Browse code

add names (id()) to QualityScaledXStringSet coercion

Martin Morgan authored on 10/12/2020 20:18:48
Showing1 changed files
... ...
@@ -194,29 +194,49 @@ test_FastqStreamer_IRanges <- function()
194 194
     checkException(FastqStreamer(fl, rng), silent=TRUE)
195 195
 }
196 196
 
197
+test_ShortReadQ_coerce_DNAStringSet <- function()
198
+{
199
+    checkIdentical(
200
+        as(ShortReadQ(), "DNAStringSet"),
201
+        DNAStringSet(setNames(nm = character()))
202
+    )
203
+
204
+    sp <- SolexaPath(system.file('extdata', package='ShortRead'))
205
+    obj <- readFastq(sp, qualityType="SFastqQuality")
206
+    checkIdentical(sread(obj), unname(as(obj, "DNAStringSet")))
207
+    checkIdentical(as.character(id(obj)), names(as(obj, "DNAStringSet")))
208
+}
209
+
197 210
 test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function()
198 211
 {
199 212
     sp <- SolexaPath(system.file('extdata', package='ShortRead'))
200 213
     obj <- readFastq(sp, qualityType="SFastqQuality")
201 214
 
202 215
     res <- as(obj, "QualityScaledDNAStringSet")
203
-    checkIdentical(as.character(sread(obj)),
204
-                   as.character(as(res, "DNAStringSet")))
216
+    checkIdentical(sread(obj), unname(as(res, "DNAStringSet")))
205 217
     checkIdentical(as.character(quality(quality(obj))),
206 218
                    as.character(quality(res)))
219
+    checkIdentical(as.character(id(obj)), names(res))
207 220
     checkTrue(is(quality(res), "SolexaQuality"))
208 221
 
209 222
     obj <- initialize(obj, quality=FastqQuality(quality(quality(obj))))
210 223
     res <- as(obj, "QualityScaledDNAStringSet")
211
-    checkIdentical(as.character(sread(obj)),
212
-                   as.character(as(res, "DNAStringSet")))
224
+    checkIdentical(sread(obj), unname(as(res, "DNAStringSet")))
213 225
     checkIdentical(as.character(quality(quality(obj))),
214 226
                    as.character(quality(res)))
227
+    checkIdentical(as.character(id(obj)), names(res))
215 228
     checkTrue(is(quality(res), "PhredQuality"))
216 229
 
217 230
     q <- MatrixQuality(as(quality(obj), "matrix"))
218 231
     obj <- initialize(obj, quality=q)
219 232
     checkException(as(obj, "QualityScaledDNAStringSet"), silent=TRUE)
233
+
234
+    obj <- ShortReadQ()
235
+    res <- as(obj, "QualityScaledDNAStringSet")
236
+    checkIdentical(sread(obj), unname(as(res, "DNAStringSet")))
237
+    checkIdentical(as.character(quality(quality(obj))),
238
+                   as.character(quality(res)))
239
+    checkIdentical(as.character(id(obj)), names(res))
220 240
 }
221 241
 
222 242
 test_ShortReadQ_coerce_matrix <- function() 
Browse code

update unit test

- alias .qualityTypeAuto()

Martin Morgan authored on 03/02/2020 20:39:26
Showing1 changed files
... ...
@@ -22,6 +22,7 @@ checkShortReadQ <- function(obj, len, wd) {
22 22
 }
23 23
 
24 24
 test_qualityTypeAuto <- function() {
25
+    .qualityTypeAuto <- ShortRead:::.qualityTypeAuto
25 26
     ## SFastqQuality if all > ':'; some > 'J'
26 27
     quality <- BStringSet(":J")
27 28
     quality <- BStringSet(";J")
Browse code

readFastq argument qualityType = "Auto" detects SFastqQuality correctly

- closes #2

Martin Morgan authored on 30/01/2020 22:54:13
Showing1 changed files
... ...
@@ -21,6 +21,16 @@ checkShortReadQ <- function(obj, len, wd) {
21 21
     checkIdentical(as.character(id(x)), as.character(id(y)))
22 22
 }
23 23
 
24
+test_qualityTypeAuto <- function() {
25
+    ## SFastqQuality if all > ':'; some > 'J'
26
+    quality <- BStringSet(":J")
27
+    quality <- BStringSet(";J")
28
+    checkIdentical(.qualityTypeAuto(quality), FastqQuality)
29
+    checkIdentical(.qualityTypeAuto(quality), FastqQuality)
30
+    quality <- BStringSet(";K")
31
+    checkIdentical(.qualityTypeAuto(quality), SFastqQuality)
32
+}
33
+
24 34
 test_ShortReadQ_constructors <- function() {
25 35
     sp <- SolexaPath(system.file('extdata', package='ShortRead'))
26 36
     sr <- obj <- readFastq(sp)
Browse code

implement [<,ShortReadQ-method

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@105724 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 01/07/2015 13:08:43
Showing1 changed files
... ...
@@ -235,6 +235,15 @@ test_ShortReadQ_subset <- function() {
235 235
     checkIdentical(2L, length(obj[1:2,,drop=TRUE]))
236 236
 }
237 237
 
238
+
239
+test_ShortReadQ_subset_gets <- function() {
240
+    sp <- SolexaPath(system.file('extdata', package='ShortRead'))
241
+    sr <- obj <- readFastq(sp)
242
+    i <- sample(length(obj))
243
+    sr[i] <- obj
244
+    .equals(sr[i], obj)
245
+}
246
+
238 247
 test_ShortReadQ_narrow <- function() {
239 248
     sp <- SolexaPath(system.file('extdata', package='ShortRead'))
240 249
     sr <- readFastq(sp)
Browse code

FastqStreamer / Sampler improvements

- warn on incomplete final line
- no warning when initialized with an (open) connection


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@80555 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 18/09/2013 19:10:25
Showing1 changed files
... ...
@@ -75,7 +75,7 @@ test_FastqSampler_rand <- function()
75 75
 
76 76
     ## different samples
77 77
     set.seed(123L)
78
-    samp <- open(FastqSampler(fl, 50))
78
+    samp <- FastqSampler(fl, 50)
79 79
     obs <- length(Reduce(intersect, replicate(2, id(yield(samp)))))
80 80
     checkIdentical(7L, obs)
81 81
     obs <- length(Reduce(intersect, replicate(3, id(yield(samp)))))
Browse code

allow for '\r' in end-of-line marks

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@71073 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 09/11/2012 03:51:09
Showing1 changed files
... ...
@@ -127,6 +127,14 @@ test_FastqStreamer <- function()
127 127
     checkIdentical(256L, len)
128 128
 }
129 129
 
130
+test_FastqStreamer_roundtrip <- function()
131
+{
132
+    out <- tempfile()
133
+    writeFastq(v1 <- readFastq(fl), out)
134
+    s <- FastqStreamer(out)
135
+    .equals(v1, yield(s))
136
+}
137
+
130 138
 test_FastqStreamer_IRanges <- function()
131 139
 {
132 140
     sr <- readFastq(fl)
Browse code

allow as(ShortReadQ, "matrix") to work on unequal width qualities

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@68853 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 28/08/2012 00:01:36
Showing1 changed files
... ...
@@ -200,6 +200,19 @@ test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function()
200 200
     checkException(as(obj, "QualityScaledDNAStringSet"), silent=TRUE)
201 201
 }
202 202
 
203
+test_ShortReadQ_coerce_matrix <- function() 
204
+{
205
+    ## 0-length
206
+    fq <- FastqQuality()
207
+    exp <- matrix(NA_integer_, 0, 0)
208
+    checkIdentical(exp, as(fq, "matrix"))
209
+
210
+    ## ragged matrix
211
+    fq <- FastqQuality(BStringSet(c("]]X", "]]]X")))
212
+    exp <- matrix(c(rep(60L, 4), 55L, 60L, NA_integer_, 55L), 2)
213
+    checkIdentical(exp, as(fq, "matrix"))
214
+}
215
+
203 216
 test_ShortReadQ_subset <- function() {
204 217
     sp <- SolexaPath(system.file('extdata', package='ShortRead'))
205 218
     obj <- readFastq(sp)
Browse code

IRanges indexing to FastqStreamer

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@66219 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 28/05/2012 23:08:38
Showing1 changed files
... ...
@@ -127,6 +127,54 @@ test_FastqStreamer <- function()
127 127
     checkIdentical(256L, len)
128 128
 }
129 129
 
130
+test_FastqStreamer_IRanges <- function()
131
+{
132
+    sr <- readFastq(fl)
133
+
134
+    ## basics
135
+    rng <- IRanges(c(50, 100, 200), width=c(5, 4, 3))
136
+    f <- FastqStreamer(fl, rng)
137
+    .equals(sr[50:54], yield(f))
138
+    .equals(sr[100:103], yield(f))
139
+    .equals(sr[200:202], yield(f))
140
+    .equals(ShortReadQ(), yield(f))
141
+    close(f)
142
+
143
+    ## successive
144
+    rng <- IRanges(c(50, 60), width=10)
145
+    f <- FastqStreamer(fl, rng)
146
+    .equals(sr[50:59], yield(f))
147
+    .equals(sr[60:69], yield(f))
148
+    .equals(ShortReadQ(), yield(f))
149
+    close(f)
150
+
151
+    ## off-the-end
152
+    rng <- IRanges(250, width=100)
153
+    f <- FastqStreamer(fl, rng)
154
+    .equals(sr[250:256], yield(f))
155
+    .equals(ShortReadQ(), yield(f))
156
+    close(f)
157
+
158
+    ## too-short buffer to skip all reads in one binary input
159
+    rng <- IRanges(250, width=5)
160
+    f <- FastqStreamer(fl, rng, readerBlockSize=10000)
161
+    .equals(sr[250:254], yield(f))
162
+    .equals(ShortReadQ(), yield(f))
163
+    close(f)
164
+
165
+    rng <- IRanges(241, width=5)
166
+    f <- FastqStreamer(fl, rng, readerBlockSize=10000)
167
+    .equals(sr[241:245], yield(f))
168
+    .equals(ShortReadQ(), yield(f))
169
+    close(f)
170
+
171
+    ## exceptions
172
+    rng <- IRanges(50, 49)              # non-zero
173
+    checkException(FastqStreamer(fl, rng), silent=TRUE)
174
+    rng <- IRanges(c(50, 59), c(60, 70)) # strictly increasing
175
+    checkException(FastqStreamer(fl, rng), silent=TRUE)
176
+}
177
+
130 178
 test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function()
131 179
 {
132 180
     sp <- SolexaPath(system.file('extdata', package='ShortRead'))
Browse code

update unit test to reflect bug fix changes

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@65460 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 26/04/2012 19:08:42
Showing1 changed files
... ...
@@ -77,9 +77,9 @@ test_FastqSampler_rand <- function()
77 77
     set.seed(123L)
78 78
     samp <- open(FastqSampler(fl, 50))
79 79
     obs <- length(Reduce(intersect, replicate(2, id(yield(samp)))))
80
-    checkIdentical(20L, obs)
81
-    obs <- length(Reduce(intersect, replicate(5, id(yield(samp)))))
82
-    checkIdentical(8L, obs)
80
+    checkIdentical(7L, obs)
81
+    obs <- length(Reduce(intersect, replicate(3, id(yield(samp)))))
82
+    checkIdentical(0L, obs)
83 83
     close(samp)
84 84
 }
85 85
 
Browse code

FastqSampler would only replace first read during sampling

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@65394 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 25/04/2012 11:30:44
Showing1 changed files
... ...
@@ -72,6 +72,15 @@ test_FastqSampler_rand <- function()
72 72
     set.seed(123L); exp <- yield(samp)
73 73
     close(samp)
74 74
     .equals(obs, exp)
75
+
76
+    ## different samples
77
+    set.seed(123L)
78
+    samp <- open(FastqSampler(fl, 50))
79
+    obs <- length(Reduce(intersect, replicate(2, id(yield(samp)))))
80
+    checkIdentical(20L, obs)
81
+    obs <- length(Reduce(intersect, replicate(5, id(yield(samp)))))
82
+    checkIdentical(8L, obs)
83
+    close(samp)
75 84
 }
76 85
 
77 86
 test_FastqStreamer <- function()
Browse code

remove finalizer from FastqSampler / FastqStreamer

- connections and finalizers do not work well together


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@61871 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 06/01/2012 20:58:21
Showing1 changed files
... ...
@@ -48,14 +48,18 @@ test_FastqSampler <- function()
48 48
 {
49 49
     sr <- readFastq(fl)
50 50
     ## here to re-use equality checker
51
-    obj <- yield(FastqSampler(fl))
51
+    
52
+    obj <- yield(f <- FastqSampler(fl))
53
+    close(f)
52 54
     .equals(sr, obj)
53 55
 
54
-    yld <- yield(FastqSampler(fl, readerBlockSize=1000))
56
+    yld <- yield(f <- FastqSampler(fl, readerBlockSize=1000))
57
+    close(f)
55 58
     checkTrue(validObject(yld))
56 59
 
57 60
     ## regression
58
-    yld <- yield(FastqSampler(fl, readerBlockSize=256))
61
+    yld <- yield(f <- FastqSampler(fl, readerBlockSize=256))
62
+    close(f)
59 63
     checkIdentical(256L, length(yld))
60 64
 
61 65
 }
... ...
@@ -66,6 +70,7 @@ test_FastqSampler_rand <- function()
66 70
     samp <- FastqSampler(fl, 50)
67 71
     set.seed(123L); obs <- yield(samp)
68 72
     set.seed(123L); exp <- yield(samp)
73
+    close(samp)
69 74
     .equals(obs, exp)
70 75
 }
71 76
 
... ...
@@ -79,6 +84,7 @@ test_FastqStreamer <- function()
79 84
         len <- len + length(y)
80 85
         i <- i + 1L
81 86
     }
87
+    close(f)
82 88
     checkIdentical(6L, i)
83 89
     checkIdentical(256L, len)
84 90
 
... ...
@@ -86,6 +92,7 @@ test_FastqStreamer <- function()
86 92
     f <- FastqStreamer(fl, n=50)
87 93
     .equals(sr[1:50], yield(f))
88 94
     .equals(sr[50+1:50], yield(f))
95
+    close(f)
89 96
 
90 97
     ## whole file
91 98
     f <- FastqStreamer(fl, n=500)
... ...
@@ -95,6 +102,7 @@ test_FastqStreamer <- function()
95 102
         len <- len + length(y)
96 103
         i <- i + 1L
97 104
     }
105
+    close(f)
98 106
     checkIdentical(1L, i)
99 107
     checkIdentical(256L, len)
100 108
 
... ...
@@ -105,6 +113,7 @@ test_FastqStreamer <- function()
105 113
         len <- len + length(y)
106 114
         i <- i + 1L
107 115
     }
116
+    close(f)
108 117
     checkIdentical(6L, i)
109 118
     checkIdentical(256L, len)
110 119
 }
Browse code

FastqSampler/Streamer enhancements

- samples started with the same random number seed are identical
(regression introduced earlier in devel)
- faster streamer implementation; more shared code
- emphasize in docs that any connection (e.g., gzfile) works


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@60979 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 30/11/2011 14:02:21
Showing1 changed files
... ...
@@ -60,6 +60,15 @@ test_FastqSampler <- function()
60 60
 
61 61
 }
62 62
 
63
+test_FastqSampler_rand <- function()
64
+{
65
+    ## two samples with the same random number seed are identical
66
+    samp <- FastqSampler(fl, 50)
67
+    set.seed(123L); obs <- yield(samp)
68
+    set.seed(123L); exp <- yield(samp)
69
+    .equals(obs, exp)
70
+}
71
+
63 72
 test_FastqStreamer <- function()
64 73
 {
65 74
     sr <- readFastq(fl)
Browse code

improve FastqSampler

- considerably faster
- no artificial restriction on id or other sizes


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@60695 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 22/11/2011 04:40:26
Showing1 changed files
... ...
@@ -44,48 +44,6 @@ test_ShortReadQ_constructors <- function() {
44 44
     .equals(sr, obj)
45 45
 }
46 46
 
47
-test_FastqSampler_downsample <- function()
48
-{
49
-    sampler <- FastqSampler(file())
50
-    downsample <- sampler$.downsample
51
-
52
-    n <- length(letters)
53
-    lets <- as.list(letters)
54
-    LETS <- as.list(LETTERS)
55
-
56
-    sampler$n <- n; sampler$tot_n <- 0L
57
-    checkIdentical(lets, downsample(list(), lets))
58
-
59
-    sampler$n <- sampler$tot_n <- n
60
-    checkIdentical(lets, downsample(list(), lets))
61
-    checkIdentical(lets, downsample(lets, list()))
62
-
63
-    sampler$n <- 10L; sampler$tot_n <- n
64
-    ans <- downsample(list(), lets)
65
-    checkIdentical(sampler$n, length(ans))
66
-
67
-    sampler$n <- sampler$tot_n <- 2L * n
68
-    checkIdentical(c(lets, LETS), downsample(lets, LETS))
69
-
70
-    sampler$n <- 2L * n + 1L
71
-    checkIdentical(c(lets, LETS), downsample(lets, LETS))
72
-
73
-    set.seed(123L)
74
-    sampler$tot_n <- 2L * n; sampler$n <- n
75
-    ans <- unlist(downsample(lets, LETS))
76
-    checkIdentical(n, length(ans))
77
-    checkIdentical(14L, sum(ans %in% letters))
78
-
79
-    sampler$tot_n <- 200L * n
80
-    ans <- unlist(downsample(lets, LETS))
81
-    checkIdentical(n, length(ans))
82
-    checkIdentical(n, sum(ans %in% lets))
83
-
84
-    ans <- unlist(downsample(LETS, lets))
85
-    checkIdentical(n, length(ans))
86
-    checkIdentical(n, sum(ans %in% LETS))
87
-}
88
-
89 47
 test_FastqSampler <- function()
90 48
 {
91 49
     sr <- readFastq(fl)
Browse code

FastqSampler bug fixes & enhancements

- sample_rec_parser C could check for invalid access after the fact,
corrupting memory
- .fixedBinRecSampler duplicated work to be done by FastqSampler;
simplified
- FastqSampler could return fewer samples than requested / available
- split large lists of records to create ShortReadQ instances that fit
in memory and that can be appended


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@60557 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 20/11/2011 20:29:42
Showing1 changed files
... ...
@@ -44,6 +44,47 @@ test_ShortReadQ_constructors <- function() {
44 44
     .equals(sr, obj)
45 45
 }
46 46
 
47
+test_FastqSampler_downsample <- function()
48
+{
49
+    sampler <- FastqSampler(file())
50
+    downsample <- sampler$.downsample
51
+
52
+    n <- length(letters)
53
+    lets <- as.list(letters)
54
+    LETS <- as.list(LETTERS)
55
+
56
+    sampler$n <- n; sampler$tot_n <- 0L
57
+    checkIdentical(lets, downsample(list(), lets))
58
+
59
+    sampler$n <- sampler$tot_n <- n
60
+    checkIdentical(lets, downsample(list(), lets))
61
+    checkIdentical(lets, downsample(lets, list()))
62
+
63
+    sampler$n <- 10L; sampler$tot_n <- n
64
+    ans <- downsample(list(), lets)
65
+    checkIdentical(sampler$n, length(ans))
66
+
67
+    sampler$n <- sampler$tot_n <- 2L * n
68
+    checkIdentical(c(lets, LETS), downsample(lets, LETS))
69
+
70
+    sampler$n <- 2L * n + 1L
71
+    checkIdentical(c(lets, LETS), downsample(lets, LETS))
72
+
73
+    set.seed(123L)
74
+    sampler$tot_n <- 2L * n; sampler$n <- n
75
+    ans <- unlist(downsample(lets, LETS))
76
+    checkIdentical(n, length(ans))
77
+    checkIdentical(14L, sum(ans %in% letters))
78
+
79
+    sampler$tot_n <- 200L * n
80
+    ans <- unlist(downsample(lets, LETS))
81
+    checkIdentical(n, length(ans))
82
+    checkIdentical(n, sum(ans %in% lets))
83
+
84
+    ans <- unlist(downsample(LETS, lets))
85
+    checkIdentical(n, length(ans))
86
+    checkIdentical(n, sum(ans %in% LETS))
87
+}
47 88
 
48 89
 test_FastqSampler <- function()
49 90
 {
... ...
@@ -52,7 +93,13 @@ test_FastqSampler <- function()
52 93
     obj <- yield(FastqSampler(fl))
53 94
     .equals(sr, obj)
54 95
 
55
-    checkTrue(validObject(yield(FastqSampler(fl, readerBlockSize=1000))))
96
+    yld <- yield(FastqSampler(fl, readerBlockSize=1000))
97
+    checkTrue(validObject(yld))
98
+
99
+    ## regression
100
+    yld <- yield(FastqSampler(fl, readerBlockSize=256))
101
+    checkIdentical(256L, length(yld))
102
+
56 103
 }
57 104
 
58 105
 test_FastqStreamer <- function()
Browse code

handle multiple FastqStreamer input chunks correctly

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@58597 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 30/09/2011 22:55:08
Showing1 changed files
... ...
@@ -51,6 +51,8 @@ test_FastqSampler <- function()
51 51
     ## here to re-use equality checker
52 52
     obj <- yield(FastqSampler(fl))
53 53
     .equals(sr, obj)
54
+
55
+    checkTrue(validObject(yield(FastqSampler(fl, readerBlockSize=1000))))
54 56
 }
55 57
 
56 58
 test_FastqStreamer <- function()
... ...
@@ -66,6 +68,11 @@ test_FastqStreamer <- function()
66 68
     checkIdentical(6L, i)
67 69
     checkIdentical(256L, len)
68 70
 
71
+    ## values equal?
72
+    f <- FastqStreamer(fl, n=50)
73
+    .equals(sr[1:50], yield(f))
74
+    .equals(sr[50+1:50], yield(f))
75
+
69 76
     ## whole file
70 77
     f <- FastqStreamer(fl, n=500)
71 78
     i <- 0L; len <- 0L
... ...
@@ -77,9 +84,15 @@ test_FastqStreamer <- function()
77 84
     checkIdentical(1L, i)
78 85
     checkIdentical(256L, len)
79 86
 
80
-    f <- FastqStreamer(fl, n=50)
81
-    .equals(sr[1:50], yield(f))
82
-    .equals(sr[50+1:50], yield(f))
87
+    ## small reader block size
88
+    f <- FastqStreamer(fl, n=50, readerBlockSize=100)
89
+    i <- 0L; len <- 0L
90
+    while (length(y <- yield(f))) {
91
+        len <- len + length(y)
92
+        i <- i + 1L
93
+    }
94
+    checkIdentical(6L, i)
95
+    checkIdentical(256L, len)
83 96
 }
84 97
 
85 98
 test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function()
Browse code

add FastqStreamer class

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@58596 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 30/09/2011 22:23:46
Showing1 changed files
... ...
@@ -1,3 +1,6 @@
1
+sp <- SolexaPath(system.file('extdata', package='ShortRead'))
2
+fl <- file.path(analysisPath(sp), "s_1_sequence.txt")
3
+
1 4
 checkShortReadQ <- function(obj, len, wd) {
2 5
     checkStringSet <- function(obj, type, len, wd) {
3 6
         checkTrue(is(obj, type))
... ...
@@ -44,14 +47,41 @@ test_ShortReadQ_constructors <- function() {
44 47
 
45 48
 test_FastqSampler <- function()
46 49
 {
47
-    sp <- SolexaPath(system.file('extdata', package='ShortRead'))
48
-    fl <- file.path(analysisPath(sp), "s_1_sequence.txt")
49 50
     sr <- readFastq(fl)
50 51
     ## here to re-use equality checker
51 52
     obj <- yield(FastqSampler(fl))
52 53
     .equals(sr, obj)
53 54
 }
54 55
 
56
+test_FastqStreamer <- function()
57
+{
58
+    sr <- readFastq(fl)
59
+
60
+    f <- FastqStreamer(fl, n=50)
61
+    i <- 0L; len <- 0L
62
+    while (length(y <- yield(f))) {
63
+        len <- len + length(y)
64
+        i <- i + 1L
65
+    }
66
+    checkIdentical(6L, i)
67
+    checkIdentical(256L, len)
68
+
69
+    ## whole file
70
+    f <- FastqStreamer(fl, n=500)
71
+    i <- 0L; len <- 0L
72
+    while (length(y <- yield(f))) {
73
+        .equals(sr, y)
74
+        len <- len + length(y)
75
+        i <- i + 1L
76
+    }
77
+    checkIdentical(1L, i)
78
+    checkIdentical(256L, len)
79
+
80
+    f <- FastqStreamer(fl, n=50)
81
+    .equals(sr[1:50], yield(f))
82
+    .equals(sr[50+1:50], yield(f))
83
+}
84
+
55 85
 test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function()
56 86
 {
57 87
     sp <- SolexaPath(system.file('extdata', package='ShortRead'))
Browse code

allow more lenient subsetting, e.g., [i, drop=TRUE]

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@53022 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 20/02/2011 20:34:47
Showing1 changed files
... ...
@@ -86,8 +86,9 @@ test_ShortReadQ_subset <- function() {
86 86
 
87 87
     checkException(obj[,1], silent=TRUE)
88 88
     checkException(obj[1,1], silent=TRUE)
89
-    checkException(obj[1,], silent=TRUE)
90
-    checkException(obj[1,], silent=TRUE)
89
+    checkIdentical(2L, length(obj[1:2,]))
90
+    checkIdentical(2L, length(obj[1:2,drop=TRUE]))
91
+    checkIdentical(2L, length(obj[1:2,,drop=TRUE]))
91 92
 }
92 93
 
93 94
 test_ShortReadQ_narrow <- function() {
Browse code

Add FastqSampler class and yield method for samples from large file

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@49637 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 27/09/2010 00:43:13
Showing1 changed files
... ...
@@ -41,6 +41,17 @@ test_ShortReadQ_constructors <- function() {
41 41
     .equals(sr, obj)
42 42
 }
43 43
 
44
+
45
+test_FastqSampler <- function()
46
+{
47
+    sp <- SolexaPath(system.file('extdata', package='ShortRead'))
48
+    fl <- file.path(analysisPath(sp), "s_1_sequence.txt")
49
+    sr <- readFastq(fl)
50
+    ## here to re-use equality checker
51
+    obj <- yield(FastqSampler(fl))
52
+    .equals(sr, obj)
53
+}
54
+
44 55
 test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function()
45 56
 {
46 57
     sp <- SolexaPath(system.file('extdata', package='ShortRead'))
Browse code

Update how the unit tests are organized and run.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@49154 bc3139a8-67e5-0310-9ffc-ced21a209358

Valerie Obenchain authored on 30/08/2010 20:57:29
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,126 @@
1
+checkShortReadQ <- function(obj, len, wd) {
2
+    checkStringSet <- function(obj, type, len, wd) {
3
+        checkTrue(is(obj, type))
4
+        checkEquals(len, length(obj))
5
+        checkEquals(wd, unique(width(obj)))
6
+    }
7
+    checkStringSet(obj, "ShortReadQ", len, wd[[1]])
8
+    checkStringSet(sread(obj), "DNAStringSet", len, wd[[2]])
9
+    checkStringSet(id(obj), "BStringSet", len, wd[[3]]) # ids w/ diff lengths
10
+    checkStringSet(quality(obj), "QualityScore", len, wd[[4]])
11
+}
12
+
13
+.equals <- function(x, y)
14
+{
15
+    checkIdentical(as.character(sread(x)), as.character(sread(y)))
16
+    checkIdentical(as.character(quality(quality(x))),
17
+                   as.character(quality(quality(y))))
18
+    checkIdentical(as.character(id(x)), as.character(id(y)))
19
+}
20
+
21
+test_ShortReadQ_constructors <- function() {
22
+    sp <- SolexaPath(system.file('extdata', package='ShortRead'))
23
+    sr <- obj <- readFastq(sp)
24
+    checkTrue(validObject(obj))
25
+    checkShortReadQ(obj, 256, list(36, 36, 24:22, 36))
26
+
27
+    obj <- ShortReadQ()
28
+    checkTrue(class(obj) == "ShortReadQ")
29
+    checkTrue(validObject(obj))
30
+
31
+    obj <- ShortReadQ(sread(sr), quality(sr))
32
+    checkTrue(class(obj) == "ShortReadQ")
33
+    checkTrue(validObject(obj))
34
+    .equals(new("ShortReadQ", sread=sread(sr),
35
+                id=BStringSet(rep("", length(sr))),
36
+                quality=quality(sr)), obj)
37
+
38
+    obj <- ShortReadQ(sread(sr), quality(sr), id(sr))
39
+    checkTrue(class(obj) == "ShortReadQ")
40
+    checkTrue(validObject(obj))
41
+    .equals(sr, obj)
42
+}
43
+
44
+test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function()
45
+{
46
+    sp <- SolexaPath(system.file('extdata', package='ShortRead'))
47
+    obj <- readFastq(sp, qualityType="SFastqQuality")
48
+
49
+    res <- as(obj, "QualityScaledDNAStringSet")
50
+    checkIdentical(as.character(sread(obj)),
51
+                   as.character(as(res, "DNAStringSet")))
52
+    checkIdentical(as.character(quality(quality(obj))),
53
+                   as.character(quality(res)))
54
+    checkTrue(is(quality(res), "SolexaQuality"))
55
+
56
+    obj <- initialize(obj, quality=FastqQuality(quality(quality(obj))))
57
+    res <- as(obj, "QualityScaledDNAStringSet")
58
+    checkIdentical(as.character(sread(obj)),
59
+                   as.character(as(res, "DNAStringSet")))
60
+    checkIdentical(as.character(quality(quality(obj))),
61
+                   as.character(quality(res)))
62
+    checkTrue(is(quality(res), "PhredQuality"))
63
+
64
+    q <- MatrixQuality(as(quality(obj), "matrix"))
65
+    obj <- initialize(obj, quality=q)
66
+    checkException(as(obj, "QualityScaledDNAStringSet"), silent=TRUE)
67
+}
68
+
69
+test_ShortReadQ_subset <- function() {
70
+    sp <- SolexaPath(system.file('extdata', package='ShortRead'))
71
+    obj <- readFastq(sp)
72
+
73
+    obj1 <- obj[c(3, 5:7, 9)]
74
+    checkShortReadQ(obj1, 5, list(36, 36, 23, 36))
75
+
76
+    checkException(obj[,1], silent=TRUE)
77
+    checkException(obj[1,1], silent=TRUE)
78
+    checkException(obj[1,], silent=TRUE)
79
+    checkException(obj[1,], silent=TRUE)
80
+}
81
+
82
+test_ShortReadQ_narrow <- function() {
83
+    sp <- SolexaPath(system.file('extdata', package='ShortRead'))
84
+    sr <- readFastq(sp)
85
+
86
+    obj <- narrow(sr, start=1, end=10)
87
+    checkTrue(class(obj) == "ShortReadQ")
88
+    checkTrue(length(obj) == length(sr))
89
+    checkTrue(unique(width(obj)) == 10)
90
+    checkIdentical(as.character(sread(obj)),
91
+                   substr(as.character(sread(sr)), 1, 10))
92
+    checkIdentical(as.character(quality(quality(obj))),
93
+                   substr(as.character(quality(quality(sr))), 1, 10))
94
+    checkIdentical(as.character(id(obj)), as.character(id(sr)))
95
+
96
+    checkIdentical(narrow(sr, start=start(sread(sr))), sr)
97
+}
98
+
99
+test_ShortReadQ_compact <- function() {
100
+    sp <- SolexaPath(system.file('extdata', package='ShortRead'))
101
+    sr <- readFastq(sp)[1:10]
102
+    res <- compact(sr)
103
+    checkIdentical(as.character(sread(sr)), as.character(sread(res)))
104
+    checkIdentical(as.character(quality(quality(sr))),
105
+                   as.character(quality(quality(res))))
106
+}
107
+
108
+test_ShortReadQ_clean <- function() {
109
+    sp <- SolexaPath(system.file('extdata', package='ShortRead'))
110
+    obj <- readFastq(sp)
111
+
112
+    cln <- clean(obj)
113
+    checkIdentical(class(obj), class(cln))
114
+    ## FIXME: need a stronger test
115
+    checkEquals(length(obj), length(clean(obj)))
116
+}
117
+
118
+test_ShortReadQ_srsort <- function() {
119
+    sp <- SolexaPath(system.file('extdata', package='ShortRead'))
120
+    obj <- readFastq(sp)
121
+    srt <- srsort(obj)
122
+    checkIdentical(class(obj), class(srt))
123
+    checkIdentical(length(obj), length(srt))
124
+    checkIdentical(srsort(sread(obj)), sread(srt))
125
+    checkIdentical(quality(obj)[srorder(obj)], quality(srt))
126
+}