... | ... |
@@ -194,29 +194,49 @@ test_FastqStreamer_IRanges <- function() |
194 | 194 |
checkException(FastqStreamer(fl, rng), silent=TRUE) |
195 | 195 |
} |
196 | 196 |
|
197 |
+test_ShortReadQ_coerce_DNAStringSet <- function() |
|
198 |
+{ |
|
199 |
+ checkIdentical( |
|
200 |
+ as(ShortReadQ(), "DNAStringSet"), |
|
201 |
+ DNAStringSet(setNames(nm = character())) |
|
202 |
+ ) |
|
203 |
+ |
|
204 |
+ sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
|
205 |
+ obj <- readFastq(sp, qualityType="SFastqQuality") |
|
206 |
+ checkIdentical(sread(obj), unname(as(obj, "DNAStringSet"))) |
|
207 |
+ checkIdentical(as.character(id(obj)), names(as(obj, "DNAStringSet"))) |
|
208 |
+} |
|
209 |
+ |
|
197 | 210 |
test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function() |
198 | 211 |
{ |
199 | 212 |
sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
200 | 213 |
obj <- readFastq(sp, qualityType="SFastqQuality") |
201 | 214 |
|
202 | 215 |
res <- as(obj, "QualityScaledDNAStringSet") |
203 |
- checkIdentical(as.character(sread(obj)), |
|
204 |
- as.character(as(res, "DNAStringSet"))) |
|
216 |
+ checkIdentical(sread(obj), unname(as(res, "DNAStringSet"))) |
|
205 | 217 |
checkIdentical(as.character(quality(quality(obj))), |
206 | 218 |
as.character(quality(res))) |
219 |
+ checkIdentical(as.character(id(obj)), names(res)) |
|
207 | 220 |
checkTrue(is(quality(res), "SolexaQuality")) |
208 | 221 |
|
209 | 222 |
obj <- initialize(obj, quality=FastqQuality(quality(quality(obj)))) |
210 | 223 |
res <- as(obj, "QualityScaledDNAStringSet") |
211 |
- checkIdentical(as.character(sread(obj)), |
|
212 |
- as.character(as(res, "DNAStringSet"))) |
|
224 |
+ checkIdentical(sread(obj), unname(as(res, "DNAStringSet"))) |
|
213 | 225 |
checkIdentical(as.character(quality(quality(obj))), |
214 | 226 |
as.character(quality(res))) |
227 |
+ checkIdentical(as.character(id(obj)), names(res)) |
|
215 | 228 |
checkTrue(is(quality(res), "PhredQuality")) |
216 | 229 |
|
217 | 230 |
q <- MatrixQuality(as(quality(obj), "matrix")) |
218 | 231 |
obj <- initialize(obj, quality=q) |
219 | 232 |
checkException(as(obj, "QualityScaledDNAStringSet"), silent=TRUE) |
233 |
+ |
|
234 |
+ obj <- ShortReadQ() |
|
235 |
+ res <- as(obj, "QualityScaledDNAStringSet") |
|
236 |
+ checkIdentical(sread(obj), unname(as(res, "DNAStringSet"))) |
|
237 |
+ checkIdentical(as.character(quality(quality(obj))), |
|
238 |
+ as.character(quality(res))) |
|
239 |
+ checkIdentical(as.character(id(obj)), names(res)) |
|
220 | 240 |
} |
221 | 241 |
|
222 | 242 |
test_ShortReadQ_coerce_matrix <- function() |
... | ... |
@@ -21,6 +21,16 @@ checkShortReadQ <- function(obj, len, wd) { |
21 | 21 |
checkIdentical(as.character(id(x)), as.character(id(y))) |
22 | 22 |
} |
23 | 23 |
|
24 |
+test_qualityTypeAuto <- function() { |
|
25 |
+ ## SFastqQuality if all > ':'; some > 'J' |
|
26 |
+ quality <- BStringSet(":J") |
|
27 |
+ quality <- BStringSet(";J") |
|
28 |
+ checkIdentical(.qualityTypeAuto(quality), FastqQuality) |
|
29 |
+ checkIdentical(.qualityTypeAuto(quality), FastqQuality) |
|
30 |
+ quality <- BStringSet(";K") |
|
31 |
+ checkIdentical(.qualityTypeAuto(quality), SFastqQuality) |
|
32 |
+} |
|
33 |
+ |
|
24 | 34 |
test_ShortReadQ_constructors <- function() { |
25 | 35 |
sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
26 | 36 |
sr <- obj <- readFastq(sp) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@105724 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -235,6 +235,15 @@ test_ShortReadQ_subset <- function() { |
235 | 235 |
checkIdentical(2L, length(obj[1:2,,drop=TRUE])) |
236 | 236 |
} |
237 | 237 |
|
238 |
+ |
|
239 |
+test_ShortReadQ_subset_gets <- function() { |
|
240 |
+ sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
|
241 |
+ sr <- obj <- readFastq(sp) |
|
242 |
+ i <- sample(length(obj)) |
|
243 |
+ sr[i] <- obj |
|
244 |
+ .equals(sr[i], obj) |
|
245 |
+} |
|
246 |
+ |
|
238 | 247 |
test_ShortReadQ_narrow <- function() { |
239 | 248 |
sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
240 | 249 |
sr <- readFastq(sp) |
- warn on incomplete final line
- no warning when initialized with an (open) connection
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@80555 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -75,7 +75,7 @@ test_FastqSampler_rand <- function() |
75 | 75 |
|
76 | 76 |
## different samples |
77 | 77 |
set.seed(123L) |
78 |
- samp <- open(FastqSampler(fl, 50)) |
|
78 |
+ samp <- FastqSampler(fl, 50) |
|
79 | 79 |
obs <- length(Reduce(intersect, replicate(2, id(yield(samp))))) |
80 | 80 |
checkIdentical(7L, obs) |
81 | 81 |
obs <- length(Reduce(intersect, replicate(3, id(yield(samp))))) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@71073 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -127,6 +127,14 @@ test_FastqStreamer <- function() |
127 | 127 |
checkIdentical(256L, len) |
128 | 128 |
} |
129 | 129 |
|
130 |
+test_FastqStreamer_roundtrip <- function() |
|
131 |
+{ |
|
132 |
+ out <- tempfile() |
|
133 |
+ writeFastq(v1 <- readFastq(fl), out) |
|
134 |
+ s <- FastqStreamer(out) |
|
135 |
+ .equals(v1, yield(s)) |
|
136 |
+} |
|
137 |
+ |
|
130 | 138 |
test_FastqStreamer_IRanges <- function() |
131 | 139 |
{ |
132 | 140 |
sr <- readFastq(fl) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@68853 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -200,6 +200,19 @@ test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function() |
200 | 200 |
checkException(as(obj, "QualityScaledDNAStringSet"), silent=TRUE) |
201 | 201 |
} |
202 | 202 |
|
203 |
+test_ShortReadQ_coerce_matrix <- function() |
|
204 |
+{ |
|
205 |
+ ## 0-length |
|
206 |
+ fq <- FastqQuality() |
|
207 |
+ exp <- matrix(NA_integer_, 0, 0) |
|
208 |
+ checkIdentical(exp, as(fq, "matrix")) |
|
209 |
+ |
|
210 |
+ ## ragged matrix |
|
211 |
+ fq <- FastqQuality(BStringSet(c("]]X", "]]]X"))) |
|
212 |
+ exp <- matrix(c(rep(60L, 4), 55L, 60L, NA_integer_, 55L), 2) |
|
213 |
+ checkIdentical(exp, as(fq, "matrix")) |
|
214 |
+} |
|
215 |
+ |
|
203 | 216 |
test_ShortReadQ_subset <- function() { |
204 | 217 |
sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
205 | 218 |
obj <- readFastq(sp) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@66219 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -127,6 +127,54 @@ test_FastqStreamer <- function() |
127 | 127 |
checkIdentical(256L, len) |
128 | 128 |
} |
129 | 129 |
|
130 |
+test_FastqStreamer_IRanges <- function() |
|
131 |
+{ |
|
132 |
+ sr <- readFastq(fl) |
|
133 |
+ |
|
134 |
+ ## basics |
|
135 |
+ rng <- IRanges(c(50, 100, 200), width=c(5, 4, 3)) |
|
136 |
+ f <- FastqStreamer(fl, rng) |
|
137 |
+ .equals(sr[50:54], yield(f)) |
|
138 |
+ .equals(sr[100:103], yield(f)) |
|
139 |
+ .equals(sr[200:202], yield(f)) |
|
140 |
+ .equals(ShortReadQ(), yield(f)) |
|
141 |
+ close(f) |
|
142 |
+ |
|
143 |
+ ## successive |
|
144 |
+ rng <- IRanges(c(50, 60), width=10) |
|
145 |
+ f <- FastqStreamer(fl, rng) |
|
146 |
+ .equals(sr[50:59], yield(f)) |
|
147 |
+ .equals(sr[60:69], yield(f)) |
|
148 |
+ .equals(ShortReadQ(), yield(f)) |
|
149 |
+ close(f) |
|
150 |
+ |
|
151 |
+ ## off-the-end |
|
152 |
+ rng <- IRanges(250, width=100) |
|
153 |
+ f <- FastqStreamer(fl, rng) |
|
154 |
+ .equals(sr[250:256], yield(f)) |
|
155 |
+ .equals(ShortReadQ(), yield(f)) |
|
156 |
+ close(f) |
|
157 |
+ |
|
158 |
+ ## too-short buffer to skip all reads in one binary input |
|
159 |
+ rng <- IRanges(250, width=5) |
|
160 |
+ f <- FastqStreamer(fl, rng, readerBlockSize=10000) |
|
161 |
+ .equals(sr[250:254], yield(f)) |
|
162 |
+ .equals(ShortReadQ(), yield(f)) |
|
163 |
+ close(f) |
|
164 |
+ |
|
165 |
+ rng <- IRanges(241, width=5) |
|
166 |
+ f <- FastqStreamer(fl, rng, readerBlockSize=10000) |
|
167 |
+ .equals(sr[241:245], yield(f)) |
|
168 |
+ .equals(ShortReadQ(), yield(f)) |
|
169 |
+ close(f) |
|
170 |
+ |
|
171 |
+ ## exceptions |
|
172 |
+ rng <- IRanges(50, 49) # non-zero |
|
173 |
+ checkException(FastqStreamer(fl, rng), silent=TRUE) |
|
174 |
+ rng <- IRanges(c(50, 59), c(60, 70)) # strictly increasing |
|
175 |
+ checkException(FastqStreamer(fl, rng), silent=TRUE) |
|
176 |
+} |
|
177 |
+ |
|
130 | 178 |
test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function() |
131 | 179 |
{ |
132 | 180 |
sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@65460 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -77,9 +77,9 @@ test_FastqSampler_rand <- function() |
77 | 77 |
set.seed(123L) |
78 | 78 |
samp <- open(FastqSampler(fl, 50)) |
79 | 79 |
obs <- length(Reduce(intersect, replicate(2, id(yield(samp))))) |
80 |
- checkIdentical(20L, obs) |
|
81 |
- obs <- length(Reduce(intersect, replicate(5, id(yield(samp))))) |
|
82 |
- checkIdentical(8L, obs) |
|
80 |
+ checkIdentical(7L, obs) |
|
81 |
+ obs <- length(Reduce(intersect, replicate(3, id(yield(samp))))) |
|
82 |
+ checkIdentical(0L, obs) |
|
83 | 83 |
close(samp) |
84 | 84 |
} |
85 | 85 |
|
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@65394 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -72,6 +72,15 @@ test_FastqSampler_rand <- function() |
72 | 72 |
set.seed(123L); exp <- yield(samp) |
73 | 73 |
close(samp) |
74 | 74 |
.equals(obs, exp) |
75 |
+ |
|
76 |
+ ## different samples |
|
77 |
+ set.seed(123L) |
|
78 |
+ samp <- open(FastqSampler(fl, 50)) |
|
79 |
+ obs <- length(Reduce(intersect, replicate(2, id(yield(samp))))) |
|
80 |
+ checkIdentical(20L, obs) |
|
81 |
+ obs <- length(Reduce(intersect, replicate(5, id(yield(samp))))) |
|
82 |
+ checkIdentical(8L, obs) |
|
83 |
+ close(samp) |
|
75 | 84 |
} |
76 | 85 |
|
77 | 86 |
test_FastqStreamer <- function() |
- connections and finalizers do not work well together
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@61871 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -48,14 +48,18 @@ test_FastqSampler <- function() |
48 | 48 |
{ |
49 | 49 |
sr <- readFastq(fl) |
50 | 50 |
## here to re-use equality checker |
51 |
- obj <- yield(FastqSampler(fl)) |
|
51 |
+ |
|
52 |
+ obj <- yield(f <- FastqSampler(fl)) |
|
53 |
+ close(f) |
|
52 | 54 |
.equals(sr, obj) |
53 | 55 |
|
54 |
- yld <- yield(FastqSampler(fl, readerBlockSize=1000)) |
|
56 |
+ yld <- yield(f <- FastqSampler(fl, readerBlockSize=1000)) |
|
57 |
+ close(f) |
|
55 | 58 |
checkTrue(validObject(yld)) |
56 | 59 |
|
57 | 60 |
## regression |
58 |
- yld <- yield(FastqSampler(fl, readerBlockSize=256)) |
|
61 |
+ yld <- yield(f <- FastqSampler(fl, readerBlockSize=256)) |
|
62 |
+ close(f) |
|
59 | 63 |
checkIdentical(256L, length(yld)) |
60 | 64 |
|
61 | 65 |
} |
... | ... |
@@ -66,6 +70,7 @@ test_FastqSampler_rand <- function() |
66 | 70 |
samp <- FastqSampler(fl, 50) |
67 | 71 |
set.seed(123L); obs <- yield(samp) |
68 | 72 |
set.seed(123L); exp <- yield(samp) |
73 |
+ close(samp) |
|
69 | 74 |
.equals(obs, exp) |
70 | 75 |
} |
71 | 76 |
|
... | ... |
@@ -79,6 +84,7 @@ test_FastqStreamer <- function() |
79 | 84 |
len <- len + length(y) |
80 | 85 |
i <- i + 1L |
81 | 86 |
} |
87 |
+ close(f) |
|
82 | 88 |
checkIdentical(6L, i) |
83 | 89 |
checkIdentical(256L, len) |
84 | 90 |
|
... | ... |
@@ -86,6 +92,7 @@ test_FastqStreamer <- function() |
86 | 92 |
f <- FastqStreamer(fl, n=50) |
87 | 93 |
.equals(sr[1:50], yield(f)) |
88 | 94 |
.equals(sr[50+1:50], yield(f)) |
95 |
+ close(f) |
|
89 | 96 |
|
90 | 97 |
## whole file |
91 | 98 |
f <- FastqStreamer(fl, n=500) |
... | ... |
@@ -95,6 +102,7 @@ test_FastqStreamer <- function() |
95 | 102 |
len <- len + length(y) |
96 | 103 |
i <- i + 1L |
97 | 104 |
} |
105 |
+ close(f) |
|
98 | 106 |
checkIdentical(1L, i) |
99 | 107 |
checkIdentical(256L, len) |
100 | 108 |
|
... | ... |
@@ -105,6 +113,7 @@ test_FastqStreamer <- function() |
105 | 113 |
len <- len + length(y) |
106 | 114 |
i <- i + 1L |
107 | 115 |
} |
116 |
+ close(f) |
|
108 | 117 |
checkIdentical(6L, i) |
109 | 118 |
checkIdentical(256L, len) |
110 | 119 |
} |
- samples started with the same random number seed are identical
(regression introduced earlier in devel)
- faster streamer implementation; more shared code
- emphasize in docs that any connection (e.g., gzfile) works
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@60979 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -60,6 +60,15 @@ test_FastqSampler <- function() |
60 | 60 |
|
61 | 61 |
} |
62 | 62 |
|
63 |
+test_FastqSampler_rand <- function() |
|
64 |
+{ |
|
65 |
+ ## two samples with the same random number seed are identical |
|
66 |
+ samp <- FastqSampler(fl, 50) |
|
67 |
+ set.seed(123L); obs <- yield(samp) |
|
68 |
+ set.seed(123L); exp <- yield(samp) |
|
69 |
+ .equals(obs, exp) |
|
70 |
+} |
|
71 |
+ |
|
63 | 72 |
test_FastqStreamer <- function() |
64 | 73 |
{ |
65 | 74 |
sr <- readFastq(fl) |
- considerably faster
- no artificial restriction on id or other sizes
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@60695 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -44,48 +44,6 @@ test_ShortReadQ_constructors <- function() { |
44 | 44 |
.equals(sr, obj) |
45 | 45 |
} |
46 | 46 |
|
47 |
-test_FastqSampler_downsample <- function() |
|
48 |
-{ |
|
49 |
- sampler <- FastqSampler(file()) |
|
50 |
- downsample <- sampler$.downsample |
|
51 |
- |
|
52 |
- n <- length(letters) |
|
53 |
- lets <- as.list(letters) |
|
54 |
- LETS <- as.list(LETTERS) |
|
55 |
- |
|
56 |
- sampler$n <- n; sampler$tot_n <- 0L |
|
57 |
- checkIdentical(lets, downsample(list(), lets)) |
|
58 |
- |
|
59 |
- sampler$n <- sampler$tot_n <- n |
|
60 |
- checkIdentical(lets, downsample(list(), lets)) |
|
61 |
- checkIdentical(lets, downsample(lets, list())) |
|
62 |
- |
|
63 |
- sampler$n <- 10L; sampler$tot_n <- n |
|
64 |
- ans <- downsample(list(), lets) |
|
65 |
- checkIdentical(sampler$n, length(ans)) |
|
66 |
- |
|
67 |
- sampler$n <- sampler$tot_n <- 2L * n |
|
68 |
- checkIdentical(c(lets, LETS), downsample(lets, LETS)) |
|
69 |
- |
|
70 |
- sampler$n <- 2L * n + 1L |
|
71 |
- checkIdentical(c(lets, LETS), downsample(lets, LETS)) |
|
72 |
- |
|
73 |
- set.seed(123L) |
|
74 |
- sampler$tot_n <- 2L * n; sampler$n <- n |
|
75 |
- ans <- unlist(downsample(lets, LETS)) |
|
76 |
- checkIdentical(n, length(ans)) |
|
77 |
- checkIdentical(14L, sum(ans %in% letters)) |
|
78 |
- |
|
79 |
- sampler$tot_n <- 200L * n |
|
80 |
- ans <- unlist(downsample(lets, LETS)) |
|
81 |
- checkIdentical(n, length(ans)) |
|
82 |
- checkIdentical(n, sum(ans %in% lets)) |
|
83 |
- |
|
84 |
- ans <- unlist(downsample(LETS, lets)) |
|
85 |
- checkIdentical(n, length(ans)) |
|
86 |
- checkIdentical(n, sum(ans %in% LETS)) |
|
87 |
-} |
|
88 |
- |
|
89 | 47 |
test_FastqSampler <- function() |
90 | 48 |
{ |
91 | 49 |
sr <- readFastq(fl) |
- sample_rec_parser C could check for invalid access after the fact,
corrupting memory
- .fixedBinRecSampler duplicated work to be done by FastqSampler;
simplified
- FastqSampler could return fewer samples than requested / available
- split large lists of records to create ShortReadQ instances that fit
in memory and that can be appended
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@60557 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -44,6 +44,47 @@ test_ShortReadQ_constructors <- function() { |
44 | 44 |
.equals(sr, obj) |
45 | 45 |
} |
46 | 46 |
|
47 |
+test_FastqSampler_downsample <- function() |
|
48 |
+{ |
|
49 |
+ sampler <- FastqSampler(file()) |
|
50 |
+ downsample <- sampler$.downsample |
|
51 |
+ |
|
52 |
+ n <- length(letters) |
|
53 |
+ lets <- as.list(letters) |
|
54 |
+ LETS <- as.list(LETTERS) |
|
55 |
+ |
|
56 |
+ sampler$n <- n; sampler$tot_n <- 0L |
|
57 |
+ checkIdentical(lets, downsample(list(), lets)) |
|
58 |
+ |
|
59 |
+ sampler$n <- sampler$tot_n <- n |
|
60 |
+ checkIdentical(lets, downsample(list(), lets)) |
|
61 |
+ checkIdentical(lets, downsample(lets, list())) |
|
62 |
+ |
|
63 |
+ sampler$n <- 10L; sampler$tot_n <- n |
|
64 |
+ ans <- downsample(list(), lets) |
|
65 |
+ checkIdentical(sampler$n, length(ans)) |
|
66 |
+ |
|
67 |
+ sampler$n <- sampler$tot_n <- 2L * n |
|
68 |
+ checkIdentical(c(lets, LETS), downsample(lets, LETS)) |
|
69 |
+ |
|
70 |
+ sampler$n <- 2L * n + 1L |
|
71 |
+ checkIdentical(c(lets, LETS), downsample(lets, LETS)) |
|
72 |
+ |
|
73 |
+ set.seed(123L) |
|
74 |
+ sampler$tot_n <- 2L * n; sampler$n <- n |
|
75 |
+ ans <- unlist(downsample(lets, LETS)) |
|
76 |
+ checkIdentical(n, length(ans)) |
|
77 |
+ checkIdentical(14L, sum(ans %in% letters)) |
|
78 |
+ |
|
79 |
+ sampler$tot_n <- 200L * n |
|
80 |
+ ans <- unlist(downsample(lets, LETS)) |
|
81 |
+ checkIdentical(n, length(ans)) |
|
82 |
+ checkIdentical(n, sum(ans %in% lets)) |
|
83 |
+ |
|
84 |
+ ans <- unlist(downsample(LETS, lets)) |
|
85 |
+ checkIdentical(n, length(ans)) |
|
86 |
+ checkIdentical(n, sum(ans %in% LETS)) |
|
87 |
+} |
|
47 | 88 |
|
48 | 89 |
test_FastqSampler <- function() |
49 | 90 |
{ |
... | ... |
@@ -52,7 +93,13 @@ test_FastqSampler <- function() |
52 | 93 |
obj <- yield(FastqSampler(fl)) |
53 | 94 |
.equals(sr, obj) |
54 | 95 |
|
55 |
- checkTrue(validObject(yield(FastqSampler(fl, readerBlockSize=1000)))) |
|
96 |
+ yld <- yield(FastqSampler(fl, readerBlockSize=1000)) |
|
97 |
+ checkTrue(validObject(yld)) |
|
98 |
+ |
|
99 |
+ ## regression |
|
100 |
+ yld <- yield(FastqSampler(fl, readerBlockSize=256)) |
|
101 |
+ checkIdentical(256L, length(yld)) |
|
102 |
+ |
|
56 | 103 |
} |
57 | 104 |
|
58 | 105 |
test_FastqStreamer <- function() |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@58597 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -51,6 +51,8 @@ test_FastqSampler <- function() |
51 | 51 |
## here to re-use equality checker |
52 | 52 |
obj <- yield(FastqSampler(fl)) |
53 | 53 |
.equals(sr, obj) |
54 |
+ |
|
55 |
+ checkTrue(validObject(yield(FastqSampler(fl, readerBlockSize=1000)))) |
|
54 | 56 |
} |
55 | 57 |
|
56 | 58 |
test_FastqStreamer <- function() |
... | ... |
@@ -66,6 +68,11 @@ test_FastqStreamer <- function() |
66 | 68 |
checkIdentical(6L, i) |
67 | 69 |
checkIdentical(256L, len) |
68 | 70 |
|
71 |
+ ## values equal? |
|
72 |
+ f <- FastqStreamer(fl, n=50) |
|
73 |
+ .equals(sr[1:50], yield(f)) |
|
74 |
+ .equals(sr[50+1:50], yield(f)) |
|
75 |
+ |
|
69 | 76 |
## whole file |
70 | 77 |
f <- FastqStreamer(fl, n=500) |
71 | 78 |
i <- 0L; len <- 0L |
... | ... |
@@ -77,9 +84,15 @@ test_FastqStreamer <- function() |
77 | 84 |
checkIdentical(1L, i) |
78 | 85 |
checkIdentical(256L, len) |
79 | 86 |
|
80 |
- f <- FastqStreamer(fl, n=50) |
|
81 |
- .equals(sr[1:50], yield(f)) |
|
82 |
- .equals(sr[50+1:50], yield(f)) |
|
87 |
+ ## small reader block size |
|
88 |
+ f <- FastqStreamer(fl, n=50, readerBlockSize=100) |
|
89 |
+ i <- 0L; len <- 0L |
|
90 |
+ while (length(y <- yield(f))) { |
|
91 |
+ len <- len + length(y) |
|
92 |
+ i <- i + 1L |
|
93 |
+ } |
|
94 |
+ checkIdentical(6L, i) |
|
95 |
+ checkIdentical(256L, len) |
|
83 | 96 |
} |
84 | 97 |
|
85 | 98 |
test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function() |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@58596 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,3 +1,6 @@ |
1 |
+sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
|
2 |
+fl <- file.path(analysisPath(sp), "s_1_sequence.txt") |
|
3 |
+ |
|
1 | 4 |
checkShortReadQ <- function(obj, len, wd) { |
2 | 5 |
checkStringSet <- function(obj, type, len, wd) { |
3 | 6 |
checkTrue(is(obj, type)) |
... | ... |
@@ -44,14 +47,41 @@ test_ShortReadQ_constructors <- function() { |
44 | 47 |
|
45 | 48 |
test_FastqSampler <- function() |
46 | 49 |
{ |
47 |
- sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
|
48 |
- fl <- file.path(analysisPath(sp), "s_1_sequence.txt") |
|
49 | 50 |
sr <- readFastq(fl) |
50 | 51 |
## here to re-use equality checker |
51 | 52 |
obj <- yield(FastqSampler(fl)) |
52 | 53 |
.equals(sr, obj) |
53 | 54 |
} |
54 | 55 |
|
56 |
+test_FastqStreamer <- function() |
|
57 |
+{ |
|
58 |
+ sr <- readFastq(fl) |
|
59 |
+ |
|
60 |
+ f <- FastqStreamer(fl, n=50) |
|
61 |
+ i <- 0L; len <- 0L |
|
62 |
+ while (length(y <- yield(f))) { |
|
63 |
+ len <- len + length(y) |
|
64 |
+ i <- i + 1L |
|
65 |
+ } |
|
66 |
+ checkIdentical(6L, i) |
|
67 |
+ checkIdentical(256L, len) |
|
68 |
+ |
|
69 |
+ ## whole file |
|
70 |
+ f <- FastqStreamer(fl, n=500) |
|
71 |
+ i <- 0L; len <- 0L |
|
72 |
+ while (length(y <- yield(f))) { |
|
73 |
+ .equals(sr, y) |
|
74 |
+ len <- len + length(y) |
|
75 |
+ i <- i + 1L |
|
76 |
+ } |
|
77 |
+ checkIdentical(1L, i) |
|
78 |
+ checkIdentical(256L, len) |
|
79 |
+ |
|
80 |
+ f <- FastqStreamer(fl, n=50) |
|
81 |
+ .equals(sr[1:50], yield(f)) |
|
82 |
+ .equals(sr[50+1:50], yield(f)) |
|
83 |
+} |
|
84 |
+ |
|
55 | 85 |
test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function() |
56 | 86 |
{ |
57 | 87 |
sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@53022 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -86,8 +86,9 @@ test_ShortReadQ_subset <- function() { |
86 | 86 |
|
87 | 87 |
checkException(obj[,1], silent=TRUE) |
88 | 88 |
checkException(obj[1,1], silent=TRUE) |
89 |
- checkException(obj[1,], silent=TRUE) |
|
90 |
- checkException(obj[1,], silent=TRUE) |
|
89 |
+ checkIdentical(2L, length(obj[1:2,])) |
|
90 |
+ checkIdentical(2L, length(obj[1:2,drop=TRUE])) |
|
91 |
+ checkIdentical(2L, length(obj[1:2,,drop=TRUE])) |
|
91 | 92 |
} |
92 | 93 |
|
93 | 94 |
test_ShortReadQ_narrow <- function() { |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@49637 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -41,6 +41,17 @@ test_ShortReadQ_constructors <- function() { |
41 | 41 |
.equals(sr, obj) |
42 | 42 |
} |
43 | 43 |
|
44 |
+ |
|
45 |
+test_FastqSampler <- function() |
|
46 |
+{ |
|
47 |
+ sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
|
48 |
+ fl <- file.path(analysisPath(sp), "s_1_sequence.txt") |
|
49 |
+ sr <- readFastq(fl) |
|
50 |
+ ## here to re-use equality checker |
|
51 |
+ obj <- yield(FastqSampler(fl)) |
|
52 |
+ .equals(sr, obj) |
|
53 |
+} |
|
54 |
+ |
|
44 | 55 |
test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function() |
45 | 56 |
{ |
46 | 57 |
sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@49154 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,126 @@ |
1 |
+checkShortReadQ <- function(obj, len, wd) { |
|
2 |
+ checkStringSet <- function(obj, type, len, wd) { |
|
3 |
+ checkTrue(is(obj, type)) |
|
4 |
+ checkEquals(len, length(obj)) |
|
5 |
+ checkEquals(wd, unique(width(obj))) |
|
6 |
+ } |
|
7 |
+ checkStringSet(obj, "ShortReadQ", len, wd[[1]]) |
|
8 |
+ checkStringSet(sread(obj), "DNAStringSet", len, wd[[2]]) |
|
9 |
+ checkStringSet(id(obj), "BStringSet", len, wd[[3]]) # ids w/ diff lengths |
|
10 |
+ checkStringSet(quality(obj), "QualityScore", len, wd[[4]]) |
|
11 |
+} |
|
12 |
+ |
|
13 |
+.equals <- function(x, y) |
|
14 |
+{ |
|
15 |
+ checkIdentical(as.character(sread(x)), as.character(sread(y))) |
|
16 |
+ checkIdentical(as.character(quality(quality(x))), |
|
17 |
+ as.character(quality(quality(y)))) |
|
18 |
+ checkIdentical(as.character(id(x)), as.character(id(y))) |
|
19 |
+} |
|
20 |
+ |
|
21 |
+test_ShortReadQ_constructors <- function() { |
|
22 |
+ sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
|
23 |
+ sr <- obj <- readFastq(sp) |
|
24 |
+ checkTrue(validObject(obj)) |
|
25 |
+ checkShortReadQ(obj, 256, list(36, 36, 24:22, 36)) |
|
26 |
+ |
|
27 |
+ obj <- ShortReadQ() |
|
28 |
+ checkTrue(class(obj) == "ShortReadQ") |
|
29 |
+ checkTrue(validObject(obj)) |
|
30 |
+ |
|
31 |
+ obj <- ShortReadQ(sread(sr), quality(sr)) |
|
32 |
+ checkTrue(class(obj) == "ShortReadQ") |
|
33 |
+ checkTrue(validObject(obj)) |
|
34 |
+ .equals(new("ShortReadQ", sread=sread(sr), |
|
35 |
+ id=BStringSet(rep("", length(sr))), |
|
36 |
+ quality=quality(sr)), obj) |
|
37 |
+ |
|
38 |
+ obj <- ShortReadQ(sread(sr), quality(sr), id(sr)) |
|
39 |
+ checkTrue(class(obj) == "ShortReadQ") |
|
40 |
+ checkTrue(validObject(obj)) |
|
41 |
+ .equals(sr, obj) |
|
42 |
+} |
|
43 |
+ |
|
44 |
+test_ShortReadQ_coerce_QualityScaledDNAStringSet <- function() |
|
45 |
+{ |
|
46 |
+ sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
|
47 |
+ obj <- readFastq(sp, qualityType="SFastqQuality") |
|
48 |
+ |
|
49 |
+ res <- as(obj, "QualityScaledDNAStringSet") |
|
50 |
+ checkIdentical(as.character(sread(obj)), |
|
51 |
+ as.character(as(res, "DNAStringSet"))) |
|
52 |
+ checkIdentical(as.character(quality(quality(obj))), |
|
53 |
+ as.character(quality(res))) |
|
54 |
+ checkTrue(is(quality(res), "SolexaQuality")) |
|
55 |
+ |
|
56 |
+ obj <- initialize(obj, quality=FastqQuality(quality(quality(obj)))) |
|
57 |
+ res <- as(obj, "QualityScaledDNAStringSet") |
|
58 |
+ checkIdentical(as.character(sread(obj)), |
|
59 |
+ as.character(as(res, "DNAStringSet"))) |
|
60 |
+ checkIdentical(as.character(quality(quality(obj))), |
|
61 |
+ as.character(quality(res))) |
|
62 |
+ checkTrue(is(quality(res), "PhredQuality")) |
|
63 |
+ |
|
64 |
+ q <- MatrixQuality(as(quality(obj), "matrix")) |
|
65 |
+ obj <- initialize(obj, quality=q) |
|
66 |
+ checkException(as(obj, "QualityScaledDNAStringSet"), silent=TRUE) |
|
67 |
+} |
|
68 |
+ |
|
69 |
+test_ShortReadQ_subset <- function() { |
|
70 |
+ sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
|
71 |
+ obj <- readFastq(sp) |
|
72 |
+ |
|
73 |
+ obj1 <- obj[c(3, 5:7, 9)] |
|
74 |
+ checkShortReadQ(obj1, 5, list(36, 36, 23, 36)) |
|
75 |
+ |
|
76 |
+ checkException(obj[,1], silent=TRUE) |
|
77 |
+ checkException(obj[1,1], silent=TRUE) |
|
78 |
+ checkException(obj[1,], silent=TRUE) |
|
79 |
+ checkException(obj[1,], silent=TRUE) |
|
80 |
+} |
|
81 |
+ |
|
82 |
+test_ShortReadQ_narrow <- function() { |
|
83 |
+ sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
|
84 |
+ sr <- readFastq(sp) |
|
85 |
+ |
|
86 |
+ obj <- narrow(sr, start=1, end=10) |
|
87 |
+ checkTrue(class(obj) == "ShortReadQ") |
|
88 |
+ checkTrue(length(obj) == length(sr)) |
|
89 |
+ checkTrue(unique(width(obj)) == 10) |
|
90 |
+ checkIdentical(as.character(sread(obj)), |
|
91 |
+ substr(as.character(sread(sr)), 1, 10)) |
|
92 |
+ checkIdentical(as.character(quality(quality(obj))), |
|
93 |
+ substr(as.character(quality(quality(sr))), 1, 10)) |
|
94 |
+ checkIdentical(as.character(id(obj)), as.character(id(sr))) |
|
95 |
+ |
|
96 |
+ checkIdentical(narrow(sr, start=start(sread(sr))), sr) |
|
97 |
+} |
|
98 |
+ |
|
99 |
+test_ShortReadQ_compact <- function() { |
|
100 |
+ sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
|
101 |
+ sr <- readFastq(sp)[1:10] |
|
102 |
+ res <- compact(sr) |
|
103 |
+ checkIdentical(as.character(sread(sr)), as.character(sread(res))) |
|
104 |
+ checkIdentical(as.character(quality(quality(sr))), |
|
105 |
+ as.character(quality(quality(res)))) |
|
106 |
+} |
|
107 |
+ |
|
108 |
+test_ShortReadQ_clean <- function() { |
|
109 |
+ sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
|
110 |
+ obj <- readFastq(sp) |
|
111 |
+ |
|
112 |
+ cln <- clean(obj) |
|
113 |
+ checkIdentical(class(obj), class(cln)) |
|
114 |
+ ## FIXME: need a stronger test |
|
115 |
+ checkEquals(length(obj), length(clean(obj))) |
|
116 |
+} |
|
117 |
+ |
|
118 |
+test_ShortReadQ_srsort <- function() { |
|
119 |
+ sp <- SolexaPath(system.file('extdata', package='ShortRead')) |
|
120 |
+ obj <- readFastq(sp) |
|
121 |
+ srt <- srsort(obj) |
|
122 |
+ checkIdentical(class(obj), class(srt)) |
|
123 |
+ checkIdentical(length(obj), length(srt)) |
|
124 |
+ checkIdentical(srsort(sread(obj)), sread(srt)) |
|
125 |
+ checkIdentical(quality(obj)[srorder(obj)], quality(srt)) |
|
126 |
+} |