... | ... |
@@ -111,6 +111,7 @@ makeClusters <- function(hasGRanges, maxGap = 10^8) { |
111 | 111 |
|
112 | 112 |
# Exported functions ----------------------------------------------------------- |
113 | 113 |
|
114 |
+# TODO: BSmooth() should warn if BSseq object contains mix of strands. |
|
114 | 115 |
# TODO: If BSmooth() encounteres errors, return `BPREDO`` as `metadata(BSseq)` |
115 | 116 |
# so as not to clobber the user's BSseq object; see |
116 | 117 |
# https://support.bioconductor.org/p/109374/#109459. |
... | ... |
@@ -200,6 +200,7 @@ setMethod("findOverlaps", c("FWGRanges", "FWGRanges"), .findOverlaps_FWGRanges) |
200 | 200 |
|
201 | 201 |
# Internal functions ----------------------------------------------------------- |
202 | 202 |
|
203 |
+# TODO: Warn if x contains mix of + and * or - and * loci? |
|
203 | 204 |
.strandCollapse <- function(x) { |
204 | 205 |
stopifnot(is(x, "GenomicRanges")) |
205 | 206 |
if (all(strand(x) == "*")) return(x) |
... | ... |
@@ -11,7 +11,8 @@ setMethod( |
11 | 11 |
"findCytosines", |
12 | 12 |
"BSgenome", |
13 | 13 |
function(x, context, seqlevels = seqlevels(x)) { |
14 |
- # NOTE: vmatchPattern,BSgenome-method returns a GRanges instance. |
|
14 |
+ # NOTE: vmatchPattern,BSgenome-method returns a GRanges instance and |
|
15 |
+ # automatically checks both forward and reverse strands. |
|
15 | 16 |
gr <- vmatchPattern( |
16 | 17 |
pattern = context, |
17 | 18 |
subject = x, |
... | ... |
@@ -29,25 +29,20 @@ |
29 | 29 |
guessed_file_types |
30 | 30 |
} |
31 | 31 |
|
32 |
-# TODO: (longterm, see "Alternatively ..." for a better idea) |
|
33 |
-# .readBismarkAsDT2(): exact same as .readBismarkAsDT() but |
|
34 |
-# uses utils::read.delim() instead of readr::read_tsv(). In brief |
|
35 |
-# benchmarking, readr::read_csv() is ~1.3-1.6x faster than |
|
32 |
+# TODO: Choose between utils::read.delim(), readr::read_tsv(), and |
|
33 |
+# data.table::fread() based on 'file'. If plain text, use fread(). If a |
|
34 |
+# compressed file, use readr::read_tsv() if available, otherwise |
|
35 |
+# utils::read_delim(). Longer term, combine data.table::fread() with |
|
36 |
+# shell commands (where available) to pass compressed files. Will need to |
|
37 |
+# be careful of the interaction between BPPARAM and fread()'s nThread. |
|
38 |
+# Once implemented, move readr to Suggests. Finally, allow user to |
|
39 |
+# specify which function to use. |
|
40 |
+# NOTE: In brief benchmarking, readr::read_csv() is ~1.3-1.6x faster than |
|
36 | 41 |
# utils::read.delim() when reading a gzipped file, albeit it with ~1.6-2x |
37 | 42 |
# more total memory allocated. Therefore, there may be times users prefer |
38 |
-# to trade off faster speed for lower memory usage. When written, move |
|
39 |
-# readr to Suggests. Alternatively, re-write .readBismarkAsDT() using |
|
40 |
-# data.table::fread() for uncompressed files and utils::read.delim() for |
|
41 |
-# compressed files. This removes the dependency on readr, albeit it with |
|
42 |
-# slightly slower reading of compressed files. Could even then use |
|
43 |
-# data.table::fread() coupled with shell commands (where available) to |
|
44 |
-# pass compressed files. Ultimately, we want to use data.table beyond |
|
45 |
-# data.table::fread() whereas readr is only used for file input. |
|
46 |
-# NOTE: This returns the file as a data.table. However, to do this it uses |
|
47 |
-# readr::read_tsv() + data.table::setDT() instead of data.table::fread()! |
|
48 |
-# Although the latter is faster, this uses the former because Bismark |
|
49 |
-# files are commonly compressed and readr::read_tsv() supports reading |
|
50 |
-# directly from compressed files whereas data.table::fread() does not. |
|
43 |
+# to trade off faster speed for lower memory usage. |
|
44 |
+# TODO: Formalise these benchmarks as a document in the bsseq package so that |
|
45 |
+# we can readily re-visit these as needed. |
|
51 | 46 |
.readBismarkAsDT <- function(file, |
52 | 47 |
col_spec = c("all", "BSseq", "GRanges"), |
53 | 48 |
check = FALSE, |
... | ... |
@@ -482,7 +477,7 @@ read.bismark <- function(files, |
482 | 477 |
} |
483 | 478 |
} else { |
484 | 479 |
ptime1 <- proc.time() |
485 |
- if (verbose) message("[read.bismark] Using 'loci' as valid loci") |
|
480 |
+ if (verbose) message("[read.bismark] Using 'loci' as candidate loci") |
|
486 | 481 |
if (strandCollapse) { |
487 | 482 |
if (verbose) { |
488 | 483 |
message("[read.bismark] Collapsing strand of 'loci' ...") |