Browse code

Add some TODOs and NOTEs

Peter Hickey authored on 14/06/2018 02:05:45
Showing 4 changed files

... ...
@@ -111,6 +111,7 @@ makeClusters <- function(hasGRanges, maxGap = 10^8) {
111 111
 
112 112
 # Exported functions -----------------------------------------------------------
113 113
 
114
+# TODO: BSmooth() should warn if BSseq object contains mix of strands.
114 115
 # TODO: If BSmooth() encounteres errors, return `BPREDO`` as `metadata(BSseq)`
115 116
 #       so as not to clobber the user's BSseq object; see
116 117
 #       https://support.bioconductor.org/p/109374/#109459.
... ...
@@ -200,6 +200,7 @@ setMethod("findOverlaps", c("FWGRanges", "FWGRanges"), .findOverlaps_FWGRanges)
200 200
 
201 201
 # Internal functions -----------------------------------------------------------
202 202
 
203
+# TODO: Warn if x contains mix of + and * or - and * loci?
203 204
 .strandCollapse <- function(x) {
204 205
     stopifnot(is(x, "GenomicRanges"))
205 206
     if (all(strand(x) == "*")) return(x)
... ...
@@ -11,7 +11,8 @@ setMethod(
11 11
     "findCytosines",
12 12
     "BSgenome",
13 13
     function(x, context, seqlevels = seqlevels(x)) {
14
-        # NOTE: vmatchPattern,BSgenome-method returns a GRanges instance.
14
+        # NOTE: vmatchPattern,BSgenome-method returns a GRanges instance and
15
+        #       automatically checks both forward and reverse strands.
15 16
         gr <- vmatchPattern(
16 17
             pattern = context,
17 18
             subject = x,
... ...
@@ -29,25 +29,20 @@
29 29
     guessed_file_types
30 30
 }
31 31
 
32
-# TODO: (longterm, see "Alternatively ..." for a better idea)
33
-#       .readBismarkAsDT2(): exact same as .readBismarkAsDT() but
34
-#       uses utils::read.delim() instead of readr::read_tsv(). In brief
35
-#       benchmarking, readr::read_csv() is ~1.3-1.6x faster than
32
+# TODO: Choose between utils::read.delim(), readr::read_tsv(), and
33
+#       data.table::fread() based on 'file'. If plain text, use fread(). If a
34
+#       compressed file, use readr::read_tsv() if available, otherwise
35
+#       utils::read_delim(). Longer term, combine data.table::fread() with
36
+#       shell commands (where available) to pass compressed files. Will need to
37
+#       be careful of the interaction between BPPARAM and fread()'s nThread.
38
+#       Once implemented, move readr to Suggests. Finally, allow user to
39
+#       specify which function to use.
40
+# NOTE: In brief  benchmarking, readr::read_csv() is ~1.3-1.6x faster than
36 41
 #       utils::read.delim() when reading a gzipped file, albeit it with ~1.6-2x
37 42
 #       more total memory allocated. Therefore, there may be times users prefer
38
-#       to trade off faster speed for lower memory usage. When written, move
39
-#       readr to Suggests. Alternatively, re-write .readBismarkAsDT() using
40
-#       data.table::fread() for uncompressed files and utils::read.delim() for
41
-#       compressed files. This removes the dependency on readr, albeit it with
42
-#       slightly slower reading of compressed files. Could even then use
43
-#       data.table::fread() coupled with shell commands (where available) to
44
-#       pass compressed files. Ultimately, we want to use data.table beyond
45
-#       data.table::fread() whereas readr is only used for file input.
46
-# NOTE: This returns the file as a data.table. However, to do this it uses
47
-#       readr::read_tsv() + data.table::setDT() instead of data.table::fread()!
48
-#       Although the latter is faster, this uses the former because Bismark
49
-#       files are commonly compressed and readr::read_tsv() supports reading
50
-#       directly from compressed files whereas data.table::fread() does not.
43
+#       to trade off faster speed for lower memory usage.
44
+# TODO: Formalise these benchmarks as a document in the bsseq package so that
45
+#       we can readily re-visit these as needed.
51 46
 .readBismarkAsDT <- function(file,
52 47
                              col_spec = c("all", "BSseq", "GRanges"),
53 48
                              check = FALSE,
... ...
@@ -482,7 +477,7 @@ read.bismark <- function(files,
482 477
         }
483 478
     } else {
484 479
         ptime1 <- proc.time()
485
-        if (verbose) message("[read.bismark] Using 'loci' as valid loci")
480
+        if (verbose) message("[read.bismark] Using 'loci' as candidate loci")
486 481
         if (strandCollapse) {
487 482
             if (verbose) {
488 483
                 message("[read.bismark] Collapsing strand of 'loci' ...")