Browse code

Updates for recent changes

Peter Hickey authored on 20/07/2018 04:31:21
Showing7 changed files

... ...
@@ -25,7 +25,7 @@ importFrom(graphics, "abline", "axis", "layout", "legend", "lines",
25 25
 import(parallel)
26 26
 importFrom(locfit, "locfit", "lp")
27 27
 importFrom(DelayedMatrixStats, "rowSds", "rowVars", "colMeans2", "colSums2",
28
-           "rowSums2", "rowMeans2", "rowAlls")
28
+           "rowSums2", "rowMeans2", "rowAlls", "rowGrid", "colGrid")
29 29
 
30 30
 importFrom(scales, "alpha")
31 31
 importClassesFrom(Biobase, "AnnotatedDataFrame")
... ...
@@ -46,7 +46,7 @@ importFrom(Biostrings, "DNAString", "vmatchPattern", "reverseComplement")
46 46
 importFrom(utils, "read.delim")
47 47
 importFrom(BSgenome, "vmatchPattern")
48 48
 importFrom(tools, "file_path_as_absolute")
49
-importFrom(R.utils, "isGzipped", "gunzip")
49
+importFrom(R.utils, "isGzipped", "isBzipped", "gunzip", "bunzip2")
50 50
 
51 51
 ##
52 52
 ## Exporting
... ...
@@ -17,6 +17,7 @@ orderBSseq <- function(BSseq, seqOrder = NULL) {
17 17
 #       regions is non-NULL; discuss with Kasper
18 18
 # TODO: Add parallel support
19 19
 # TODO: Document withDimnames
20
+# TODO: Should is be an explicit withDimnames arg or simply passed via `...`?
20 21
 getMeth <- function(BSseq, regions = NULL, type = c("smooth", "raw"),
21 22
                     what = c("perBase", "perRegion"), confint = FALSE,
22 23
                     alpha = 0.95, withDimnames = TRUE) {
... ...
@@ -108,6 +109,7 @@ getMeth <- function(BSseq, regions = NULL, type = c("smooth", "raw"),
108 109
 # TODO: Whether or not colnames are added to returned value depends on whether
109 110
 #       regions is non-NULL; discuss with Kasper
110 111
 # TODO: Document withDimnames
112
+# TODO: Should is be an explicit withDimnames arg or simply passed via `...`?
111 113
 getCoverage <- function(BSseq, regions = NULL, type = c("Cov", "M"),
112 114
                         what = c("perBase", "perRegionAverage",
113 115
                                  "perRegionTotal"),
... ...
@@ -126,3 +126,34 @@ blockApplyWithRealization <- function(x, FUN, ..., sink = NULL, x_grid = NULL,
126 126
     BPPARAM = BPPARAM)
127 127
 }
128 128
 
129
+# TODO: Needed?
130
+.getSEDir <- function(x) {
131
+    paths <- lapply(assays(x, withDimnames = FALSE), function(a) {
132
+        try(path(a), silent = TRUE)
133
+    })
134
+    if (any(vapply(paths, is, logical(1L), "try-error"))) {
135
+        stop("Cannot extract 'dir'.")
136
+    }
137
+    unique_paths <- unique(unlist(paths, use.names = FALSE))
138
+    if (length(unique_paths) > 1) {
139
+        stop("Assay data spread across multiple HDF5 files.")
140
+    }
141
+    dirs <- dirname(unlist(paths, use.names = FALSE))
142
+    unique(dirs)
143
+}
144
+
145
+# Should return TRUE for BSseq object created with read.bismark() or saved with
146
+# HDF5Array::saveHDF5SummarizedExperiment().
147
+# TODO: Check dirname(paths[[1L]]) also contains 'se.rds'? It looks like dir
148
+#       can contain other files besides these; check.
149
+.isHDF5BackedBSseqUpdatable <- function(x) {
150
+    stopifnot(is(x, "BSseq"))
151
+    if (!identical(.getBSseqBackends(x), "HDF5Array")) {
152
+        return(FALSE)
153
+    }
154
+    paths <- vapply(assays(x, withDimnames = FALSE), path, character(1L))
155
+    if (all(paths == paths[[1L]]) && all(basename(paths) == "assays.h5")) {
156
+        return(TRUE)
157
+    }
158
+    FALSE
159
+}
... ...
@@ -217,9 +217,14 @@ setMethod("findOverlaps", c("FWGRanges", "FWGRanges"), .findOverlaps_FWGRanges)
217 217
 #       the smallest returned object (albeit at the small cost of a sort).
218 218
 .readBismarkAsFWGRanges <- function(file, rmZeroCov = FALSE,
219 219
                                     strandCollapse = FALSE, sort = TRUE,
220
-                                    is_zcat_available = TRUE, nThread = 1L,
221 220
                                     verbose = FALSE) {
222
-    # Quieten R CMD check about 'no visible binding for global variable' -------
221
+    # Argument checks ----------------------------------------------------------
222
+
223
+    stopifnot(isTRUEorFALSE(rmZeroCov))
224
+    stopifnot(isTRUEorFALSE(strandCollapse))
225
+    stopifnot(isTRUEorFALSE(sort))
226
+
227
+    # Quieten R CMD check about 'no visible binding for global variable'
223 228
     M <- U <- NULL
224 229
 
225 230
     # Read file to construct data.table of valid loci --------------------------
... ...
@@ -228,8 +233,6 @@ setMethod("findOverlaps", c("FWGRanges", "FWGRanges"), .findOverlaps_FWGRanges)
228 233
             file = file,
229 234
             col_spec = "BSseq",
230 235
             check = TRUE,
231
-            is_zcat_available = is_zcat_available,
232
-            nThread = nThread,
233 236
             verbose = verbose)
234 237
         if (strandCollapse && !is.null(dt[["strand"]]) &&
235 238
             !dt[, all(strand == "*")]) {
... ...
@@ -248,8 +251,6 @@ setMethod("findOverlaps", c("FWGRanges", "FWGRanges"), .findOverlaps_FWGRanges)
248 251
             file = file,
249 252
             col_spec = "GRanges",
250 253
             check = FALSE,
251
-            is_zcat_available = is_zcat_available,
252
-            nThread = nThread,
253 254
             verbose = verbose)
254 255
         if (strandCollapse && !is.null(dt[["strand"]]) &&
255 256
             !dt[, all(strand == "*")]) {
... ...
@@ -303,9 +304,7 @@ setMethod("findOverlaps", c("FWGRanges", "FWGRanges"), .findOverlaps_FWGRanges)
303 304
                                                rmZeroCov,
304 305
                                                strandCollapse,
305 306
                                                verbose,
306
-                                               BPPARAM,
307
-                                               is_zcat_available,
308
-                                               nThread) {
307
+                                               BPPARAM) {
309 308
     subverbose <- max(as.integer(verbose) - 1L, 0L)
310 309
 
311 310
     # TODO: Instead of using the 'largest' file, use the largest
... ...
@@ -338,8 +337,6 @@ setMethod("findOverlaps", c("FWGRanges", "FWGRanges"), .findOverlaps_FWGRanges)
338 337
         file = files[[1L]],
339 338
         rmZeroCov = rmZeroCov,
340 339
         strandCollapse = strandCollapse,
341
-        is_zcat_available = is_zcat_available,
342
-        nThread = nThread,
343 340
         verbose = subverbose)
344 341
     # Identify loci not found in first file.
345 342
     # TODO: Pre-process loci as a GNCList?
... ...
@@ -356,18 +353,12 @@ setMethod("findOverlaps", c("FWGRanges", "FWGRanges"), .findOverlaps_FWGRanges)
356 353
     }
357 354
     list_of_loci_from_other_files_not_in_first_file <- bplapply(
358 355
         files[-1L], function(file, loci_from_first_file) {
359
-            # TODO: This message won't appear in main process, so probably remove.
360
-            if (verbose) {
361
-                message("[.contructFWGRangesFromBismarkFiles] Extracting loci ",
362
-                        "from '", file, "'")
363
-            }
364 356
             # Read this file.
365 357
             loci_from_this_file <- .readBismarkAsFWGRanges(
366 358
                 file = file,
367 359
                 rmZeroCov = rmZeroCov,
368 360
                 strandCollapse = strandCollapse,
369
-                verbose = subverbose,
370
-                is_zcat_available = is_zcat_available)
361
+                verbose = subverbose)
371 362
             subsetByOverlaps(
372 363
                 x = loci_from_this_file,
373 364
                 ranges = loci_from_first_file,
... ...
@@ -144,6 +144,7 @@ collapseBSseq <- function(BSseq, columns) {
144 144
     #       pass down to constructors).
145 145
     se <- SummarizedExperiment(
146 146
         assays = SimpleList(M = M, Cov = Cov),
147
-        rowRanges = rowRanges(BSseq))
147
+        rowRanges = rowRanges(BSseq),
148
+        colData = DataFrame(row.names = unique(columns)))
148 149
     .BSseq(se)
149 150
 }
... ...
@@ -189,11 +189,7 @@ collapseBSseq(BStest, columns = c("A1" = "A", "A2" = "A", "A3" = "B"))
189 189
 # An example using a HDF5-backed BSseq object
190 190
 #
191 191
 library(HDF5Array)
192
-# See ?HDF5Array::writeHDF5Array for details
193
-hdf5_M <- writeHDF5Array(M)
194
-# NOTE: HDF5Array::writeHDF5Array() doesn't preserve dimnames, so have to add
195
-#       these manually
196
-dimnames(hdf5_M) <- dimnames(M)
192
+hdf5_M <- realize(M, "HDF5Array")
197 193
 hdf5_BStest <- BSseq(pos = 1:3,
198 194
                      chr = c("chr1", "chr2", "chr1"),
199 195
                      M = hdf5_M,
... ...
@@ -47,7 +47,10 @@ If you use this package, please cite our BSmooth paper [@Hansen:2012].
47 47
 The first step of the analysis is to smooth the data
48 48
 
49 49
 ```{r smooth,eval=FALSE}
50
-BS.cancer.ex.fit <- BSmooth(BS.cancer.ex, mc.cores = 1, verbose = TRUE)
50
+BS.cancer.ex.fit <- BSmooth(
51
+    BSseq = BS.cancer.ex, 
52
+    BPPARAM = MulticoreParam(workers = 1), 
53
+    verbose = TRUE)
51 54
 ``` 
52 55
 This particular piece of code is not being run when the vignette is being created.  It takes roughly 2 minutes per sample.  If you have 6 cores available, use `mc.cores = 6` and the total run time will be roughly 2 minutes.  Note that setting `mc.cores` to a value greater than 1 is not support on MS Windows due to a limitation of the operating system.
53 56