Browse code

Replace base/matrixStats/DelayedArray by DelayedMatrixStats

This is a straight find and replace of (col|row)(Sums|Means) with DelayedMatrixStats equivalents. Immediately, this is to work around an apparent bug in DelayedArray,rowSums-method (https://github.com/Bioconductor/DelayedArray/issues/16) but long term want to be using the optimised implementations in DelayedMatrixStat (e.g., using `cols` and `rows` args).

Peter Hickey authored on 24/04/2018 17:53:48
Showing 9 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: bsseq
2
-Version: 1.15.3
2
+Version: 1.15.4
3 3
 Encoding: UTF-8
4 4
 Title: Analyze, manage and store bisulfite sequencing data
5 5
 Description: A collection of tools for analyzing and visualizing bisulfite
... ...
@@ -26,7 +26,7 @@ Imports:
26 26
     data.table,
27 27
     S4Vectors,
28 28
     R.utils (>= 2.0.0),
29
-    matrixStats (>= 0.50.0),
29
+    DelayedMatrixStats (>= 1.1.12),
30 30
     permute,
31 31
     limma,
32 32
     DelayedArray (>= 0.5.27),
... ...
@@ -16,8 +16,8 @@ importFrom(graphics, "abline", "axis", "layout", "legend", "lines",
16 16
            "mtext", "par", "plot", "points", "polygon", "rect", "rug", "text")
17 17
 import(parallel)
18 18
 importFrom(locfit, "locfit", "lp")
19
-importFrom(matrixStats, "rowSds", "rowVars", "rowMaxs", "rowMins", "colMeans2",
20
-           "colSums2")
19
+importFrom(DelayedMatrixStats, "rowSds", "rowVars", "colMeans2", "colSums2",
20
+           "rowSums2", "rowMeans2")
21 21
 import(IRanges)
22 22
 import(GenomicRanges)
23 23
 import(SummarizedExperiment)
... ...
@@ -36,7 +36,8 @@ importFrom(R.utils, "isGzipped", "gunzip")
36 36
 import(limma)
37 37
 importFrom(permute, "shuffleSet", "how")
38 38
 importClassesFrom(DelayedArray, "DelayedArray", "DelayedMatrix")
39
-importFrom(DelayedArray, "DelayedArray", "plogis", "pmin2", "pmax2")
39
+importFrom(DelayedArray, "DelayedArray", "plogis", "pmin2", "pmax2", "rowMaxs",
40
+           "rowMins")
40 41
 
41 42
 ##
42 43
 ## Exporting
... ...
@@ -54,7 +54,7 @@ BSmooth.tstat <- function(BSseq, group1, group2, estimate.var = c("same", "paire
54 54
         ##       we skip it for the time being. Really noticeable when Cov
55 55
         ##       needs row subsetted/reordered (i.e. has Cov@index[[1]] exists)
56 56
         ##
57
-        if(any(rowSums(getCoverage(BSseq)[, c(group1, group2)]) == 0)) {
57
+        if(any(rowSums2(getCoverage(BSseq)[, c(group1, group2)]) == 0)) {
58 58
             warning("Computing t-statistics at locations where there is no data; consider subsetting the 'BSseq' object first")
59 59
         }
60 60
     }
... ...
@@ -75,8 +75,8 @@ BSmooth.tstat <- function(BSseq, group1, group2, estimate.var = c("same", "paire
75 75
     ptime1 <- proc.time()
76 76
     allPs <- getMeth(BSseq, type = "smooth", what = "perBase",
77 77
                      confint = FALSE)
78
-    group1.means <- rowMeans(allPs[, group1, drop = FALSE], na.rm = TRUE)
79
-    group2.means <- rowMeans(allPs[, group2, drop = FALSE], na.rm = TRUE)
78
+    group1.means <- rowMeans2(allPs[, group1, drop = FALSE], na.rm = TRUE)
79
+    group2.means <- rowMeans2(allPs[, group2, drop = FALSE], na.rm = TRUE)
80 80
     ptime2 <- proc.time()
81 81
     stime <- (ptime2 - ptime1)[3]
82 82
     if(verbose) cat(sprintf("done in %.1f sec\n", stime))
... ...
@@ -166,7 +166,7 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
166 166
         }
167 167
     }
168 168
     if(rmZeroCov) {
169
-        wh <- which(rowSums(Cov) == 0)
169
+        wh <- which(rowSums2(Cov) == 0)
170 170
         if(length(wh) > 0) {
171 171
             gr <- gr[-wh]
172 172
             M <- M[-wh,,drop = FALSE]
... ...
@@ -159,10 +159,10 @@ getCoverage <- function(BSseq, regions = NULL, type = c("Cov", "M"),
159 159
             return(getBSseq(BSseq, type = type))
160 160
         }
161 161
         if (what == "perRegionTotal") {
162
-            return(colSums(getBSseq(BSseq, type = type)))
162
+            return(colSums2(getBSseq(BSseq, type = type)))
163 163
         }
164 164
         if (what == "perRegionAverage") {
165
-            return(colMeans(getBSseq(BSseq, type = type)))
165
+            return(colMeans2(getBSseq(BSseq, type = type)))
166 166
         }
167 167
     }
168 168
     if (class(regions) == "data.frame") {
... ...
@@ -44,15 +44,15 @@
44 44
     is(x@seed, "matrix")
45 45
 }
46 46
 
47
-# NOTE: Equivalent to rowSums(x[, j, drop = FALSE]) but does it using a
47
+# NOTE: Equivalent to rowSums2(x[, j, drop = FALSE]) but does it using a
48 48
 #       delayed operation and always returns a nrow(x) x 1 DelayedMatrix
49
-.delayed_rowSums <- function(x, j) {
49
+.delayed_rowSums2 <- function(x, j) {
50 50
     Reduce(`+`, lapply(j, function(jj) x[, jj, drop = FALSE]))
51 51
 }
52 52
 
53
-# NOTE: Equivalent to colSums(x[i, , drop = FALSE]) but does it using a
53
+# NOTE: Equivalent to colSums2(x[i, , drop = FALSE]) but does it using a
54 54
 #       delayed operation and always returns a 1 x ncol(x) DelayedMatrix
55
-.delayed_colSums <- function(x, i) {
55
+.delayed_colSums2 <- function(x, i) {
56 56
     Reduce(`+`, lapply(i, function(ii) x[ii, , drop = FALSE]))
57 57
 }
58 58
 
... ...
@@ -63,11 +63,11 @@
63 63
     if (MARGIN == 1) {
64 64
         if (is.null(BACKEND)) {
65 65
             collapsed_x <- do.call(cbind, lapply(sp, function(j) {
66
-                rowSums(x[, j, drop = FALSE])
66
+                rowSums2(x[, j, drop = FALSE])
67 67
             }))
68 68
         } else {
69 69
             collapsed_x <- do.call(cbind, lapply(sp, function(j) {
70
-                .delayed_rowSums(x, j)
70
+                .delayed_rowSums2(x, j)
71 71
             }))
72 72
             # NOTE: Need to manually add colnames when using this method
73 73
             colnames(collapsed_x) <- names(sp)
... ...
@@ -75,11 +75,11 @@
75 75
     } else if (MARGIN == 2) {
76 76
         if (is.null(BACKEND)) {
77 77
             collapsed_x <- do.call(rbind, lapply(sp, function(i) {
78
-                colSums(x[i, , drop = FALSE])
78
+                colSums2(x[i, , drop = FALSE])
79 79
             }))
80 80
         } else {
81 81
             collapsed_x <- do.call(rbind, lapply(sp, function(i) {
82
-                .delayed_colSums(x, i)
82
+                .delayed_colSums2(x, i)
83 83
             }))
84 84
             # NOTE: Need to manually add rownames when using this method
85 85
             rownames(collapsed_x) <- names(sp)
... ...
@@ -12,7 +12,7 @@ fisherTests <- function(BSseq, group1, group2, lookup = NULL,
12 12
     if(is.character(group2)) {
13 13
         stopifnot(all(group2 %in% sampleNames(BSseq)))
14 14
         group2 <- match(group2, sampleNames(BSseq))
15
-    }    
15
+    }
16 16
     if(is.numeric(group2)) {
17 17
         stopifnot(min(group2) >= 1 & max(group2) <= ncol(BSseq))
18 18
     } else stop("problems with argument 'group2'")
... ...
@@ -26,10 +26,10 @@ fisherTests <- function(BSseq, group1, group2, lookup = NULL,
26 26
     }
27 27
     if(verbose) cat("[fisherTests] setup ... ")
28 28
     ptime1 <- proc.time()
29
-    M1 <- rowSums(getCoverage(BSseq, type = "M")[, group1, drop = FALSE])
30
-    M2 <- rowSums(getCoverage(BSseq, type = "M")[, group2, drop = FALSE])
31
-    Cov1 <- rowSums(getCoverage(BSseq, type = "Cov")[, group1, drop = FALSE])
32
-    Cov2 <- rowSums(getCoverage(BSseq, type = "Cov")[, group2, drop = FALSE])
29
+    M1 <- rowSums2(getCoverage(BSseq, type = "M")[, group1, drop = FALSE])
30
+    M2 <- rowSums2(getCoverage(BSseq, type = "M")[, group2, drop = FALSE])
31
+    Cov1 <- rowSums2(getCoverage(BSseq, type = "Cov")[, group1, drop = FALSE])
32
+    Cov2 <- rowSums2(getCoverage(BSseq, type = "Cov")[, group2, drop = FALSE])
33 33
     keys <- paste(Cov1, Cov2, M1, M2, sep = "_")
34 34
     newkeys <- setdiff(keys, oldkeys)
35 35
     expand <- matrix(as.integer(do.call(rbind, strsplit(newkeys, "_"))), ncol = 4)
... ...
@@ -1,8 +1,8 @@
1 1
 poissonGoodnessOfFit <- function(BSseq, nQuantiles = 10^5) {
2 2
     Cov <- getBSseq(BSseq, type = "Cov")
3
-    lambda <- rowMeans(Cov)
3
+    lambda <- rowMeans2(Cov)
4 4
     out <- list(chisq = NULL, df = ncol(Cov) - 1)
5
-    out$chisq <- rowSums((Cov - lambda)^2 / lambda)
5
+    out$chisq <- rowSums2((Cov - lambda)^2 / lambda)
6 6
     if(nQuantiles > length(out$chisq))
7 7
         nQuantiles <- length(out$chisq)
8 8
     probs <- seq(from = 0, to = 1, length.out = nQuantiles)
... ...
@@ -22,14 +22,14 @@ binomialGoodnessOfFit <- function(BSseq, method = c("MLE"), nQuantiles = 10^5) {
22 22
     M <- getCoverage(BSseq, type = "M", what = "perBase")
23 23
     Cov <- getCoverage(BSseq, type = "Cov", what = "perBase")
24 24
     if(method == "MLE") # This is the MLE under iid
25
-        p <- rowSums(M) / rowSums(Cov) 
25
+        p <- rowSums2(M) / rowSums2(Cov)
26 26
     ## p <- rowMeans(M / Cov, na.rm = TRUE)
27 27
     out <- list(chisq = NULL, quantiles = NULL, df = ncol(Cov) - 1)
28 28
     ## This gets tricky.  For the binomial model, we need Cov > 0, but
29 29
     ## this implies that a given location may have less then nCol(BSseq)
30 30
     ## number of observations, and hence the degrees of freedom will
31
-    ## vary.  
32
-    out$chisq <- rowSums((M - Cov*p)^2 / sqrt(Cov * p * (1-p)))
31
+    ## vary.
32
+    out$chisq <- rowSums2((M - Cov*p)^2 / sqrt(Cov * p * (1-p)))
33 33
     probs <- seq(from = 0, to = 1, length.out = nQuantiles)
34 34
     ## Next line will remove all locations where not all samples have coverage
35 35
     out$quantiles <- quantile(out$chisq, prob = probs, na.rm = TRUE)
... ...
@@ -11,7 +11,7 @@ read.umtab2 <- function(dirs, sampleNames = NULL, rmZeroCov = FALSE,
11 11
                                    readCycle = readCycle, keepFilt = keepFilt,
12 12
                                    verbose = verbose)
13 13
         if(rmZeroCov && length(chrRead) != 0){
14
-            wh <- which(rowSums(chrRead$U + chrRead$M) > 0)
14
+            wh <- which(rowSums2(chrRead$U + chrRead$M) > 0)
15 15
             chrRead$M <- chrRead$M[wh,]
16 16
             chrRead$U <- chrRead$U[wh,]
17 17
             if(readCycle) {
... ...
@@ -62,7 +62,7 @@ read.umtab2 <- function(dirs, sampleNames = NULL, rmZeroCov = FALSE,
62 62
 }
63 63
 
64 64
 
65
-read.umtab2.chr <- function(files, sampleNames = NULL, 
65
+read.umtab2.chr <- function(files, sampleNames = NULL,
66 66
                             keepM, keepU, readCycle = FALSE, keepFilt = FALSE,
67 67
                             verbose = TRUE) {
68 68
     columnHeaders <- strsplit(readLines(files[1], n = 1), "\t")[[1]]
... ...
@@ -148,7 +148,7 @@ read.umtab2.chr <- function(files, sampleNames = NULL,
148 148
                 csums = csums))
149 149
 }
150 150
 
151
-read.umtab2.chr2 <- function(files, sampleNames = NULL, 
151
+read.umtab2.chr2 <- function(files, sampleNames = NULL,
152 152
                              keepM, keepU, verbose = TRUE) {
153 153
     columnHeaders <- strsplit(readLines(files[1], n = 1), "\t")[[1]]
154 154
     if("strand" %in% columnHeaders)
... ...
@@ -187,7 +187,7 @@ read.umtab2.chr2 <- function(files, sampleNames = NULL,
187 187
     }, grLoc)
188 188
     nPos <- length(grLoc)
189 189
     nSamples <- length(files)
190
-    
190
+
191 191
     M <- matrix(0L, nrow = nPos, ncol = nSamples)
192 192
     colnames(M)  <- sampleNames
193 193
     U <- Mcy <- Ucy <- M
... ...
@@ -248,7 +248,7 @@ read.umtab <- function(dirs, sampleNames = NULL, rmZeroCov = FALSE,
248 248
                                   keepM = keepM, keepU = keepU, sampleNames = sampleNames,
249 249
                                   verbose = verbose)
250 250
         if(rmZeroCov && length(chrRead) != 0){
251
-            wh <- which(rowSums(chrRead$U + chrRead$M) > 0)
251
+            wh <- which(rowSums2(chrRead$U + chrRead$M) > 0)
252 252
             chrRead$M <- chrRead$M[wh,]
253 253
             chrRead$U <- chrRead$U[wh,]
254 254
             chrRead$Mcy <- chrRead$Mcy[wh,]
... ...
@@ -369,7 +369,7 @@ read.bsmoothDirRaw <- function(dir, seqnames = NULL, keepCycle = FALSE, keepFilt
369 369
     what0[int] <- replicate(length(int), integer(0))
370 370
     if(!keepCycle)
371 371
         what0[c("Mcy", "Ucy")] <- replicate(2, NULL)
372
-    if(!keepFilt) 
372
+    if(!keepFilt)
373 373
         what0[grep("^filt", names(what0))] <- replicate(length(grep("^filt", names(what0))), NULL)
374 374
     outList <- lapply(allChrFiles, function(thisfile) {
375 375
         if(verbose)
... ...
@@ -448,7 +448,7 @@ read.bsmooth <- function(dirs, sampleNames = NULL, seqnames = NULL, returnRaw =
448 448
         }
449 449
         ptime2 <- proc.time()
450 450
         stime <- (ptime2 - ptime1)[3]
451
-        if(verbose) cat(sprintf("done in %.1f secs\n", stime)) 
451
+        if(verbose) cat(sprintf("done in %.1f secs\n", stime))
452 452
         out
453 453
     })
454 454
     if(!returnRaw) {