This is a straight find and replace of (col|row)(Sums|Means) with DelayedMatrixStats equivalents. Immediately, this is to work around an apparent bug in DelayedArray,rowSums-method (https://github.com/Bioconductor/DelayedArray/issues/16) but long term want to be using the optimised implementations in DelayedMatrixStat (e.g., using `cols` and `rows` args).
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
Package: bsseq |
2 |
-Version: 1.15.3 |
|
2 |
+Version: 1.15.4 |
|
3 | 3 |
Encoding: UTF-8 |
4 | 4 |
Title: Analyze, manage and store bisulfite sequencing data |
5 | 5 |
Description: A collection of tools for analyzing and visualizing bisulfite |
... | ... |
@@ -26,7 +26,7 @@ Imports: |
26 | 26 |
data.table, |
27 | 27 |
S4Vectors, |
28 | 28 |
R.utils (>= 2.0.0), |
29 |
- matrixStats (>= 0.50.0), |
|
29 |
+ DelayedMatrixStats (>= 1.1.12), |
|
30 | 30 |
permute, |
31 | 31 |
limma, |
32 | 32 |
DelayedArray (>= 0.5.27), |
... | ... |
@@ -16,8 +16,8 @@ importFrom(graphics, "abline", "axis", "layout", "legend", "lines", |
16 | 16 |
"mtext", "par", "plot", "points", "polygon", "rect", "rug", "text") |
17 | 17 |
import(parallel) |
18 | 18 |
importFrom(locfit, "locfit", "lp") |
19 |
-importFrom(matrixStats, "rowSds", "rowVars", "rowMaxs", "rowMins", "colMeans2", |
|
20 |
- "colSums2") |
|
19 |
+importFrom(DelayedMatrixStats, "rowSds", "rowVars", "colMeans2", "colSums2", |
|
20 |
+ "rowSums2", "rowMeans2") |
|
21 | 21 |
import(IRanges) |
22 | 22 |
import(GenomicRanges) |
23 | 23 |
import(SummarizedExperiment) |
... | ... |
@@ -36,7 +36,8 @@ importFrom(R.utils, "isGzipped", "gunzip") |
36 | 36 |
import(limma) |
37 | 37 |
importFrom(permute, "shuffleSet", "how") |
38 | 38 |
importClassesFrom(DelayedArray, "DelayedArray", "DelayedMatrix") |
39 |
-importFrom(DelayedArray, "DelayedArray", "plogis", "pmin2", "pmax2") |
|
39 |
+importFrom(DelayedArray, "DelayedArray", "plogis", "pmin2", "pmax2", "rowMaxs", |
|
40 |
+ "rowMins") |
|
40 | 41 |
|
41 | 42 |
## |
42 | 43 |
## Exporting |
... | ... |
@@ -54,7 +54,7 @@ BSmooth.tstat <- function(BSseq, group1, group2, estimate.var = c("same", "paire |
54 | 54 |
## we skip it for the time being. Really noticeable when Cov |
55 | 55 |
## needs row subsetted/reordered (i.e. has Cov@index[[1]] exists) |
56 | 56 |
## |
57 |
- if(any(rowSums(getCoverage(BSseq)[, c(group1, group2)]) == 0)) { |
|
57 |
+ if(any(rowSums2(getCoverage(BSseq)[, c(group1, group2)]) == 0)) { |
|
58 | 58 |
warning("Computing t-statistics at locations where there is no data; consider subsetting the 'BSseq' object first") |
59 | 59 |
} |
60 | 60 |
} |
... | ... |
@@ -75,8 +75,8 @@ BSmooth.tstat <- function(BSseq, group1, group2, estimate.var = c("same", "paire |
75 | 75 |
ptime1 <- proc.time() |
76 | 76 |
allPs <- getMeth(BSseq, type = "smooth", what = "perBase", |
77 | 77 |
confint = FALSE) |
78 |
- group1.means <- rowMeans(allPs[, group1, drop = FALSE], na.rm = TRUE) |
|
79 |
- group2.means <- rowMeans(allPs[, group2, drop = FALSE], na.rm = TRUE) |
|
78 |
+ group1.means <- rowMeans2(allPs[, group1, drop = FALSE], na.rm = TRUE) |
|
79 |
+ group2.means <- rowMeans2(allPs[, group2, drop = FALSE], na.rm = TRUE) |
|
80 | 80 |
ptime2 <- proc.time() |
81 | 81 |
stime <- (ptime2 - ptime1)[3] |
82 | 82 |
if(verbose) cat(sprintf("done in %.1f sec\n", stime)) |
... | ... |
@@ -166,7 +166,7 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL, |
166 | 166 |
} |
167 | 167 |
} |
168 | 168 |
if(rmZeroCov) { |
169 |
- wh <- which(rowSums(Cov) == 0) |
|
169 |
+ wh <- which(rowSums2(Cov) == 0) |
|
170 | 170 |
if(length(wh) > 0) { |
171 | 171 |
gr <- gr[-wh] |
172 | 172 |
M <- M[-wh,,drop = FALSE] |
... | ... |
@@ -159,10 +159,10 @@ getCoverage <- function(BSseq, regions = NULL, type = c("Cov", "M"), |
159 | 159 |
return(getBSseq(BSseq, type = type)) |
160 | 160 |
} |
161 | 161 |
if (what == "perRegionTotal") { |
162 |
- return(colSums(getBSseq(BSseq, type = type))) |
|
162 |
+ return(colSums2(getBSseq(BSseq, type = type))) |
|
163 | 163 |
} |
164 | 164 |
if (what == "perRegionAverage") { |
165 |
- return(colMeans(getBSseq(BSseq, type = type))) |
|
165 |
+ return(colMeans2(getBSseq(BSseq, type = type))) |
|
166 | 166 |
} |
167 | 167 |
} |
168 | 168 |
if (class(regions) == "data.frame") { |
... | ... |
@@ -44,15 +44,15 @@ |
44 | 44 |
is(x@seed, "matrix") |
45 | 45 |
} |
46 | 46 |
|
47 |
-# NOTE: Equivalent to rowSums(x[, j, drop = FALSE]) but does it using a |
|
47 |
+# NOTE: Equivalent to rowSums2(x[, j, drop = FALSE]) but does it using a |
|
48 | 48 |
# delayed operation and always returns a nrow(x) x 1 DelayedMatrix |
49 |
-.delayed_rowSums <- function(x, j) { |
|
49 |
+.delayed_rowSums2 <- function(x, j) { |
|
50 | 50 |
Reduce(`+`, lapply(j, function(jj) x[, jj, drop = FALSE])) |
51 | 51 |
} |
52 | 52 |
|
53 |
-# NOTE: Equivalent to colSums(x[i, , drop = FALSE]) but does it using a |
|
53 |
+# NOTE: Equivalent to colSums2(x[i, , drop = FALSE]) but does it using a |
|
54 | 54 |
# delayed operation and always returns a 1 x ncol(x) DelayedMatrix |
55 |
-.delayed_colSums <- function(x, i) { |
|
55 |
+.delayed_colSums2 <- function(x, i) { |
|
56 | 56 |
Reduce(`+`, lapply(i, function(ii) x[ii, , drop = FALSE])) |
57 | 57 |
} |
58 | 58 |
|
... | ... |
@@ -63,11 +63,11 @@ |
63 | 63 |
if (MARGIN == 1) { |
64 | 64 |
if (is.null(BACKEND)) { |
65 | 65 |
collapsed_x <- do.call(cbind, lapply(sp, function(j) { |
66 |
- rowSums(x[, j, drop = FALSE]) |
|
66 |
+ rowSums2(x[, j, drop = FALSE]) |
|
67 | 67 |
})) |
68 | 68 |
} else { |
69 | 69 |
collapsed_x <- do.call(cbind, lapply(sp, function(j) { |
70 |
- .delayed_rowSums(x, j) |
|
70 |
+ .delayed_rowSums2(x, j) |
|
71 | 71 |
})) |
72 | 72 |
# NOTE: Need to manually add colnames when using this method |
73 | 73 |
colnames(collapsed_x) <- names(sp) |
... | ... |
@@ -75,11 +75,11 @@ |
75 | 75 |
} else if (MARGIN == 2) { |
76 | 76 |
if (is.null(BACKEND)) { |
77 | 77 |
collapsed_x <- do.call(rbind, lapply(sp, function(i) { |
78 |
- colSums(x[i, , drop = FALSE]) |
|
78 |
+ colSums2(x[i, , drop = FALSE]) |
|
79 | 79 |
})) |
80 | 80 |
} else { |
81 | 81 |
collapsed_x <- do.call(rbind, lapply(sp, function(i) { |
82 |
- .delayed_colSums(x, i) |
|
82 |
+ .delayed_colSums2(x, i) |
|
83 | 83 |
})) |
84 | 84 |
# NOTE: Need to manually add rownames when using this method |
85 | 85 |
rownames(collapsed_x) <- names(sp) |
... | ... |
@@ -12,7 +12,7 @@ fisherTests <- function(BSseq, group1, group2, lookup = NULL, |
12 | 12 |
if(is.character(group2)) { |
13 | 13 |
stopifnot(all(group2 %in% sampleNames(BSseq))) |
14 | 14 |
group2 <- match(group2, sampleNames(BSseq)) |
15 |
- } |
|
15 |
+ } |
|
16 | 16 |
if(is.numeric(group2)) { |
17 | 17 |
stopifnot(min(group2) >= 1 & max(group2) <= ncol(BSseq)) |
18 | 18 |
} else stop("problems with argument 'group2'") |
... | ... |
@@ -26,10 +26,10 @@ fisherTests <- function(BSseq, group1, group2, lookup = NULL, |
26 | 26 |
} |
27 | 27 |
if(verbose) cat("[fisherTests] setup ... ") |
28 | 28 |
ptime1 <- proc.time() |
29 |
- M1 <- rowSums(getCoverage(BSseq, type = "M")[, group1, drop = FALSE]) |
|
30 |
- M2 <- rowSums(getCoverage(BSseq, type = "M")[, group2, drop = FALSE]) |
|
31 |
- Cov1 <- rowSums(getCoverage(BSseq, type = "Cov")[, group1, drop = FALSE]) |
|
32 |
- Cov2 <- rowSums(getCoverage(BSseq, type = "Cov")[, group2, drop = FALSE]) |
|
29 |
+ M1 <- rowSums2(getCoverage(BSseq, type = "M")[, group1, drop = FALSE]) |
|
30 |
+ M2 <- rowSums2(getCoverage(BSseq, type = "M")[, group2, drop = FALSE]) |
|
31 |
+ Cov1 <- rowSums2(getCoverage(BSseq, type = "Cov")[, group1, drop = FALSE]) |
|
32 |
+ Cov2 <- rowSums2(getCoverage(BSseq, type = "Cov")[, group2, drop = FALSE]) |
|
33 | 33 |
keys <- paste(Cov1, Cov2, M1, M2, sep = "_") |
34 | 34 |
newkeys <- setdiff(keys, oldkeys) |
35 | 35 |
expand <- matrix(as.integer(do.call(rbind, strsplit(newkeys, "_"))), ncol = 4) |
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
poissonGoodnessOfFit <- function(BSseq, nQuantiles = 10^5) { |
2 | 2 |
Cov <- getBSseq(BSseq, type = "Cov") |
3 |
- lambda <- rowMeans(Cov) |
|
3 |
+ lambda <- rowMeans2(Cov) |
|
4 | 4 |
out <- list(chisq = NULL, df = ncol(Cov) - 1) |
5 |
- out$chisq <- rowSums((Cov - lambda)^2 / lambda) |
|
5 |
+ out$chisq <- rowSums2((Cov - lambda)^2 / lambda) |
|
6 | 6 |
if(nQuantiles > length(out$chisq)) |
7 | 7 |
nQuantiles <- length(out$chisq) |
8 | 8 |
probs <- seq(from = 0, to = 1, length.out = nQuantiles) |
... | ... |
@@ -22,14 +22,14 @@ binomialGoodnessOfFit <- function(BSseq, method = c("MLE"), nQuantiles = 10^5) { |
22 | 22 |
M <- getCoverage(BSseq, type = "M", what = "perBase") |
23 | 23 |
Cov <- getCoverage(BSseq, type = "Cov", what = "perBase") |
24 | 24 |
if(method == "MLE") # This is the MLE under iid |
25 |
- p <- rowSums(M) / rowSums(Cov) |
|
25 |
+ p <- rowSums2(M) / rowSums2(Cov) |
|
26 | 26 |
## p <- rowMeans(M / Cov, na.rm = TRUE) |
27 | 27 |
out <- list(chisq = NULL, quantiles = NULL, df = ncol(Cov) - 1) |
28 | 28 |
## This gets tricky. For the binomial model, we need Cov > 0, but |
29 | 29 |
## this implies that a given location may have less then nCol(BSseq) |
30 | 30 |
## number of observations, and hence the degrees of freedom will |
31 |
- ## vary. |
|
32 |
- out$chisq <- rowSums((M - Cov*p)^2 / sqrt(Cov * p * (1-p))) |
|
31 |
+ ## vary. |
|
32 |
+ out$chisq <- rowSums2((M - Cov*p)^2 / sqrt(Cov * p * (1-p))) |
|
33 | 33 |
probs <- seq(from = 0, to = 1, length.out = nQuantiles) |
34 | 34 |
## Next line will remove all locations where not all samples have coverage |
35 | 35 |
out$quantiles <- quantile(out$chisq, prob = probs, na.rm = TRUE) |
... | ... |
@@ -11,7 +11,7 @@ read.umtab2 <- function(dirs, sampleNames = NULL, rmZeroCov = FALSE, |
11 | 11 |
readCycle = readCycle, keepFilt = keepFilt, |
12 | 12 |
verbose = verbose) |
13 | 13 |
if(rmZeroCov && length(chrRead) != 0){ |
14 |
- wh <- which(rowSums(chrRead$U + chrRead$M) > 0) |
|
14 |
+ wh <- which(rowSums2(chrRead$U + chrRead$M) > 0) |
|
15 | 15 |
chrRead$M <- chrRead$M[wh,] |
16 | 16 |
chrRead$U <- chrRead$U[wh,] |
17 | 17 |
if(readCycle) { |
... | ... |
@@ -62,7 +62,7 @@ read.umtab2 <- function(dirs, sampleNames = NULL, rmZeroCov = FALSE, |
62 | 62 |
} |
63 | 63 |
|
64 | 64 |
|
65 |
-read.umtab2.chr <- function(files, sampleNames = NULL, |
|
65 |
+read.umtab2.chr <- function(files, sampleNames = NULL, |
|
66 | 66 |
keepM, keepU, readCycle = FALSE, keepFilt = FALSE, |
67 | 67 |
verbose = TRUE) { |
68 | 68 |
columnHeaders <- strsplit(readLines(files[1], n = 1), "\t")[[1]] |
... | ... |
@@ -148,7 +148,7 @@ read.umtab2.chr <- function(files, sampleNames = NULL, |
148 | 148 |
csums = csums)) |
149 | 149 |
} |
150 | 150 |
|
151 |
-read.umtab2.chr2 <- function(files, sampleNames = NULL, |
|
151 |
+read.umtab2.chr2 <- function(files, sampleNames = NULL, |
|
152 | 152 |
keepM, keepU, verbose = TRUE) { |
153 | 153 |
columnHeaders <- strsplit(readLines(files[1], n = 1), "\t")[[1]] |
154 | 154 |
if("strand" %in% columnHeaders) |
... | ... |
@@ -187,7 +187,7 @@ read.umtab2.chr2 <- function(files, sampleNames = NULL, |
187 | 187 |
}, grLoc) |
188 | 188 |
nPos <- length(grLoc) |
189 | 189 |
nSamples <- length(files) |
190 |
- |
|
190 |
+ |
|
191 | 191 |
M <- matrix(0L, nrow = nPos, ncol = nSamples) |
192 | 192 |
colnames(M) <- sampleNames |
193 | 193 |
U <- Mcy <- Ucy <- M |
... | ... |
@@ -248,7 +248,7 @@ read.umtab <- function(dirs, sampleNames = NULL, rmZeroCov = FALSE, |
248 | 248 |
keepM = keepM, keepU = keepU, sampleNames = sampleNames, |
249 | 249 |
verbose = verbose) |
250 | 250 |
if(rmZeroCov && length(chrRead) != 0){ |
251 |
- wh <- which(rowSums(chrRead$U + chrRead$M) > 0) |
|
251 |
+ wh <- which(rowSums2(chrRead$U + chrRead$M) > 0) |
|
252 | 252 |
chrRead$M <- chrRead$M[wh,] |
253 | 253 |
chrRead$U <- chrRead$U[wh,] |
254 | 254 |
chrRead$Mcy <- chrRead$Mcy[wh,] |
... | ... |
@@ -369,7 +369,7 @@ read.bsmoothDirRaw <- function(dir, seqnames = NULL, keepCycle = FALSE, keepFilt |
369 | 369 |
what0[int] <- replicate(length(int), integer(0)) |
370 | 370 |
if(!keepCycle) |
371 | 371 |
what0[c("Mcy", "Ucy")] <- replicate(2, NULL) |
372 |
- if(!keepFilt) |
|
372 |
+ if(!keepFilt) |
|
373 | 373 |
what0[grep("^filt", names(what0))] <- replicate(length(grep("^filt", names(what0))), NULL) |
374 | 374 |
outList <- lapply(allChrFiles, function(thisfile) { |
375 | 375 |
if(verbose) |
... | ... |
@@ -448,7 +448,7 @@ read.bsmooth <- function(dirs, sampleNames = NULL, seqnames = NULL, returnRaw = |
448 | 448 |
} |
449 | 449 |
ptime2 <- proc.time() |
450 | 450 |
stime <- (ptime2 - ptime1)[3] |
451 |
- if(verbose) cat(sprintf("done in %.1f secs\n", stime)) |
|
451 |
+ if(verbose) cat(sprintf("done in %.1f secs\n", stime)) |
|
452 | 452 |
out |
453 | 453 |
}) |
454 | 454 |
if(!returnRaw) { |