- Addressed #71, thanks @scottgigante
- Removes undocumented behaviour of BSseq() sorting the output. In previous versions, this was done via `reduce(gr)`; see https://github.com/hansenlab/bsseq/blob/RELEASE_3_7/R/BSseq_class.R#L176-L223. In the refactor branch I had changed it to use order(gr). Now, it is completely removed.
... | ... |
@@ -69,10 +69,12 @@ setValidity2("BSseq", function(object) { |
69 | 69 |
|
70 | 70 |
# Exported functions ----------------------------------------------------------- |
71 | 71 |
|
72 |
+# TODO: Document that if duplicate loci are detected then the output `M` and |
|
73 |
+# `Cov` will be numeric even if input `M` and `Cov` were integer. |
|
72 | 74 |
# TODO: BSseq() is arguably a bad constructor. It doesn't return a valid BSseq |
73 | 75 |
# object when called without any arguments. It also does some pretty |
74 |
-# complicated parsing of the inputs. Still, I think we're stuck with it |
|
75 |
-# because it's been around for a long time. |
|
76 |
+# complicated parsing of the inputs. But we're stuck with it because it's |
|
77 |
+# been around for a long time. |
|
76 | 78 |
BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL, |
77 | 79 |
trans = NULL, parameters = NULL, pData = NULL, |
78 | 80 |
gr = NULL, pos = NULL, chr = NULL, sampleNames = NULL, |
... | ... |
@@ -171,27 +173,15 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL, |
171 | 173 |
if (!is.null(coef) || !is.null(se.coef)) { |
172 | 174 |
stop("Cannot collapse when 'coef' or 'se.coef' are present.") |
173 | 175 |
} |
174 |
- # NOTE: reduce() sorts the output |
|
175 |
- # TODO: Clarify above comment. |
|
176 | 176 |
unique_gr <- unique(gr) |
177 |
- # TODO: type = "equal"? |
|
178 |
- ol <- findOverlaps(unique_gr, gr) |
|
179 |
- gr <- unique(gr) |
|
177 |
+ ol <- findOverlaps(unique_gr, gr, type = "equal") |
|
178 |
+ gr <- unique_gr |
|
179 |
+ # TODO: Can we avoid making `idx`? |
|
180 | 180 |
idx <- as.list(ol) |
181 | 181 |
M <- .collapseMatrixLike(M, idx, MARGIN = 2L) |
182 | 182 |
Cov <- .collapseMatrixLike(Cov, idx, MARGIN = 2L) |
183 | 183 |
} |
184 | 184 |
|
185 |
- # Sort loci |
|
186 |
- if (is.unsorted(gr)) { |
|
187 |
- o <- order(gr) |
|
188 |
- gr <- gr[o] |
|
189 |
- M <- M[o, , drop = FALSE] |
|
190 |
- Cov <- Cov[o, , drop = FALSE] |
|
191 |
- coef <- coef[o, , drop = FALSE] |
|
192 |
- se.coef <- se.coef[o, , drop = FALSE] |
|
193 |
- } |
|
194 |
- |
|
195 | 185 |
# Construct BSseq object |
196 | 186 |
assays <- SimpleList(M = M, Cov = Cov, coef = coef, se.coef = se.coef) |
197 | 187 |
assays <- assays[!S4Vectors:::sapply_isNULL(assays)] |
... | ... |
@@ -13,16 +13,42 @@ setGeneric( |
13 | 13 |
|
14 | 14 |
# Internal methods ------------------------------------------------------------- |
15 | 15 |
|
16 |
+# TODO: Re-write in C/C++ to avoid allocating `ans` multiple times |
|
16 | 17 |
setMethod(".collapseMatrixLike", "matrix", function(x, idx, MARGIN) { |
17 | 18 |
if (MARGIN == 1L) { |
18 |
- # TODO: Need colnames? |
|
19 |
- return(do.call(cbind, lapply(idx, function(j) rowSums2(x, cols = j)))) |
|
19 |
+ ans <- matrix( |
|
20 |
+ # NOTE: rowSums2() always returns numeric |
|
21 |
+ data = NA_real_, |
|
22 |
+ nrow = nrow(x), |
|
23 |
+ ncol = length(idx), |
|
24 |
+ dimnames = list(rownames(x), NULL)) |
|
25 |
+ l <- lengths(idx) |
|
26 |
+ stopifnot(min(l) > 0) |
|
27 |
+ l1 <- which(l == 1) |
|
28 |
+ ans[, l1] <- x[, unlist(idx[l1])] |
|
29 |
+ lmore <- which(l != 1) |
|
30 |
+ ans[, lmore] <- do.call(cbind, lapply(idx[lmore], function(j) { |
|
31 |
+ rowSums2(x, cols = j) |
|
32 |
+ })) |
|
20 | 33 |
} else if (MARGIN == 2L) { |
21 |
- # TODO: Need rownames? |
|
22 |
- do.call(rbind, lapply(idx, function(i) colSums2(x, rows = i))) |
|
34 |
+ ans <- matrix( |
|
35 |
+ # NOTE: colSums2() always returns numeric |
|
36 |
+ data = NA_real_, |
|
37 |
+ nrow = length(idx), |
|
38 |
+ ncol = ncol(x), |
|
39 |
+ dimnames = list(NULL, colnames(x))) |
|
40 |
+ l <- lengths(idx) |
|
41 |
+ stopifnot(min(l) > 0) |
|
42 |
+ l1 <- which(l == 1) |
|
43 |
+ ans[l1, ] <- x[unlist(idx[l1]), ] |
|
44 |
+ lmore <- which(l != 1) |
|
45 |
+ ans[lmore, ] <- do.call(rbind, lapply(idx[lmore], function(i) { |
|
46 |
+ colSums2(x, rows = i) |
|
47 |
+ })) |
|
23 | 48 |
} else { |
24 | 49 |
stop("'MARGIN' must be 1 or 2") |
25 | 50 |
} |
51 |
+ ans |
|
26 | 52 |
}) |
27 | 53 |
|
28 | 54 |
setMethod( |
... | ... |
@@ -3,6 +3,13 @@ |
3 | 3 |
\encoding{UTF-8} |
4 | 4 |
\section{Version 1.17.x}{ |
5 | 5 |
\itemize{ |
6 |
+ \item{\code{BSseq()} will no longer reorder inputs. Previously, the |
|
7 |
+ returned \emph{BSseq} object was ordered by ordering the loci, although |
|
8 |
+ this behaviour was not documented. \code{BSseq()} may still filter out loci |
|
9 |
+ if \code{rmZeroCov = FALSE} or collapse loci if |
|
10 |
+ \code{strandCollapse = FALSE} or duplicate loci are detected, but the |
|
11 |
+ relative order of loci in the output will match that of the input. |
|
12 |
+ } |
|
6 | 13 |
\item{Fix bug with \code{maxGap} argument of \code{BSmooth()}. The bug |
7 | 14 |
meant that the 'maximum gap between two methylation loci' was incorrectly |
8 | 15 |
set to \code{2 * maxGap + 1} instead of \code{maxGap}. This likely did not |
... | ... |
@@ -47,7 +47,7 @@ BSseq(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL, |
47 | 47 |
|
48 | 48 |
In case one or more methylation loci appears multiple times, the |
49 | 49 |
\code{M} and \code{Cov} matrices are summed over rows linked to the |
50 |
- same methylation loci. See the example below. |
|
50 |
+ same methylation loci. See the example below. |
|
51 | 51 |
|
52 | 52 |
Users should never have to specify \code{coef}, \code{se.coef}, |
53 | 53 |
\code{trans}, and \code{parameters}, this is for internal use (they |
... | ... |
@@ -29,14 +29,18 @@ test_that("BSseq", { |
29 | 29 |
|
30 | 30 |
BStest2 <- BSseq(pos = 3:1, chr = rep("chr1", 3), M = M[3:1, ], |
31 | 31 |
Cov = M[3:1, ] + 2) |
32 |
- expect_equal(BStest, BStest2) |
|
32 |
+ # NOTE: BSseq() does not reorder inputs |
|
33 |
+ expect_true(!isTRUE(all.equal(BStest, BStest2))) |
|
34 |
+ expect_equal(BStest, sort(BStest2)) |
|
33 | 35 |
M2 <- rbind(M[3:1, ], c(1, 1, 1)) |
34 | 36 |
Cov2 <- rbind(M[3:1, ] + 2, c(1, 1, 1)) |
35 | 37 |
M2[3, ] <- M2[3, ] - 1 |
36 | 38 |
Cov2[3, ] <- Cov2[3, ] - 1 |
37 | 39 |
BStest3 <- suppressWarnings( |
38 | 40 |
BSseq(pos = c(3:1,1), chr = rep("chr1", 4), M = M2, Cov = Cov2)) |
39 |
- expect_equal(BStest, BStest3) |
|
41 |
+ # NOTE: BSseq() does not reorder inputs |
|
42 |
+ expect_true(!isTRUE(all.equal(BStest, BStest3))) |
|
43 |
+ expect_equal(BStest, sort(BStest3)) |
|
40 | 44 |
}) |
41 | 45 |
|
42 | 46 |
test_that("BSseq with HDF5Backend", { |