&& is always preferred over & in if conditions as it prevents the
right operand of the && operation from being unnecessarily evaluated.
In this particular case this change preventively repairs the
updateObject() method for BSseq objects that will otherwise break
on objects with DataFrame instances in them when DataFrame becomes
a virtual class in the S4Vectors package (will happen soon).
... | ... |
@@ -371,7 +371,7 @@ setReplaceMethod( |
371 | 371 |
setMethod("updateObject", "BSseq", |
372 | 372 |
function(object, ...) { |
373 | 373 |
# NOTE: identical() is too strong |
374 |
- if (hasBeenSmoothed(object) & |
|
374 |
+ if (hasBeenSmoothed(object) && |
|
375 | 375 |
isTRUE(all.equal(getBSseq(object, "trans"), .oldTrans))) { |
376 | 376 |
object@trans <- plogis |
377 | 377 |
} |
... | ... |
@@ -252,9 +252,9 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(), |
252 | 252 |
stop("'replace' must be TRUE or FALSE") |
253 | 253 |
} |
254 | 254 |
if (!dir.exists(dir)) { |
255 |
- HDF5Array:::.create_dir(dir) |
|
255 |
+ HDF5Array::create_dir(dir) |
|
256 | 256 |
} else { |
257 |
- HDF5Array:::.replace_dir(dir, replace) |
|
257 |
+ HDF5Array::replace_dir(dir, replace) |
|
258 | 258 |
} |
259 | 259 |
h5_path <- file.path(dir, "assays.h5") |
260 | 260 |
} else if (identical(BACKEND, NULL)) { |
... | ... |
@@ -310,7 +310,7 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(), |
310 | 310 |
# NOTE: Save BSseq object; mimicing |
311 | 311 |
# HDF5Array::saveHDF5SummarizedExperiment(). |
312 | 312 |
x <- bsseq |
313 |
- x@assays <- HDF5Array:::.shorten_assay2h5_links(x@assays) |
|
313 |
+ x@assays <- HDF5Array::shorten_assay2h5_links(x@assays) |
|
314 | 314 |
saveRDS(x, file = file.path(dir, "se.rds")) |
315 | 315 |
} |
316 | 316 |
bsseq |
... | ... |
@@ -83,7 +83,7 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL, |
83 | 83 |
} |
84 | 84 |
# Process 'trans' and 'parameters'. |
85 | 85 |
if (is.null(trans)) { |
86 |
- trans <- function(q) NULL |
|
86 |
+ trans <- function() NULL |
|
87 | 87 |
environment(trans) <- emptyenv() |
88 | 88 |
} |
89 | 89 |
if (is.null(parameters)) { |
... | ... |
@@ -83,7 +83,8 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL, |
83 | 83 |
} |
84 | 84 |
# Process 'trans' and 'parameters'. |
85 | 85 |
if (is.null(trans)) { |
86 |
- trans <- function() NULL |
|
86 |
+ trans <- function(q) NULL |
|
87 |
+ environment(trans) <- emptyenv() |
|
87 | 88 |
} |
88 | 89 |
if (is.null(parameters)) { |
89 | 90 |
parameters <- list() |
... | ... |
@@ -200,7 +200,7 @@ getBSseq <- function(BSseq, |
200 | 200 |
|
201 | 201 |
# Move to BSseq-utils? |
202 | 202 |
strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(), |
203 |
- BACKEND = getRealizationBackend(), |
|
203 |
+ BACKEND = getAutoRealizationBackend(), |
|
204 | 204 |
dir = tempfile("BSseq"), replace = FALSE, |
205 | 205 |
chunkdim = NULL, level = NULL, |
206 | 206 |
type = c("double", "integer")) { |
... | ... |
@@ -216,9 +216,9 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(), |
216 | 216 |
} |
217 | 217 |
# Register 'BACKEND' and return to current value on exit. |
218 | 218 |
# TODO: Is this strictly necessary? |
219 |
- current_BACKEND <- getRealizationBackend() |
|
220 |
- on.exit(setRealizationBackend(current_BACKEND), add = TRUE) |
|
221 |
- setRealizationBackend(BACKEND) |
|
219 |
+ current_BACKEND <- getAutoRealizationBackend() |
|
220 |
+ on.exit(setAutoRealizationBackend(current_BACKEND), add = TRUE) |
|
221 |
+ setAutoRealizationBackend(BACKEND) |
|
222 | 222 |
# Check compatability of 'BPPARAM' with 'BACKEND'. |
223 | 223 |
if (!.areBackendsInMemory(BACKEND)) { |
224 | 224 |
if (!.isSingleMachineBackend(BPPARAM)) { |
... | ... |
@@ -92,7 +92,7 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL, |
92 | 92 |
if (is.null(sampleNames)) { |
93 | 93 |
if (is.null(pData)) { |
94 | 94 |
# BSseq object will have no colnames. |
95 |
- pData <- S4Vectors:::new_DataFrame(nrows = ncol(M)) |
|
95 |
+ pData <- make_zero_col_DFrame(ncol(M)) |
|
96 | 96 |
} else { |
97 | 97 |
# BSseq object will have 'sampleNames' as colnames. |
98 | 98 |
pData <- DataFrame(row.names = sampleNames) |
- Temporary workaround for https://github.com/Bioconductor/DelayedArray/issues/41
- Ensures bsseq passes checks for next BioC release
... | ... |
@@ -272,7 +272,7 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(), |
272 | 272 |
|
273 | 273 |
ol <- findOverlaps(loci, collapsed_loci, type = "equal") |
274 | 274 |
group <- subjectHits(ol) |
275 |
- M <- rowsum( |
|
275 |
+ M <- .rowsum( |
|
276 | 276 |
x = assay(BSseq, "M", withDimnames = FALSE), |
277 | 277 |
group = group, |
278 | 278 |
# NOTE: reorder = TRUE to ensure same row-order as collapsed_loci. |
... | ... |
@@ -283,7 +283,7 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(), |
283 | 283 |
chunkdim = chunkdim, |
284 | 284 |
level = level, |
285 | 285 |
type = type) |
286 |
- Cov <- rowsum( |
|
286 |
+ Cov <- .rowsum( |
|
287 | 287 |
x = assay(BSseq, "Cov", withDimnames = FALSE), |
288 | 288 |
group = group, |
289 | 289 |
# NOTE: reorder = TRUE to ensure same row-order as collapsed_loci. |
... | ... |
@@ -250,7 +250,11 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(), |
250 | 250 |
if (!isTRUEorFALSE(replace)) { |
251 | 251 |
stop("'replace' must be TRUE or FALSE") |
252 | 252 |
} |
253 |
- HDF5Array:::.create_dir(dir = dir, replace = replace) |
|
253 |
+ if (!dir.exists(dir)) { |
|
254 |
+ HDF5Array:::.create_dir(dir) |
|
255 |
+ } else { |
|
256 |
+ HDF5Array:::.replace_dir(dir, replace) |
|
257 |
+ } |
|
254 | 258 |
h5_path <- file.path(dir, "assays.h5") |
255 | 259 |
} else if (identical(BACKEND, NULL)) { |
256 | 260 |
h5_path <- NULL |
... | ... |
@@ -305,7 +309,7 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(), |
305 | 309 |
# NOTE: Save BSseq object; mimicing |
306 | 310 |
# HDF5Array::saveHDF5SummarizedExperiment(). |
307 | 311 |
x <- bsseq |
308 |
- x@assays <- HDF5Array:::.shorten_h5_paths(x@assays) |
|
312 |
+ x@assays <- HDF5Array:::.shorten_assay2h5_links(x@assays) |
|
309 | 313 |
saveRDS(x, file = file.path(dir, "se.rds")) |
310 | 314 |
} |
311 | 315 |
bsseq |
- Small tweak to default @trans function in BSseq() and combine().
- Add unit test
... | ... |
@@ -83,7 +83,7 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL, |
83 | 83 |
} |
84 | 84 |
# Process 'trans' and 'parameters'. |
85 | 85 |
if (is.null(trans)) { |
86 |
- trans <- function(x) NULL |
|
86 |
+ trans <- function() NULL |
|
87 | 87 |
} |
88 | 88 |
if (is.null(parameters)) { |
89 | 89 |
parameters <- list() |
... | ... |
@@ -39,21 +39,17 @@ setValidity2("BSseq", function(object) { |
39 | 39 |
} |
40 | 40 |
msg <- validMsg(msg, .checkAssayNames(object, c("Cov", "M"))) |
41 | 41 |
|
42 |
- # TODO: Are colnames strictly necessary? |
|
43 |
- if (is.null(colnames(object))) { |
|
44 |
- msg <- validMsg(msg, "colnames (aka sampleNames) need to be set") |
|
45 |
- } |
|
46 |
- |
|
47 |
- assay_rownames <- lapply(assays(object), rownames) |
|
48 |
- if (!all(S4Vectors:::sapply_isNULL(assay_rownames))) { |
|
49 |
- msg <- validMsg(msg, "unnecessary rownames on one-or-more assays") |
|
42 |
+ msg <- validMsg( |
|
43 |
+ msg = msg, |
|
44 |
+ result = .checkMandCov( |
|
45 |
+ M = assay(object, "M", withDimnames = FALSE), |
|
46 |
+ Cov = assay(object, "Cov", withDimnames = FALSE))) |
|
47 |
+ |
|
48 |
+ if (is.null(msg)) { |
|
49 |
+ TRUE |
|
50 |
+ } else { |
|
51 |
+ msg |
|
50 | 52 |
} |
51 |
- |
|
52 |
- msg <- validMsg(msg, |
|
53 |
- .checkMandCov(assay(object, "M", withDimnames = FALSE), |
|
54 |
- assay(object, "Cov", withDimnames = FALSE))) |
|
55 |
- |
|
56 |
- if (is.null(msg)) TRUE else msg |
|
57 | 53 |
}) |
58 | 54 |
|
59 | 55 |
# Internal functions ----------------------------------------------------------- |
... | ... |
@@ -69,135 +65,113 @@ setValidity2("BSseq", function(object) { |
69 | 65 |
|
70 | 66 |
# Exported functions ----------------------------------------------------------- |
71 | 67 |
|
72 |
-# TODO: Document that if duplicate loci are detected then the output `M` and |
|
73 |
-# `Cov` will be numeric even if input `M` and `Cov` were integer. |
|
74 | 68 |
# TODO: BSseq() is arguably a bad constructor. It doesn't return a valid BSseq |
75 | 69 |
# object when called without any arguments. It also does some pretty |
76 | 70 |
# complicated parsing of the inputs. But we're stuck with it because it's |
77 | 71 |
# been around for a long time. |
78 | 72 |
BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL, |
79 |
- trans = NULL, parameters = NULL, pData = NULL, |
|
80 |
- gr = NULL, pos = NULL, chr = NULL, sampleNames = NULL, |
|
73 |
+ trans = NULL, parameters = NULL, pData = NULL, gr = NULL, |
|
74 |
+ pos = NULL, chr = NULL, sampleNames = NULL, |
|
81 | 75 |
rmZeroCov = FALSE) { |
82 | 76 |
|
83 |
- # Process 'gr', or 'pos' and 'chr' |
|
84 |
- if (is.null(gr)) { |
|
85 |
- if (is.null(pos) || is.null(chr)) { |
|
86 |
- stop("Need 'pos' and 'chr' if 'gr' not supplied.") |
|
87 |
- } |
|
88 |
- gr <- GRanges(seqnames = chr, ranges = IRanges(start = pos, width = 1L)) |
|
77 |
+ # Argument checks ---------------------------------------------------------- |
|
78 |
+ |
|
79 |
+ # Process assays. |
|
80 |
+ # NOTE: Nothing to do for 'coef', and 'se.coef'. |
|
81 |
+ if (is.null(M) || is.null(Cov)) { |
|
82 |
+ stop("Need 'M' and 'Cov'.") |
|
89 | 83 |
} |
90 |
- if (!is(gr, "GRanges")) stop("'gr' needs to be a GRanges.") |
|
91 |
- if (any(width(gr) != 1L)) stop("'gr' needs to have widths of 1.") |
|
92 |
- |
|
93 |
- # Process 'M' and 'Cov' |
|
94 |
- if (is.null(M) || is.null(Cov)) stop("Need 'M' and 'Cov'.") |
|
95 |
- if (length(gr) != nrow(M) || |
|
96 |
- length(gr) != nrow(Cov) || |
|
97 |
- ncol(Cov) != ncol(M)) { |
|
98 |
- stop("'gr', 'M' and 'Cov' need to have compatible dimensions.") |
|
84 |
+ # Process 'trans' and 'parameters'. |
|
85 |
+ if (is.null(trans)) { |
|
86 |
+ trans <- function(x) NULL |
|
99 | 87 |
} |
100 |
- if (!is.null(rownames(M))) rownames(M) <- NULL |
|
101 |
- if (!is.null(rownames(Cov))) rownames(Cov) <- NULL |
|
102 |
- if (!is.null(names(gr))) names(gr) <- NULL |
|
103 |
- |
|
104 |
- # Process 'sampleNames' and 'pData' |
|
105 |
- if (is.null(pData)) { |
|
106 |
- if (is.null(sampleNames)) { |
|
107 |
- if (!is.null(colnames(M))) { |
|
108 |
- sampleNames <- colnames(M) |
|
109 |
- } else if (!is.null(colnames(Cov))) { |
|
110 |
- sampleNames <- colnames(Cov) |
|
88 |
+ if (is.null(parameters)) { |
|
89 |
+ parameters <- list() |
|
90 |
+ } |
|
91 |
+ # Process 'sampleNames' and 'pData'. |
|
92 |
+ if (is.null(sampleNames)) { |
|
93 |
+ if (is.null(pData)) { |
|
94 |
+ # BSseq object will have no colnames. |
|
95 |
+ pData <- S4Vectors:::new_DataFrame(nrows = ncol(M)) |
|
96 |
+ } else { |
|
97 |
+ # BSseq object will have 'sampleNames' as colnames. |
|
98 |
+ pData <- DataFrame(row.names = sampleNames) |
|
99 |
+ } |
|
100 |
+ } else { |
|
101 |
+ if (is.null(pData)) { |
|
102 |
+ # BSseq object will have 'sampleNames' as colnames. |
|
103 |
+ pData <- DataFrame(row.names = sampleNames) |
|
104 |
+ } else { |
|
105 |
+ if (is.null(rownames(pData))) { |
|
106 |
+ rownames(pData) <- sampleNames |
|
111 | 107 |
} else { |
112 |
- sampleNames <- paste0("V", seq_len(ncol(M))) |
|
108 |
+ stopifnot(identical(rownames(pData), sampleNames)) |
|
113 | 109 |
} |
114 | 110 |
} |
115 |
- pData <- DataFrame(row.names = sampleNames) |
|
116 |
- } else { |
|
117 |
- pData <- as(pData, "DataFrame") |
|
118 | 111 |
} |
119 |
- if (is.null(sampleNames) && !is.null(pData) && !is.null(rownames(pData))) { |
|
120 |
- sampleNames <- rownames(pData) |
|
112 |
+ # Process 'gr', 'pos', and 'chr'. |
|
113 |
+ if (is.null(gr)) { |
|
114 |
+ if (is.null(pos) || is.null(chr)) { |
|
115 |
+ stop("Need 'pos' and 'chr' if 'gr' not supplied.") |
|
116 |
+ } |
|
117 |
+ gr <- GRanges(seqnames = chr, ranges = IRanges(start = pos, width = 1L)) |
|
121 | 118 |
} |
122 |
- if (length(unique(sampleNames)) != ncol(M)) { |
|
123 |
- stop("sampleNames need to be unique and of the right length.") |
|
119 |
+ if (!is(gr, "GRanges")) { |
|
120 |
+ stop("'gr' needs to be a GRanges.") |
|
124 | 121 |
} |
122 |
+ # Process 'rmZeroCov'. |
|
123 |
+ stopifnot(isTRUEorFALSE(rmZeroCov)) |
|
125 | 124 |
|
126 |
- # TODO: Is this really necessary? It complicates things because HDF5Matrix |
|
127 |
- # will be degraded to a DelayedMatrix |
|
128 |
- # Add column names to assays |
|
129 |
- if (is.null(colnames(M)) || any(sampleNames != colnames(M))) { |
|
130 |
- colnames(M) <- sampleNames |
|
131 |
- } |
|
132 |
- if (is.null(colnames(Cov)) || any(sampleNames != colnames(Cov))) { |
|
133 |
- colnames(Cov) <- sampleNames |
|
134 |
- } |
|
135 |
- if (!is.null(coef)) { |
|
136 |
- if (nrow(coef) != nrow(M) || ncol(coef) != ncol(M)) { |
|
137 |
- stop("'coef' does not have the right dimensions") |
|
138 |
- } |
|
139 |
- if (is.null(colnames(coef)) || any(sampleNames != colnames(coef))) { |
|
140 |
- colnames(coef) <- sampleNames |
|
141 |
- } |
|
142 |
- if (!is.null(rownames(coef))) { |
|
143 |
- rownames(coef) <- NULL |
|
144 |
- } |
|
145 |
- } |
|
146 |
- if (!is.null(se.coef)) { |
|
147 |
- if (nrow(se.coef) != nrow(M) || ncol(se.coef) != ncol(M)) { |
|
148 |
- stop("'se.coef' does not have the right dimensions") |
|
149 |
- } |
|
150 |
- if (is.null(colnames(se.coef)) || |
|
151 |
- any(sampleNames != colnames(se.coef))) { |
|
152 |
- colnames(se.coef) <- sampleNames |
|
153 |
- } |
|
154 |
- if (!is.null(rownames(se.coef))) { |
|
155 |
- rownames(se.coef) <- NULL |
|
125 |
+ # Collapse duplicate loci -------------------------------------------------- |
|
126 |
+ |
|
127 |
+ is_duplicated <- duplicated(gr) |
|
128 |
+ if (any(is_duplicated)) { |
|
129 |
+ warning("Detected duplicate loci. Collapsing counts in 'M' and 'Cov' ", |
|
130 |
+ "at these positions.") |
|
131 |
+ if (!is.null(coef) || !is.null(se.coef)) { |
|
132 |
+ stop("Cannot collapse when 'coef' or 'se.coef' are non-NULL.") |
|
156 | 133 |
} |
134 |
+ loci <- gr[!is_duplicated] |
|
135 |
+ ol <- findOverlaps(gr, loci, type = "equal") |
|
136 |
+ M <- rowsum(x = M, group = subjectHits(ol), reorder = FALSE) |
|
137 |
+ rownames(M) <- NULL |
|
138 |
+ Cov <- rowsum(x = Cov, group = subjectHits(ol), reorder = FALSE) |
|
139 |
+ rownames(Cov) <- NULL |
|
140 |
+ } else { |
|
141 |
+ loci <- gr |
|
157 | 142 |
} |
158 | 143 |
|
159 |
- # Optionally, remove positions with Cov == 0 |
|
144 |
+ # Optionally, remove positions with zero coverage -------------------------- |
|
145 |
+ |
|
160 | 146 |
if (rmZeroCov) { |
161 | 147 |
loci_with_zero_cov <- rowAlls(Cov, value = 0) |
162 | 148 |
if (any(loci_with_zero_cov)) { |
163 |
- gr <- gr[!loci_with_zero_cov] |
|
164 |
- M <- M[!loci_with_zero_cov, , drop = FALSE] |
|
165 |
- Cov <- Cov[!loci_with_zero_cov, , drop = FALSE] |
|
149 |
+ loci_with_nonzero_cov <- !loci_with_zero_cov |
|
150 |
+ gr <- gr[loci_with_nonzero_cov] |
|
151 |
+ M <- M[loci_with_nonzero_cov, , drop = FALSE] |
|
152 |
+ Cov <- Cov[loci_with_nonzero_cov, , drop = FALSE] |
|
153 |
+ coef <- coef[loci_with_nonzero_cov, , drop = FALSE] |
|
154 |
+ se.coef <- se.coef[loci_with_nonzero_cov, , drop = FALSE] |
|
166 | 155 |
} |
167 | 156 |
} |
168 | 157 |
|
169 |
- # Collapse duplicate loci |
|
170 |
- if (any(duplicated(gr))) { |
|
171 |
- warning("Detected duplicate loci. Collapsing counts in 'M' and 'Cov' ", |
|
172 |
- "at these positions.") |
|
173 |
- if (!is.null(coef) || !is.null(se.coef)) { |
|
174 |
- stop("Cannot collapse when 'coef' or 'se.coef' are present.") |
|
175 |
- } |
|
176 |
- unique_gr <- unique(gr) |
|
177 |
- ol <- findOverlaps(unique_gr, gr, type = "equal") |
|
178 |
- gr <- unique_gr |
|
179 |
- # TODO: Can we avoid making `idx`? |
|
180 |
- idx <- as.list(ol) |
|
181 |
- M <- .collapseMatrixLike(M, idx, MARGIN = 2L) |
|
182 |
- Cov <- .collapseMatrixLike(Cov, idx, MARGIN = 2L) |
|
183 |
- } |
|
158 |
+ # Construct BSseq object --------------------------------------------------- |
|
184 | 159 |
|
185 |
- # Construct BSseq object |
|
186 | 160 |
assays <- SimpleList(M = M, Cov = Cov, coef = coef, se.coef = se.coef) |
187 | 161 |
assays <- assays[!S4Vectors:::sapply_isNULL(assays)] |
188 |
- se <- SummarizedExperiment(assays = assays, rowRanges = gr, colData = pData) |
|
189 |
- if (is.null(parameters)) { |
|
190 |
- parameters <- list() |
|
191 |
- } |
|
192 |
- if (is.null(trans)) { |
|
193 |
- trans <- function(x) NULL |
|
194 |
- } |
|
162 |
+ se <- SummarizedExperiment( |
|
163 |
+ assays = assays, |
|
164 |
+ rowRanges = loci, |
|
165 |
+ colData = pData) |
|
195 | 166 |
.BSseq(se, trans = trans, parameters = parameters) |
196 | 167 |
} |
197 | 168 |
|
198 |
-hasBeenSmoothed <- function(BSseq) "coef" %in% assayNames(BSseq) |
|
169 |
+# Move to BSseq-utils? |
|
170 |
+hasBeenSmoothed <- function(BSseq) { |
|
171 |
+ "coef" %in% assayNames(BSseq) |
|
172 |
+} |
|
199 | 173 |
|
200 |
-# TODO: Document withDimnames |
|
174 |
+# Move to BSseq-utils? |
|
201 | 175 |
getBSseq <- function(BSseq, |
202 | 176 |
type = c("Cov", "M", "gr", "coef", "se.coef", "trans", |
203 | 177 |
"parameters"), |
... | ... |
@@ -206,33 +180,135 @@ getBSseq <- function(BSseq, |
206 | 180 |
if (type %in% c("M", "Cov")) { |
207 | 181 |
return(assay(BSseq, type, withDimnames = withDimnames)) |
208 | 182 |
} |
209 |
- if (type %in% c("coef", "se.coef") && type %in% assayNames(BSseq)) { |
|
210 |
- return(assay(BSseq, type, withDimnames = withDimnames)) |
|
183 |
+ if (type %in% c("coef", "se.coef")) { |
|
184 |
+ if (type %in% assayNames(BSseq)) { |
|
185 |
+ return(assay(BSseq, type, withDimnames = withDimnames)) |
|
186 |
+ } else { |
|
187 |
+ return(NULL) |
|
188 |
+ } |
|
189 |
+ } |
|
190 |
+ if (type == "trans") { |
|
191 |
+ return(BSseq@trans) |
|
192 |
+ } |
|
193 |
+ if (type == "parameters") { |
|
194 |
+ return(BSseq@parameters) |
|
195 |
+ } |
|
196 |
+ if (type == "gr") { |
|
197 |
+ return(BSseq@rowRanges) |
|
211 | 198 |
} |
212 |
- if (type %in% c("coef", "se.coef")) return(NULL) |
|
213 |
- if (type == "trans") return(BSseq@trans) |
|
214 |
- if (type == "parameters") return(BSseq@parameters) |
|
215 |
- if (type == "gr") return(BSseq@rowRanges) |
|
216 |
- |
|
217 | 199 |
} |
218 | 200 |
|
219 |
-strandCollapse <- function(BSseq, shift = TRUE) { |
|
201 |
+# Move to BSseq-utils? |
|
202 |
+strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(), |
|
203 |
+ BACKEND = getRealizationBackend(), |
|
204 |
+ dir = tempfile("BSseq"), replace = FALSE, |
|
205 |
+ chunkdim = NULL, level = NULL, |
|
206 |
+ type = c("double", "integer")) { |
|
207 |
+ |
|
208 |
+ # Argument checks ---------------------------------------------------------- |
|
209 |
+ |
|
220 | 210 |
if (all(runValue(strand(BSseq)) == "*")) { |
221 |
- warning("All loci are unstranded; nothing to collapse") |
|
211 |
+ warning("All loci are unstranded, nothing to collapse.", call. = FALSE) |
|
222 | 212 |
return(BSseq) |
223 | 213 |
} |
224 | 214 |
if (!(all(runValue(strand(BSseq)) %in% c("+", "-")))) { |
225 | 215 |
stop("'BSseq' object has a mix of stranded and unstranded loci.") |
226 | 216 |
} |
227 |
- BS.forward <- BSseq[strand(BSseq) == "+"] |
|
228 |
- strand(BS.forward) <- "*" |
|
229 |
- BS.reverse <- BSseq[strand(BSseq) == "-"] |
|
230 |
- strand(BS.reverse) <- "*" |
|
231 |
- if (shift) rowRanges(BS.reverse) <- shift(rowRanges(BS.reverse), -1L) |
|
232 |
- sampleNames(BS.reverse) <- paste0(sampleNames(BS.reverse), "_REVERSE") |
|
233 |
- BS.comb <- combine(BS.forward, BS.reverse) |
|
234 |
- newBSseq <- collapseBSseq(BS.comb, columns = rep(sampleNames(BSseq), 2)) |
|
235 |
- newBSseq |
|
217 |
+ # Register 'BACKEND' and return to current value on exit. |
|
218 |
+ # TODO: Is this strictly necessary? |
|
219 |
+ current_BACKEND <- getRealizationBackend() |
|
220 |
+ on.exit(setRealizationBackend(current_BACKEND), add = TRUE) |
|
221 |
+ setRealizationBackend(BACKEND) |
|
222 |
+ # Check compatability of 'BPPARAM' with 'BACKEND'. |
|
223 |
+ if (!.areBackendsInMemory(BACKEND)) { |
|
224 |
+ if (!.isSingleMachineBackend(BPPARAM)) { |
|
225 |
+ stop("The parallelisation strategy must use a single machine ", |
|
226 |
+ "when using an on-disk realization backend.\n", |
|
227 |
+ "See help(\"read.bismark\") for details.", |
|
228 |
+ call. = FALSE) |
|
229 |
+ } |
|
230 |
+ } else { |
|
231 |
+ if (!is.null(BACKEND)) { |
|
232 |
+ # NOTE: Currently do not support any in-memory realization |
|
233 |
+ # backends. If the realization backend is NULL then an |
|
234 |
+ # ordinary matrix is returned rather than a matrix-backed |
|
235 |
+ # DelayedMatrix. |
|
236 |
+ stop("The '", BACKEND, "' realization backend is not supported.", |
|
237 |
+ "\n See help(\"read.bismark\") for details.", |
|
238 |
+ call. = FALSE) |
|
239 |
+ } |
|
240 |
+ } |
|
241 |
+ # If using HDF5Array as BACKEND, check remaining options are sensible. |
|
242 |
+ if (identical(BACKEND, "HDF5Array")) { |
|
243 |
+ # NOTE: Most of this copied from |
|
244 |
+ # HDF5Array::saveHDF5SummarizedExperiment(). |
|
245 |
+ if (!isSingleString(dir)) { |
|
246 |
+ stop(wmsg("'dir' must be a single string specifying the path to ", |
|
247 |
+ "the directory where to save the BSseq object (the ", |
|
248 |
+ "directory will be created).")) |
|
249 |
+ } |
|
250 |
+ if (!isTRUEorFALSE(replace)) { |
|
251 |
+ stop("'replace' must be TRUE or FALSE") |
|
252 |
+ } |
|
253 |
+ HDF5Array:::.create_dir(dir = dir, replace = replace) |
|
254 |
+ h5_path <- file.path(dir, "assays.h5") |
|
255 |
+ } else if (identical(BACKEND, NULL)) { |
|
256 |
+ h5_path <- NULL |
|
257 |
+ } |
|
258 |
+ |
|
259 |
+ # Collapse loci ------------------------------------------------------------ |
|
260 |
+ |
|
261 |
+ loci <- rowRanges(BSseq) |
|
262 |
+ if (shift) { |
|
263 |
+ loci <- shift(loci, shift = as.integer(-1L * (strand(loci) == "-"))) |
|
264 |
+ } |
|
265 |
+ collapsed_loci <- reduce(loci, min.gapwidth = 0L, ignore.strand = TRUE) |
|
266 |
+ |
|
267 |
+ # Collapse 'M' and 'Cov' matrices ------------------------------------------ |
|
268 |
+ |
|
269 |
+ ol <- findOverlaps(loci, collapsed_loci, type = "equal") |
|
270 |
+ group <- subjectHits(ol) |
|
271 |
+ M <- rowsum( |
|
272 |
+ x = assay(BSseq, "M", withDimnames = FALSE), |
|
273 |
+ group = group, |
|
274 |
+ # NOTE: reorder = TRUE to ensure same row-order as collapsed_loci. |
|
275 |
+ reorder = TRUE, |
|
276 |
+ BPPARAM = BPPARAM, |
|
277 |
+ filepath = h5_path, |
|
278 |
+ name = "Cov", |
|
279 |
+ chunkdim = chunkdim, |
|
280 |
+ level = level, |
|
281 |
+ type = type) |
|
282 |
+ Cov <- rowsum( |
|
283 |
+ x = assay(BSseq, "Cov", withDimnames = FALSE), |
|
284 |
+ group = group, |
|
285 |
+ # NOTE: reorder = TRUE to ensure same row-order as collapsed_loci. |
|
286 |
+ reorder = TRUE, |
|
287 |
+ BPPARAM = BPPARAM, |
|
288 |
+ filepath = h5_path, |
|
289 |
+ name = "Cov", |
|
290 |
+ chunkdim = chunkdim, |
|
291 |
+ level = level, |
|
292 |
+ type = type) |
|
293 |
+ |
|
294 |
+ # Construct BSseq object, saving it if it is HDF5-backed ------------------- |
|
295 |
+ |
|
296 |
+ se <- SummarizedExperiment( |
|
297 |
+ assays = SimpleList(M = unname(M), Cov = unname(Cov)), |
|
298 |
+ rowRanges = collapsed_loci, |
|
299 |
+ colData = colData(BSseq)) |
|
300 |
+ # TODO: Is there a way to use the internal constructor with `check = FALSE`? |
|
301 |
+ # Assuming input was valid, the output is valid, too. |
|
302 |
+ # .BSseq(se, trans = function(x) NULL, parameters = list()) |
|
303 |
+ bsseq <- new2("BSseq", se, check = FALSE) |
|
304 |
+ if (!is.null(BACKEND) && BACKEND == "HDF5Array") { |
|
305 |
+ # NOTE: Save BSseq object; mimicing |
|
306 |
+ # HDF5Array::saveHDF5SummarizedExperiment(). |
|
307 |
+ x <- bsseq |
|
308 |
+ x@assays <- HDF5Array:::.shorten_h5_paths(x@assays) |
|
309 |
+ saveRDS(x, file = file.path(dir, "se.rds")) |
|
310 |
+ } |
|
311 |
+ bsseq |
|
236 | 312 |
} |
237 | 313 |
|
238 | 314 |
# Exported methods ------------------------------------------------------------- |
... | ... |
@@ -290,7 +366,8 @@ setReplaceMethod( |
290 | 366 |
setMethod("updateObject", "BSseq", |
291 | 367 |
function(object, ...) { |
292 | 368 |
# NOTE: identical() is too strong |
293 |
- if (isTRUE(all.equal(getBSseq(object, "trans"), .oldTrans))) { |
|
369 |
+ if (hasBeenSmoothed(object) & |
|
370 |
+ isTRUE(all.equal(getBSseq(object, "trans"), .oldTrans))) { |
|
294 | 371 |
object@trans <- plogis |
295 | 372 |
} |
296 | 373 |
if (is(object, "SummarizedExperiment")) { |
... | ... |
@@ -298,14 +375,15 @@ setMethod("updateObject", "BSseq", |
298 | 375 |
object <- callNextMethod() |
299 | 376 |
return(object) |
300 | 377 |
} else { |
301 |
- BSseq(gr = object@gr, |
|
302 |
- M = object@M, |
|
303 |
- Cov = object@Cov, |
|
304 |
- coef = object@coef, |
|
305 |
- se.coef = object@se.coef, |
|
306 |
- trans = object@trans, |
|
307 |
- parameters = object@parameters, |
|
308 |
- pData = object@phenoData@data) |
|
378 |
+ BSseq( |
|
379 |
+ gr = object@gr, |
|
380 |
+ M = object@M, |
|
381 |
+ Cov = object@Cov, |
|
382 |
+ coef = object@coef, |
|
383 |
+ se.coef = object@se.coef, |
|
384 |
+ trans = object@trans, |
|
385 |
+ parameters = object@parameters, |
|
386 |
+ pData = object@phenoData@data) |
|
309 | 387 |
} |
310 | 388 |
} |
311 | 389 |
) |
- Addressed #71, thanks @scottgigante
- Removes undocumented behaviour of BSseq() sorting the output. In previous versions, this was done via `reduce(gr)`; see https://github.com/hansenlab/bsseq/blob/RELEASE_3_7/R/BSseq_class.R#L176-L223. In the refactor branch I had changed it to use order(gr). Now, it is completely removed.
... | ... |
@@ -69,10 +69,12 @@ setValidity2("BSseq", function(object) { |
69 | 69 |
|
70 | 70 |
# Exported functions ----------------------------------------------------------- |
71 | 71 |
|
72 |
+# TODO: Document that if duplicate loci are detected then the output `M` and |
|
73 |
+# `Cov` will be numeric even if input `M` and `Cov` were integer. |
|
72 | 74 |
# TODO: BSseq() is arguably a bad constructor. It doesn't return a valid BSseq |
73 | 75 |
# object when called without any arguments. It also does some pretty |
74 |
-# complicated parsing of the inputs. Still, I think we're stuck with it |
|
75 |
-# because it's been around for a long time. |
|
76 |
+# complicated parsing of the inputs. But we're stuck with it because it's |
|
77 |
+# been around for a long time. |
|
76 | 78 |
BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL, |
77 | 79 |
trans = NULL, parameters = NULL, pData = NULL, |
78 | 80 |
gr = NULL, pos = NULL, chr = NULL, sampleNames = NULL, |
... | ... |
@@ -171,27 +173,15 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL, |
171 | 173 |
if (!is.null(coef) || !is.null(se.coef)) { |
172 | 174 |
stop("Cannot collapse when 'coef' or 'se.coef' are present.") |
173 | 175 |
} |
174 |
- # NOTE: reduce() sorts the output |
|
175 |
- # TODO: Clarify above comment. |
|
176 | 176 |
unique_gr <- unique(gr) |
177 |
- # TODO: type = "equal"? |
|
178 |
- ol <- findOverlaps(unique_gr, gr) |
|
179 |
- gr <- unique(gr) |
|
177 |
+ ol <- findOverlaps(unique_gr, gr, type = "equal") |
|
178 |
+ gr <- unique_gr |
|
179 |
+ # TODO: Can we avoid making `idx`? |
|
180 | 180 |
idx <- as.list(ol) |
181 | 181 |
M <- .collapseMatrixLike(M, idx, MARGIN = 2L) |
182 | 182 |
Cov <- .collapseMatrixLike(Cov, idx, MARGIN = 2L) |
183 | 183 |
} |
184 | 184 |
|
185 |
- # Sort loci |
|
186 |
- if (is.unsorted(gr)) { |
|
187 |
- o <- order(gr) |
|
188 |
- gr <- gr[o] |
|
189 |
- M <- M[o, , drop = FALSE] |
|
190 |
- Cov <- Cov[o, , drop = FALSE] |
|
191 |
- coef <- coef[o, , drop = FALSE] |
|
192 |
- se.coef <- se.coef[o, , drop = FALSE] |
|
193 |
- } |
|
194 |
- |
|
195 | 185 |
# Construct BSseq object |
196 | 186 |
assays <- SimpleList(M = M, Cov = Cov, coef = coef, se.coef = se.coef) |
197 | 187 |
assays <- assays[!S4Vectors:::sapply_isNULL(assays)] |
... | ... |
@@ -172,7 +172,9 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL, |
172 | 172 |
stop("Cannot collapse when 'coef' or 'se.coef' are present.") |
173 | 173 |
} |
174 | 174 |
# NOTE: reduce() sorts the output |
175 |
+ # TODO: Clarify above comment. |
|
175 | 176 |
unique_gr <- unique(gr) |
177 |
+ # TODO: type = "equal"? |
|
176 | 178 |
ol <- findOverlaps(unique_gr, gr) |
177 | 179 |
gr <- unique(gr) |
178 | 180 |
idx <- as.list(ol) |
- BSseq objects can once again use ordinary matrix objects as assays.
- Reimplement `BSmooth()` more-or-less from scratch:
- Switch from 'parallel' to 'BiocParallel' for parallelization. This brings some notable improvements:
- Smoothed results can now be written directly to an on-disk realization backend by the worker. This dramatically reduces memory usage compared to previous versions of 'bsseq' that required all results be retained in-memory.
- Parallelization is now supported on Windows through the use of a 'SnowParam' object as the value of `BPPARAM`.
- Improved error handling makes it possible to gracefully resume `BSmooth()` jobs that encountered errors by re-doing only the necessary tasks.
- Detailed and extensive job logging facilities.
- Fix bug in `BSmooth()` with the `maxGap` parameter.
- Re-factor BSseq() constructor and add fast, internal .BSseq() constructor.
- Re-factor collapseBSseq() and combine(). Should be much more performant.
- Use beachmat to implement fast validity checking of 'M' and 'Cov' matrices.
- Resave BS.chr22 (supplied data) using integer for storage mode of assays to reduce size.
- Switch from RUnit to testthat. testthat has better integration with code coverage tools that help when refactoring.
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,318 @@ |
1 |
+# Exported classes ------------------------------------------------------------- |
|
2 |
+ |
|
3 |
+# NOTE: This create a 'classGeneratorFunction' (internal constructor), .BSseq() |
|
4 |
+.BSseq <- setClass( |
|
5 |
+ "BSseq", |
|
6 |
+ slots = representation( |
|
7 |
+ trans = "function", |
|
8 |
+ parameters = "list"), |
|
9 |
+ contains = "RangedSummarizedExperiment" |
|
10 |
+) |
|
11 |
+ |
|
12 |
+# Validity methods ------------------------------------------------------------- |
|
13 |
+ |
|
14 |
+.checkAssayNames <- function(object, names) { |
|
15 |
+ if (all(names %in% assayNames(object))) { |
|
16 |
+ return(NULL) |
|
17 |
+ } else { |
|
18 |
+ paste0( |
|
19 |
+ "object of class ", |
|
20 |
+ class(object), |
|
21 |
+ " needs to have assay slots with names ", |
|
22 |
+ paste0(names, collapse = ", ")) |
|
23 |
+ |
|
24 |
+ } |
|
25 |
+} |
|
26 |
+ |
|
27 |
+.checkMandCov <- function(M, Cov) { |
|
28 |
+ msg <- NULL |
|
29 |
+ validMsg(msg, .Call(cxx_check_M_and_Cov, M, Cov)) |
|
30 |
+} |
|
31 |
+ |
|
32 |
+setValidity2("BSseq", function(object) { |
|
33 |
+ msg <- NULL |
|
34 |
+ |
|
35 |
+ if (identical(object, .BSseq())) { |
|
36 |
+ # No validity checks for object returned by internal constructor |
|
37 |
+ return(msg) |
|
38 |
+ } |
|
39 |
+ msg <- validMsg(msg, .checkAssayNames(object, c("Cov", "M"))) |
|
40 |
+ |
|
41 |
+ # TODO: Are colnames strictly necessary? |
|
42 |
+ if (is.null(colnames(object))) { |
|
43 |
+ msg <- validMsg(msg, "colnames (aka sampleNames) need to be set") |
|
44 |
+ } |
|
45 |
+ |
|
46 |
+ assay_rownames <- lapply(assays(object), rownames) |
|
47 |
+ if (!all(S4Vectors:::sapply_isNULL(assay_rownames))) { |
|
48 |
+ msg <- validMsg(msg, "unnecessary rownames on one-or-more assays") |
|
49 |
+ } |
|
50 |
+ |
|
51 |
+ msg <- validMsg(msg, |
|
52 |
+ .checkMandCov(assay(object, "M", withDimnames = FALSE), |
|
53 |
+ assay(object, "Cov", withDimnames = FALSE))) |
|
54 |
+ |
|
55 |
+ if (is.null(msg)) TRUE else msg |
|
56 |
+}) |
|
57 |
+ |
|
58 |
+# Internal functions ----------------------------------------------------------- |
|
59 |
+ |
|
60 |
+.oldTrans <- function(x) { |
|
61 |
+ y <- x |
|
62 |
+ ix <- which(x < 0) |
|
63 |
+ ix2 <- which(x > 0) |
|
64 |
+ y[ix] <- exp(x[ix])/(1 + exp(x[ix])) |
|
65 |
+ y[ix2] <- 1/(1 + exp(-x[ix2])) |
|
66 |
+ y |
|
67 |
+} |
|
68 |
+ |
|
69 |
+# Exported functions ----------------------------------------------------------- |
|
70 |
+ |
|
71 |
+# TODO: BSseq() is arguably a bad constructor. It doesn't return a valid BSseq |
|
72 |
+# object when called without any arguments. It also does some pretty |
|
73 |
+# complicated parsing of the inputs. Still, I think we're stuck with it |
|
74 |
+# because it's been around for a long time. |
|
75 |
+BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL, |
|
76 |
+ trans = NULL, parameters = NULL, pData = NULL, |
|
77 |
+ gr = NULL, pos = NULL, chr = NULL, sampleNames = NULL, |
|
78 |
+ rmZeroCov = FALSE) { |
|
79 |
+ |
|
80 |
+ # Process 'gr', or 'pos' and 'chr' |
|
81 |
+ if (is.null(gr)) { |
|
82 |
+ if (is.null(pos) || is.null(chr)) { |
|
83 |
+ stop("Need 'pos' and 'chr' if 'gr' not supplied.") |
|
84 |
+ } |
|
85 |
+ gr <- GRanges(seqnames = chr, ranges = IRanges(start = pos, width = 1L)) |
|
86 |
+ } |
|
87 |
+ if (!is(gr, "GRanges")) stop("'gr' needs to be a GRanges.") |
|
88 |
+ if (any(width(gr) != 1L)) stop("'gr' needs to have widths of 1.") |
|
89 |
+ |
|
90 |
+ # Process 'M' and 'Cov' |
|
91 |
+ if (is.null(M) || is.null(Cov)) stop("Need 'M' and 'Cov'.") |
|
92 |
+ if (length(gr) != nrow(M) || |
|
93 |
+ length(gr) != nrow(Cov) || |
|
94 |
+ ncol(Cov) != ncol(M)) { |
|
95 |
+ stop("'gr', 'M' and 'Cov' need to have compatible dimensions.") |
|
96 |
+ } |
|
97 |
+ if (!is.null(rownames(M))) rownames(M) <- NULL |
|
98 |
+ if (!is.null(rownames(Cov))) rownames(Cov) <- NULL |
|
99 |
+ if (!is.null(names(gr))) names(gr) <- NULL |
|
100 |
+ |
|
101 |
+ # Process 'sampleNames' and 'pData' |
|
102 |
+ if (is.null(pData)) { |
|
103 |
+ if (is.null(sampleNames)) { |
|
104 |
+ if (!is.null(colnames(M))) { |
|
105 |
+ sampleNames <- colnames(M) |
|
106 |
+ } else if (!is.null(colnames(Cov))) { |
|
107 |
+ sampleNames <- colnames(Cov) |
|
108 |
+ } else { |
|
109 |
+ sampleNames <- paste0("V", seq_len(ncol(M))) |
|
110 |
+ } |
|
111 |
+ } |
|
112 |
+ pData <- DataFrame(row.names = sampleNames) |
|
113 |
+ } else { |
|
114 |
+ pData <- as(pData, "DataFrame") |
|
115 |
+ } |
|
116 |
+ if (is.null(sampleNames) && !is.null(pData) && !is.null(rownames(pData))) { |
|
117 |
+ sampleNames <- rownames(pData) |
|
118 |
+ } |
|
119 |
+ if (length(unique(sampleNames)) != ncol(M)) { |
|
120 |
+ stop("sampleNames need to be unique and of the right length.") |
|
121 |
+ } |
|
122 |
+ |
|
123 |
+ # TODO: Is this really necessary? It complicates things because HDF5Matrix |
|
124 |
+ # will be degraded to a DelayedMatrix |
|
125 |
+ # Add column names to assays |
|
126 |
+ if (is.null(colnames(M)) || any(sampleNames != colnames(M))) { |
|
127 |
+ colnames(M) <- sampleNames |
|
128 |
+ } |
|
129 |
+ if (is.null(colnames(Cov)) || any(sampleNames != colnames(Cov))) { |
|
130 |
+ colnames(Cov) <- sampleNames |
|
131 |
+ } |
|
132 |
+ if (!is.null(coef)) { |
|
133 |
+ if (nrow(coef) != nrow(M) || ncol(coef) != ncol(M)) { |
|
134 |
+ stop("'coef' does not have the right dimensions") |
|
135 |
+ } |
|
136 |
+ if (is.null(colnames(coef)) || any(sampleNames != colnames(coef))) { |
|
137 |
+ colnames(coef) <- sampleNames |
|
138 |
+ } |
|
139 |
+ if (!is.null(rownames(coef))) { |
|
140 |
+ rownames(coef) <- NULL |
|
141 |
+ } |
|
142 |
+ } |
|
143 |
+ if (!is.null(se.coef)) { |
|
144 |
+ if (nrow(se.coef) != nrow(M) || ncol(se.coef) != ncol(M)) { |
|
145 |
+ stop("'se.coef' does not have the right dimensions") |
|
146 |
+ } |
|
147 |
+ if (is.null(colnames(se.coef)) || |
|
148 |
+ any(sampleNames != colnames(se.coef))) { |
|
149 |
+ colnames(se.coef) <- sampleNames |
|
150 |
+ } |
|
151 |
+ if (!is.null(rownames(se.coef))) { |
|
152 |
+ rownames(se.coef) <- NULL |
|
153 |
+ } |
|
154 |
+ } |
|
155 |
+ |
|
156 |
+ # Optionally, remove positions with Cov == 0 |
|
157 |
+ if (rmZeroCov) { |
|
158 |
+ loci_with_zero_cov <- rowAlls(Cov, value = 0) |
|
159 |
+ if (any(loci_with_zero_cov)) { |
|
160 |
+ gr <- gr[!loci_with_zero_cov] |
|
161 |
+ M <- M[!loci_with_zero_cov, , drop = FALSE] |
|
162 |
+ Cov <- Cov[!loci_with_zero_cov, , drop = FALSE] |
|
163 |
+ } |
|
164 |
+ } |
|
165 |
+ |
|
166 |
+ # Collapse duplicate loci |
|
167 |
+ if (any(duplicated(gr))) { |
|
168 |
+ warning("Detected duplicate loci. Collapsing counts in 'M' and 'Cov' ", |
|
169 |
+ "at these positions.") |
|
170 |
+ if (!is.null(coef) || !is.null(se.coef)) { |
|
171 |
+ stop("Cannot collapse when 'coef' or 'se.coef' are present.") |
|
172 |
+ } |
|
173 |
+ # NOTE: reduce() sorts the output |
|
174 |
+ unique_gr <- unique(gr) |
|
175 |
+ ol <- findOverlaps(unique_gr, gr) |
|
176 |
+ gr <- unique(gr) |
|
177 |
+ idx <- as.list(ol) |
|
178 |
+ M <- .collapseMatrixLike(M, idx, MARGIN = 2L) |
|
179 |
+ Cov <- .collapseMatrixLike(Cov, idx, MARGIN = 2L) |
|
180 |
+ } |
|
181 |
+ |
|
182 |
+ # Sort loci |
|
183 |
+ if (is.unsorted(gr)) { |
|
184 |
+ o <- order(gr) |
|
185 |
+ gr <- gr[o] |
|
186 |
+ M <- M[o, , drop = FALSE] |
|
187 |
+ Cov <- Cov[o, , drop = FALSE] |
|
188 |
+ coef <- coef[o, , drop = FALSE] |
|
189 |
+ se.coef <- se.coef[o, , drop = FALSE] |
|
190 |
+ } |
|
191 |
+ |
|
192 |
+ # Construct BSseq object |
|
193 |
+ assays <- SimpleList(M = M, Cov = Cov, coef = coef, se.coef = se.coef) |
|
194 |
+ assays <- assays[!S4Vectors:::sapply_isNULL(assays)] |
|
195 |
+ se <- SummarizedExperiment(assays = assays, rowRanges = gr, colData = pData) |
|
196 |
+ if (is.null(parameters)) { |
|
197 |
+ parameters <- list() |
|
198 |
+ } |
|
199 |
+ if (is.null(trans)) { |
|
200 |
+ trans <- function(x) NULL |
|
201 |
+ } |
|
202 |
+ .BSseq(se, trans = trans, parameters = parameters) |
|
203 |
+} |
|
204 |
+ |
|
205 |
+hasBeenSmoothed <- function(BSseq) "coef" %in% assayNames(BSseq) |
|
206 |
+ |
|
207 |
+# TODO: Document withDimnames |
|
208 |
+getBSseq <- function(BSseq, |
|
209 |
+ type = c("Cov", "M", "gr", "coef", "se.coef", "trans", |
|
210 |
+ "parameters"), |
|
211 |
+ withDimnames = TRUE) { |
|
212 |
+ type <- match.arg(type) |
|
213 |
+ if (type %in% c("M", "Cov")) { |
|
214 |
+ return(assay(BSseq, type, withDimnames = withDimnames)) |
|
215 |
+ } |
|
216 |
+ if (type %in% c("coef", "se.coef") && type %in% assayNames(BSseq)) { |
|
217 |
+ return(assay(BSseq, type, withDimnames = withDimnames)) |
|
218 |
+ } |
|
219 |
+ if (type %in% c("coef", "se.coef")) return(NULL) |
|
220 |
+ if (type == "trans") return(BSseq@trans) |
|
221 |
+ if (type == "parameters") return(BSseq@parameters) |
|
222 |
+ if (type == "gr") return(BSseq@rowRanges) |
|
223 |
+ |
|
224 |
+} |
|
225 |
+ |
|
226 |
+strandCollapse <- function(BSseq, shift = TRUE) { |
|
227 |
+ if (all(runValue(strand(BSseq)) == "*")) { |
|
228 |
+ warning("All loci are unstranded; nothing to collapse") |
|
229 |
+ return(BSseq) |
|
230 |
+ } |
|
231 |
+ if (!(all(runValue(strand(BSseq)) %in% c("+", "-")))) { |
|
232 |
+ stop("'BSseq' object has a mix of stranded and unstranded loci.") |
|
233 |
+ } |
|
234 |
+ BS.forward <- BSseq[strand(BSseq) == "+"] |
|
235 |
+ strand(BS.forward) <- "*" |
|
236 |
+ BS.reverse <- BSseq[strand(BSseq) == "-"] |
|
237 |
+ strand(BS.reverse) <- "*" |
|
238 |
+ if (shift) rowRanges(BS.reverse) <- shift(rowRanges(BS.reverse), -1L) |
|
239 |
+ sampleNames(BS.reverse) <- paste0(sampleNames(BS.reverse), "_REVERSE") |
|
240 |
+ BS.comb <- combine(BS.forward, BS.reverse) |
|
241 |
+ newBSseq <- collapseBSseq(BS.comb, columns = rep(sampleNames(BSseq), 2)) |
|
242 |
+ newBSseq |
|
243 |
+} |
|
244 |
+ |
|
245 |
+# Exported methods ------------------------------------------------------------- |
|
246 |
+ |
|
247 |
+setMethod("show", signature(object = "BSseq"), function(object) { |
|
248 |
+ cat("An object of type 'BSseq' with\n") |
|
249 |
+ cat(" ", nrow(object), "methylation loci\n") |
|
250 |
+ cat(" ", ncol(object), "samples\n") |
|
251 |
+ if (hasBeenSmoothed(object)) { |
|
252 |
+ cat("has been smoothed with\n") |
|
253 |
+ cat(" ", object@parameters$smoothText, "\n") |
|
254 |
+ } else { |
|
255 |
+ cat("has not been smoothed\n") |
|
256 |
+ } |
|
257 |
+ if (.isHDF5ArrayBacked(object)) { |