Browse code

Use && instead of & in if condition

&& is always preferred over & in if conditions as it prevents the
right operand of the && operation from being unnecessarily evaluated.
In this particular case this change preventively repairs the
updateObject() method for BSseq objects that will otherwise break
on objects with DataFrame instances in them when DataFrame becomes
a virtual class in the S4Vectors package (will happen soon).

Hervé Pagès authored on 02/11/2021 01:52:48
Showing 1 changed files
... ...
@@ -371,7 +371,7 @@ setReplaceMethod(
371 371
 setMethod("updateObject", "BSseq",
372 372
           function(object, ...) {
373 373
               # NOTE: identical() is too strong
374
-              if (hasBeenSmoothed(object) &
374
+              if (hasBeenSmoothed(object) &&
375 375
                   isTRUE(all.equal(getBSseq(object, "trans"), .oldTrans))) {
376 376
                   object@trans <- plogis
377 377
               }
Browse code

resync with HDF5Array 1.19.11

Hervé Pagès authored on 26/04/2021 05:55:03
Showing 1 changed files
... ...
@@ -252,9 +252,9 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(),
252 252
             stop("'replace' must be TRUE or FALSE")
253 253
         }
254 254
         if (!dir.exists(dir)) {
255
-            HDF5Array:::.create_dir(dir)
255
+            HDF5Array::create_dir(dir)
256 256
         } else {
257
-            HDF5Array:::.replace_dir(dir, replace)
257
+            HDF5Array::replace_dir(dir, replace)
258 258
         }
259 259
         h5_path <- file.path(dir, "assays.h5")
260 260
     } else if (identical(BACKEND, NULL)) {
... ...
@@ -310,7 +310,7 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(),
310 310
         # NOTE: Save BSseq object; mimicing
311 311
         #       HDF5Array::saveHDF5SummarizedExperiment().
312 312
         x <- bsseq
313
-        x@assays <- HDF5Array:::.shorten_assay2h5_links(x@assays)
313
+        x@assays <- HDF5Array::shorten_assay2h5_links(x@assays)
314 314
         saveRDS(x, file = file.path(dir, "se.rds"))
315 315
     }
316 316
     bsseq
Browse code

more environment fix

kasperdanielhansen authored on 23/03/2021 08:07:57
Showing 1 changed files
... ...
@@ -83,7 +83,7 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
83 83
     }
84 84
     # Process 'trans' and 'parameters'.
85 85
     if (is.null(trans)) {
86
-        trans <- function(q) NULL
86
+        trans <- function() NULL
87 87
         environment(trans) <- emptyenv()
88 88
     }
89 89
     if (is.null(parameters)) {
Browse code

fixing a test error

Kasper Daniel Hansen authored on 15/03/2021 23:01:13
Showing 1 changed files
... ...
@@ -83,7 +83,8 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
83 83
     }
84 84
     # Process 'trans' and 'parameters'.
85 85
     if (is.null(trans)) {
86
-        trans <- function() NULL
86
+        trans <- function(q) NULL
87
+        environment(trans) <- emptyenv()
87 88
     }
88 89
     if (is.null(parameters)) {
89 90
         parameters <- list()
Browse code

Resync with DelayedArray 0.15.10

Hervé Pagès authored on 27/09/2020 01:24:50
Showing 1 changed files
... ...
@@ -200,7 +200,7 @@ getBSseq <- function(BSseq,
200 200
 
201 201
 # Move to BSseq-utils?
202 202
 strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(),
203
-                           BACKEND = getRealizationBackend(),
203
+                           BACKEND = getAutoRealizationBackend(),
204 204
                            dir = tempfile("BSseq"), replace = FALSE,
205 205
                            chunkdim = NULL, level = NULL,
206 206
                            type = c("double", "integer")) {
... ...
@@ -216,9 +216,9 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(),
216 216
     }
217 217
     # Register 'BACKEND' and return to current value on exit.
218 218
     # TODO: Is this strictly necessary?
219
-    current_BACKEND <- getRealizationBackend()
220
-    on.exit(setRealizationBackend(current_BACKEND), add = TRUE)
221
-    setRealizationBackend(BACKEND)
219
+    current_BACKEND <- getAutoRealizationBackend()
220
+    on.exit(setAutoRealizationBackend(current_BACKEND), add = TRUE)
221
+    setAutoRealizationBackend(BACKEND)
222 222
     # Check compatability of 'BPPARAM' with 'BACKEND'.
223 223
     if (!.areBackendsInMemory(BACKEND)) {
224 224
         if (!.isSingleMachineBackend(BPPARAM)) {
Browse code

Resync with latest internal changes to S4Vectors and IRanges

Hervé Pagès authored on 09/06/2020 01:36:15
Showing 1 changed files
... ...
@@ -92,7 +92,7 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
92 92
     if (is.null(sampleNames)) {
93 93
         if (is.null(pData)) {
94 94
             # BSseq object will have no colnames.
95
-            pData <- S4Vectors:::new_DataFrame(nrows = ncol(M))
95
+            pData <- make_zero_col_DFrame(ncol(M))
96 96
         } else {
97 97
             # BSseq object will have 'sampleNames' as colnames.
98 98
             pData <- DataFrame(row.names = sampleNames)
Browse code

Add .colsum() and .rowsum()

- Temporary workaround for https://github.com/Bioconductor/DelayedArray/issues/41
- Ensures bsseq passes checks for next BioC release

Peter Hickey authored on 23/04/2019 02:33:24
Showing 1 changed files
... ...
@@ -272,7 +272,7 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(),
272 272
 
273 273
     ol <- findOverlaps(loci, collapsed_loci, type = "equal")
274 274
     group <- subjectHits(ol)
275
-    M <- rowsum(
275
+    M <- .rowsum(
276 276
         x = assay(BSseq, "M", withDimnames = FALSE),
277 277
         group = group,
278 278
         # NOTE: reorder = TRUE to ensure same row-order as collapsed_loci.
... ...
@@ -283,7 +283,7 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(),
283 283
         chunkdim = chunkdim,
284 284
         level = level,
285 285
         type = type)
286
-    Cov <- rowsum(
286
+    Cov <- .rowsum(
287 287
         x = assay(BSseq, "Cov", withDimnames = FALSE),
288 288
         group = group,
289 289
         # NOTE: reorder = TRUE to ensure same row-order as collapsed_loci.
Browse code

resync with latest changes to HDF5Array package

Hervé Pagès authored on 13/12/2018 20:18:12
Showing 1 changed files
... ...
@@ -250,7 +250,11 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(),
250 250
         if (!isTRUEorFALSE(replace)) {
251 251
             stop("'replace' must be TRUE or FALSE")
252 252
         }
253
-        HDF5Array:::.create_dir(dir = dir, replace = replace)
253
+        if (!dir.exists(dir)) {
254
+            HDF5Array:::.create_dir(dir)
255
+        } else {
256
+            HDF5Array:::.replace_dir(dir, replace)
257
+        }
254 258
         h5_path <- file.path(dir, "assays.h5")
255 259
     } else if (identical(BACKEND, NULL)) {
256 260
         h5_path <- NULL
... ...
@@ -305,7 +309,7 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(),
305 309
         # NOTE: Save BSseq object; mimicing
306 310
         #       HDF5Array::saveHDF5SummarizedExperiment().
307 311
         x <- bsseq
308
-        x@assays <- HDF5Array:::.shorten_h5_paths(x@assays)
312
+        x@assays <- HDF5Array:::.shorten_assay2h5_links(x@assays)
309 313
         saveRDS(x, file = file.path(dir, "se.rds"))
310 314
     }
311 315
     bsseq
Browse code

Close issue #74

- Small tweak to default @trans function in BSseq() and combine().
- Add unit test

Peter Hickey authored on 15/10/2018 05:04:53
Showing 1 changed files
... ...
@@ -83,7 +83,7 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
83 83
     }
84 84
     # Process 'trans' and 'parameters'.
85 85
     if (is.null(trans)) {
86
-        trans <- function(x) NULL
86
+        trans <- function() NULL
87 87
     }
88 88
     if (is.null(parameters)) {
89 89
         parameters <- list()
Browse code

Simplify BSseq() and BSseq validity method, refactor strandCollapse()

Peter Hickey authored on 17/09/2018 11:07:27
Showing 1 changed files
... ...
@@ -39,21 +39,17 @@ setValidity2("BSseq", function(object) {
39 39
     }
40 40
     msg <- validMsg(msg, .checkAssayNames(object, c("Cov", "M")))
41 41
 
42
-    # TODO: Are colnames strictly necessary?
43
-    if (is.null(colnames(object))) {
44
-        msg <- validMsg(msg, "colnames (aka sampleNames) need to be set")
45
-    }
46
-
47
-    assay_rownames <- lapply(assays(object), rownames)
48
-    if (!all(S4Vectors:::sapply_isNULL(assay_rownames))) {
49
-        msg <- validMsg(msg, "unnecessary rownames on one-or-more assays")
42
+    msg <- validMsg(
43
+        msg = msg,
44
+        result = .checkMandCov(
45
+            M = assay(object, "M", withDimnames = FALSE),
46
+            Cov = assay(object, "Cov", withDimnames = FALSE)))
47
+
48
+    if (is.null(msg)) {
49
+        TRUE
50
+    } else {
51
+        msg
50 52
     }
51
-
52
-    msg <- validMsg(msg,
53
-                    .checkMandCov(assay(object, "M", withDimnames = FALSE),
54
-                                  assay(object, "Cov", withDimnames = FALSE)))
55
-
56
-    if (is.null(msg)) TRUE else msg
57 53
 })
58 54
 
59 55
 # Internal functions -----------------------------------------------------------
... ...
@@ -69,135 +65,113 @@ setValidity2("BSseq", function(object) {
69 65
 
70 66
 # Exported functions -----------------------------------------------------------
71 67
 
72
-# TODO: Document that if duplicate loci are detected then the output `M` and
73
-#       `Cov` will be numeric even if input `M` and `Cov` were integer.
74 68
 # TODO: BSseq() is arguably a bad constructor. It doesn't return a valid BSseq
75 69
 #       object when called without any arguments. It also does some pretty
76 70
 #       complicated parsing of the inputs. But we're stuck with it because it's
77 71
 #       been around for a long time.
78 72
 BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
79
-                  trans = NULL, parameters = NULL, pData = NULL,
80
-                  gr = NULL, pos = NULL, chr = NULL, sampleNames = NULL,
73
+                  trans = NULL, parameters = NULL, pData = NULL, gr = NULL,
74
+                  pos = NULL, chr = NULL, sampleNames = NULL,
81 75
                   rmZeroCov = FALSE) {
82 76
 
83
-    # Process 'gr', or 'pos' and 'chr'
84
-    if (is.null(gr)) {
85
-        if (is.null(pos) || is.null(chr)) {
86
-            stop("Need 'pos' and 'chr' if 'gr' not supplied.")
87
-        }
88
-        gr <- GRanges(seqnames = chr, ranges = IRanges(start = pos, width = 1L))
77
+    # Argument checks ----------------------------------------------------------
78
+
79
+    # Process assays.
80
+    # NOTE: Nothing to do for 'coef', and 'se.coef'.
81
+    if (is.null(M) || is.null(Cov)) {
82
+        stop("Need 'M' and 'Cov'.")
89 83
     }
90
-    if (!is(gr, "GRanges")) stop("'gr' needs to be a GRanges.")
91
-    if (any(width(gr) != 1L)) stop("'gr' needs to have widths of 1.")
92
-
93
-    # Process 'M' and 'Cov'
94
-    if (is.null(M) || is.null(Cov)) stop("Need 'M' and 'Cov'.")
95
-    if (length(gr) != nrow(M) ||
96
-        length(gr) != nrow(Cov) ||
97
-        ncol(Cov) != ncol(M)) {
98
-        stop("'gr', 'M' and 'Cov' need to have compatible dimensions.")
84
+    # Process 'trans' and 'parameters'.
85
+    if (is.null(trans)) {
86
+        trans <- function(x) NULL
99 87
     }
100
-    if (!is.null(rownames(M))) rownames(M) <- NULL
101
-    if (!is.null(rownames(Cov))) rownames(Cov) <- NULL
102
-    if (!is.null(names(gr))) names(gr) <- NULL
103
-
104
-    # Process 'sampleNames' and 'pData'
105
-    if (is.null(pData)) {
106
-        if (is.null(sampleNames)) {
107
-            if (!is.null(colnames(M))) {
108
-                sampleNames <- colnames(M)
109
-            } else if (!is.null(colnames(Cov))) {
110
-                sampleNames <- colnames(Cov)
88
+    if (is.null(parameters)) {
89
+        parameters <- list()
90
+    }
91
+    # Process 'sampleNames' and 'pData'.
92
+    if (is.null(sampleNames)) {
93
+        if (is.null(pData)) {
94
+            # BSseq object will have no colnames.
95
+            pData <- S4Vectors:::new_DataFrame(nrows = ncol(M))
96
+        } else {
97
+            # BSseq object will have 'sampleNames' as colnames.
98
+            pData <- DataFrame(row.names = sampleNames)
99
+        }
100
+    } else {
101
+        if (is.null(pData)) {
102
+            # BSseq object will have 'sampleNames' as colnames.
103
+            pData <- DataFrame(row.names = sampleNames)
104
+        } else {
105
+            if (is.null(rownames(pData))) {
106
+                rownames(pData) <- sampleNames
111 107
             } else {
112
-                sampleNames <- paste0("V", seq_len(ncol(M)))
108
+                stopifnot(identical(rownames(pData), sampleNames))
113 109
             }
114 110
         }
115
-        pData <- DataFrame(row.names = sampleNames)
116
-    } else {
117
-        pData <- as(pData, "DataFrame")
118 111
     }
119
-    if (is.null(sampleNames) && !is.null(pData) && !is.null(rownames(pData))) {
120
-        sampleNames <- rownames(pData)
112
+    # Process 'gr', 'pos', and 'chr'.
113
+    if (is.null(gr)) {
114
+        if (is.null(pos) || is.null(chr)) {
115
+            stop("Need 'pos' and 'chr' if 'gr' not supplied.")
116
+        }
117
+        gr <- GRanges(seqnames = chr, ranges = IRanges(start = pos, width = 1L))
121 118
     }
122
-    if (length(unique(sampleNames)) != ncol(M)) {
123
-        stop("sampleNames need to be unique and of the right length.")
119
+    if (!is(gr, "GRanges")) {
120
+        stop("'gr' needs to be a GRanges.")
124 121
     }
122
+    # Process 'rmZeroCov'.
123
+    stopifnot(isTRUEorFALSE(rmZeroCov))
125 124
 
126
-    # TODO: Is this really necessary? It complicates things because HDF5Matrix
127
-    #       will be degraded to a DelayedMatrix
128
-    # Add column names to assays
129
-    if (is.null(colnames(M)) || any(sampleNames != colnames(M))) {
130
-        colnames(M) <- sampleNames
131
-    }
132
-    if (is.null(colnames(Cov)) || any(sampleNames != colnames(Cov))) {
133
-        colnames(Cov) <- sampleNames
134
-    }
135
-    if (!is.null(coef)) {
136
-        if (nrow(coef) != nrow(M) || ncol(coef) != ncol(M)) {
137
-            stop("'coef' does not have the right dimensions")
138
-        }
139
-        if (is.null(colnames(coef)) || any(sampleNames != colnames(coef))) {
140
-            colnames(coef) <- sampleNames
141
-        }
142
-        if (!is.null(rownames(coef))) {
143
-            rownames(coef) <- NULL
144
-        }
145
-    }
146
-    if (!is.null(se.coef)) {
147
-        if (nrow(se.coef) != nrow(M) || ncol(se.coef) != ncol(M)) {
148
-            stop("'se.coef' does not have the right dimensions")
149
-        }
150
-        if (is.null(colnames(se.coef)) ||
151
-            any(sampleNames != colnames(se.coef))) {
152
-            colnames(se.coef) <- sampleNames
153
-        }
154
-        if (!is.null(rownames(se.coef))) {
155
-            rownames(se.coef) <- NULL
125
+    # Collapse duplicate loci --------------------------------------------------
126
+
127
+    is_duplicated <- duplicated(gr)
128
+    if (any(is_duplicated)) {
129
+        warning("Detected duplicate loci. Collapsing counts in 'M' and 'Cov' ",
130
+                "at these positions.")
131
+        if (!is.null(coef) || !is.null(se.coef)) {
132
+            stop("Cannot collapse when 'coef' or 'se.coef' are non-NULL.")
156 133
         }
134
+        loci <- gr[!is_duplicated]
135
+        ol <- findOverlaps(gr, loci, type = "equal")
136
+        M <- rowsum(x = M, group = subjectHits(ol), reorder = FALSE)
137
+        rownames(M) <- NULL
138
+        Cov <- rowsum(x = Cov, group = subjectHits(ol), reorder = FALSE)
139
+        rownames(Cov) <- NULL
140
+    } else {
141
+        loci <- gr
157 142
     }
158 143
 
159
-    # Optionally, remove positions with Cov == 0
144
+    # Optionally, remove positions with zero coverage --------------------------
145
+
160 146
     if (rmZeroCov) {
161 147
         loci_with_zero_cov <- rowAlls(Cov, value = 0)
162 148
         if (any(loci_with_zero_cov)) {
163
-            gr <- gr[!loci_with_zero_cov]
164
-            M <- M[!loci_with_zero_cov, , drop = FALSE]
165
-            Cov <- Cov[!loci_with_zero_cov, , drop = FALSE]
149
+            loci_with_nonzero_cov <- !loci_with_zero_cov
150
+            gr <- gr[loci_with_nonzero_cov]
151
+            M <- M[loci_with_nonzero_cov, , drop = FALSE]
152
+            Cov <- Cov[loci_with_nonzero_cov, , drop = FALSE]
153
+            coef <- coef[loci_with_nonzero_cov, , drop = FALSE]
154
+            se.coef <- se.coef[loci_with_nonzero_cov, , drop = FALSE]
166 155
         }
167 156
     }
168 157
 
169
-    # Collapse duplicate loci
170
-    if (any(duplicated(gr))) {
171
-        warning("Detected duplicate loci. Collapsing counts in 'M' and 'Cov' ",
172
-                "at these positions.")
173
-        if (!is.null(coef) || !is.null(se.coef)) {
174
-            stop("Cannot collapse when 'coef' or 'se.coef' are present.")
175
-        }
176
-        unique_gr <- unique(gr)
177
-        ol <- findOverlaps(unique_gr, gr, type = "equal")
178
-        gr <- unique_gr
179
-        # TODO: Can we avoid making `idx`?
180
-        idx <- as.list(ol)
181
-        M <- .collapseMatrixLike(M, idx, MARGIN = 2L)
182
-        Cov <- .collapseMatrixLike(Cov, idx, MARGIN = 2L)
183
-    }
158
+    # Construct BSseq object ---------------------------------------------------
184 159
 
185
-    # Construct BSseq object
186 160
     assays <- SimpleList(M = M, Cov = Cov, coef = coef, se.coef = se.coef)
187 161
     assays <- assays[!S4Vectors:::sapply_isNULL(assays)]
188
-    se <- SummarizedExperiment(assays = assays, rowRanges = gr, colData = pData)
189
-    if (is.null(parameters)) {
190
-        parameters <- list()
191
-    }
192
-    if (is.null(trans)) {
193
-        trans <- function(x) NULL
194
-    }
162
+    se <- SummarizedExperiment(
163
+        assays = assays,
164
+        rowRanges = loci,
165
+        colData = pData)
195 166
     .BSseq(se, trans = trans, parameters = parameters)
196 167
 }
197 168
 
198
-hasBeenSmoothed <- function(BSseq) "coef" %in% assayNames(BSseq)
169
+# Move to BSseq-utils?
170
+hasBeenSmoothed <- function(BSseq) {
171
+    "coef" %in% assayNames(BSseq)
172
+}
199 173
 
200
-# TODO: Document withDimnames
174
+# Move to BSseq-utils?
201 175
 getBSseq <- function(BSseq,
202 176
                      type = c("Cov", "M", "gr", "coef", "se.coef", "trans",
203 177
                               "parameters"),
... ...
@@ -206,33 +180,135 @@ getBSseq <- function(BSseq,
206 180
     if (type %in% c("M", "Cov")) {
207 181
         return(assay(BSseq, type, withDimnames = withDimnames))
208 182
     }
209
-    if (type %in% c("coef", "se.coef") && type %in% assayNames(BSseq)) {
210
-        return(assay(BSseq, type, withDimnames = withDimnames))
183
+    if (type %in% c("coef", "se.coef")) {
184
+        if (type %in% assayNames(BSseq)) {
185
+            return(assay(BSseq, type, withDimnames = withDimnames))
186
+        } else {
187
+            return(NULL)
188
+        }
189
+    }
190
+    if (type == "trans") {
191
+        return(BSseq@trans)
192
+    }
193
+    if (type == "parameters") {
194
+        return(BSseq@parameters)
195
+    }
196
+    if (type == "gr") {
197
+        return(BSseq@rowRanges)
211 198
     }
212
-    if (type %in% c("coef", "se.coef")) return(NULL)
213
-    if (type == "trans") return(BSseq@trans)
214
-    if (type == "parameters") return(BSseq@parameters)
215
-    if (type == "gr") return(BSseq@rowRanges)
216
-
217 199
 }
218 200
 
219
-strandCollapse <- function(BSseq, shift = TRUE) {
201
+# Move to BSseq-utils?
202
+strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(),
203
+                           BACKEND = getRealizationBackend(),
204
+                           dir = tempfile("BSseq"), replace = FALSE,
205
+                           chunkdim = NULL, level = NULL,
206
+                           type = c("double", "integer")) {
207
+
208
+    # Argument checks ----------------------------------------------------------
209
+
220 210
     if (all(runValue(strand(BSseq)) == "*")) {
221
-        warning("All loci are unstranded; nothing to collapse")
211
+        warning("All loci are unstranded, nothing to collapse.", call. = FALSE)
222 212
         return(BSseq)
223 213
     }
224 214
     if (!(all(runValue(strand(BSseq)) %in% c("+", "-")))) {
225 215
         stop("'BSseq' object has a mix of stranded and unstranded loci.")
226 216
     }
227
-    BS.forward <- BSseq[strand(BSseq) == "+"]
228
-    strand(BS.forward) <- "*"
229
-    BS.reverse <- BSseq[strand(BSseq) == "-"]
230
-    strand(BS.reverse) <- "*"
231
-    if (shift) rowRanges(BS.reverse) <- shift(rowRanges(BS.reverse), -1L)
232
-    sampleNames(BS.reverse) <- paste0(sampleNames(BS.reverse), "_REVERSE")
233
-    BS.comb <- combine(BS.forward, BS.reverse)
234
-    newBSseq <- collapseBSseq(BS.comb, columns = rep(sampleNames(BSseq), 2))
235
-    newBSseq
217
+    # Register 'BACKEND' and return to current value on exit.
218
+    # TODO: Is this strictly necessary?
219
+    current_BACKEND <- getRealizationBackend()
220
+    on.exit(setRealizationBackend(current_BACKEND), add = TRUE)
221
+    setRealizationBackend(BACKEND)
222
+    # Check compatability of 'BPPARAM' with 'BACKEND'.
223
+    if (!.areBackendsInMemory(BACKEND)) {
224
+        if (!.isSingleMachineBackend(BPPARAM)) {
225
+            stop("The parallelisation strategy must use a single machine ",
226
+                 "when using an on-disk realization backend.\n",
227
+                 "See help(\"read.bismark\") for details.",
228
+                 call. = FALSE)
229
+        }
230
+    } else {
231
+        if (!is.null(BACKEND)) {
232
+            # NOTE: Currently do not support any in-memory realization
233
+            #       backends. If the realization backend is NULL then an
234
+            #       ordinary matrix is returned rather than a matrix-backed
235
+            #       DelayedMatrix.
236
+            stop("The '", BACKEND, "' realization backend is not supported.",
237
+                 "\n  See help(\"read.bismark\") for details.",
238
+                 call. = FALSE)
239
+        }
240
+    }
241
+    # If using HDF5Array as BACKEND, check remaining options are sensible.
242
+    if (identical(BACKEND, "HDF5Array")) {
243
+        # NOTE: Most of this copied from
244
+        #       HDF5Array::saveHDF5SummarizedExperiment().
245
+        if (!isSingleString(dir)) {
246
+            stop(wmsg("'dir' must be a single string specifying the path to ",
247
+                      "the directory where to save the BSseq object (the ",
248
+                      "directory will be created)."))
249
+        }
250
+        if (!isTRUEorFALSE(replace)) {
251
+            stop("'replace' must be TRUE or FALSE")
252
+        }
253
+        HDF5Array:::.create_dir(dir = dir, replace = replace)
254
+        h5_path <- file.path(dir, "assays.h5")
255
+    } else if (identical(BACKEND, NULL)) {
256
+        h5_path <- NULL
257
+    }
258
+
259
+    # Collapse loci ------------------------------------------------------------
260
+
261
+    loci <- rowRanges(BSseq)
262
+    if (shift) {
263
+        loci <- shift(loci, shift = as.integer(-1L * (strand(loci) == "-")))
264
+    }
265
+    collapsed_loci <- reduce(loci, min.gapwidth = 0L, ignore.strand = TRUE)
266
+
267
+    # Collapse 'M' and 'Cov' matrices ------------------------------------------
268
+
269
+    ol <- findOverlaps(loci, collapsed_loci, type = "equal")
270
+    group <- subjectHits(ol)
271
+    M <- rowsum(
272
+        x = assay(BSseq, "M", withDimnames = FALSE),
273
+        group = group,
274
+        # NOTE: reorder = TRUE to ensure same row-order as collapsed_loci.
275
+        reorder = TRUE,
276
+        BPPARAM = BPPARAM,
277
+        filepath = h5_path,
278
+        name = "Cov",
279
+        chunkdim = chunkdim,
280
+        level = level,
281
+        type = type)
282
+    Cov <- rowsum(
283
+        x = assay(BSseq, "Cov", withDimnames = FALSE),
284
+        group = group,
285
+        # NOTE: reorder = TRUE to ensure same row-order as collapsed_loci.
286
+        reorder = TRUE,
287
+        BPPARAM = BPPARAM,
288
+        filepath = h5_path,
289
+        name = "Cov",
290
+        chunkdim = chunkdim,
291
+        level = level,
292
+        type = type)
293
+
294
+    # Construct BSseq object, saving it if it is HDF5-backed -------------------
295
+
296
+    se <- SummarizedExperiment(
297
+        assays = SimpleList(M = unname(M), Cov = unname(Cov)),
298
+        rowRanges = collapsed_loci,
299
+        colData = colData(BSseq))
300
+    # TODO: Is there a way to use the internal constructor with `check = FALSE`?
301
+    #       Assuming input was valid, the output is valid, too.
302
+    # .BSseq(se, trans = function(x) NULL, parameters = list())
303
+    bsseq <- new2("BSseq", se, check = FALSE)
304
+    if (!is.null(BACKEND) && BACKEND == "HDF5Array") {
305
+        # NOTE: Save BSseq object; mimicing
306
+        #       HDF5Array::saveHDF5SummarizedExperiment().
307
+        x <- bsseq
308
+        x@assays <- HDF5Array:::.shorten_h5_paths(x@assays)
309
+        saveRDS(x, file = file.path(dir, "se.rds"))
310
+    }
311
+    bsseq
236 312
 }
237 313
 
238 314
 # Exported methods -------------------------------------------------------------
... ...
@@ -290,7 +366,8 @@ setReplaceMethod(
290 366
 setMethod("updateObject", "BSseq",
291 367
           function(object, ...) {
292 368
               # NOTE: identical() is too strong
293
-              if (isTRUE(all.equal(getBSseq(object, "trans"), .oldTrans))) {
369
+              if (hasBeenSmoothed(object) &
370
+                  isTRUE(all.equal(getBSseq(object, "trans"), .oldTrans))) {
294 371
                   object@trans <- plogis
295 372
               }
296 373
               if (is(object, "SummarizedExperiment")) {
... ...
@@ -298,14 +375,15 @@ setMethod("updateObject", "BSseq",
298 375
                   object <- callNextMethod()
299 376
                   return(object)
300 377
               } else {
301
-                  BSseq(gr = object@gr,
302
-                        M = object@M,
303
-                        Cov = object@Cov,
304
-                        coef = object@coef,
305
-                        se.coef = object@se.coef,
306
-                        trans = object@trans,
307
-                        parameters = object@parameters,
308
-                        pData = object@phenoData@data)
378
+                  BSseq(
379
+                      gr = object@gr,
380
+                      M = object@M,
381
+                      Cov = object@Cov,
382
+                      coef = object@coef,
383
+                      se.coef = object@se.coef,
384
+                      trans = object@trans,
385
+                      parameters = object@parameters,
386
+                      pData = object@phenoData@data)
309 387
               }
310 388
           }
311 389
 )
Browse code

Speed up .collapseMatrixLike() and, in turn, BSseq()

- Addressed #71, thanks @scottgigante
- Removes undocumented behaviour of BSseq() sorting the output. In previous versions, this was done via `reduce(gr)`; see https://github.com/hansenlab/bsseq/blob/RELEASE_3_7/R/BSseq_class.R#L176-L223. In the refactor branch I had changed it to use order(gr). Now, it is completely removed.

Peter Hickey authored on 20/06/2018 16:19:24
Showing 1 changed files
... ...
@@ -69,10 +69,12 @@ setValidity2("BSseq", function(object) {
69 69
 
70 70
 # Exported functions -----------------------------------------------------------
71 71
 
72
+# TODO: Document that if duplicate loci are detected then the output `M` and
73
+#       `Cov` will be numeric even if input `M` and `Cov` were integer.
72 74
 # TODO: BSseq() is arguably a bad constructor. It doesn't return a valid BSseq
73 75
 #       object when called without any arguments. It also does some pretty
74
-#       complicated parsing of the inputs. Still, I think we're stuck with it
75
-#       because it's been around for a long time.
76
+#       complicated parsing of the inputs. But we're stuck with it because it's
77
+#       been around for a long time.
76 78
 BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
77 79
                   trans = NULL, parameters = NULL, pData = NULL,
78 80
                   gr = NULL, pos = NULL, chr = NULL, sampleNames = NULL,
... ...
@@ -171,27 +173,15 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
171 173
         if (!is.null(coef) || !is.null(se.coef)) {
172 174
             stop("Cannot collapse when 'coef' or 'se.coef' are present.")
173 175
         }
174
-        # NOTE: reduce() sorts the output
175
-        # TODO: Clarify above comment.
176 176
         unique_gr <- unique(gr)
177
-        # TODO: type = "equal"?
178
-        ol <- findOverlaps(unique_gr, gr)
179
-        gr <- unique(gr)
177
+        ol <- findOverlaps(unique_gr, gr, type = "equal")
178
+        gr <- unique_gr
179
+        # TODO: Can we avoid making `idx`?
180 180
         idx <- as.list(ol)
181 181
         M <- .collapseMatrixLike(M, idx, MARGIN = 2L)
182 182
         Cov <- .collapseMatrixLike(Cov, idx, MARGIN = 2L)
183 183
     }
184 184
 
185
-    # Sort loci
186
-    if (is.unsorted(gr)) {
187
-        o <- order(gr)
188
-        gr <- gr[o]
189
-        M <- M[o, , drop = FALSE]
190
-        Cov <- Cov[o, , drop = FALSE]
191
-        coef <- coef[o, , drop = FALSE]
192
-        se.coef <- se.coef[o, , drop = FALSE]
193
-    }
194
-
195 185
     # Construct BSseq object
196 186
     assays <- SimpleList(M = M, Cov = Cov, coef = coef, se.coef = se.coef)
197 187
     assays <- assays[!S4Vectors:::sapply_isNULL(assays)]
Browse code

Add TODOs

Peter Hickey authored on 13/06/2018 00:25:21
Showing 1 changed files
... ...
@@ -172,7 +172,9 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
172 172
             stop("Cannot collapse when 'coef' or 'se.coef' are present.")
173 173
         }
174 174
         # NOTE: reduce() sorts the output
175
+        # TODO: Clarify above comment.
175 176
         unique_gr <- unique(gr)
177
+        # TODO: type = "equal"?
176 178
         ol <- findOverlaps(unique_gr, gr)
177 179
         gr <- unique(gr)
178 180
         idx <- as.list(ol)
Browse code

Add TODOs

Peter Hickey authored on 29/05/2018 19:26:19
Showing 1 changed files
... ...
@@ -29,6 +29,7 @@
29 29
     validMsg(msg, .Call(cxx_check_M_and_Cov, M, Cov))
30 30
 }
31 31
 
32
+# TODO: Benchmark validity method
32 33
 setValidity2("BSseq", function(object) {
33 34
     msg <- NULL
34 35
 
Browse code

Work in progress: refactoring bsseq

- BSseq objects can once again use ordinary matrix objects as assays.
- Reimplement `BSmooth()` more-or-less from scratch:
- Switch from 'parallel' to 'BiocParallel' for parallelization. This brings some notable improvements:
- Smoothed results can now be written directly to an on-disk realization backend by the worker. This dramatically reduces memory usage compared to previous versions of 'bsseq' that required all results be retained in-memory.
- Parallelization is now supported on Windows through the use of a 'SnowParam' object as the value of `BPPARAM`.
- Improved error handling makes it possible to gracefully resume `BSmooth()` jobs that encountered errors by re-doing only the necessary tasks.
- Detailed and extensive job logging facilities.
- Fix bug in `BSmooth()` with the `maxGap` parameter.
- Re-factor BSseq() constructor and add fast, internal .BSseq() constructor.
- Re-factor collapseBSseq() and combine(). Should be much more performant.
- Use beachmat to implement fast validity checking of 'M' and 'Cov' matrices.
- Resave BS.chr22 (supplied data) using integer for storage mode of assays to reduce size.
- Switch from RUnit to testthat. testthat has better integration with code coverage tools that help when refactoring.

Peter Hickey authored on 28/05/2018 23:42:18
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,318 @@
1
+# Exported classes -------------------------------------------------------------
2
+
3
+# NOTE: This create a 'classGeneratorFunction' (internal constructor), .BSseq()
4
+.BSseq <- setClass(
5
+    "BSseq",
6
+    slots = representation(
7
+        trans = "function",
8
+        parameters = "list"),
9
+    contains = "RangedSummarizedExperiment"
10
+)
11
+
12
+# Validity methods -------------------------------------------------------------
13
+
14
+.checkAssayNames <- function(object, names) {
15
+    if (all(names %in% assayNames(object))) {
16
+        return(NULL)
17
+    } else {
18
+        paste0(
19
+            "object of class ",
20
+            class(object),
21
+            " needs to have assay slots with names ",
22
+            paste0(names, collapse = ", "))
23
+
24
+    }
25
+}
26
+
27
+.checkMandCov <- function(M, Cov) {
28
+    msg <- NULL
29
+    validMsg(msg, .Call(cxx_check_M_and_Cov, M, Cov))
30
+}
31
+
32
+setValidity2("BSseq", function(object) {
33
+    msg <- NULL
34
+
35
+    if (identical(object, .BSseq())) {
36
+        # No validity checks for object returned by internal constructor
37
+        return(msg)
38
+    }
39
+    msg <- validMsg(msg, .checkAssayNames(object, c("Cov", "M")))
40
+
41
+    # TODO: Are colnames strictly necessary?
42
+    if (is.null(colnames(object))) {
43
+        msg <- validMsg(msg, "colnames (aka sampleNames) need to be set")
44
+    }
45
+
46
+    assay_rownames <- lapply(assays(object), rownames)
47
+    if (!all(S4Vectors:::sapply_isNULL(assay_rownames))) {
48
+        msg <- validMsg(msg, "unnecessary rownames on one-or-more assays")
49
+    }
50
+
51
+    msg <- validMsg(msg,
52
+                    .checkMandCov(assay(object, "M", withDimnames = FALSE),
53
+                                  assay(object, "Cov", withDimnames = FALSE)))
54
+
55
+    if (is.null(msg)) TRUE else msg
56
+})
57
+
58
+# Internal functions -----------------------------------------------------------
59
+
60
+.oldTrans <- function(x) {
61
+    y <- x
62
+    ix <- which(x < 0)
63
+    ix2 <- which(x > 0)
64
+    y[ix] <- exp(x[ix])/(1 + exp(x[ix]))
65
+    y[ix2] <- 1/(1 + exp(-x[ix2]))
66
+    y
67
+}
68
+
69
+# Exported functions -----------------------------------------------------------
70
+
71
+# TODO: BSseq() is arguably a bad constructor. It doesn't return a valid BSseq
72
+#       object when called without any arguments. It also does some pretty
73
+#       complicated parsing of the inputs. Still, I think we're stuck with it
74
+#       because it's been around for a long time.
75
+BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
76
+                  trans = NULL, parameters = NULL, pData = NULL,
77
+                  gr = NULL, pos = NULL, chr = NULL, sampleNames = NULL,
78
+                  rmZeroCov = FALSE) {
79
+
80
+    # Process 'gr', or 'pos' and 'chr'
81
+    if (is.null(gr)) {
82
+        if (is.null(pos) || is.null(chr)) {
83
+            stop("Need 'pos' and 'chr' if 'gr' not supplied.")
84
+        }
85
+        gr <- GRanges(seqnames = chr, ranges = IRanges(start = pos, width = 1L))
86
+    }
87
+    if (!is(gr, "GRanges")) stop("'gr' needs to be a GRanges.")
88
+    if (any(width(gr) != 1L)) stop("'gr' needs to have widths of 1.")
89
+
90
+    # Process 'M' and 'Cov'
91
+    if (is.null(M) || is.null(Cov)) stop("Need 'M' and 'Cov'.")
92
+    if (length(gr) != nrow(M) ||
93
+        length(gr) != nrow(Cov) ||
94
+        ncol(Cov) != ncol(M)) {
95
+        stop("'gr', 'M' and 'Cov' need to have compatible dimensions.")
96
+    }
97
+    if (!is.null(rownames(M))) rownames(M) <- NULL
98
+    if (!is.null(rownames(Cov))) rownames(Cov) <- NULL
99
+    if (!is.null(names(gr))) names(gr) <- NULL
100
+
101
+    # Process 'sampleNames' and 'pData'
102
+    if (is.null(pData)) {
103
+        if (is.null(sampleNames)) {
104
+            if (!is.null(colnames(M))) {
105
+                sampleNames <- colnames(M)
106
+            } else if (!is.null(colnames(Cov))) {
107
+                sampleNames <- colnames(Cov)
108
+            } else {
109
+                sampleNames <- paste0("V", seq_len(ncol(M)))
110
+            }
111
+        }
112
+        pData <- DataFrame(row.names = sampleNames)
113
+    } else {
114
+        pData <- as(pData, "DataFrame")
115
+    }
116
+    if (is.null(sampleNames) && !is.null(pData) && !is.null(rownames(pData))) {
117
+        sampleNames <- rownames(pData)
118
+    }
119
+    if (length(unique(sampleNames)) != ncol(M)) {
120
+        stop("sampleNames need to be unique and of the right length.")
121
+    }
122
+
123
+    # TODO: Is this really necessary? It complicates things because HDF5Matrix
124
+    #       will be degraded to a DelayedMatrix
125
+    # Add column names to assays
126
+    if (is.null(colnames(M)) || any(sampleNames != colnames(M))) {
127
+        colnames(M) <- sampleNames
128
+    }
129
+    if (is.null(colnames(Cov)) || any(sampleNames != colnames(Cov))) {
130
+        colnames(Cov) <- sampleNames
131
+    }
132
+    if (!is.null(coef)) {
133
+        if (nrow(coef) != nrow(M) || ncol(coef) != ncol(M)) {
134
+            stop("'coef' does not have the right dimensions")
135
+        }
136
+        if (is.null(colnames(coef)) || any(sampleNames != colnames(coef))) {
137
+            colnames(coef) <- sampleNames
138
+        }
139
+        if (!is.null(rownames(coef))) {
140
+            rownames(coef) <- NULL
141
+        }
142
+    }
143
+    if (!is.null(se.coef)) {
144
+        if (nrow(se.coef) != nrow(M) || ncol(se.coef) != ncol(M)) {
145
+            stop("'se.coef' does not have the right dimensions")
146
+        }
147
+        if (is.null(colnames(se.coef)) ||
148
+            any(sampleNames != colnames(se.coef))) {
149
+            colnames(se.coef) <- sampleNames
150
+        }
151
+        if (!is.null(rownames(se.coef))) {
152
+            rownames(se.coef) <- NULL
153
+        }
154
+    }
155
+
156
+    # Optionally, remove positions with Cov == 0
157
+    if (rmZeroCov) {
158
+        loci_with_zero_cov <- rowAlls(Cov, value = 0)
159
+        if (any(loci_with_zero_cov)) {
160
+            gr <- gr[!loci_with_zero_cov]
161
+            M <- M[!loci_with_zero_cov, , drop = FALSE]
162
+            Cov <- Cov[!loci_with_zero_cov, , drop = FALSE]
163
+        }
164
+    }
165
+
166
+    # Collapse duplicate loci
167
+    if (any(duplicated(gr))) {
168
+        warning("Detected duplicate loci. Collapsing counts in 'M' and 'Cov' ",
169
+                "at these positions.")
170
+        if (!is.null(coef) || !is.null(se.coef)) {
171
+            stop("Cannot collapse when 'coef' or 'se.coef' are present.")
172
+        }
173
+        # NOTE: reduce() sorts the output
174
+        unique_gr <- unique(gr)
175
+        ol <- findOverlaps(unique_gr, gr)
176
+        gr <- unique(gr)
177
+        idx <- as.list(ol)
178
+        M <- .collapseMatrixLike(M, idx, MARGIN = 2L)
179
+        Cov <- .collapseMatrixLike(Cov, idx, MARGIN = 2L)
180
+    }
181
+
182
+    # Sort loci
183
+    if (is.unsorted(gr)) {
184
+        o <- order(gr)
185
+        gr <- gr[o]
186
+        M <- M[o, , drop = FALSE]
187
+        Cov <- Cov[o, , drop = FALSE]
188
+        coef <- coef[o, , drop = FALSE]
189
+        se.coef <- se.coef[o, , drop = FALSE]
190
+    }
191
+
192
+    # Construct BSseq object
193
+    assays <- SimpleList(M = M, Cov = Cov, coef = coef, se.coef = se.coef)
194
+    assays <- assays[!S4Vectors:::sapply_isNULL(assays)]
195
+    se <- SummarizedExperiment(assays = assays, rowRanges = gr, colData = pData)
196
+    if (is.null(parameters)) {
197
+        parameters <- list()
198
+    }
199
+    if (is.null(trans)) {
200
+        trans <- function(x) NULL
201
+    }
202
+    .BSseq(se, trans = trans, parameters = parameters)
203
+}
204
+
205
+hasBeenSmoothed <- function(BSseq) "coef" %in% assayNames(BSseq)
206
+
207
+# TODO: Document withDimnames
208
+getBSseq <- function(BSseq,
209
+                     type = c("Cov", "M", "gr", "coef", "se.coef", "trans",
210
+                              "parameters"),
211
+                     withDimnames = TRUE) {
212
+    type <- match.arg(type)
213
+    if (type %in% c("M", "Cov")) {
214
+        return(assay(BSseq, type, withDimnames = withDimnames))
215
+    }
216
+    if (type %in% c("coef", "se.coef") && type %in% assayNames(BSseq)) {
217
+        return(assay(BSseq, type, withDimnames = withDimnames))
218
+    }
219
+    if (type %in% c("coef", "se.coef")) return(NULL)
220
+    if (type == "trans") return(BSseq@trans)
221
+    if (type == "parameters") return(BSseq@parameters)
222
+    if (type == "gr") return(BSseq@rowRanges)
223
+
224
+}
225
+
226
+strandCollapse <- function(BSseq, shift = TRUE) {
227
+    if (all(runValue(strand(BSseq)) == "*")) {
228
+        warning("All loci are unstranded; nothing to collapse")
229
+        return(BSseq)
230
+    }
231
+    if (!(all(runValue(strand(BSseq)) %in% c("+", "-")))) {
232
+        stop("'BSseq' object has a mix of stranded and unstranded loci.")
233
+    }
234
+    BS.forward <- BSseq[strand(BSseq) == "+"]
235
+    strand(BS.forward) <- "*"
236
+    BS.reverse <- BSseq[strand(BSseq) == "-"]
237
+    strand(BS.reverse) <- "*"
238
+    if (shift) rowRanges(BS.reverse) <- shift(rowRanges(BS.reverse), -1L)
239
+    sampleNames(BS.reverse) <- paste0(sampleNames(BS.reverse), "_REVERSE")
240
+    BS.comb <- combine(BS.forward, BS.reverse)
241
+    newBSseq <- collapseBSseq(BS.comb, columns = rep(sampleNames(BSseq), 2))
242
+    newBSseq
243
+}
244
+
245
+# Exported methods -------------------------------------------------------------
246
+
247
+setMethod("show", signature(object = "BSseq"), function(object) {
248
+    cat("An object of type 'BSseq' with\n")
249
+    cat(" ", nrow(object), "methylation loci\n")
250
+    cat(" ", ncol(object), "samples\n")
251
+    if (hasBeenSmoothed(object)) {
252
+        cat("has been smoothed with\n")
253
+        cat(" ", object@parameters$smoothText, "\n")
254
+    } else {
255
+        cat("has not been smoothed\n")
256
+    }
257
+    if (.isHDF5ArrayBacked(object)) {