Browse code

Remove BPREDO from BSmooth()

- This was not fully/properly implemented and in this state was dangerous. E.g., you can accidentally clobber an existing BSseq object (see https://support.bioconductor.org/p/109374/#109459).
- I'll make it a long-term goal to support this functionality

Peter Hickey authored on 14/06/2018 14:31:16
Showing 2 changed files

... ...
@@ -112,23 +112,16 @@ makeClusters <- function(hasGRanges, maxGap = 10^8) {
112 112
 # Exported functions -----------------------------------------------------------
113 113
 
114 114
 # TODO: BSmooth() should warn if BSseq object contains mix of strands.
115
-# TODO: If BSmooth() encounteres errors, return `BPREDO`` as `metadata(BSseq)`
116
-#       so as not to clobber the user's BSseq object; see
117
-#       https://support.bioconductor.org/p/109374/#109459.
118 115
 # TODO: Make 'mc.cores', 'mc.preschedule', and 'verbose' defunct one release
119 116
 #       cycle with them deprecated.
120 117
 # TODO: Consider having BSmooth() create a 'smoothed' assay in addition to or
121 118
 #       instead of the 'coef' and 'se.coef' assays.
122
-# TODO: To benefit from error recovery requires that bpStopOnError(BPPARAM) is
123
-#       TRUE but the default is FALSE. How to help the user? Probably don't
124
-#       want to override the user-specified value.
125 119
 BSmooth <- function(BSseq,
126 120
                     ns = 70,
127 121
                     h = 1000,
128 122
                     maxGap = 10^8,
129 123
                     keep.se = FALSE,
130 124
                     verbose = TRUE,
131
-                    BPREDO = list(),
132 125
                     BPPARAM = bpparam(),
133 126
                     BACKEND = getRealizationBackend(),
134 127
                     ...,
... ...
@@ -137,100 +130,81 @@ BSmooth <- function(BSseq,
137 130
                     mc.cores = 1) {
138 131
     # Argument checks-----------------------------------------------------------
139 132
 
140
-    # Check if this is a re-do.
141
-    # NOTE: Under the assumptions of a re-do (i.e. BSmooth() is being re-run
142
-    #       with the same arguments), we skip straight ahead to re-running
143
-    #       failed smoothing tasks with no further argument checks.
144
-    if (length(BPREDO)) {
145
-        if (!is.list(BPREDO) ||
146
-            identical(names(BPREDO), c("smooth", "coef_sink", "se.coef_sink",
147
-                                       "BACKEND"))) {
148
-            stop("'BPREDO' should be a list with elements 'smooth', ",
149
-                 "'coef_sink', 'se.coef_sink', and 'BACKEND'.")
133
+    # Check validity of 'BSseq' argument
134
+    if (!is(BSseq, "BSseq")) {
135
+        stop("'BSseq' must be a BSseq object.")
136
+    }
137
+    if (!isTRUE(all(width(BSseq) == 1L))) {
138
+        stop("All loci in 'BSseq' must have width == 1.")
139
+    }
140
+    if (is.unsorted(BSseq)) {
141
+        stop("'BSseq' must be sorted before smoothing. Use 'sort(BSseq)'.")
142
+    }
143
+    # Check for deprecated arguments and issue warning(s) if found.
144
+    if (!missing(parallelBy)) {
145
+        warning(
146
+            "'parallelBy' is deprecated and ignored.\n",
147
+            "See help(\"BSmooth\") for details.",
148
+            call. = FALSE,
149
+            immediate. = TRUE)
150
+    }
151
+    if (!missing(mc.preschedule)) {
152
+        warning(
153
+            "'mc.preschedule' is deprecated and ignored.\n",
154
+            "See help(\"BSmooth\") for details.",
155
+            call. = FALSE,
156
+            immediate. = TRUE)
157
+    }
158
+    if (!missing(mc.cores)) {
159
+        # TODO: What if user has provided a BPPARAM?
160
+        warning(
161
+            "'mc.cores' is deprecated.\n",
162
+            "Replaced with 'BPPARAM = MulticoreParam(workers = mc.cores)'",
163
+            ".\nSee help(\"BSmooth\").",
164
+            call. = FALSE,
165
+            immediate. = TRUE)
166
+        BPPARAM <- MulticoreParam(workers = mc.cores)
167
+    }
168
+    if (!missing(verbose)) {
169
+        warning(
170
+            "'verbose' is deprecated.\n",
171
+            "Replaced by setting 'bpprogressbar(BPPARAM) <- TRUE'.\n",
172
+            "See help(\"BSmooth\") for details.",
173
+            call. = FALSE,
174
+            immediate. = TRUE)
175
+        if (verbose) bpprogressbar(BPPARAM) <- TRUE
176
+    }
177
+    # Register 'BACKEND' and return to current value on exit
178
+    current_BACKEND <- getRealizationBackend()
179
+    on.exit(setRealizationBackend(current_BACKEND), add = TRUE)
180
+    setRealizationBackend(BACKEND)
181
+    # Check compatability of 'BACKEND' with backend(s) of BSseq object.
182
+    BSseq_backends <- .getBSseqBackends(BSseq)
183
+    if (.areBackendsInMemory(BACKEND) &&
184
+        !.areBackendsInMemory(BSseq_backends)) {
185
+        stop("Using an in-memory backend for a disk-backed BSseq object ",
186
+             "is not supported.\n",
187
+             "See help(\"BSmooth\") for details.",
188
+             call. = FALSE)
189
+    }
190
+    # Check compatability of 'BPPARAM' with the realization backend.
191
+    if (!.areBackendsInMemory(BACKEND)) {
192
+        if (!.isSingleMachineBackend(BPPARAM)) {
193
+            stop("The parallelisation strategy must use a single machine ",
194
+                 "when using an on-disk realization backend.\n",
195
+                 "See help(\"BSmooth\") for details.",
196
+                 call. = FALSE)
150 197
         }
151
-        is_redo <- TRUE
152
-        coef_sink <- BPREDO[["coef_sink"]]
153
-        se.coef_sink <- BPREDO[["se.coef_sink"]]
154
-        BACKEND <- BPREDO[["BACKEND"]]
155
-        BPREDO <- BPREDO[["smooth"]]
156 198
     } else {
157
-        is_redo <- FALSE
158
-        # Check validity of 'BSseq' argument
159
-        if (!is(BSseq, "BSseq")) {
160
-            stop("'BSseq' must be a BSseq object.")
161
-        }
162
-        if (!isTRUE(all(width(BSseq) == 1L))) {
163
-            stop("All loci in 'BSseq' must have width == 1.")
164
-        }
165
-        if (is.unsorted(BSseq)) {
166
-            stop("'BSseq' must be sorted before smoothing. Use 'sort(BSseq)'.")
167
-        }
168
-        # Check for deprecated arguments and issue warning(s) if found.
169
-        if (!missing(parallelBy)) {
170
-            warning(
171
-                "'parallelBy' is deprecated and ignored.\n",
172
-                "See help(\"BSmooth\") for details.",
173
-                call. = FALSE,
174
-                immediate. = TRUE)
175
-        }
176
-        if (!missing(mc.preschedule)) {
177
-            warning(
178
-                "'mc.preschedule' is deprecated and ignored.\n",
179
-                "See help(\"BSmooth\") for details.",
180
-                call. = FALSE,
181
-                immediate. = TRUE)
182
-        }
183
-        if (!missing(mc.cores)) {
184
-            # TODO: What if user has provided a BPPARAM?
185
-            warning(
186
-                "'mc.cores' is deprecated.\n",
187
-                "Replaced with 'BPPARAM = MulticoreParam(workers = mc.cores)'",
188
-                ".\nSee help(\"BSmooth\").",
189
-                call. = FALSE,
190
-                immediate. = TRUE)
191
-            BPPARAM <- MulticoreParam(workers = mc.cores)
192
-        }
193
-        if (!missing(verbose)) {
194
-            warning(
195
-                "'verbose' is deprecated.\n",
196
-                "Replaced by setting 'bpprogressbar(BPPARAM) <- TRUE'.\n",
197
-                "See help(\"BSmooth\") for details.",
198
-                call. = FALSE,
199
-                immediate. = TRUE)
200
-            if (verbose) bpprogressbar(BPPARAM) <- TRUE
201
-        }
202
-        # Register 'BACKEND' and return to current value on exit
203
-        current_BACKEND <- getRealizationBackend()
204
-        on.exit(setRealizationBackend(current_BACKEND), add = TRUE)
205
-        setRealizationBackend(BACKEND)
206
-        # Check compatability of 'BACKEND' with backend(s) of BSseq object.
207
-        BSseq_backends <- .getBSseqBackends(BSseq)
208
-        if (.areBackendsInMemory(BACKEND) &&
209
-            !.areBackendsInMemory(BSseq_backends)) {
210
-            stop("Using an in-memory backend for a disk-backed BSseq object ",
211
-                 "is not supported.\n",
199
+        if (!is.null(BACKEND)) {
200
+            # NOTE: Currently do not support any in-memory realization
201
+            #       backends. If 'BACKEND' is NULL then an ordinary matrix
202
+            #       is returned rather than a matrix-backed DelayedMatrix.
203
+            stop("The '", BACKEND, "' realization backend is not ",
204
+                 "supported.\n",
212 205
                  "See help(\"BSmooth\") for details.",
213 206
                  call. = FALSE)
214 207
         }
215
-        # Check compatability of 'BPPARAM' with the realization backend.
216
-        if (!.areBackendsInMemory(BACKEND)) {
217
-            if (!.isSingleMachineBackend(BPPARAM)) {
218
-                stop("The parallelisation strategy must use a single machine ",
219
-                     "when using an on-disk realization backend.\n",
220
-                     "See help(\"BSmooth\") for details.",
221
-                     call. = FALSE)
222
-            }
223
-        } else {
224
-            if (!is.null(BACKEND)) {
225
-                # NOTE: Currently do not support any in-memory realization
226
-                #       backends. If 'BACKEND' is NULL then an ordinary matrix
227
-                #       is returned rather than a matrix-backed DelayedMatrix.
228
-                stop("The '", BACKEND, "' realization backend is not ",
229
-                     "supported.\n",
230
-                     "See help(\"BSmooth\") for details.",
231
-                     call. = FALSE)
232
-            }
233
-        }
234 208
     }
235 209
 
236 210
     # Smoothing ----------------------------------------------------------------
... ...
@@ -247,51 +221,45 @@ BSmooth <- function(BSseq,
247 221
     # Set up "parallel" ArrayGrid over pos
248 222
     pos_grid <- ArbitraryArrayGrid(list(row_tickmarks, 1L))
249 223
     # Construct RealizationSink objects (as required)
250
-    if (!is_redo) {
251
-        if (is.null(BACKEND)) {
252
-            coef_sink <- NULL
253
-            se.coef_sink <- NULL
254
-            sink_lock <- NULL
255
-        } else if (BACKEND == "HDF5Array") {
256
-            coef_sink <- HDF5RealizationSink(
224
+    if (BACKEND == "HDF5Array") {
225
+        coef_sink <- HDF5RealizationSink(
226
+            dim = dim(M),
227
+            # NOTE: Never allow dimnames.
228
+            dimnames = NULL,
229
+            type = "double",
230
+            name = "coef",
231
+            ...)
232
+        on.exit(close(coef_sink), add = TRUE)
233
+        sink_lock <- ipcid()
234
+        on.exit(ipcremove(sink_lock), add = TRUE)
235
+        if (keep.se) {
236
+            se.coef_sink <- HDF5RealizationSink(
257 237
                 dim = dim(M),
258 238
                 # NOTE: Never allow dimnames.
259 239
                 dimnames = NULL,
260 240
                 type = "double",
261
-                name = "coef",
241
+                name = "se.coef",
262 242
                 ...)
263
-            on.exit(close(coef_sink), add = TRUE)
264
-            sink_lock <- ipcid()
265
-            on.exit(ipcremove(sink_lock), add = TRUE)
266
-            if (keep.se) {
267
-                se.coef_sink <- HDF5RealizationSink(
268
-                    dim = dim(M),
269
-                    # NOTE: Never allow dimnames.
270
-                    dimnames = NULL,
271
-                    type = "double",
272
-                    name = "se.coef",
273
-                    ...)
274
-                on.exit(close(se.coef_sink), add = TRUE)
275
-            } else {
276
-                se.coef_sink <- NULL
277
-            }
243
+            on.exit(close(se.coef_sink), add = TRUE)
278 244
         } else {
279
-            # TODO: This branch should probably never be entered because we
280
-            #       (implicitly) only support in-memory or HDF5Array backends.
281
-            #       However, we retain it for now (e.g., fstArray backend would
282
-            #       use this until a dedicated branch was implemented).
283
-            coef_sink <- DelayedArray:::RealizationSink(dim(M), type = "double")
284
-            on.exit(close(coef_sink), add = TRUE)
285
-            sink_lock <- ipcid()
286
-            on.exit(ipcremove(sink_lock), add = TRUE)
287
-            if (keep.se) {
288
-                se.coef_sink <- DelayedArray:::RealizationSink(
289
-                    dim(M),
290
-                    type = "double")
291
-                on.exit(close(se.coef_sink), add = TRUE)
292
-            } else {
293
-                se.coef_sink <- NULL
294
-            }
245
+            se.coef_sink <- NULL
246
+        }
247
+    } else {
248
+        # TODO: This branch should probably never be entered because we
249
+        #       (implicitly) only support in-memory or HDF5Array backends.
250
+        #       However, we retain it for now (e.g., fstArray backend would
251
+        #       use this until a dedicated branch was implemented).
252
+        coef_sink <- DelayedArray:::RealizationSink(dim(M), type = "double")
253
+        on.exit(close(coef_sink), add = TRUE)
254
+        sink_lock <- ipcid()
255
+        on.exit(ipcremove(sink_lock), add = TRUE)
256
+        if (keep.se) {
257
+            se.coef_sink <- DelayedArray:::RealizationSink(
258
+                dim(M),
259
+                type = "double")
260
+            on.exit(close(se.coef_sink), add = TRUE)
261
+        } else {
262
+            se.coef_sink <- NULL
295 263
         }
296 264
     }
297 265
 
... ...
@@ -320,25 +288,11 @@ BSmooth <- function(BSseq,
320 288
         ns = ns,
321 289
         h = h,
322 290
         keep.se = keep.se,
323
-        BPREDO = BPREDO,
324 291
         BPPARAM = BPPARAM))
325 292
     if (!all(bpok(smooth))) {
326
-        # TODO: Feels like stop() rather than warning() should be used, but
327
-        #       stop() doesn't allow for the return of partial results;
328
-        #       see https://support.bioconductor.org/p/109374/
329
-        warning("BSmooth() encountered errors: ",
330
-                sum(!bpok(smooth)), " of ", length(smooth),
331
-                " smoothing tasks failed.\n",
332
-                "BSmooth() has returned partial results, including errors, ",
333
-                "for debugging purposes.\n",
334
-                "It may be possible to re-run just these failed smoothing ",
335
-                "tasks.\nSee help(\"BSmooth\")",
336
-                call. = FALSE)
337
-        # NOTE: Return intermediate results as well as all derived variables.
338
-        return(list(smooth = smooth,
339
-                    coef_sink = coef_sink,
340
-                    se.coef_sink = se.coef_sink,
341
-                    BACKEND = BACKEND))
293
+        stop("BSmooth() encountered errors: ",
294
+             sum(!bpok(smooth)), " of ", length(smooth),
295
+             " smoothing tasks failed.")
342 296
     }
343 297
     # Construct coef and se.coef from results of smooth().
344 298
     if (is.null(BACKEND)) {
... ...
@@ -384,6 +338,3 @@ BSmooth <- function(BSseq,
384 338
 #       For example, we could set custom messages within .BSmooth() using the
385 339
 #       futile.logger syntax; see the BiocParalell vignette 'Errors, Logs and
386 340
 #       Debugging in BiocParallel'.
387
-# TODO: Remove NOTEs that are really documentation issues to the docs
388
-# TODO: If the BSseq object is backed by a single HDF5 file then use that to
389
-#       write the 'coef' and 'se.coef' data.
... ...
@@ -12,7 +12,6 @@ BSmooth(BSseq,
12 12
         maxGap = 10^8,
13 13
         keep.se = FALSE,
14 14
         verbose = TRUE,
15
-        BPREDO = list(),
16 15
         BPPARAM = bpparam(),
17 16
         BACKEND = getRealizationBackend(),
18 17
         ...,
... ...
@@ -37,8 +36,6 @@ BSmooth(BSseq,
37 36
     percent bigger and is currently not be used for anything in \pkg{bsseq}.}
38 37
   \item{verbose}{\strong{Deprecated}.
39 38
     See section, 'Parallelization and progress monitoring' for further details.}
40
-  \item{BPREDO}{Typically not used.
41
-    See section, 'Parallelization and progress monitoring' for further details.}
42 39
   \item{BPPARAM}{An optional \linkS4class{BiocParallelParam} instance
43 40
     determining the parallel back-end to be used during evaluation. Currently
44 41
     supported are \linkS4class{SerialParam} (Unix, Mac, Windows),
... ...
@@ -113,9 +110,6 @@ BSmooth(BSseq,
113 110
       results be retained in-memory.
114 111
     \item Parallelization is now supported on Windows through the use of a
115 112
     \linkS4class{SnowParam} object as the value of \code{BPPARAM}.
116
-    \item Improved error handling makes it possible to gracefully resume
117
-    \code{BSmooth} jobs that encountered errors by re-doing only the necessary
118
-    tasks.
119 113
     \item Detailed and extensive job logging facilities.
120 114
   }
121 115
 
... ...
@@ -149,26 +143,6 @@ BSmooth(BSseq,
149 143
     Please consult the \pkg{BiocParallel} documentation to take full advantage
150 144
     these advanced features.
151 145
   }
152
-
153
-  \subsection{Re-running failed tasks}{
154
-    By using \pkg{BiocParallel} for parallelization, \code{BSmooth()} may be
155
-    able to resume smoothing runs that failed due to hardware issues, for
156
-    example. To take advantage of this feature requires that
157
-    \code{stop.on.error = FALSE} was used to construct the
158
-    \linkS4class{BiocParallelParam} object passed to \code{BPPARAM}. When
159
-    \code{BSmooth()} encounters a potentially recoverable error, it will return
160
-    the intermediate results (including errors) and print instructions for how
161
-    the job may be resumed. This typically means re-running the exact same
162
-    command but with the addition of the the intermediate results passed via
163
-    the \code{BPREDO} argument.
164
-
165
-    Please note that not all errors can be recovered from in this way. We
166
-    recommend inspecting the returned object of intermediate results to try to
167
-    identify the source of the error (e.g., bad input data, failed node on a
168
-    cluster job, memory-usage error on a shared-memory job). It may be helpful
169
-    to use the advanced logging capabilities of \pkg{BiocParallel} when
170
-    re-running the job.
171
-  }
172 146
 }
173 147
 
174 148
 \value{