- This was not fully/properly implemented and in this state was dangerous. E.g., you can accidentally clobber an existing BSseq object (see https://support.bioconductor.org/p/109374/#109459).
- I'll make it a long-term goal to support this functionality
... | ... |
@@ -112,23 +112,16 @@ makeClusters <- function(hasGRanges, maxGap = 10^8) { |
112 | 112 |
# Exported functions ----------------------------------------------------------- |
113 | 113 |
|
114 | 114 |
# TODO: BSmooth() should warn if BSseq object contains mix of strands. |
115 |
-# TODO: If BSmooth() encounteres errors, return `BPREDO`` as `metadata(BSseq)` |
|
116 |
-# so as not to clobber the user's BSseq object; see |
|
117 |
-# https://support.bioconductor.org/p/109374/#109459. |
|
118 | 115 |
# TODO: Make 'mc.cores', 'mc.preschedule', and 'verbose' defunct one release |
119 | 116 |
# cycle with them deprecated. |
120 | 117 |
# TODO: Consider having BSmooth() create a 'smoothed' assay in addition to or |
121 | 118 |
# instead of the 'coef' and 'se.coef' assays. |
122 |
-# TODO: To benefit from error recovery requires that bpStopOnError(BPPARAM) is |
|
123 |
-# TRUE but the default is FALSE. How to help the user? Probably don't |
|
124 |
-# want to override the user-specified value. |
|
125 | 119 |
BSmooth <- function(BSseq, |
126 | 120 |
ns = 70, |
127 | 121 |
h = 1000, |
128 | 122 |
maxGap = 10^8, |
129 | 123 |
keep.se = FALSE, |
130 | 124 |
verbose = TRUE, |
131 |
- BPREDO = list(), |
|
132 | 125 |
BPPARAM = bpparam(), |
133 | 126 |
BACKEND = getRealizationBackend(), |
134 | 127 |
..., |
... | ... |
@@ -137,100 +130,81 @@ BSmooth <- function(BSseq, |
137 | 130 |
mc.cores = 1) { |
138 | 131 |
# Argument checks----------------------------------------------------------- |
139 | 132 |
|
140 |
- # Check if this is a re-do. |
|
141 |
- # NOTE: Under the assumptions of a re-do (i.e. BSmooth() is being re-run |
|
142 |
- # with the same arguments), we skip straight ahead to re-running |
|
143 |
- # failed smoothing tasks with no further argument checks. |
|
144 |
- if (length(BPREDO)) { |
|
145 |
- if (!is.list(BPREDO) || |
|
146 |
- identical(names(BPREDO), c("smooth", "coef_sink", "se.coef_sink", |
|
147 |
- "BACKEND"))) { |
|
148 |
- stop("'BPREDO' should be a list with elements 'smooth', ", |
|
149 |
- "'coef_sink', 'se.coef_sink', and 'BACKEND'.") |
|
133 |
+ # Check validity of 'BSseq' argument |
|
134 |
+ if (!is(BSseq, "BSseq")) { |
|
135 |
+ stop("'BSseq' must be a BSseq object.") |
|
136 |
+ } |
|
137 |
+ if (!isTRUE(all(width(BSseq) == 1L))) { |
|
138 |
+ stop("All loci in 'BSseq' must have width == 1.") |
|
139 |
+ } |
|
140 |
+ if (is.unsorted(BSseq)) { |
|
141 |
+ stop("'BSseq' must be sorted before smoothing. Use 'sort(BSseq)'.") |
|
142 |
+ } |
|
143 |
+ # Check for deprecated arguments and issue warning(s) if found. |
|
144 |
+ if (!missing(parallelBy)) { |
|
145 |
+ warning( |
|
146 |
+ "'parallelBy' is deprecated and ignored.\n", |
|
147 |
+ "See help(\"BSmooth\") for details.", |
|
148 |
+ call. = FALSE, |
|
149 |
+ immediate. = TRUE) |
|
150 |
+ } |
|
151 |
+ if (!missing(mc.preschedule)) { |
|
152 |
+ warning( |
|
153 |
+ "'mc.preschedule' is deprecated and ignored.\n", |
|
154 |
+ "See help(\"BSmooth\") for details.", |
|
155 |
+ call. = FALSE, |
|
156 |
+ immediate. = TRUE) |
|
157 |
+ } |
|
158 |
+ if (!missing(mc.cores)) { |
|
159 |
+ # TODO: What if user has provided a BPPARAM? |
|
160 |
+ warning( |
|
161 |
+ "'mc.cores' is deprecated.\n", |
|
162 |
+ "Replaced with 'BPPARAM = MulticoreParam(workers = mc.cores)'", |
|
163 |
+ ".\nSee help(\"BSmooth\").", |
|
164 |
+ call. = FALSE, |
|
165 |
+ immediate. = TRUE) |
|
166 |
+ BPPARAM <- MulticoreParam(workers = mc.cores) |
|
167 |
+ } |
|
168 |
+ if (!missing(verbose)) { |
|
169 |
+ warning( |
|
170 |
+ "'verbose' is deprecated.\n", |
|
171 |
+ "Replaced by setting 'bpprogressbar(BPPARAM) <- TRUE'.\n", |
|
172 |
+ "See help(\"BSmooth\") for details.", |
|
173 |
+ call. = FALSE, |
|
174 |
+ immediate. = TRUE) |
|
175 |
+ if (verbose) bpprogressbar(BPPARAM) <- TRUE |
|
176 |
+ } |
|
177 |
+ # Register 'BACKEND' and return to current value on exit |
|
178 |
+ current_BACKEND <- getRealizationBackend() |
|
179 |
+ on.exit(setRealizationBackend(current_BACKEND), add = TRUE) |
|
180 |
+ setRealizationBackend(BACKEND) |
|
181 |
+ # Check compatability of 'BACKEND' with backend(s) of BSseq object. |
|
182 |
+ BSseq_backends <- .getBSseqBackends(BSseq) |
|
183 |
+ if (.areBackendsInMemory(BACKEND) && |
|
184 |
+ !.areBackendsInMemory(BSseq_backends)) { |
|
185 |
+ stop("Using an in-memory backend for a disk-backed BSseq object ", |
|
186 |
+ "is not supported.\n", |
|
187 |
+ "See help(\"BSmooth\") for details.", |
|
188 |
+ call. = FALSE) |
|
189 |
+ } |
|
190 |
+ # Check compatability of 'BPPARAM' with the realization backend. |
|
191 |
+ if (!.areBackendsInMemory(BACKEND)) { |
|
192 |
+ if (!.isSingleMachineBackend(BPPARAM)) { |
|
193 |
+ stop("The parallelisation strategy must use a single machine ", |
|
194 |
+ "when using an on-disk realization backend.\n", |
|
195 |
+ "See help(\"BSmooth\") for details.", |
|
196 |
+ call. = FALSE) |
|
150 | 197 |
} |
151 |
- is_redo <- TRUE |
|
152 |
- coef_sink <- BPREDO[["coef_sink"]] |
|
153 |
- se.coef_sink <- BPREDO[["se.coef_sink"]] |
|
154 |
- BACKEND <- BPREDO[["BACKEND"]] |
|
155 |
- BPREDO <- BPREDO[["smooth"]] |
|
156 | 198 |
} else { |
157 |
- is_redo <- FALSE |
|
158 |
- # Check validity of 'BSseq' argument |
|
159 |
- if (!is(BSseq, "BSseq")) { |
|
160 |
- stop("'BSseq' must be a BSseq object.") |
|
161 |
- } |
|
162 |
- if (!isTRUE(all(width(BSseq) == 1L))) { |
|
163 |
- stop("All loci in 'BSseq' must have width == 1.") |
|
164 |
- } |
|
165 |
- if (is.unsorted(BSseq)) { |
|
166 |
- stop("'BSseq' must be sorted before smoothing. Use 'sort(BSseq)'.") |
|
167 |
- } |
|
168 |
- # Check for deprecated arguments and issue warning(s) if found. |
|
169 |
- if (!missing(parallelBy)) { |
|
170 |
- warning( |
|
171 |
- "'parallelBy' is deprecated and ignored.\n", |
|
172 |
- "See help(\"BSmooth\") for details.", |
|
173 |
- call. = FALSE, |
|
174 |
- immediate. = TRUE) |
|
175 |
- } |
|
176 |
- if (!missing(mc.preschedule)) { |
|
177 |
- warning( |
|
178 |
- "'mc.preschedule' is deprecated and ignored.\n", |
|
179 |
- "See help(\"BSmooth\") for details.", |
|
180 |
- call. = FALSE, |
|
181 |
- immediate. = TRUE) |
|
182 |
- } |
|
183 |
- if (!missing(mc.cores)) { |
|
184 |
- # TODO: What if user has provided a BPPARAM? |
|
185 |
- warning( |
|
186 |
- "'mc.cores' is deprecated.\n", |
|
187 |
- "Replaced with 'BPPARAM = MulticoreParam(workers = mc.cores)'", |
|
188 |
- ".\nSee help(\"BSmooth\").", |
|
189 |
- call. = FALSE, |
|
190 |
- immediate. = TRUE) |
|
191 |
- BPPARAM <- MulticoreParam(workers = mc.cores) |
|
192 |
- } |
|
193 |
- if (!missing(verbose)) { |
|
194 |
- warning( |
|
195 |
- "'verbose' is deprecated.\n", |
|
196 |
- "Replaced by setting 'bpprogressbar(BPPARAM) <- TRUE'.\n", |
|
197 |
- "See help(\"BSmooth\") for details.", |
|
198 |
- call. = FALSE, |
|
199 |
- immediate. = TRUE) |
|
200 |
- if (verbose) bpprogressbar(BPPARAM) <- TRUE |
|
201 |
- } |
|
202 |
- # Register 'BACKEND' and return to current value on exit |
|
203 |
- current_BACKEND <- getRealizationBackend() |
|
204 |
- on.exit(setRealizationBackend(current_BACKEND), add = TRUE) |
|
205 |
- setRealizationBackend(BACKEND) |
|
206 |
- # Check compatability of 'BACKEND' with backend(s) of BSseq object. |
|
207 |
- BSseq_backends <- .getBSseqBackends(BSseq) |
|
208 |
- if (.areBackendsInMemory(BACKEND) && |
|
209 |
- !.areBackendsInMemory(BSseq_backends)) { |
|
210 |
- stop("Using an in-memory backend for a disk-backed BSseq object ", |
|
211 |
- "is not supported.\n", |
|
199 |
+ if (!is.null(BACKEND)) { |
|
200 |
+ # NOTE: Currently do not support any in-memory realization |
|
201 |
+ # backends. If 'BACKEND' is NULL then an ordinary matrix |
|
202 |
+ # is returned rather than a matrix-backed DelayedMatrix. |
|
203 |
+ stop("The '", BACKEND, "' realization backend is not ", |
|
204 |
+ "supported.\n", |
|
212 | 205 |
"See help(\"BSmooth\") for details.", |
213 | 206 |
call. = FALSE) |
214 | 207 |
} |
215 |
- # Check compatability of 'BPPARAM' with the realization backend. |
|
216 |
- if (!.areBackendsInMemory(BACKEND)) { |
|
217 |
- if (!.isSingleMachineBackend(BPPARAM)) { |
|
218 |
- stop("The parallelisation strategy must use a single machine ", |
|
219 |
- "when using an on-disk realization backend.\n", |
|
220 |
- "See help(\"BSmooth\") for details.", |
|
221 |
- call. = FALSE) |
|
222 |
- } |
|
223 |
- } else { |
|
224 |
- if (!is.null(BACKEND)) { |
|
225 |
- # NOTE: Currently do not support any in-memory realization |
|
226 |
- # backends. If 'BACKEND' is NULL then an ordinary matrix |
|
227 |
- # is returned rather than a matrix-backed DelayedMatrix. |
|
228 |
- stop("The '", BACKEND, "' realization backend is not ", |
|
229 |
- "supported.\n", |
|
230 |
- "See help(\"BSmooth\") for details.", |
|
231 |
- call. = FALSE) |
|
232 |
- } |
|
233 |
- } |
|
234 | 208 |
} |
235 | 209 |
|
236 | 210 |
# Smoothing ---------------------------------------------------------------- |
... | ... |
@@ -247,51 +221,45 @@ BSmooth <- function(BSseq, |
247 | 221 |
# Set up "parallel" ArrayGrid over pos |
248 | 222 |
pos_grid <- ArbitraryArrayGrid(list(row_tickmarks, 1L)) |
249 | 223 |
# Construct RealizationSink objects (as required) |
250 |
- if (!is_redo) { |
|
251 |
- if (is.null(BACKEND)) { |
|
252 |
- coef_sink <- NULL |
|
253 |
- se.coef_sink <- NULL |
|
254 |
- sink_lock <- NULL |
|
255 |
- } else if (BACKEND == "HDF5Array") { |
|
256 |
- coef_sink <- HDF5RealizationSink( |
|
224 |
+ if (BACKEND == "HDF5Array") { |
|
225 |
+ coef_sink <- HDF5RealizationSink( |
|
226 |
+ dim = dim(M), |
|
227 |
+ # NOTE: Never allow dimnames. |
|
228 |
+ dimnames = NULL, |
|
229 |
+ type = "double", |
|
230 |
+ name = "coef", |
|
231 |
+ ...) |
|
232 |
+ on.exit(close(coef_sink), add = TRUE) |
|
233 |
+ sink_lock <- ipcid() |
|
234 |
+ on.exit(ipcremove(sink_lock), add = TRUE) |
|
235 |
+ if (keep.se) { |
|
236 |
+ se.coef_sink <- HDF5RealizationSink( |
|
257 | 237 |
dim = dim(M), |
258 | 238 |
# NOTE: Never allow dimnames. |
259 | 239 |
dimnames = NULL, |
260 | 240 |
type = "double", |
261 |
- name = "coef", |
|
241 |
+ name = "se.coef", |
|
262 | 242 |
...) |
263 |
- on.exit(close(coef_sink), add = TRUE) |
|
264 |
- sink_lock <- ipcid() |
|
265 |
- on.exit(ipcremove(sink_lock), add = TRUE) |
|
266 |
- if (keep.se) { |
|
267 |
- se.coef_sink <- HDF5RealizationSink( |
|
268 |
- dim = dim(M), |
|
269 |
- # NOTE: Never allow dimnames. |
|
270 |
- dimnames = NULL, |
|
271 |
- type = "double", |
|
272 |
- name = "se.coef", |
|
273 |
- ...) |
|
274 |
- on.exit(close(se.coef_sink), add = TRUE) |
|
275 |
- } else { |
|
276 |
- se.coef_sink <- NULL |
|
277 |
- } |
|
243 |
+ on.exit(close(se.coef_sink), add = TRUE) |
|
278 | 244 |
} else { |
279 |
- # TODO: This branch should probably never be entered because we |
|
280 |
- # (implicitly) only support in-memory or HDF5Array backends. |
|
281 |
- # However, we retain it for now (e.g., fstArray backend would |
|
282 |
- # use this until a dedicated branch was implemented). |
|
283 |
- coef_sink <- DelayedArray:::RealizationSink(dim(M), type = "double") |
|
284 |
- on.exit(close(coef_sink), add = TRUE) |
|
285 |
- sink_lock <- ipcid() |
|
286 |
- on.exit(ipcremove(sink_lock), add = TRUE) |
|
287 |
- if (keep.se) { |
|
288 |
- se.coef_sink <- DelayedArray:::RealizationSink( |
|
289 |
- dim(M), |
|
290 |
- type = "double") |
|
291 |
- on.exit(close(se.coef_sink), add = TRUE) |
|
292 |
- } else { |
|
293 |
- se.coef_sink <- NULL |
|
294 |
- } |
|
245 |
+ se.coef_sink <- NULL |
|
246 |
+ } |
|
247 |
+ } else { |
|
248 |
+ # TODO: This branch should probably never be entered because we |
|
249 |
+ # (implicitly) only support in-memory or HDF5Array backends. |
|
250 |
+ # However, we retain it for now (e.g., fstArray backend would |
|
251 |
+ # use this until a dedicated branch was implemented). |
|
252 |
+ coef_sink <- DelayedArray:::RealizationSink(dim(M), type = "double") |
|
253 |
+ on.exit(close(coef_sink), add = TRUE) |
|
254 |
+ sink_lock <- ipcid() |
|
255 |
+ on.exit(ipcremove(sink_lock), add = TRUE) |
|
256 |
+ if (keep.se) { |
|
257 |
+ se.coef_sink <- DelayedArray:::RealizationSink( |
|
258 |
+ dim(M), |
|
259 |
+ type = "double") |
|
260 |
+ on.exit(close(se.coef_sink), add = TRUE) |
|
261 |
+ } else { |
|
262 |
+ se.coef_sink <- NULL |
|
295 | 263 |
} |
296 | 264 |
} |
297 | 265 |
|
... | ... |
@@ -320,25 +288,11 @@ BSmooth <- function(BSseq, |
320 | 288 |
ns = ns, |
321 | 289 |
h = h, |
322 | 290 |
keep.se = keep.se, |
323 |
- BPREDO = BPREDO, |
|
324 | 291 |
BPPARAM = BPPARAM)) |
325 | 292 |
if (!all(bpok(smooth))) { |
326 |
- # TODO: Feels like stop() rather than warning() should be used, but |
|
327 |
- # stop() doesn't allow for the return of partial results; |
|
328 |
- # see https://support.bioconductor.org/p/109374/ |
|
329 |
- warning("BSmooth() encountered errors: ", |
|
330 |
- sum(!bpok(smooth)), " of ", length(smooth), |
|
331 |
- " smoothing tasks failed.\n", |
|
332 |
- "BSmooth() has returned partial results, including errors, ", |
|
333 |
- "for debugging purposes.\n", |
|
334 |
- "It may be possible to re-run just these failed smoothing ", |
|
335 |
- "tasks.\nSee help(\"BSmooth\")", |
|
336 |
- call. = FALSE) |
|
337 |
- # NOTE: Return intermediate results as well as all derived variables. |
|
338 |
- return(list(smooth = smooth, |
|
339 |
- coef_sink = coef_sink, |
|
340 |
- se.coef_sink = se.coef_sink, |
|
341 |
- BACKEND = BACKEND)) |
|
293 |
+ stop("BSmooth() encountered errors: ", |
|
294 |
+ sum(!bpok(smooth)), " of ", length(smooth), |
|
295 |
+ " smoothing tasks failed.") |
|
342 | 296 |
} |
343 | 297 |
# Construct coef and se.coef from results of smooth(). |
344 | 298 |
if (is.null(BACKEND)) { |
... | ... |
@@ -384,6 +338,3 @@ BSmooth <- function(BSseq, |
384 | 338 |
# For example, we could set custom messages within .BSmooth() using the |
385 | 339 |
# futile.logger syntax; see the BiocParalell vignette 'Errors, Logs and |
386 | 340 |
# Debugging in BiocParallel'. |
387 |
-# TODO: Remove NOTEs that are really documentation issues to the docs |
|
388 |
-# TODO: If the BSseq object is backed by a single HDF5 file then use that to |
|
389 |
-# write the 'coef' and 'se.coef' data. |
... | ... |
@@ -12,7 +12,6 @@ BSmooth(BSseq, |
12 | 12 |
maxGap = 10^8, |
13 | 13 |
keep.se = FALSE, |
14 | 14 |
verbose = TRUE, |
15 |
- BPREDO = list(), |
|
16 | 15 |
BPPARAM = bpparam(), |
17 | 16 |
BACKEND = getRealizationBackend(), |
18 | 17 |
..., |
... | ... |
@@ -37,8 +36,6 @@ BSmooth(BSseq, |
37 | 36 |
percent bigger and is currently not be used for anything in \pkg{bsseq}.} |
38 | 37 |
\item{verbose}{\strong{Deprecated}. |
39 | 38 |
See section, 'Parallelization and progress monitoring' for further details.} |
40 |
- \item{BPREDO}{Typically not used. |
|
41 |
- See section, 'Parallelization and progress monitoring' for further details.} |
|
42 | 39 |
\item{BPPARAM}{An optional \linkS4class{BiocParallelParam} instance |
43 | 40 |
determining the parallel back-end to be used during evaluation. Currently |
44 | 41 |
supported are \linkS4class{SerialParam} (Unix, Mac, Windows), |
... | ... |
@@ -113,9 +110,6 @@ BSmooth(BSseq, |
113 | 110 |
results be retained in-memory. |
114 | 111 |
\item Parallelization is now supported on Windows through the use of a |
115 | 112 |
\linkS4class{SnowParam} object as the value of \code{BPPARAM}. |
116 |
- \item Improved error handling makes it possible to gracefully resume |
|
117 |
- \code{BSmooth} jobs that encountered errors by re-doing only the necessary |
|
118 |
- tasks. |
|
119 | 113 |
\item Detailed and extensive job logging facilities. |
120 | 114 |
} |
121 | 115 |
|
... | ... |
@@ -149,26 +143,6 @@ BSmooth(BSseq, |
149 | 143 |
Please consult the \pkg{BiocParallel} documentation to take full advantage |
150 | 144 |
these advanced features. |
151 | 145 |
} |
152 |
- |
|
153 |
- \subsection{Re-running failed tasks}{ |
|
154 |
- By using \pkg{BiocParallel} for parallelization, \code{BSmooth()} may be |
|
155 |
- able to resume smoothing runs that failed due to hardware issues, for |
|
156 |
- example. To take advantage of this feature requires that |
|
157 |
- \code{stop.on.error = FALSE} was used to construct the |
|
158 |
- \linkS4class{BiocParallelParam} object passed to \code{BPPARAM}. When |
|
159 |
- \code{BSmooth()} encounters a potentially recoverable error, it will return |
|
160 |
- the intermediate results (including errors) and print instructions for how |
|
161 |
- the job may be resumed. This typically means re-running the exact same |
|
162 |
- command but with the addition of the the intermediate results passed via |
|
163 |
- the \code{BPREDO} argument. |
|
164 |
- |
|
165 |
- Please note that not all errors can be recovered from in this way. We |
|
166 |
- recommend inspecting the returned object of intermediate results to try to |
|
167 |
- identify the source of the error (e.g., bad input data, failed node on a |
|
168 |
- cluster job, memory-usage error on a shared-memory job). It may be helpful |
|
169 |
- to use the advanced logging capabilities of \pkg{BiocParallel} when |
|
170 |
- re-running the job. |
|
171 |
- } |
|
172 | 146 |
} |
173 | 147 |
|
174 | 148 |
\value{ |