... | ... |
@@ -109,7 +109,7 @@ |
109 | 109 |
ncol = 1) |
110 | 110 |
} |
111 | 111 |
ipclock(sink_lock) |
112 |
- write_block(x = sink, viewport = sink_grid[[b]], block = ans) |
|
112 |
+ write_block(sink, viewport = sink_grid[[b]], block = ans) |
|
113 | 113 |
ipcunlock(sink_lock) |
114 | 114 |
NULL |
115 | 115 |
}, |
... | ... |
@@ -194,7 +194,7 @@ |
194 | 194 |
nrow = 1) |
195 | 195 |
} |
196 | 196 |
ipclock(sink_lock) |
197 |
- write_block(x = sink, viewport = sink_grid[[b]], block = ans) |
|
197 |
+ write_block(sink, viewport = sink_grid[[b]], block = ans) |
|
198 | 198 |
ipcunlock(sink_lock) |
199 | 199 |
NULL |
200 | 200 |
}, |
... | ... |
@@ -272,7 +272,7 @@ blockApplyWithRealization <- function(x, FUN, ..., sink = NULL, x_grid = NULL, |
272 | 272 |
block_ans <- FUN(block, ...) |
273 | 273 |
# NOTE: This is the only part different from DelayedArray::blockApply() |
274 | 274 |
if (!is.null(sink)) { |
275 |
- write_block(x = sink, viewport = sink_viewport, block = block_ans) |
|
275 |
+ write_block(sink, viewport = sink_viewport, block = block_ans) |
|
276 | 276 |
block_ans <- NULL |
277 | 277 |
} |
278 | 278 |
if (DelayedArray:::get_verbose_block_processing()) { |
... | ... |
@@ -89,7 +89,7 @@ |
89 | 89 |
|
90 | 90 |
# Construct ArrayGrid -------------------------------------------------- |
91 | 91 |
|
92 |
- sink_grid <- colGrid(x = sink, ncol = 1L) |
|
92 |
+ sink_grid <- colAutoGrid(x = sink, ncol = 1L) |
|
93 | 93 |
list_of_cols <- split(seq_along(group), group)[ugroup] |
94 | 94 |
|
95 | 95 |
# Compute colsum() ----------------------------------------------------- |
... | ... |
@@ -174,7 +174,7 @@ |
174 | 174 |
|
175 | 175 |
# Construct ArrayGrid -------------------------------------------------- |
176 | 176 |
|
177 |
- sink_grid <- rowGrid(x = sink, nrow = 1L) |
|
177 |
+ sink_grid <- rowAutoGrid(x = sink, nrow = 1L) |
|
178 | 178 |
list_of_rows <- split(seq_along(group), group)[as.character(ugroup)] |
179 | 179 |
|
180 | 180 |
# Compute colsum() ----------------------------------------------------- |
... | ... |
@@ -251,8 +251,8 @@ blockApplyWithRealization <- function(x, FUN, ..., sink = NULL, x_grid = NULL, |
251 | 251 |
FUN <- match.fun(FUN) |
252 | 252 |
|
253 | 253 |
# Check conformable dots_grids and sinks_grids |
254 |
- x_grid <- DelayedArray:::.normarg_grid(x_grid, x) |
|
255 |
- sink_grid <- DelayedArray:::.normarg_grid(sink_grid, sink) |
|
254 |
+ x_grid <- DelayedArray:::normarg_grid(x_grid, x) |
|
255 |
+ sink_grid <- DelayedArray:::normarg_grid(sink_grid, sink) |
|
256 | 256 |
if (!identical(dim(x_grid), dim(sink_grid))) { |
257 | 257 |
stop("non-conformable 'x_grid' and 'sink_grid'") |
258 | 258 |
} |
- Temporary workaround for https://github.com/Bioconductor/DelayedArray/issues/41
- Ensures bsseq passes checks for next BioC release
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-# Functions/methods that would be good to have in DelayedArray |
|
1 |
+# Functions/methods that would be good to have in DelayedArray ----------------- |
|
2 | 2 |
|
3 | 3 |
.rowVars <- function(x, rows = NULL, cols = NULL, ...) { |
4 | 4 |
if (is(x, "DelayedArray")) { |
... | ... |
@@ -41,6 +41,176 @@ |
41 | 41 |
} |
42 | 42 |
} |
43 | 43 |
|
44 |
+# A temporary workaround to |
|
45 |
+# https://github.com/Bioconductor/DelayedArray/issues/41. |
|
46 |
+.colsum <- function(x, group, reorder = TRUE, na.rm = FALSE, filepath = NULL, |
|
47 |
+ name = NULL, chunkdim = NULL, level = NULL, |
|
48 |
+ type = c("double", "integer"), BPPARAM = bpparam()) { |
|
49 |
+ |
|
50 |
+ # NOTE: Special case for HDF5Matrix, otherwise defer to rowsum(). |
|
51 |
+ if (is(x, "HDF5Matrix")) { |
|
52 |
+ # Check arguments ------------------------------------------------------ |
|
53 |
+ |
|
54 |
+ type <- match.arg(type) |
|
55 |
+ if (any(!c(type(x), type) %in% c("integer", "double"))) { |
|
56 |
+ stop("'type(x)' must be 'integer' or 'double'.") |
|
57 |
+ } |
|
58 |
+ if (length(group) != NCOL(x)) { |
|
59 |
+ stop("incorrect length for 'group'") |
|
60 |
+ } |
|
61 |
+ if (anyNA(group)) { |
|
62 |
+ warning("missing values for 'group'") |
|
63 |
+ } |
|
64 |
+ ugroup <- unique(group) |
|
65 |
+ if (reorder) { |
|
66 |
+ ugroup <- sort(ugroup, na.last = TRUE, method = "quick") |
|
67 |
+ } |
|
68 |
+ # TODO: Default is type = "double" because rowSums2() returns numeric, |
|
69 |
+ # but it can be useful to manually override this when you know |
|
70 |
+ # the result is integer. |
|
71 |
+ |
|
72 |
+ # Construct RealizationSink -------------------------------------------- |
|
73 |
+ |
|
74 |
+ # NOTE: This is ultimately coerced to the output DelayedMatrix |
|
75 |
+ # object |
|
76 |
+ ans_nrow <- nrow(x) |
|
77 |
+ ans_ncol <- length(ugroup) |
|
78 |
+ ans_dim <- c(ans_nrow, ans_ncol) |
|
79 |
+ sink <- HDF5RealizationSink( |
|
80 |
+ dim = ans_dim, |
|
81 |
+ dimnames = list(rownames(x), as.character(ugroup)), |
|
82 |
+ type = type, |
|
83 |
+ filepath = filepath, |
|
84 |
+ name = name, |
|
85 |
+ chunkdim = chunkdim, |
|
86 |
+ level = level) |
|
87 |
+ sink_lock <- ipcid() |
|
88 |
+ on.exit(ipcremove(sink_lock), add = TRUE) |
|
89 |
+ |
|
90 |
+ # Construct ArrayGrid -------------------------------------------------- |
|
91 |
+ |
|
92 |
+ sink_grid <- colGrid(x = sink, ncol = 1L) |
|
93 |
+ list_of_cols <- split(seq_along(group), group)[ugroup] |
|
94 |
+ |
|
95 |
+ # Compute colsum() ----------------------------------------------------- |
|
96 |
+ |
|
97 |
+ bplapply( |
|
98 |
+ X = seq_along(sink_grid), |
|
99 |
+ FUN = function(b, x, sink, sink_lock, sink_grid, list_of_cols) { |
|
100 |
+ cols <- list_of_cols[[b]] |
|
101 |
+ if (length(cols) == 1L) { |
|
102 |
+ ans <- as.matrix(x[, cols, drop = FALSE]) |
|
103 |
+ if (na.rm) { |
|
104 |
+ ans[is.na(ans)] <- 0L |
|
105 |
+ } |
|
106 |
+ } else { |
|
107 |
+ ans <- matrix( |
|
108 |
+ rowSums2(x, cols = cols, na.rm = na.rm), |
|
109 |
+ ncol = 1) |
|
110 |
+ } |
|
111 |
+ ipclock(sink_lock) |
|
112 |
+ write_block(x = sink, viewport = sink_grid[[b]], block = ans) |
|
113 |
+ ipcunlock(sink_lock) |
|
114 |
+ NULL |
|
115 |
+ }, |
|
116 |
+ x = x, |
|
117 |
+ sink = sink, |
|
118 |
+ sink_lock = sink_lock, |
|
119 |
+ sink_grid = sink_grid, |
|
120 |
+ list_of_cols = list_of_cols, |
|
121 |
+ BPPARAM = BPPARAM) |
|
122 |
+ return(as(sink, "DelayedArray")) |
|
123 |
+ } |
|
124 |
+ |
|
125 |
+ colsum(x, group, reorder) |
|
126 |
+} |
|
127 |
+ |
|
128 |
+# A temporary workaround to |
|
129 |
+# https://github.com/Bioconductor/DelayedArray/issues/41. |
|
130 |
+.rowsum <- function(x, group, reorder = TRUE, na.rm = FALSE, filepath = NULL, |
|
131 |
+ name = NULL, chunkdim = NULL, level = NULL, |
|
132 |
+ type = c("double", "integer"), BPPARAM = bpparam()) { |
|
133 |
+ |
|
134 |
+ # NOTE: Special case for HDF5Matrix, otherwise defer to rowsum(). |
|
135 |
+ if (is(x, "HDF5Matrix")) { |
|
136 |
+ |
|
137 |
+ # Check arguments ------------------------------------------------------ |
|
138 |
+ |
|
139 |
+ if (any(!c(type(x), type) %in% c("integer", "double"))) { |
|
140 |
+ stop("'type(x)' must be 'integer' or 'double'.") |
|
141 |
+ } |
|
142 |
+ if (length(group) != NROW(x)) { |
|
143 |
+ stop("incorrect length for 'group'") |
|
144 |
+ } |
|
145 |
+ if (anyNA(group)) { |
|
146 |
+ warning("missing values for 'group'") |
|
147 |
+ } |
|
148 |
+ ugroup <- unique(group) |
|
149 |
+ if (reorder) { |
|
150 |
+ ugroup <- sort(ugroup, na.last = TRUE, method = "quick") |
|
151 |
+ } |
|
152 |
+ # NOTE: Default is type = "double" because colSums2() returns numeric, |
|
153 |
+ # but it can be useful to manually override this when you know the |
|
154 |
+ # result is integer. |
|
155 |
+ type <- match.arg(type) |
|
156 |
+ |
|
157 |
+ # Construct RealizationSink -------------------------------------------- |
|
158 |
+ |
|
159 |
+ # NOTE: This is ultimately coerced to the output DelayedMatrix |
|
160 |
+ # object |
|
161 |
+ ans_nrow <- length(ugroup) |
|
162 |
+ ans_ncol <- ncol(x) |
|
163 |
+ ans_dim <- c(ans_nrow, ans_ncol) |
|
164 |
+ sink <- HDF5RealizationSink( |
|
165 |
+ dim = ans_dim, |
|
166 |
+ dimnames = list(as.character(ugroup), colnames(x)), |
|
167 |
+ type = type, |
|
168 |
+ filepath = filepath, |
|
169 |
+ name = name, |
|
170 |
+ chunkdim = chunkdim, |
|
171 |
+ level = level) |
|
172 |
+ sink_lock <- ipcid() |
|
173 |
+ on.exit(ipcremove(sink_lock), add = TRUE) |
|
174 |
+ |
|
175 |
+ # Construct ArrayGrid -------------------------------------------------- |
|
176 |
+ |
|
177 |
+ sink_grid <- rowGrid(x = sink, nrow = 1L) |
|
178 |
+ list_of_rows <- split(seq_along(group), group)[as.character(ugroup)] |
|
179 |
+ |
|
180 |
+ # Compute colsum() ----------------------------------------------------- |
|
181 |
+ |
|
182 |
+ bplapply( |
|
183 |
+ X = seq_along(sink_grid), |
|
184 |
+ FUN = function(b, x, sink, sink_lock, sink_grid, list_of_rows) { |
|
185 |
+ rows <- list_of_rows[[b]] |
|
186 |
+ if (length(rows) == 1L) { |
|
187 |
+ ans <- as.matrix(x[rows, , drop = FALSE]) |
|
188 |
+ if (na.rm) { |
|
189 |
+ ans[is.na(ans)] <- 0L |
|
190 |
+ } |
|
191 |
+ } else { |
|
192 |
+ ans <- matrix( |
|
193 |
+ colSums2(x, rows = rows, na.rm = na.rm), |
|
194 |
+ nrow = 1) |
|
195 |
+ } |
|
196 |
+ ipclock(sink_lock) |
|
197 |
+ write_block(x = sink, viewport = sink_grid[[b]], block = ans) |
|
198 |
+ ipcunlock(sink_lock) |
|
199 |
+ NULL |
|
200 |
+ }, |
|
201 |
+ x = x, |
|
202 |
+ sink = sink, |
|
203 |
+ sink_lock = sink_lock, |
|
204 |
+ sink_grid = sink_grid, |
|
205 |
+ list_of_rows = list_of_rows, |
|
206 |
+ BPPARAM = BPPARAM) |
|
207 |
+ return(as(sink, "DelayedArray")) |
|
208 |
+ } |
|
209 |
+ |
|
210 |
+ rowsum(x, group, reorder) |
|
211 |
+} |
|
212 |
+ |
|
213 |
+ |
|
44 | 214 |
# Missing methods -------------------------------------------------------------- |
45 | 215 |
|
46 | 216 |
# NOTE: Copied from minfi |
... | ... |
@@ -135,12 +135,13 @@ blockApplyWithRealization <- function(x, FUN, ..., sink = NULL, x_grid = NULL, |
135 | 135 |
# can contain other files besides these; check. |
136 | 136 |
.isHDF5BackedBSseqUpdatable <- function(x) { |
137 | 137 |
stopifnot(is(x, "BSseq")) |
138 |
- if (!identical(.getBSseqBackends(x), "HDF5Array")) { |
|
138 |
+ assay_class <- vapply(assays(x, withDimnames = FALSE), class, character(1L)) |
|
139 |
+ if (!all(assay_class == "HDF5Matrix")) { |
|
139 | 140 |
return(FALSE) |
140 | 141 |
} |
141 | 142 |
paths <- vapply(assays(x, withDimnames = FALSE), path, character(1L)) |
142 |
- if (all(paths == paths[[1L]]) && all(basename(paths) == "assays.h5")) { |
|
143 |
- return(TRUE) |
|
143 |
+ if (!all(paths == paths[[1L]]) || !all(basename(paths) == "assays.h5")) { |
|
144 |
+ return(FALSE) |
|
144 | 145 |
} |
145 |
- FALSE |
|
146 |
+ TRUE |
|
146 | 147 |
} |
... | ... |
@@ -27,19 +27,6 @@ |
27 | 27 |
quantile(x, ...) |
28 | 28 |
} |
29 | 29 |
|
30 |
-.DelayedMatrix <- function(x) { |
|
31 |
- x_name <- deparse(substitute(x)) |
|
32 |
- X <- try(DelayedArray(x), silent = TRUE) |
|
33 |
- if (is(X, "try-error")) { |
|
34 |
- stop("Could not construct DelayedMatrix from '", x_name, "'", |
|
35 |
- call. = FALSE) |
|
36 |
- } |
|
37 |
- if (!is(X, "DelayedMatrix")) { |
|
38 |
- stop("'", x_name, "' must be matrix-like", call. = FALSE) |
|
39 |
- } |
|
40 |
- X |
|
41 |
-} |
|
42 |
- |
|
43 | 30 |
.isSimpleDelayedMatrix <- function(x) { |
44 | 31 |
is(x@seed, "matrix") |
45 | 32 |
} |
... | ... |
@@ -126,3 +126,34 @@ blockApplyWithRealization <- function(x, FUN, ..., sink = NULL, x_grid = NULL, |
126 | 126 |
BPPARAM = BPPARAM) |
127 | 127 |
} |
128 | 128 |
|
129 |
+# TODO: Needed? |
|
130 |
+.getSEDir <- function(x) { |
|
131 |
+ paths <- lapply(assays(x, withDimnames = FALSE), function(a) { |
|
132 |
+ try(path(a), silent = TRUE) |
|
133 |
+ }) |
|
134 |
+ if (any(vapply(paths, is, logical(1L), "try-error"))) { |
|
135 |
+ stop("Cannot extract 'dir'.") |
|
136 |
+ } |
|
137 |
+ unique_paths <- unique(unlist(paths, use.names = FALSE)) |
|
138 |
+ if (length(unique_paths) > 1) { |
|
139 |
+ stop("Assay data spread across multiple HDF5 files.") |
|
140 |
+ } |
|
141 |
+ dirs <- dirname(unlist(paths, use.names = FALSE)) |
|
142 |
+ unique(dirs) |
|
143 |
+} |
|
144 |
+ |
|
145 |
+# Should return TRUE for BSseq object created with read.bismark() or saved with |
|
146 |
+# HDF5Array::saveHDF5SummarizedExperiment(). |
|
147 |
+# TODO: Check dirname(paths[[1L]]) also contains 'se.rds'? It looks like dir |
|
148 |
+# can contain other files besides these; check. |
|
149 |
+.isHDF5BackedBSseqUpdatable <- function(x) { |
|
150 |
+ stopifnot(is(x, "BSseq")) |
|
151 |
+ if (!identical(.getBSseqBackends(x), "HDF5Array")) { |
|
152 |
+ return(FALSE) |
|
153 |
+ } |
|
154 |
+ paths <- vapply(assays(x, withDimnames = FALSE), path, character(1L)) |
|
155 |
+ if (all(paths == paths[[1L]]) && all(basename(paths) == "assays.h5")) { |
|
156 |
+ return(TRUE) |
|
157 |
+ } |
|
158 |
+ FALSE |
|
159 |
+} |
... | ... |
@@ -79,30 +79,6 @@ setMethod("dimnames", "arrayRealizationSink", function(x) { |
79 | 79 |
dimnames(x@result_envir$result) |
80 | 80 |
}) |
81 | 81 |
|
82 |
-# Helper functions for setting up ArrayGrid instances -------------------------- |
|
83 |
- |
|
84 |
-# NOTE: Copy of minfi:::colGrid() |
|
85 |
-# TODO: Perhaps move this to DelayedMatrixStats? |
|
86 |
-colGrid <- function(x) { |
|
87 |
- block_maxlen <- max(nrow(x), |
|
88 |
- DelayedArray:::get_default_block_maxlength(type(x))) |
|
89 |
- spacings <- DelayedArray:::get_spacings_for_linear_capped_length_blocks( |
|
90 |
- refdim = dim(x), |
|
91 |
- block_maxlen = block_maxlen) |
|
92 |
- RegularArrayGrid(dim(x), spacings) |
|
93 |
-} |
|
94 |
- |
|
95 |
-# NOTE: Copy of minfi:::rowGrid() |
|
96 |
-# TODO: Perhaps move this to DelayedMatrixStats? |
|
97 |
-rowGrid <- function(x) { |
|
98 |
- block_maxlen <- max(ncol(x), |
|
99 |
- DelayedArray:::get_default_block_maxlength(type(x))) |
|
100 |
- spacings <- DelayedArray:::get_spacings_for_hypercube_capped_length_blocks( |
|
101 |
- refdim = dim(x), |
|
102 |
- block_maxlen = block_maxlen) |
|
103 |
- RegularArrayGrid(dim(x), spacings) |
|
104 |
-} |
|
105 |
- |
|
106 | 82 |
# Advanced block processing routines ------------------------------------------- |
107 | 83 |
|
108 | 84 |
# NOTE: Copy of minfi:::blockApplyWithRealization() |
... | ... |
@@ -133,10 +133,7 @@ blockApplyWithRealization <- function(x, FUN, ..., sink = NULL, x_grid = NULL, |
133 | 133 |
} |
134 | 134 |
x_viewport <- x_grid[[b]] |
135 | 135 |
sink_viewport <- sink_grid[[b]] |
136 |
- block <- DelayedArray:::extract_block(x, x_viewport) |
|
137 |
- if (!is.array(block)) { |
|
138 |
- block <- DelayedArray:::.as_array_or_matrix(block) |
|
139 |
- } |
|
136 |
+ block <- read_block(x, x_viewport) |
|
140 | 137 |
attr(block, "from_grid") <- x_grid |
141 | 138 |
attr(block, "block_id") <- b |
142 | 139 |
block_ans <- FUN(block, ...) |
... | ... |
@@ -142,7 +142,7 @@ blockApplyWithRealization <- function(x, FUN, ..., sink = NULL, x_grid = NULL, |
142 | 142 |
block_ans <- FUN(block, ...) |
143 | 143 |
# NOTE: This is the only part different from DelayedArray::blockApply() |
144 | 144 |
if (!is.null(sink)) { |
145 |
- write_block_to_sink(block_ans, sink, sink_viewport) |
|
145 |
+ write_block(x = sink, viewport = sink_viewport, block = block_ans) |
|
146 | 146 |
block_ans <- NULL |
147 | 147 |
} |
148 | 148 |
if (DelayedArray:::get_verbose_block_processing()) { |
- There are still warnings and notes
- And still more stuff to re-factor
... | ... |
@@ -54,6 +54,31 @@ |
54 | 54 |
} |
55 | 55 |
} |
56 | 56 |
|
57 |
+# Missing methods -------------------------------------------------------------- |
|
58 |
+ |
|
59 |
+# NOTE: Copied from minfi |
|
60 |
+# TODO: Perhaps move this to DelayedMatrixStats? |
|
61 |
+# TODO: DelayedArray::type() for all RealizationSink subclasses |
|
62 |
+setMethod("type", "HDF5RealizationSink", function(x) { |
|
63 |
+ x@type |
|
64 |
+}) |
|
65 |
+# NOTE: Copied from minfi |
|
66 |
+# TODO: Perhaps move this to DelayedMatrixStats? |
|
67 |
+setMethod("type", "arrayRealizationSink", function(x) { |
|
68 |
+ DelayedArray::type(x@result_envir$result) |
|
69 |
+}) |
|
70 |
+# NOTE: Copied from minfi |
|
71 |
+# TODO: Perhaps move this to DelayedMatrixStats? |
|
72 |
+setMethod("type", "RleRealizationSink", function(x) { |
|
73 |
+ x@type |
|
74 |
+}) |
|
75 |
+# NOTE: Copied from minfi |
|
76 |
+# TODO: Perhaps move this to DelayedMatrixStats? |
|
77 |
+# TODO: dimnames() for all RealizationSink subclasses |
|
78 |
+setMethod("dimnames", "arrayRealizationSink", function(x) { |
|
79 |
+ dimnames(x@result_envir$result) |
|
80 |
+}) |
|
81 |
+ |
|
57 | 82 |
# Helper functions for setting up ArrayGrid instances -------------------------- |
58 | 83 |
|
59 | 84 |
# NOTE: Copy of minfi:::colGrid() |
... | ... |
@@ -53,3 +53,78 @@ |
53 | 53 |
stop("'type' = ", type, " is not supported") |
54 | 54 |
} |
55 | 55 |
} |
56 |
+ |
|
57 |
+# Helper functions for setting up ArrayGrid instances -------------------------- |
|
58 |
+ |
|
59 |
+# NOTE: Copy of minfi:::colGrid() |
|
60 |
+# TODO: Perhaps move this to DelayedMatrixStats? |
|
61 |
+colGrid <- function(x) { |
|
62 |
+ block_maxlen <- max(nrow(x), |
|
63 |
+ DelayedArray:::get_default_block_maxlength(type(x))) |
|
64 |
+ spacings <- DelayedArray:::get_spacings_for_linear_capped_length_blocks( |
|
65 |
+ refdim = dim(x), |
|
66 |
+ block_maxlen = block_maxlen) |
|
67 |
+ RegularArrayGrid(dim(x), spacings) |
|
68 |
+} |
|
69 |
+ |
|
70 |
+# NOTE: Copy of minfi:::rowGrid() |
|
71 |
+# TODO: Perhaps move this to DelayedMatrixStats? |
|
72 |
+rowGrid <- function(x) { |
|
73 |
+ block_maxlen <- max(ncol(x), |
|
74 |
+ DelayedArray:::get_default_block_maxlength(type(x))) |
|
75 |
+ spacings <- DelayedArray:::get_spacings_for_hypercube_capped_length_blocks( |
|
76 |
+ refdim = dim(x), |
|
77 |
+ block_maxlen = block_maxlen) |
|
78 |
+ RegularArrayGrid(dim(x), spacings) |
|
79 |
+} |
|
80 |
+ |
|
81 |
+# Advanced block processing routines ------------------------------------------- |
|
82 |
+ |
|
83 |
+# NOTE: Copy of minfi:::blockApplyWithRealization() |
|
84 |
+# TODO: Perhaps move this to DelayedMatrixStats? |
|
85 |
+# NOTE: DelayedArray::blockApply() with the option to write the blocks to |
|
86 |
+# 'sink'. Useful, for example, to apply a function across column-blocks |
|
87 |
+# of a DelayedMatrix, write these results to disk, and then wrap |
|
88 |
+# these in a DelayedMatrix. |
|
89 |
+# TODO: See https://github.com/Bioconductor/DelayedArray/issues/10 |
|
90 |
+blockApplyWithRealization <- function(x, FUN, ..., sink = NULL, x_grid = NULL, |
|
91 |
+ sink_grid = NULL, BPREDO = list(), |
|
92 |
+ BPPARAM = bpparam()) { |
|
93 |
+ FUN <- match.fun(FUN) |
|
94 |
+ |
|
95 |
+ # Check conformable dots_grids and sinks_grids |
|
96 |
+ x_grid <- DelayedArray:::.normarg_grid(x_grid, x) |
|
97 |
+ sink_grid <- DelayedArray:::.normarg_grid(sink_grid, sink) |
|
98 |
+ if (!identical(dim(x_grid), dim(sink_grid))) { |
|
99 |
+ stop("non-conformable 'x_grid' and 'sink_grid'") |
|
100 |
+ } |
|
101 |
+ |
|
102 |
+ # Loop over blocks of `x` and write to `sink` |
|
103 |
+ nblock <- length(x_grid) |
|
104 |
+ bplapply(seq_len(nblock), function(b) { |
|
105 |
+ if (DelayedArray:::get_verbose_block_processing()) { |
|
106 |
+ message("Processing block ", b, "/", nblock, " ... ", |
|
107 |
+ appendLF = FALSE) |
|
108 |
+ } |
|
109 |
+ x_viewport <- x_grid[[b]] |
|
110 |
+ sink_viewport <- sink_grid[[b]] |
|
111 |
+ block <- DelayedArray:::extract_block(x, x_viewport) |
|
112 |
+ if (!is.array(block)) { |
|
113 |
+ block <- DelayedArray:::.as_array_or_matrix(block) |
|
114 |
+ } |
|
115 |
+ attr(block, "from_grid") <- x_grid |
|
116 |
+ attr(block, "block_id") <- b |
|
117 |
+ block_ans <- FUN(block, ...) |
|
118 |
+ # NOTE: This is the only part different from DelayedArray::blockApply() |
|
119 |
+ if (!is.null(sink)) { |
|
120 |
+ write_block_to_sink(block_ans, sink, sink_viewport) |
|
121 |
+ block_ans <- NULL |
|
122 |
+ } |
|
123 |
+ if (DelayedArray:::get_verbose_block_processing()) { |
|
124 |
+ message("OK") |
|
125 |
+ } |
|
126 |
+ }, |
|
127 |
+ BPREDO = BPREDO, |
|
128 |
+ BPPARAM = BPPARAM) |
|
129 |
+} |
|
130 |
+ |
- BSseq objects can once again use ordinary matrix objects as assays.
- Reimplement `BSmooth()` more-or-less from scratch:
- Switch from 'parallel' to 'BiocParallel' for parallelization. This brings some notable improvements:
- Smoothed results can now be written directly to an on-disk realization backend by the worker. This dramatically reduces memory usage compared to previous versions of 'bsseq' that required all results be retained in-memory.
- Parallelization is now supported on Windows through the use of a 'SnowParam' object as the value of `BPPARAM`.
- Improved error handling makes it possible to gracefully resume `BSmooth()` jobs that encountered errors by re-doing only the necessary tasks.
- Detailed and extensive job logging facilities.
- Fix bug in `BSmooth()` with the `maxGap` parameter.
- Re-factor BSseq() constructor and add fast, internal .BSseq() constructor.
- Re-factor collapseBSseq() and combine(). Should be much more performant.
- Use beachmat to implement fast validity checking of 'M' and 'Cov' matrices.
- Resave BS.chr22 (supplied data) using integer for storage mode of assays to reduce size.
- Switch from RUnit to testthat. testthat has better integration with code coverage tools that help when refactoring.
... | ... |
@@ -44,48 +44,12 @@ |
44 | 44 |
is(x@seed, "matrix") |
45 | 45 |
} |
46 | 46 |
|
47 |
-# NOTE: Equivalent to rowSums2(x[, j, drop = FALSE]) but does it using a |
|
48 |
-# delayed operation and always returns a nrow(x) x 1 DelayedMatrix |
|
49 |
-.delayed_rowSums2 <- function(x, j) { |
|
50 |
- Reduce(`+`, lapply(j, function(jj) x[, jj, drop = FALSE])) |
|
51 |
-} |
|
52 |
- |
|
53 |
-# NOTE: Equivalent to colSums2(x[i, , drop = FALSE]) but does it using a |
|
54 |
-# delayed operation and always returns a 1 x ncol(x) DelayedMatrix |
|
55 |
-.delayed_colSums2 <- function(x, i) { |
|
56 |
- Reduce(`+`, lapply(i, function(ii) x[ii, , drop = FALSE])) |
|
57 |
-} |
|
58 |
- |
|
59 |
-# MARGIN = 1: collapse using rowSums |
|
60 |
-# MARGIN = 2: collapse using colSums |
|
61 |
-.collapseDelayedMatrix <- function(x, sp, MARGIN, BACKEND = NULL) { |
|
62 |
- stopifnot(is(x, "DelayedMatrix")) |
|
63 |
- if (MARGIN == 1) { |
|
64 |
- if (is.null(BACKEND)) { |
|
65 |
- collapsed_x <- do.call(cbind, lapply(sp, function(j) { |
|
66 |
- rowSums2(x[, j, drop = FALSE]) |
|
67 |
- })) |
|
68 |
- } else { |
|
69 |
- collapsed_x <- do.call(cbind, lapply(sp, function(j) { |
|
70 |
- .delayed_rowSums2(x, j) |
|
71 |
- })) |
|
72 |
- # NOTE: Need to manually add colnames when using this method |
|
73 |
- colnames(collapsed_x) <- names(sp) |
|
74 |
- } |
|
75 |
- } else if (MARGIN == 2) { |
|
76 |
- if (is.null(BACKEND)) { |
|
77 |
- collapsed_x <- do.call(rbind, lapply(sp, function(i) { |
|
78 |
- colSums2(x[i, , drop = FALSE]) |
|
79 |
- })) |
|
80 |
- } else { |
|
81 |
- collapsed_x <- do.call(rbind, lapply(sp, function(i) { |
|
82 |
- .delayed_colSums2(x, i) |
|
83 |
- })) |
|
84 |
- # NOTE: Need to manually add rownames when using this method |
|
85 |
- rownames(collapsed_x) <- names(sp) |
|
86 |
- } |
|
47 |
+.zero_type <- function(type) { |
|
48 |
+ if (identical(type, "integer")) { |
|
49 |
+ fill <- 0L |
|
50 |
+ } else if (identical(type, "double")) { |
|
51 |
+ fill <- 0 |
|
87 | 52 |
} else { |
88 |
- stop("'MARGIN' must be 1 or 2") |
|
53 |
+ stop("'type' = ", type, " is not supported") |
|
89 | 54 |
} |
90 |
- realize(collapsed_x, BACKEND = BACKEND) |
|
91 | 55 |
} |
This is a straight find and replace of (col|row)(Sums|Means) with DelayedMatrixStats equivalents. Immediately, this is to work around an apparent bug in DelayedArray,rowSums-method (https://github.com/Bioconductor/DelayedArray/issues/16) but long term want to be using the optimised implementations in DelayedMatrixStat (e.g., using `cols` and `rows` args).
... | ... |
@@ -44,15 +44,15 @@ |
44 | 44 |
is(x@seed, "matrix") |
45 | 45 |
} |
46 | 46 |
|
47 |
-# NOTE: Equivalent to rowSums(x[, j, drop = FALSE]) but does it using a |
|
47 |
+# NOTE: Equivalent to rowSums2(x[, j, drop = FALSE]) but does it using a |
|
48 | 48 |
# delayed operation and always returns a nrow(x) x 1 DelayedMatrix |
49 |
-.delayed_rowSums <- function(x, j) { |
|
49 |
+.delayed_rowSums2 <- function(x, j) { |
|
50 | 50 |
Reduce(`+`, lapply(j, function(jj) x[, jj, drop = FALSE])) |
51 | 51 |
} |
52 | 52 |
|
53 |
-# NOTE: Equivalent to colSums(x[i, , drop = FALSE]) but does it using a |
|
53 |
+# NOTE: Equivalent to colSums2(x[i, , drop = FALSE]) but does it using a |
|
54 | 54 |
# delayed operation and always returns a 1 x ncol(x) DelayedMatrix |
55 |
-.delayed_colSums <- function(x, i) { |
|
55 |
+.delayed_colSums2 <- function(x, i) { |
|
56 | 56 |
Reduce(`+`, lapply(i, function(ii) x[ii, , drop = FALSE])) |
57 | 57 |
} |
58 | 58 |
|
... | ... |
@@ -63,11 +63,11 @@ |
63 | 63 |
if (MARGIN == 1) { |
64 | 64 |
if (is.null(BACKEND)) { |
65 | 65 |
collapsed_x <- do.call(cbind, lapply(sp, function(j) { |
66 |
- rowSums(x[, j, drop = FALSE]) |
|
66 |
+ rowSums2(x[, j, drop = FALSE]) |
|
67 | 67 |
})) |
68 | 68 |
} else { |
69 | 69 |
collapsed_x <- do.call(cbind, lapply(sp, function(j) { |
70 |
- .delayed_rowSums(x, j) |
|
70 |
+ .delayed_rowSums2(x, j) |
|
71 | 71 |
})) |
72 | 72 |
# NOTE: Need to manually add colnames when using this method |
73 | 73 |
colnames(collapsed_x) <- names(sp) |
... | ... |
@@ -75,11 +75,11 @@ |
75 | 75 |
} else if (MARGIN == 2) { |
76 | 76 |
if (is.null(BACKEND)) { |
77 | 77 |
collapsed_x <- do.call(rbind, lapply(sp, function(i) { |
78 |
- colSums(x[i, , drop = FALSE]) |
|
78 |
+ colSums2(x[i, , drop = FALSE]) |
|
79 | 79 |
})) |
80 | 80 |
} else { |
81 | 81 |
collapsed_x <- do.call(rbind, lapply(sp, function(i) { |
82 |
- .delayed_colSums(x, i) |
|
82 |
+ .delayed_colSums2(x, i) |
|
83 | 83 |
})) |
84 | 84 |
# NOTE: Need to manually add rownames when using this method |
85 | 85 |
rownames(collapsed_x) <- names(sp) |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,91 @@ |
1 |
+# Functions/methods that would be good to have in DelayedArray |
|
2 |
+ |
|
3 |
+.rowVars <- function(x, rows = NULL, cols = NULL, ...) { |
|
4 |
+ if (is(x, "DelayedArray")) { |
|
5 |
+ if (!is.null(rows)) { |
|
6 |
+ x <- x[rows, ] |
|
7 |
+ } |
|
8 |
+ if (!is.null(cols)) { |
|
9 |
+ x <- x[, cols] |
|
10 |
+ } |
|
11 |
+ row_vars <- rowVars(as.array(x), ...) |
|
12 |
+ } else { |
|
13 |
+ row_vars <- rowVars(x, rows = rows, cols = cols, ...) |
|
14 |
+ } |
|
15 |
+ row_vars |
|
16 |
+} |
|
17 |
+ |
|
18 |
+.rowSds <- function(x, rows = NULL, cols = NULL, ...) { |
|
19 |
+ row_vars <- .rowVars(x, rows = rows, cols = cols, ...) |
|
20 |
+ sqrt(row_vars) |
|
21 |
+} |
|
22 |
+ |
|
23 |
+.quantile <- function(x, ...) { |
|
24 |
+ if (is(x, "DelayedArray")) { |
|
25 |
+ x <- as.array(x) |
|
26 |
+ } |
|
27 |
+ quantile(x, ...) |
|
28 |
+} |
|
29 |
+ |
|
30 |
+.DelayedMatrix <- function(x) { |
|
31 |
+ x_name <- deparse(substitute(x)) |
|
32 |
+ X <- try(DelayedArray(x), silent = TRUE) |
|
33 |
+ if (is(X, "try-error")) { |
|
34 |
+ stop("Could not construct DelayedMatrix from '", x_name, "'", |
|
35 |
+ call. = FALSE) |
|
36 |
+ } |
|
37 |
+ if (!is(X, "DelayedMatrix")) { |
|
38 |
+ stop("'", x_name, "' must be matrix-like", call. = FALSE) |
|
39 |
+ } |
|
40 |
+ X |
|
41 |
+} |
|
42 |
+ |
|
43 |
+.isSimpleDelayedMatrix <- function(x) { |
|
44 |
+ is(x@seed, "matrix") |
|
45 |
+} |
|
46 |
+ |
|
47 |
+# NOTE: Equivalent to rowSums(x[, j, drop = FALSE]) but does it using a |
|
48 |
+# delayed operation and always returns a nrow(x) x 1 DelayedMatrix |
|
49 |
+.delayed_rowSums <- function(x, j) { |
|
50 |
+ Reduce(`+`, lapply(j, function(jj) x[, jj, drop = FALSE])) |
|
51 |
+} |
|
52 |
+ |
|
53 |
+# NOTE: Equivalent to colSums(x[i, , drop = FALSE]) but does it using a |
|
54 |
+# delayed operation and always returns a 1 x ncol(x) DelayedMatrix |
|
55 |
+.delayed_colSums <- function(x, i) { |
|
56 |
+ Reduce(`+`, lapply(i, function(ii) x[ii, , drop = FALSE])) |
|
57 |
+} |
|
58 |
+ |
|
59 |
+# MARGIN = 1: collapse using rowSums |
|
60 |
+# MARGIN = 2: collapse using colSums |
|
61 |
+.collapseDelayedMatrix <- function(x, sp, MARGIN, BACKEND = NULL) { |
|
62 |
+ stopifnot(is(x, "DelayedMatrix")) |
|
63 |
+ if (MARGIN == 1) { |
|
64 |
+ if (is.null(BACKEND)) { |
|
65 |
+ collapsed_x <- do.call(cbind, lapply(sp, function(j) { |
|
66 |
+ rowSums(x[, j, drop = FALSE]) |
|
67 |
+ })) |
|
68 |
+ } else { |
|
69 |
+ collapsed_x <- do.call(cbind, lapply(sp, function(j) { |
|
70 |
+ .delayed_rowSums(x, j) |
|
71 |
+ })) |
|
72 |
+ # NOTE: Need to manually add colnames when using this method |
|
73 |
+ colnames(collapsed_x) <- names(sp) |
|
74 |
+ } |
|
75 |
+ } else if (MARGIN == 2) { |
|
76 |
+ if (is.null(BACKEND)) { |
|
77 |
+ collapsed_x <- do.call(rbind, lapply(sp, function(i) { |
|
78 |
+ colSums(x[i, , drop = FALSE]) |
|
79 |
+ })) |
|
80 |
+ } else { |
|
81 |
+ collapsed_x <- do.call(rbind, lapply(sp, function(i) { |
|
82 |
+ .delayed_colSums(x, i) |
|
83 |
+ })) |
|
84 |
+ # NOTE: Need to manually add rownames when using this method |
|
85 |
+ rownames(collapsed_x) <- names(sp) |
|
86 |
+ } |
|
87 |
+ } else { |
|
88 |
+ stop("'MARGIN' must be 1 or 2") |
|
89 |
+ } |
|
90 |
+ realize(collapsed_x, BACKEND = BACKEND) |
|
91 |
+} |