Browse code

Add .colsum() and .rowsum()

- Temporary workaround for https://github.com/Bioconductor/DelayedArray/issues/41
- Ensures bsseq passes checks for next BioC release

Peter Hickey authored on 23/04/2019 02:33:24
Showing3 changed files

... ...
@@ -272,7 +272,7 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(),
272 272
 
273 273
     ol <- findOverlaps(loci, collapsed_loci, type = "equal")
274 274
     group <- subjectHits(ol)
275
-    M <- rowsum(
275
+    M <- .rowsum(
276 276
         x = assay(BSseq, "M", withDimnames = FALSE),
277 277
         group = group,
278 278
         # NOTE: reorder = TRUE to ensure same row-order as collapsed_loci.
... ...
@@ -283,7 +283,7 @@ strandCollapse <- function(BSseq, shift = TRUE, BPPARAM = bpparam(),
283 283
         chunkdim = chunkdim,
284 284
         level = level,
285 285
         type = type)
286
-    Cov <- rowsum(
286
+    Cov <- .rowsum(
287 287
         x = assay(BSseq, "Cov", withDimnames = FALSE),
288 288
         group = group,
289 289
         # NOTE: reorder = TRUE to ensure same row-order as collapsed_loci.
... ...
@@ -1,4 +1,4 @@
1
-# Functions/methods that would be good to have in DelayedArray
1
+# Functions/methods that would be good to have in DelayedArray -----------------
2 2
 
3 3
 .rowVars <- function(x, rows = NULL, cols = NULL, ...) {
4 4
     if (is(x, "DelayedArray")) {
... ...
@@ -41,6 +41,176 @@
41 41
     }
42 42
 }
43 43
 
44
+# A temporary workaround to
45
+# https://github.com/Bioconductor/DelayedArray/issues/41.
46
+.colsum <- function(x, group, reorder = TRUE, na.rm = FALSE, filepath = NULL,
47
+                    name = NULL, chunkdim = NULL, level = NULL,
48
+                    type = c("double", "integer"), BPPARAM = bpparam()) {
49
+
50
+    # NOTE: Special case for HDF5Matrix, otherwise defer to rowsum().
51
+    if (is(x, "HDF5Matrix")) {
52
+        # Check arguments ------------------------------------------------------
53
+
54
+        type <- match.arg(type)
55
+        if (any(!c(type(x), type) %in% c("integer", "double"))) {
56
+            stop("'type(x)' must be 'integer' or 'double'.")
57
+        }
58
+        if (length(group) != NCOL(x)) {
59
+            stop("incorrect length for 'group'")
60
+        }
61
+        if (anyNA(group)) {
62
+            warning("missing values for 'group'")
63
+        }
64
+        ugroup <- unique(group)
65
+        if (reorder) {
66
+            ugroup <- sort(ugroup, na.last = TRUE, method = "quick")
67
+        }
68
+        # TODO: Default is type = "double" because rowSums2() returns numeric,
69
+        #       but it can be useful to manually override this when you know
70
+        #       the result is integer.
71
+
72
+        # Construct RealizationSink --------------------------------------------
73
+
74
+        # NOTE: This is ultimately coerced to the output DelayedMatrix
75
+        #       object
76
+        ans_nrow <- nrow(x)
77
+        ans_ncol <- length(ugroup)
78
+        ans_dim <- c(ans_nrow, ans_ncol)
79
+        sink <- HDF5RealizationSink(
80
+            dim = ans_dim,
81
+            dimnames = list(rownames(x), as.character(ugroup)),
82
+            type = type,
83
+            filepath = filepath,
84
+            name = name,
85
+            chunkdim = chunkdim,
86
+            level = level)
87
+        sink_lock <- ipcid()
88
+        on.exit(ipcremove(sink_lock), add = TRUE)
89
+
90
+        # Construct ArrayGrid --------------------------------------------------
91
+
92
+        sink_grid <- colGrid(x = sink, ncol = 1L)
93
+        list_of_cols <- split(seq_along(group), group)[ugroup]
94
+
95
+        # Compute colsum() -----------------------------------------------------
96
+
97
+        bplapply(
98
+            X = seq_along(sink_grid),
99
+            FUN = function(b, x, sink, sink_lock, sink_grid, list_of_cols) {
100
+                cols <- list_of_cols[[b]]
101
+                if (length(cols) == 1L) {
102
+                    ans <- as.matrix(x[, cols, drop = FALSE])
103
+                    if (na.rm) {
104
+                        ans[is.na(ans)] <- 0L
105
+                    }
106
+                } else {
107
+                    ans <- matrix(
108
+                        rowSums2(x, cols = cols, na.rm = na.rm),
109
+                        ncol = 1)
110
+                }
111
+                ipclock(sink_lock)
112
+                write_block(x = sink, viewport = sink_grid[[b]], block = ans)
113
+                ipcunlock(sink_lock)
114
+                NULL
115
+            },
116
+            x = x,
117
+            sink = sink,
118
+            sink_lock = sink_lock,
119
+            sink_grid = sink_grid,
120
+            list_of_cols = list_of_cols,
121
+            BPPARAM = BPPARAM)
122
+        return(as(sink, "DelayedArray"))
123
+    }
124
+
125
+    colsum(x, group, reorder)
126
+}
127
+
128
+# A temporary workaround to
129
+# https://github.com/Bioconductor/DelayedArray/issues/41.
130
+.rowsum <- function(x, group, reorder = TRUE, na.rm = FALSE, filepath = NULL,
131
+                    name = NULL, chunkdim = NULL, level = NULL,
132
+                    type = c("double", "integer"), BPPARAM = bpparam()) {
133
+
134
+    # NOTE: Special case for HDF5Matrix, otherwise defer to rowsum().
135
+    if (is(x, "HDF5Matrix")) {
136
+
137
+        # Check arguments ------------------------------------------------------
138
+
139
+        if (any(!c(type(x), type) %in% c("integer", "double"))) {
140
+            stop("'type(x)' must be 'integer' or 'double'.")
141
+        }
142
+        if (length(group) != NROW(x)) {
143
+            stop("incorrect length for 'group'")
144
+        }
145
+        if (anyNA(group)) {
146
+            warning("missing values for 'group'")
147
+        }
148
+        ugroup <- unique(group)
149
+        if (reorder) {
150
+            ugroup <- sort(ugroup, na.last = TRUE, method = "quick")
151
+        }
152
+        # NOTE: Default is type = "double" because colSums2() returns numeric,
153
+        #       but it can be useful to manually override this when you know the
154
+        #       result is integer.
155
+        type <- match.arg(type)
156
+
157
+        # Construct RealizationSink --------------------------------------------
158
+
159
+        # NOTE: This is ultimately coerced to the output DelayedMatrix
160
+        #       object
161
+        ans_nrow <- length(ugroup)
162
+        ans_ncol <- ncol(x)
163
+        ans_dim <- c(ans_nrow, ans_ncol)
164
+        sink <- HDF5RealizationSink(
165
+            dim = ans_dim,
166
+            dimnames = list(as.character(ugroup), colnames(x)),
167
+            type = type,
168
+            filepath = filepath,
169
+            name = name,
170
+            chunkdim = chunkdim,
171
+            level = level)
172
+        sink_lock <- ipcid()
173
+        on.exit(ipcremove(sink_lock), add = TRUE)
174
+
175
+        # Construct ArrayGrid --------------------------------------------------
176
+
177
+        sink_grid <- rowGrid(x = sink, nrow = 1L)
178
+        list_of_rows <- split(seq_along(group), group)[as.character(ugroup)]
179
+
180
+        # Compute colsum() -----------------------------------------------------
181
+
182
+        bplapply(
183
+            X = seq_along(sink_grid),
184
+            FUN = function(b, x, sink, sink_lock, sink_grid, list_of_rows) {
185
+                rows <- list_of_rows[[b]]
186
+                if (length(rows) == 1L) {
187
+                    ans <- as.matrix(x[rows, , drop = FALSE])
188
+                    if (na.rm) {
189
+                        ans[is.na(ans)] <- 0L
190
+                    }
191
+                } else {
192
+                    ans <- matrix(
193
+                        colSums2(x, rows = rows, na.rm = na.rm),
194
+                        nrow = 1)
195
+                }
196
+                ipclock(sink_lock)
197
+                write_block(x = sink, viewport = sink_grid[[b]], block = ans)
198
+                ipcunlock(sink_lock)
199
+                NULL
200
+            },
201
+            x = x,
202
+            sink = sink,
203
+            sink_lock = sink_lock,
204
+            sink_grid = sink_grid,
205
+            list_of_rows = list_of_rows,
206
+            BPPARAM = BPPARAM)
207
+        return(as(sink, "DelayedArray"))
208
+    }
209
+
210
+    rowsum(x, group, reorder)
211
+}
212
+
213
+
44 214
 # Missing methods --------------------------------------------------------------
45 215
 
46 216
 # NOTE: Copied from minfi
... ...
@@ -98,7 +98,7 @@ collapseBSseq <- function(BSseq, group, BPPARAM = bpparam(),
98 98
 
99 99
     # Collapse 'M' and 'Cov' matrices ------------------------------------------
100 100
 
101
-    M <- colsum(
101
+    M <- .colsum(
102 102
         x = getCoverage(BSseq, type = "M", withDimnames = FALSE),
103 103
         group = group,
104 104
         reorder = FALSE,
... ...
@@ -108,7 +108,7 @@ collapseBSseq <- function(BSseq, group, BPPARAM = bpparam(),
108 108
         chunkdim = chunkdim,
109 109
         level = level,
110 110
         type = type)
111
-    Cov <- colsum(
111
+    Cov <- .colsum(
112 112
         x = getCoverage(BSseq, type = "Cov", withDimnames = FALSE),
113 113
         group = group,
114 114
         reorder = FALSE,