... | ... |
@@ -23,15 +23,15 @@ |
23 | 23 |
|
24 | 24 |
## because we keep the argument 'parallel.sz' for backwards compatibility |
25 | 25 |
## we need to harmonize it with the contents of BPPARAM |
26 |
- if (parallel.sz > 1L && class(BPPARAM) == "SerialParam") { |
|
26 |
+ if (parallel.sz > 1L && inherits(BPPARAM, "SerialParam")) { |
|
27 | 27 |
BPPARAM=MulticoreParam(progressbar=verbose, workers=parallel.sz, tasks=100) |
28 |
- } else if (parallel.sz == 1L && class(BPPARAM) != "SerialParam") { |
|
28 |
+ } else if (parallel.sz == 1L && !inherits(BPPARAM, "SerialParam")) { |
|
29 | 29 |
parallel.sz <- bpnworkers(BPPARAM) |
30 |
- } else if (parallel.sz > 1L && class(BPPARAM) != "SerialParam") { |
|
30 |
+ } else if (parallel.sz > 1L && !inherits(BPPARAM, "SerialParam")) { |
|
31 | 31 |
bpworkers(BPPARAM) <- parallel.sz |
32 | 32 |
} |
33 | 33 |
|
34 |
- if (class(BPPARAM) != "SerialParam" && verbose) |
|
34 |
+ if (!inherits(BPPARAM, "SerialParam") && verbose) |
|
35 | 35 |
cat(sprintf("Setting parallel calculations through a %s back-end\nwith workers=%d and tasks=100.\n", |
36 | 36 |
class(BPPARAM), parallel.sz)) |
37 | 37 |
|
... | ... |
@@ -149,7 +149,7 @@ rankHDF5 <- function(X){ |
149 | 149 |
.fastRndWalk2 <- function(gSetIdx, geneRanking, ra_block) { |
150 | 150 |
n <- length(geneRanking) |
151 | 151 |
k <- length(gSetIdx) |
152 |
- idxs <- sort.int(fastmatch::fmatch(gSetIdx, geneRanking)) |
|
152 |
+ idxs <- sort.int(match(gSetIdx, geneRanking)) |
|
153 | 153 |
stepCDFinGeneSet2 <- |
154 | 154 |
sum(ra_block[geneRanking[idxs]] * (n - idxs + 1)) / |
155 | 155 |
sum((ra_block[geneRanking[idxs]])) |
... | ... |
@@ -116,15 +116,13 @@ zscoreDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
116 | 116 |
|
117 | 117 |
Z <- t(DelayedArray::scale(t(X))) |
118 | 118 |
|
119 |
- es <- bplapply(geneSets, h5BackendRealization, combinezDelayed, |
|
120 |
- Z, BPPARAM = BPPARAM) |
|
121 |
- |
|
122 |
- es <- do.call(rbind, es) |
|
123 |
- rownames(es) <- names(geneSets) |
|
124 |
- colnames(es) <- colnames(X) |
|
125 |
- |
|
119 |
+ es <- bplapply(geneSets, function(gSetIdx, Z){ |
|
120 |
+ x <- combinezDelayed(gSetIdx, Z) |
|
121 |
+ x <- matrix(x, 1, length(x)) |
|
122 |
+ x <- writeHDF5Array(x) |
|
123 |
+ }, Z, BPPARAM = BPPARAM) |
|
124 |
+ es <- do.call(DelayedArray::rbind, es) |
|
126 | 125 |
es <- as(es, "HDF5Array") |
127 |
- |
|
128 | 126 |
es |
129 | 127 |
} |
130 | 128 |
|
... | ... |
@@ -183,7 +181,7 @@ ssgseaDelayed <- function(X, geneSets, alpha=0.25, parallel.sz, |
183 | 181 |
res |
184 | 182 |
}, BPPARAM=BPPARAM) |
185 | 183 |
|
186 |
- es <- do.call(cbind, es) |
|
184 |
+ es <- do.call(DelayedArray::cbind, es) |
|
187 | 185 |
es |
188 | 186 |
|
189 | 187 |
if (normalization) { |
... | ... |
@@ -85,7 +85,7 @@ h5BackendRealization <- function(gSetIdx, FUN, Z, bpp) { |
85 | 85 |
|
86 | 86 |
} |
87 | 87 |
|
88 |
-rightsingularsvdvectorgset <- function(gSetIdx, Z, bpp) { |
|
88 |
+rightsvdDelayed <- function(gSetIdx, Z, bpp) { |
|
89 | 89 |
s <-runRandomSVD(Z[gSetIdx,], k=1, nu=0, BPPARAM = bpp) |
90 | 90 |
s$v[, 1] |
91 | 91 |
} |
... | ... |
@@ -95,7 +95,7 @@ plageDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
95 | 95 |
|
96 | 96 |
Z <- t(DelayedArray::scale(t(X))) |
97 | 97 |
|
98 |
- es <- bplapply(geneSets, h5BackendRealization, rightsingularsvdvectorgset, |
|
98 |
+ es <- bplapply(geneSets, h5BackendRealization, rightsvdDelayed, |
|
99 | 99 |
Z, bpp=BPPARAM, BPPARAM=BPPARAM) |
100 | 100 |
|
101 | 101 |
es <- do.call(rbind, es) |
... | ... |
@@ -171,8 +171,6 @@ ssgseaDelayed <- function(X, geneSets, alpha=0.25, parallel.sz, |
171 | 171 |
|
172 | 172 |
Ra <- abs(R)^alpha |
173 | 173 |
|
174 |
- Ra <- as(Ra, "HDF5Array") |
|
175 |
- |
|
176 | 174 |
es <- bplapply(as.list(1:n), function(j) { |
177 | 175 |
geneRanking <- order(R[, j], decreasing=TRUE) |
178 | 176 |
colRa <- Ra[,j] |
... | ... |
@@ -65,7 +65,7 @@ |
65 | 65 |
|
66 | 66 |
} |
67 | 67 |
|
68 |
-h5BackendRealization <- function(gSetIdx, FUN, Z) { |
|
68 |
+h5BackendRealization <- function(gSetIdx, FUN, Z, bpp) { |
|
69 | 69 |
|
70 | 70 |
FUN <- match.fun(FUN) |
71 | 71 |
|
... | ... |
@@ -74,7 +74,7 @@ h5BackendRealization <- function(gSetIdx, FUN, Z) { |
74 | 74 |
# step 2: create grid over sink |
75 | 75 |
sink_grid <- rowAutoGrid(sink, nrow = 1) |
76 | 76 |
# step 3: create block using FUN and write it on sink |
77 |
- block <- FUN(gSetIdx, Z) |
|
77 |
+ block <- FUN(gSetIdx, Z, bpp) |
|
78 | 78 |
block <- matrix(block, 1, length(block)) |
79 | 79 |
sink <- DelayedArray::write_block(sink, sink_grid[[1L]], block) |
80 | 80 |
# step 4: close the sink as an hdf5Array |
... | ... |
@@ -85,8 +85,8 @@ h5BackendRealization <- function(gSetIdx, FUN, Z) { |
85 | 85 |
|
86 | 86 |
} |
87 | 87 |
|
88 |
-rightsingularsvdvectorgset <- function(gSetIdx, Z) { |
|
89 |
- s <- svd(Z[gSetIdx, ]) |
|
88 |
+rightsingularsvdvectorgset <- function(gSetIdx, Z, bpp) { |
|
89 |
+ s <-runRandomSVD(Z[gSetIdx,], k=1, nu=0, BPPARAM = bpp) |
|
90 | 90 |
s$v[, 1] |
91 | 91 |
} |
92 | 92 |
|
... | ... |
@@ -96,7 +96,7 @@ plageDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
96 | 96 |
Z <- t(DelayedArray::scale(t(X))) |
97 | 97 |
|
98 | 98 |
es <- bplapply(geneSets, h5BackendRealization, rightsingularsvdvectorgset, |
99 |
- Z, BPPARAM=BPPARAM) |
|
99 |
+ Z, bpp=BPPARAM, BPPARAM=BPPARAM) |
|
100 | 100 |
|
101 | 101 |
es <- do.call(rbind, es) |
102 | 102 |
rownames(es) <- names(geneSets) |
... | ... |
@@ -70,9 +70,9 @@ h5BackendRealization <- function(gSetIdx, FUN, Z) { |
70 | 70 |
FUN <- match.fun(FUN) |
71 | 71 |
|
72 | 72 |
# step 1: create realization sink |
73 |
- sink <- HDF5Array::HDF5RealizationSink(dim = c(1L, ncol(Z))) |
|
73 |
+ sink <- HDF5RealizationSink(dim = c(1L, ncol(Z))) |
|
74 | 74 |
# step 2: create grid over sink |
75 |
- sink_grid <- DelayedArray::rowAutoGrid(sink, nrow = 1) |
|
75 |
+ sink_grid <- rowAutoGrid(sink, nrow = 1) |
|
76 | 76 |
# step 3: create block using FUN and write it on sink |
77 | 77 |
block <- FUN(gSetIdx, Z) |
78 | 78 |
block <- matrix(block, 1, length(block)) |
... | ... |
@@ -131,26 +131,22 @@ zscoreDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
131 | 131 |
#### rank function for hdf5 files using sink and grid methods |
132 | 132 |
rankHDF5 <- function(X){ |
133 | 133 |
sink <- HDF5RealizationSink(dim(X)) |
134 |
- sink_grid <- colAutoGrid(sink, ncol = 1) |
|
135 |
- X_grid <- colAutoGrid(X, ncol=1) |
|
134 |
+ grid <- defaultAutoGrid(sink, block.shape="first-dim-grows-first") |
|
136 | 135 |
|
137 |
- |
|
138 |
- FUN <- function(sink_grid, sink){ |
|
139 |
- bid <- currentBlockId() |
|
140 |
- X_viewport <- X_grid[[bid]] |
|
141 |
- block <- read_block(X, X_viewport) |
|
142 |
- block <- as.integer(rank(block)) |
|
143 |
- block <- matrix(block, length(block), 1) |
|
144 |
- write_block(sink, sink_grid, block) |
|
136 |
+ colRanks_byBlock <- function(grid, sink){ |
|
137 |
+ block <- read_block(X, grid) |
|
138 |
+ block <- t(colRanks(block, ties.method = "average")) |
|
139 |
+ mode(block) <- "integer" |
|
140 |
+ write_block(sink, grid, block) |
|
145 | 141 |
} |
146 | 142 |
|
147 |
- sink <- viewportReduce(FUN, sink_grid, sink) |
|
143 |
+ sink <- viewportReduce(colRanks_byBlock, grid, sink) |
|
148 | 144 |
close(sink) |
149 | 145 |
res <- as(sink, "DelayedArray") |
150 | 146 |
res |
151 | 147 |
} |
152 | 148 |
|
153 |
-## slightly modified .fastRndWalk() for porpuse of only |
|
149 |
+## slightly modified .fastRndWalk() for porpoise of only |
|
154 | 150 |
## receiving a vector and not a matrix column for sums |
155 | 151 |
.fastRndWalk2 <- function(gSetIdx, geneRanking, ra_block) { |
156 | 152 |
n <- length(geneRanking) |
... | ... |
@@ -164,58 +160,45 @@ rankHDF5 <- function(X){ |
164 | 160 |
walkStat |
165 | 161 |
} |
166 | 162 |
|
167 |
-delayedGeneRanking <- function(r_block, ra_block, geneSets, BPPARAM){ |
|
168 |
- geneRanking <- order(r_block, decreasing=TRUE) |
|
169 |
- res <- bplapply(geneSets, .fastRndWalk2, geneRanking, ra_block, BPPARAM = BPPARAM) |
|
170 |
- return(unlist(res)) |
|
171 |
-} |
|
172 | 163 |
|
173 | 164 |
ssgseaDelayed <- function(X, geneSets, alpha=0.25, parallel.sz, |
174 | 165 |
normalization=TRUE, verbose=TRUE, |
175 | 166 |
BPPARAM=SerialParam(progressbar=verbose)) { |
176 |
- |
|
167 |
+ |
|
177 | 168 |
n <- ncol(X) |
178 | 169 |
|
179 | 170 |
R <- rankHDF5(X) |
180 | 171 |
|
181 | 172 |
Ra <- abs(R)^alpha |
182 | 173 |
|
183 |
- # step1: creating a grid for both DelayedArray objects |
|
184 |
- # that will be iterated |
|
185 |
- R_grid <- colAutoGrid(R, ncol=1) |
|
186 |
- Ra_grid <- colAutoGrid(Ra, ncol=1) |
|
187 |
- |
|
188 |
- # step2: create a sink and a grid for it |
|
189 |
- sink <- HDF5RealizationSink(dim = c(1L, ncol(R))) |
|
190 |
- sink_grid <- colAutoGrid(sink, ncol=1) |
|
191 |
- |
|
192 |
- # step3: function that will read blocks of R and |
|
193 |
- # Ra grids, apply transformation and write into |
|
194 |
- # sink object |
|
195 |
- FUN <- function(sink_grid, sink, geneSets){ |
|
196 |
- bid <- currentBlockId() |
|
197 |
- r_block <- read_block(R, R_grid[[bid]]) |
|
198 |
- ra_block <- read_block(Ra, Ra_grid[[bid]]) |
|
199 |
- res_block <- delayedGeneRanking(r_block, ra_block, geneSets, BPPARAM) |
|
200 |
- res_block <- matrix(res_block, 1, length(res_block)) |
|
201 |
- write_block(sink, sink_grid, res_block) |
|
202 |
- } |
|
203 |
- |
|
204 |
- sink <- viewportReduce(FUN, sink_grid, sink, geneSets) |
|
174 |
+ Ra <- as(Ra, "HDF5Array") |
|
205 | 175 |
|
206 |
- # step4: close sink as a DelayedArray object |
|
207 |
- close(sink) |
|
208 |
- es <- as(sink, "DelayedArray") |
|
176 |
+ es <- bplapply(as.list(1:n), function(j) { |
|
177 |
+ geneRanking <- order(R[, j], decreasing=TRUE) |
|
178 |
+ colRa <- Ra[,j] |
|
179 |
+ sink <- HDF5RealizationSink(c(length(names(geneSets)), 1L)) |
|
180 |
+ sink_grid <- colAutoGrid(sink, ncol=1) |
|
181 |
+ es_sample <- lapply(geneSets, .fastRndWalk2, geneRanking, colRa) |
|
182 |
+ sink <- DelayedArray::write_block(sink, sink_grid[[1L]], do.call("rbind", es_sample)) |
|
183 |
+ DelayedArray::close(sink) |
|
184 |
+ res <- as(sink, "DelayedArray") |
|
185 |
+ res |
|
186 |
+ }, BPPARAM=BPPARAM) |
|
187 |
+ |
|
188 |
+ es <- do.call(cbind, es) |
|
189 |
+ es |
|
209 | 190 |
|
210 | 191 |
if (normalization) { |
211 | 192 |
## normalize enrichment scores by using the entire data set, as indicated |
212 | 193 |
## by Barbie et al., 2009, online methods, pg. 2 |
213 | 194 |
sink <- HDF5RealizationSink(dim(es)) |
214 |
- sink_grid <- colAutoGrid(sink, ncol=1) |
|
215 |
- es_grid <- colAutoGrid(es, ncol=1) |
|
195 |
+ sink_grid <- defaultAutoGrid(sink, block.shape="first-dim-grows-first") |
|
196 |
+ es_grid <- defaultAutoGrid(es, block.shape="first-dim-grows-first") |
|
197 |
+ fin <- range(es)[2] |
|
198 |
+ ini <- range(es)[1] |
|
216 | 199 |
for(bid in seq_along(sink_grid)){ |
217 | 200 |
block <- read_block(es, es_grid[[bid]]) |
218 |
- block <- block / (range(es)[2] - range(es)[1]) |
|
201 |
+ block <- apply(block, 2, function(x) x / ( fin - ini)) |
|
219 | 202 |
write_block(sink, sink_grid[[bid]], block) |
220 | 203 |
} |
221 | 204 |
close(sink) |
... | ... |
@@ -34,6 +34,14 @@ |
34 | 34 |
if (class(BPPARAM) != "SerialParam" && verbose) |
35 | 35 |
cat(sprintf("Setting parallel calculations through a %s back-end\nwith workers=%d and tasks=100.\n", |
36 | 36 |
class(BPPARAM), parallel.sz)) |
37 |
+ |
|
38 |
+ if (method == "ssgsea") { |
|
39 |
+ if(verbose) |
|
40 |
+ cat("Estimating ssGSEA scores for", length(gset.idx.list),"gene sets.\n") |
|
41 |
+ |
|
42 |
+ return(ssgseaDelayed(expr, gset.idx.list, alpha=tau, parallel.sz=parallel.sz, |
|
43 |
+ normalization=ssgsea.norm, verbose=verbose, BPPARAM=BPPARAM)) |
|
44 |
+ } |
|
37 | 45 |
|
38 | 46 |
if (method == "zscore") { |
39 | 47 |
if (rnaseq) |
... | ... |
@@ -118,4 +126,106 @@ zscoreDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
118 | 126 |
es <- as(es, "HDF5Array") |
119 | 127 |
|
120 | 128 |
es |
121 |
-} |
|
122 | 129 |
\ No newline at end of file |
130 |
+} |
|
131 |
+ |
|
132 |
+#### rank function for hdf5 files using sink and grid methods |
|
133 |
+rankHDF5 <- function(X){ |
|
134 |
+ sink <- HDF5RealizationSink(dim(X)) |
|
135 |
+ sink_grid <- colAutoGrid(sink, ncol = 1) |
|
136 |
+ X_grid <- colAutoGrid(X, ncol=1) |
|
137 |
+ |
|
138 |
+ |
|
139 |
+ FUN <- function(sink_grid, sink){ |
|
140 |
+ bid <- currentBlockId() |
|
141 |
+ X_viewport <- X_grid[[bid]] |
|
142 |
+ block <- read_block(X, X_viewport) |
|
143 |
+ block <- as.integer(rank(block)) |
|
144 |
+ block <- matrix(block, length(block), 1) |
|
145 |
+ write_block(sink, sink_grid, block) |
|
146 |
+ } |
|
147 |
+ |
|
148 |
+ sink <- viewportReduce(FUN, sink_grid, sink) |
|
149 |
+ close(sink) |
|
150 |
+ res <- as(sink, "DelayedArray") |
|
151 |
+ res |
|
152 |
+} |
|
153 |
+ |
|
154 |
+## slightly modified .fastRndWalk() for porpuse of only |
|
155 |
+## receiving a vector and not a matrix column for sums |
|
156 |
+.fastRndWalk2 <- function(gSetIdx, geneRanking, ra_block) { |
|
157 |
+ n <- length(geneRanking) |
|
158 |
+ k <- length(gSetIdx) |
|
159 |
+ idxs <- sort.int(fastmatch::fmatch(gSetIdx, geneRanking)) |
|
160 |
+ stepCDFinGeneSet2 <- |
|
161 |
+ sum(ra_block[geneRanking[idxs]] * (n - idxs + 1)) / |
|
162 |
+ sum((ra_block[geneRanking[idxs]])) |
|
163 |
+ stepCDFoutGeneSet2 <- (n * (n + 1) / 2 - sum(n - idxs + 1)) / (n - k) |
|
164 |
+ walkStat <- stepCDFinGeneSet2 - stepCDFoutGeneSet2 |
|
165 |
+ walkStat |
|
166 |
+} |
|
167 |
+ |
|
168 |
+delayedGeneRanking <- function(r_block, ra_block, geneSets, BPPARAM){ |
|
169 |
+ geneRanking <- order(r_block, decreasing=TRUE) |
|
170 |
+ res <- bplapply(geneSets, .fastRndWalk2, geneRanking, ra_block, BPPARAM = BPPARAM) |
|
171 |
+ return(unlist(res)) |
|
172 |
+} |
|
173 |
+ |
|
174 |
+ssgseaDelayed <- function(X, geneSets, alpha=0.25, parallel.sz, |
|
175 |
+ normalization=TRUE, verbose=TRUE, |
|
176 |
+ BPPARAM=SerialParam(progressbar=verbose)) { |
|
177 |
+ |
|
178 |
+ n <- ncol(X) |
|
179 |
+ |
|
180 |
+ R <- rankHDF5(X) |
|
181 |
+ |
|
182 |
+ Ra <- abs(R)^alpha |
|
183 |
+ |
|
184 |
+ # step1: creating a grid for both DelayedArray objects |
|
185 |
+ # that will be iterated |
|
186 |
+ R_grid <- colAutoGrid(R, ncol=1) |
|
187 |
+ Ra_grid <- colAutoGrid(Ra, ncol=1) |
|
188 |
+ |
|
189 |
+ # step2: create a sink and a grid for it |
|
190 |
+ sink <- HDF5RealizationSink(dim = c(1L, ncol(R))) |
|
191 |
+ sink_grid <- colAutoGrid(sink, ncol=1) |
|
192 |
+ |
|
193 |
+ # step3: function that will read blocks of R and |
|
194 |
+ # Ra grids, apply transformation and write into |
|
195 |
+ # sink object |
|
196 |
+ FUN <- function(sink_grid, sink, geneSets){ |
|
197 |
+ bid <- currentBlockId() |
|
198 |
+ r_block <- read_block(R, R_grid[[bid]]) |
|
199 |
+ ra_block <- read_block(Ra, Ra_grid[[bid]]) |
|
200 |
+ res_block <- delayedGeneRanking(r_block, ra_block, geneSets, BPPARAM) |
|
201 |
+ res_block <- matrix(res_block, 1, length(res_block)) |
|
202 |
+ write_block(sink, sink_grid, res_block) |
|
203 |
+ } |
|
204 |
+ |
|
205 |
+ sink <- viewportReduce(FUN, sink_grid, sink, geneSets) |
|
206 |
+ |
|
207 |
+ # step4: close sink as a DelayedArray object |
|
208 |
+ close(sink) |
|
209 |
+ es <- as(sink, "DelayedArray") |
|
210 |
+ |
|
211 |
+ if (normalization) { |
|
212 |
+ ## normalize enrichment scores by using the entire data set, as indicated |
|
213 |
+ ## by Barbie et al., 2009, online methods, pg. 2 |
|
214 |
+ sink <- HDF5RealizationSink(dim(es)) |
|
215 |
+ sink_grid <- colAutoGrid(sink, ncol=1) |
|
216 |
+ es_grid <- colAutoGrid(es, ncol=1) |
|
217 |
+ for(bid in seq_along(sink_grid)){ |
|
218 |
+ block <- read_block(es, es_grid[[bid]]) |
|
219 |
+ block <- block / (range(es)[2] - range(es)[1]) |
|
220 |
+ write_block(sink, sink_grid[[bid]], block) |
|
221 |
+ } |
|
222 |
+ close(sink) |
|
223 |
+ es <- as(sink, "DelayedArray") |
|
224 |
+ } |
|
225 |
+ |
|
226 |
+ rownames(es) <- names(geneSets) |
|
227 |
+ colnames(es) <- colnames(X) |
|
228 |
+ |
|
229 |
+ es <- as(es, "HDF5Array") |
|
230 |
+ es |
|
231 |
+} |
|
232 |
+ |
... | ... |
@@ -57,27 +57,19 @@ |
57 | 57 |
|
58 | 58 |
} |
59 | 59 |
|
60 |
-rightsingularsvdvectorgset <- function(gSetIdx, Z) { |
|
61 |
- s <- svd(Z[gSetIdx, ]) |
|
62 |
- s$v[, 1] |
|
63 |
-} |
|
64 |
- |
|
65 |
-svdDelayed <- function(gSetIdx, Z) { |
|
60 |
+h5BackendRealization <- function(gSetIdx, FUN, Z) { |
|
66 | 61 |
|
67 |
- # step 1: creating a HDF5 sink object |
|
68 |
- sink <- HDF5Array::HDF5RealizationSink(dim = c(1L, ncol(Z))) |
|
62 |
+ FUN <- match.fun(FUN) |
|
69 | 63 |
|
70 |
- #step 2: creating the sink's grid |
|
64 |
+ # step 1: create realization sink |
|
65 |
+ sink <- HDF5Array::HDF5RealizationSink(dim = c(1L, ncol(Z))) |
|
66 |
+ # step 2: create grid over sink |
|
71 | 67 |
sink_grid <- DelayedArray::rowAutoGrid(sink, nrow = 1) |
72 |
- |
|
73 |
- # step 3: creating the block and writing it in the sink |
|
74 |
- # object using the sink's grid |
|
75 |
- block <- rightsingularsvdvectorgset(gSetIdx, Z) |
|
68 |
+ # step 3: create block using FUN and write it on sink |
|
69 |
+ block <- FUN(gSetIdx, Z) |
|
76 | 70 |
block <- matrix(block, 1, length(block)) |
77 | 71 |
sink <- DelayedArray::write_block(sink, sink_grid[[1L]], block) |
78 |
- |
|
79 |
- # step 4: closing the sink and realizing it |
|
80 |
- # in a hdf5 array object |
|
72 |
+ # step 4: close the sink as an hdf5Array |
|
81 | 73 |
DelayedArray::close(sink) |
82 | 74 |
res <- as(sink, "DelayedArray") |
83 | 75 |
|
... | ... |
@@ -85,13 +77,18 @@ svdDelayed <- function(gSetIdx, Z) { |
85 | 77 |
|
86 | 78 |
} |
87 | 79 |
|
80 |
+rightsingularsvdvectorgset <- function(gSetIdx, Z) { |
|
81 |
+ s <- svd(Z[gSetIdx, ]) |
|
82 |
+ s$v[, 1] |
|
83 |
+} |
|
84 |
+ |
|
88 | 85 |
plageDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
89 | 86 |
BPPARAM=SerialParam(progressbar=verbose)) { |
90 | 87 |
|
91 | 88 |
Z <- t(DelayedArray::scale(t(X))) |
92 | 89 |
|
93 |
- es <- bplapply(geneSets, svdDelayed, Z, |
|
94 |
- BPPARAM=BPPARAM) |
|
90 |
+ es <- bplapply(geneSets, h5BackendRealization, rightsingularsvdvectorgset, |
|
91 |
+ Z, BPPARAM=BPPARAM) |
|
95 | 92 |
|
96 | 93 |
es <- do.call(rbind, es) |
97 | 94 |
rownames(es) <- names(geneSets) |
... | ... |
@@ -102,26 +99,23 @@ plageDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
102 | 99 |
es |
103 | 100 |
} |
104 | 101 |
|
105 |
-combinez2 <- function(gSetIdx, Z){ |
|
102 |
+combinezDelayed <- function(gSetIdx, Z){ |
|
106 | 103 |
DelayedMatrixStats::colSums2(Z[gSetIdx,]) / sqrt(length(gSetIdx)) |
107 | 104 |
} |
108 | 105 |
|
109 | 106 |
zscoreDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
110 |
- BPPARAM=SerialParam(progressbar=verbose)) { |
|
107 |
+ BPPARAM=SerialParam(progressbar=verbose)){ |
|
111 | 108 |
|
112 | 109 |
Z <- t(DelayedArray::scale(t(X))) |
113 | 110 |
|
114 |
- sink <- HDF5Array::HDF5RealizationSink(dim=c(length(names(geneSets)), ncol(X)), |
|
115 |
- dimnames = list(names(geneSets), colnames(X))) |
|
111 |
+ es <- bplapply(geneSets, h5BackendRealization, combinezDelayed, |
|
112 |
+ Z, BPPARAM = BPPARAM) |
|
116 | 113 |
|
117 |
- sink_grid <- DelayedArray::rowAutoGrid(sink, nrow = 1) |
|
114 |
+ es <- do.call(rbind, es) |
|
115 |
+ rownames(es) <- names(geneSets) |
|
116 |
+ colnames(es) <- colnames(X) |
|
117 |
+ |
|
118 |
+ es <- as(es, "HDF5Array") |
|
118 | 119 |
|
119 |
- for(bid in seq_along(sink_grid)){ |
|
120 |
- block <- combinez2(geneSets[[bid]], Z) |
|
121 |
- block <- matrix(block, 1, length(block)) |
|
122 |
- sink <- DelayedArray::write_block(sink, sink_grid[[bid]], block) |
|
123 |
- } |
|
124 |
- DelayedArray::close(sink) |
|
125 |
- es <- as(sink, "DelayedArray") |
|
126 | 120 |
es |
127 |
-} |
|
121 |
+} |
|
128 | 122 |
\ No newline at end of file |
... | ... |
@@ -35,6 +35,15 @@ |
35 | 35 |
cat(sprintf("Setting parallel calculations through a %s back-end\nwith workers=%d and tasks=100.\n", |
36 | 36 |
class(BPPARAM), parallel.sz)) |
37 | 37 |
|
38 |
+ if (method == "zscore") { |
|
39 |
+ if (rnaseq) |
|
40 |
+ stop("rnaseq=TRUE does not work with method='zscore'.") |
|
41 |
+ |
|
42 |
+ if(verbose) |
|
43 |
+ cat("Estimating combined z-scores for", length(gset.idx.list), "gene sets.\n") |
|
44 |
+ |
|
45 |
+ return(zscoreDelayed(expr, gset.idx.list, parallel.sz, verbose, BPPARAM=BPPARAM)) |
|
46 |
+ } |
|
38 | 47 |
|
39 | 48 |
if (method == "plage") { |
40 | 49 |
if (rnaseq) |
... | ... |
@@ -91,3 +100,26 @@ plageDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
91 | 100 |
es |
92 | 101 |
} |
93 | 102 |
|
103 |
+combinez2 <- function(gSetIdx, Z){ |
|
104 |
+ DelayedMatrixStats::colSums2(Z[gSetIdx,]) / sqrt(length(gSetIdx)) |
|
105 |
+} |
|
106 |
+ |
|
107 |
+zscoreDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
|
108 |
+ BPPARAM=SerialParam(progressbar=verbose)) { |
|
109 |
+ |
|
110 |
+ Z <- t(DelayedArray::scale(t(X))) |
|
111 |
+ |
|
112 |
+ sink <- HDF5Array::HDF5RealizationSink(dim=c(length(names(geneSets)), ncol(X)), |
|
113 |
+ dimnames = list(names(geneSets), colnames(X))) |
|
114 |
+ |
|
115 |
+ sink_grid <- DelayedArray::rowAutoGrid(sink, nrow = 1) |
|
116 |
+ |
|
117 |
+ for(bid in seq_along(sink_grid)){ |
|
118 |
+ block <- combinez2(geneSets[[bid]], Z) |
|
119 |
+ block <- matrix(block, 1, length(block)) |
|
120 |
+ sink <- DelayedArray::write_block(sink, sink_grid[[bid]], block) |
|
121 |
+ } |
|
122 |
+ DelayedArray::close(sink) |
|
123 |
+ es <- as(sink, "DelayedArray") |
|
124 |
+ es |
|
125 |
+} |
... | ... |
@@ -68,7 +68,7 @@ svdDelayed <- function(gSetIdx, Z) { |
68 | 68 |
block <- matrix(block, 1, length(block)) |
69 | 69 |
sink <- DelayedArray::write_block(sink, sink_grid[[1L]], block) |
70 | 70 |
|
71 |
- # step 4: closing the sink and realizating it |
|
71 |
+ # step 4: closing the sink and realizing it |
|
72 | 72 |
# in a hdf5 array object |
73 | 73 |
DelayedArray::close(sink) |
74 | 74 |
res <- as(sink, "DelayedArray") |
... | ... |
@@ -79,7 +79,7 @@ svdDelayed <- function(gSetIdx, Z) { |
79 | 79 |
|
80 | 80 |
plageDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
81 | 81 |
BPPARAM=SerialParam(progressbar=verbose)) { |
82 |
- |
|
82 |
+ |
|
83 | 83 |
Z <- t(DelayedArray::scale(t(X))) |
84 | 84 |
|
85 | 85 |
es <- bplapply(geneSets, svdDelayed, Z, |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,94 @@ |
1 |
+.gsvaDelayedArray <- function(expr, gset.idx.list, |
|
2 |
+ method=c("gsva", "ssgsea", "zscore", "plage"), |
|
3 |
+ kcdf=c("Gaussian", "Poisson", "none"), |
|
4 |
+ rnaseq=FALSE, |
|
5 |
+ abs.ranking=FALSE, |
|
6 |
+ parallel.sz=1L, |
|
7 |
+ mx.diff=TRUE, |
|
8 |
+ tau=1, |
|
9 |
+ kernel=TRUE, |
|
10 |
+ ssgsea.norm=TRUE, |
|
11 |
+ verbose=TRUE, |
|
12 |
+ BPPARAM=SerialParam(progressbar=verbose)) { |
|
13 |
+ |
|
14 |
+ if (length(gset.idx.list) == 0) |
|
15 |
+ stop("The gene set list is empty! Filter may be too stringent.") |
|
16 |
+ |
|
17 |
+ if (any(lengths(gset.idx.list) == 1)) |
|
18 |
+ warning("Some gene sets have size one. Consider setting 'min.sz > 1'.") |
|
19 |
+ |
|
20 |
+ parallel.sz <- as.integer(parallel.sz) |
|
21 |
+ if (parallel.sz < 1L) |
|
22 |
+ parallel.sz <- 1L |
|
23 |
+ |
|
24 |
+ ## because we keep the argument 'parallel.sz' for backwards compatibility |
|
25 |
+ ## we need to harmonize it with the contents of BPPARAM |
|
26 |
+ if (parallel.sz > 1L && class(BPPARAM) == "SerialParam") { |
|
27 |
+ BPPARAM=MulticoreParam(progressbar=verbose, workers=parallel.sz, tasks=100) |
|
28 |
+ } else if (parallel.sz == 1L && class(BPPARAM) != "SerialParam") { |
|
29 |
+ parallel.sz <- bpnworkers(BPPARAM) |
|
30 |
+ } else if (parallel.sz > 1L && class(BPPARAM) != "SerialParam") { |
|
31 |
+ bpworkers(BPPARAM) <- parallel.sz |
|
32 |
+ } |
|
33 |
+ |
|
34 |
+ if (class(BPPARAM) != "SerialParam" && verbose) |
|
35 |
+ cat(sprintf("Setting parallel calculations through a %s back-end\nwith workers=%d and tasks=100.\n", |
|
36 |
+ class(BPPARAM), parallel.sz)) |
|
37 |
+ |
|
38 |
+ |
|
39 |
+ if (method == "plage") { |
|
40 |
+ if (rnaseq) |
|
41 |
+ stop("rnaseq=TRUE does not work with method='plage'.") |
|
42 |
+ |
|
43 |
+ if(verbose) |
|
44 |
+ cat("Estimating PLAGE scores for", length(gset.idx.list),"gene sets.\n") |
|
45 |
+ |
|
46 |
+ return(plageDelayed(expr, gset.idx.list, parallel.sz, verbose, BPPARAM=BPPARAM)) |
|
47 |
+ } |
|
48 |
+ |
|
49 |
+ |
|
50 |
+} |
|
51 |
+ |
|
52 |
+rightsingularsvdvectorgset <- function(gSetIdx, Z) { |
|
53 |
+ s <- svd(Z[gSetIdx, ]) |
|
54 |
+ s$v[, 1] |
|
55 |
+} |
|
56 |
+ |
|
57 |
+svdDelayed <- function(gSetIdx, Z) { |
|
58 |
+ |
|
59 |
+ # step 1: creating a HDF5 sink object |
|
60 |
+ sink <- HDF5Array::HDF5RealizationSink(dim = c(1L, ncol(Z))) |
|
61 |
+ |
|
62 |
+ #step 2: creating the sink's grid |
|
63 |
+ sink_grid <- DelayedArray::rowAutoGrid(sink, nrow = 1) |
|
64 |
+ |
|
65 |
+ # step 3: creating the block and writing it in the sink |
|
66 |
+ # object using the sink's grid |
|
67 |
+ block <- rightsingularsvdvectorgset(gSetIdx, Z) |
|
68 |
+ block <- matrix(block, 1, length(block)) |
|
69 |
+ sink <- DelayedArray::write_block(sink, sink_grid[[1L]], block) |
|
70 |
+ |
|
71 |
+ # step 4: closing the sink and realizating it |
|
72 |
+ # in a hdf5 array object |
|
73 |
+ DelayedArray::close(sink) |
|
74 |
+ res <- as(sink, "DelayedArray") |
|
75 |
+ |
|
76 |
+ res |
|
77 |
+ |
|
78 |
+} |
|
79 |
+ |
|
80 |
+plageDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
|
81 |
+ BPPARAM=SerialParam(progressbar=verbose)) { |
|
82 |
+ |
|
83 |
+ Z <- t(DelayedArray::scale(t(X))) |
|
84 |
+ |
|
85 |
+ es <- bplapply(geneSets, svdDelayed, Z, |
|
86 |
+ BPPARAM=BPPARAM) |
|
87 |
+ |
|
88 |
+ es <- do.call(rbind, es) |
|
89 |
+ |
|
90 |
+ es <- as(es, "HDF5Array") |
|
91 |
+ |
|
92 |
+ es |
|
93 |
+} |
|
94 |
+ |