... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
Package: GSVA |
2 |
-Version: 1.39.10 |
|
2 |
+Version: 1.39.11 |
|
3 | 3 |
Title: Gene Set Variation Analysis for microarray and RNA-seq data |
4 | 4 |
Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.guinney@sagebase.org"), |
5 | 5 |
person("Robert", "Castelo", role="aut", email="robert.castelo@upf.edu"), |
... | ... |
@@ -52,6 +52,10 @@ importFrom(BiocParallel, SerialParam, |
52 | 52 |
importFrom(SingleCellExperiment, SingleCellExperiment) |
53 | 53 |
importFrom(sparseMatrixStats, colRanks) |
54 | 54 |
importFrom(DelayedArray, rowAutoGrid, |
55 |
+ colAutoGrid, |
|
56 |
+ currentBlockId, |
|
57 |
+ read_block, |
|
58 |
+ viewportReduce, |
|
55 | 59 |
write_block, |
56 | 60 |
close) |
57 | 61 |
importFrom(HDF5Array, HDF5RealizationSink) |
... | ... |
@@ -34,6 +34,14 @@ |
34 | 34 |
if (class(BPPARAM) != "SerialParam" && verbose) |
35 | 35 |
cat(sprintf("Setting parallel calculations through a %s back-end\nwith workers=%d and tasks=100.\n", |
36 | 36 |
class(BPPARAM), parallel.sz)) |
37 |
+ |
|
38 |
+ if (method == "ssgsea") { |
|
39 |
+ if(verbose) |
|
40 |
+ cat("Estimating ssGSEA scores for", length(gset.idx.list),"gene sets.\n") |
|
41 |
+ |
|
42 |
+ return(ssgseaDelayed(expr, gset.idx.list, alpha=tau, parallel.sz=parallel.sz, |
|
43 |
+ normalization=ssgsea.norm, verbose=verbose, BPPARAM=BPPARAM)) |
|
44 |
+ } |
|
37 | 45 |
|
38 | 46 |
if (method == "zscore") { |
39 | 47 |
if (rnaseq) |
... | ... |
@@ -118,4 +126,106 @@ zscoreDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
118 | 126 |
es <- as(es, "HDF5Array") |
119 | 127 |
|
120 | 128 |
es |
121 |
-} |
|
122 | 129 |
\ No newline at end of file |
130 |
+} |
|
131 |
+ |
|
132 |
+#### rank function for hdf5 files using sink and grid methods |
|
133 |
+rankHDF5 <- function(X){ |
|
134 |
+ sink <- HDF5RealizationSink(dim(X)) |
|
135 |
+ sink_grid <- colAutoGrid(sink, ncol = 1) |
|
136 |
+ X_grid <- colAutoGrid(X, ncol=1) |
|
137 |
+ |
|
138 |
+ |
|
139 |
+ FUN <- function(sink_grid, sink){ |
|
140 |
+ bid <- currentBlockId() |
|
141 |
+ X_viewport <- X_grid[[bid]] |
|
142 |
+ block <- read_block(X, X_viewport) |
|
143 |
+ block <- as.integer(rank(block)) |
|
144 |
+ block <- matrix(block, length(block), 1) |
|
145 |
+ write_block(sink, sink_grid, block) |
|
146 |
+ } |
|
147 |
+ |
|
148 |
+ sink <- viewportReduce(FUN, sink_grid, sink) |
|
149 |
+ close(sink) |
|
150 |
+ res <- as(sink, "DelayedArray") |
|
151 |
+ res |
|
152 |
+} |
|
153 |
+ |
|
154 |
+## slightly modified .fastRndWalk() for porpuse of only |
|
155 |
+## receiving a vector and not a matrix column for sums |
|
156 |
+.fastRndWalk2 <- function(gSetIdx, geneRanking, ra_block) { |
|
157 |
+ n <- length(geneRanking) |
|
158 |
+ k <- length(gSetIdx) |
|
159 |
+ idxs <- sort.int(fastmatch::fmatch(gSetIdx, geneRanking)) |
|
160 |
+ stepCDFinGeneSet2 <- |
|
161 |
+ sum(ra_block[geneRanking[idxs]] * (n - idxs + 1)) / |
|
162 |
+ sum((ra_block[geneRanking[idxs]])) |
|
163 |
+ stepCDFoutGeneSet2 <- (n * (n + 1) / 2 - sum(n - idxs + 1)) / (n - k) |
|
164 |
+ walkStat <- stepCDFinGeneSet2 - stepCDFoutGeneSet2 |
|
165 |
+ walkStat |
|
166 |
+} |
|
167 |
+ |
|
168 |
+delayedGeneRanking <- function(r_block, ra_block, geneSets, BPPARAM){ |
|
169 |
+ geneRanking <- order(r_block, decreasing=TRUE) |
|
170 |
+ res <- bplapply(geneSets, .fastRndWalk2, geneRanking, ra_block, BPPARAM = BPPARAM) |
|
171 |
+ return(unlist(res)) |
|
172 |
+} |
|
173 |
+ |
|
174 |
+ssgseaDelayed <- function(X, geneSets, alpha=0.25, parallel.sz, |
|
175 |
+ normalization=TRUE, verbose=TRUE, |
|
176 |
+ BPPARAM=SerialParam(progressbar=verbose)) { |
|
177 |
+ |
|
178 |
+ n <- ncol(X) |
|
179 |
+ |
|
180 |
+ R <- rankHDF5(X) |
|
181 |
+ |
|
182 |
+ Ra <- abs(R)^alpha |
|
183 |
+ |
|
184 |
+ # step1: creating a grid for both DelayedArray objects |
|
185 |
+ # that will be iterated |
|
186 |
+ R_grid <- colAutoGrid(R, ncol=1) |
|
187 |
+ Ra_grid <- colAutoGrid(Ra, ncol=1) |
|
188 |
+ |
|
189 |
+ # step2: create a sink and a grid for it |
|
190 |
+ sink <- HDF5RealizationSink(dim = c(1L, ncol(R))) |
|
191 |
+ sink_grid <- colAutoGrid(sink, ncol=1) |
|
192 |
+ |
|
193 |
+ # step3: function that will read blocks of R and |
|
194 |
+ # Ra grids, apply transformation and write into |
|
195 |
+ # sink object |
|
196 |
+ FUN <- function(sink_grid, sink, geneSets){ |
|
197 |
+ bid <- currentBlockId() |
|
198 |
+ r_block <- read_block(R, R_grid[[bid]]) |
|
199 |
+ ra_block <- read_block(Ra, Ra_grid[[bid]]) |
|
200 |
+ res_block <- delayedGeneRanking(r_block, ra_block, geneSets, BPPARAM) |
|
201 |
+ res_block <- matrix(res_block, 1, length(res_block)) |
|
202 |
+ write_block(sink, sink_grid, res_block) |
|
203 |
+ } |
|
204 |
+ |
|
205 |
+ sink <- viewportReduce(FUN, sink_grid, sink, geneSets) |
|
206 |
+ |
|
207 |
+ # step4: close sink as a DelayedArray object |
|
208 |
+ close(sink) |
|
209 |
+ es <- as(sink, "DelayedArray") |
|
210 |
+ |
|
211 |
+ if (normalization) { |
|
212 |
+ ## normalize enrichment scores by using the entire data set, as indicated |
|
213 |
+ ## by Barbie et al., 2009, online methods, pg. 2 |
|
214 |
+ sink <- HDF5RealizationSink(dim(es)) |
|
215 |
+ sink_grid <- colAutoGrid(sink, ncol=1) |
|
216 |
+ es_grid <- colAutoGrid(es, ncol=1) |
|
217 |
+ for(bid in seq_along(sink_grid)){ |
|
218 |
+ block <- read_block(es, es_grid[[bid]]) |
|
219 |
+ block <- block / (range(es)[2] - range(es)[1]) |
|
220 |
+ write_block(sink, sink_grid[[bid]], block) |
|
221 |
+ } |
|
222 |
+ close(sink) |
|
223 |
+ es <- as(sink, "DelayedArray") |
|
224 |
+ } |
|
225 |
+ |
|
226 |
+ rownames(es) <- names(geneSets) |
|
227 |
+ colnames(es) <- colnames(X) |
|
228 |
+ |
|
229 |
+ es <- as(es, "HDF5Array") |
|
230 |
+ es |
|
231 |
+} |
|
232 |
+ |