Browse code

added ssgsea method for DelayedArray

pablo-rodr-bio2 authored on 15/12/2020 13:14:39
Showing 3 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: GSVA
2
-Version: 1.39.10
2
+Version: 1.39.11
3 3
 Title: Gene Set Variation Analysis for microarray and RNA-seq data
4 4
 Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.guinney@sagebase.org"),
5 5
              person("Robert", "Castelo", role="aut", email="robert.castelo@upf.edu"),
... ...
@@ -52,6 +52,10 @@ importFrom(BiocParallel, SerialParam,
52 52
 importFrom(SingleCellExperiment, SingleCellExperiment)
53 53
 importFrom(sparseMatrixStats, colRanks)
54 54
 importFrom(DelayedArray, rowAutoGrid,
55
+			  colAutoGrid,
56
+			  currentBlockId,
57
+			  read_block, 
58
+			  viewportReduce,
55 59
 			  write_block,
56 60
 			  close)
57 61
 importFrom(HDF5Array, HDF5RealizationSink)
... ...
@@ -34,6 +34,14 @@
34 34
   if (class(BPPARAM) != "SerialParam" && verbose)
35 35
     cat(sprintf("Setting parallel calculations through a %s back-end\nwith workers=%d and tasks=100.\n",
36 36
                 class(BPPARAM), parallel.sz))
37
+  
38
+  if (method == "ssgsea") {
39
+    if(verbose)
40
+      cat("Estimating ssGSEA scores for", length(gset.idx.list),"gene sets.\n")
41
+    
42
+    return(ssgseaDelayed(expr, gset.idx.list, alpha=tau, parallel.sz=parallel.sz,
43
+                  normalization=ssgsea.norm, verbose=verbose, BPPARAM=BPPARAM))
44
+  }
37 45
 
38 46
   if (method == "zscore") {
39 47
     if (rnaseq)
... ...
@@ -118,4 +126,106 @@ zscoreDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE,
118 126
   es <- as(es, "HDF5Array")
119 127
   
120 128
   es
121
-}
122 129
\ No newline at end of file
130
+}
131
+
132
+#### rank function for hdf5 files using sink and grid methods
133
+rankHDF5 <- function(X){
134
+  sink <- HDF5RealizationSink(dim(X))
135
+  sink_grid <- colAutoGrid(sink, ncol = 1)
136
+  X_grid <- colAutoGrid(X, ncol=1)
137
+  
138
+  
139
+  FUN <- function(sink_grid, sink){
140
+    bid <- currentBlockId()
141
+    X_viewport <- X_grid[[bid]]
142
+    block <- read_block(X, X_viewport)
143
+    block <- as.integer(rank(block))
144
+    block <- matrix(block, length(block), 1)
145
+    write_block(sink, sink_grid, block)
146
+  }
147
+  
148
+  sink <- viewportReduce(FUN, sink_grid, sink)
149
+  close(sink)
150
+  res <- as(sink, "DelayedArray")
151
+  res
152
+}
153
+
154
+## slightly modified .fastRndWalk() for porpuse of only 
155
+## receiving a vector and not a matrix column for sums
156
+.fastRndWalk2 <- function(gSetIdx, geneRanking, ra_block) {
157
+  n <- length(geneRanking)
158
+  k <- length(gSetIdx)
159
+  idxs <- sort.int(fastmatch::fmatch(gSetIdx, geneRanking))
160
+  stepCDFinGeneSet2 <-
161
+    sum(ra_block[geneRanking[idxs]] * (n - idxs + 1)) /
162
+    sum((ra_block[geneRanking[idxs]]))    
163
+  stepCDFoutGeneSet2 <- (n * (n + 1) / 2 - sum(n - idxs + 1)) / (n - k)
164
+  walkStat <- stepCDFinGeneSet2 - stepCDFoutGeneSet2
165
+  walkStat
166
+}
167
+
168
+delayedGeneRanking <- function(r_block, ra_block, geneSets, BPPARAM){
169
+  geneRanking <- order(r_block, decreasing=TRUE)
170
+  res <- bplapply(geneSets, .fastRndWalk2, geneRanking, ra_block, BPPARAM = BPPARAM)
171
+  return(unlist(res))
172
+}
173
+
174
+ssgseaDelayed <- function(X, geneSets, alpha=0.25, parallel.sz,
175
+                   normalization=TRUE, verbose=TRUE,
176
+                   BPPARAM=SerialParam(progressbar=verbose)) {
177
+
178
+  n <- ncol(X)
179
+  
180
+  R <- rankHDF5(X)
181
+  
182
+  Ra <- abs(R)^alpha
183
+  
184
+  # step1: creating a grid for both DelayedArray objects
185
+  # that will be iterated
186
+  R_grid <- colAutoGrid(R, ncol=1)
187
+  Ra_grid <- colAutoGrid(Ra, ncol=1)
188
+  
189
+  # step2: create a sink and a grid for it
190
+  sink <- HDF5RealizationSink(dim = c(1L, ncol(R)))
191
+  sink_grid <- colAutoGrid(sink, ncol=1)
192
+  
193
+  # step3: function that will read blocks of R and
194
+  # Ra grids, apply transformation and write into
195
+  # sink object
196
+  FUN <- function(sink_grid, sink, geneSets){
197
+    bid <- currentBlockId()
198
+    r_block <- read_block(R, R_grid[[bid]])
199
+    ra_block <- read_block(Ra, Ra_grid[[bid]])
200
+    res_block <- delayedGeneRanking(r_block, ra_block, geneSets, BPPARAM)
201
+    res_block <- matrix(res_block, 1, length(res_block))
202
+    write_block(sink, sink_grid, res_block)
203
+  }
204
+  
205
+  sink <- viewportReduce(FUN, sink_grid, sink, geneSets)
206
+  
207
+  # step4: close sink as a DelayedArray object
208
+  close(sink)
209
+  es <- as(sink, "DelayedArray")
210
+  
211
+  if (normalization) {
212
+    ## normalize enrichment scores by using the entire data set, as indicated
213
+    ## by Barbie et al., 2009, online methods, pg. 2
214
+    sink <- HDF5RealizationSink(dim(es))
215
+    sink_grid <- colAutoGrid(sink, ncol=1)
216
+    es_grid <- colAutoGrid(es, ncol=1)
217
+    for(bid in seq_along(sink_grid)){
218
+      block <- read_block(es, es_grid[[bid]])
219
+      block <- block / (range(es)[2] - range(es)[1])
220
+      write_block(sink, sink_grid[[bid]], block)
221
+    }
222
+    close(sink)
223
+    es <- as(sink, "DelayedArray")
224
+  }
225
+  
226
+  rownames(es) <- names(geneSets)
227
+  colnames(es) <- colnames(X)
228
+  
229
+  es <- as(es, "HDF5Array")
230
+  es
231
+}
232
+