... | ... |
@@ -1,512 +1,512 @@ |
1 |
-x1 <- t(scale(t(m))) |
|
2 |
-x1 |
|
3 |
-m[gene.sets[[1]],] |
|
4 |
-x1[gene.sets[[1]], ] |
|
5 |
-x1 <- t(scale(t(m))) |
|
6 |
-x1 <- svd(x1) |
|
7 |
-x1 <- x1$v[,1] |
|
8 |
-x1 |
|
9 |
-x2 <- runExactSVD(m, scale = TRUE, center = TRUE) |
|
10 |
-x2 <- x2$v[,1] |
|
11 |
-x2 |
|
12 |
-x2 <- runExactSVD(t(m), scale = TRUE, center = TRUE) |
|
13 |
-x2 <- x2$v[,1] |
|
14 |
-x2 |
|
15 |
-x1 |
|
16 |
-t(m) |
|
17 |
-scale(t(m)) |
|
18 |
-x2 <- runExactSVD(m, scale = TRUE, center = TRUE) |
|
19 |
-x1 <- t(scale(t(m))) |
|
20 |
-x1 <- svd(x1) |
|
21 |
-x1 <- x1$v[,1] |
|
22 |
-x1 |
|
23 |
-x2 <- runExactSVD(m, scale = colMeans(m), center = TRUE) |
|
24 |
-x2 <- x2$v[,1] |
|
25 |
-x2 |
|
26 |
-colMeans(m) |
|
27 |
-x2 <- runExactSVD(m, scale = colSds(m), center = TRUE) |
|
28 |
-x2 <- x2$v[,1] |
|
29 |
-x2 |
|
30 |
-x1 |
|
31 |
-x2 |
|
32 |
-x2 <- runExactSVD(m, scale = colSds(m), center = colMeans(m)) |
|
33 |
-x2 <- x2$v[,1] |
|
34 |
-x1 |
|
35 |
-x2 |
|
36 |
-colSds(m) |
|
37 |
-colMeans(m) |
|
38 |
-x2 <- runExactSVD(t(m), scale = colSds(m), center = colMeans(m)) |
|
39 |
-x2 <- x2$v[,1] |
|
40 |
-x2 |
|
41 |
-x1 |
|
42 |
-x2 <- runExactSVD(m, scale = colSds(t(m)), center = colMeans(t(m))) |
|
43 |
-x2 <- x2$v[,1] |
|
44 |
-x2 |
|
45 |
-x1 |
|
46 |
-x1 <- t(scale(t(m))) |
|
47 |
-x1 |
|
48 |
-colSds(m) |
|
49 |
-rowSds(m) |
|
50 |
-x1 <- t(scale(t(m))) |
|
51 |
-x1 <- svd(x1) |
|
52 |
-x1 <- x1$v[,1] |
|
53 |
-x1 |
|
54 |
-x2 <- runExactSVD(m, center= colMeans(m), scale= rowSds(m)) |
|
55 |
-x2 <- x2$v[,1] |
|
56 |
-x2 |
|
57 |
-x2 <- runExactSVD(m, center= colMeans(m), scale= rowSds(m)) |
|
58 |
-x2$v[,1] |
|
59 |
-x3 <- runSVD(m, k=Inf) |
|
60 |
-x4 <- runExactSVD(m) |
|
61 |
-x3 |
|
62 |
-x4 <- runExactSVD(m) |
|
63 |
-x4 |
|
64 |
-x1 <- t(scale(t(m))) |
|
65 |
-x1 <- svd(x1[gene.sets[[1]],]) |
|
66 |
-x1 <- x1$v[,1] |
|
67 |
-x1 |
|
68 |
-x2 <- runExactSVD(m[gene.sets[[1]],], center= colMeans(m), scale= rowSds(m)) |
|
69 |
-x2 <- x2$v[,1] |
|
70 |
-x2 |
|
71 |
-x3 <- runSVD(m[gene.sets[[1]],], k=Inf) |
|
72 |
-x3$v[,1] |
|
73 |
-x2 |
|
74 |
-x2 <- runExactSVD(m[gene.sets[[1]],], center= colMeans(m), scale= rowSds(m)) |
|
75 |
-x2 |
|
76 |
-x1 |
|
77 |
-x1 <- t(scale(t(m))) |
|
78 |
-x1 |
|
79 |
-x1 <- svd(x1[gene.sets[[1]],]) |
|
80 |
-x1 |
|
81 |
-x2 <- runExactSVD(m[gene.sets[[1]],], center= colMeans(m), scale= rowSds(m)) |
|
82 |
-x2 |
|
83 |
-rowSds(m) |
|
84 |
-x1 <- t(scale(t(m))) |
|
85 |
-x1 <- svd(x1[gene.sets[[1]],]) |
|
86 |
-x1 <- x1$v[,1] |
|
87 |
-x1 |
|
88 |
-x2 <- runExactSVD(m[gene.sets[[1]],], center= colMeans(m), scale= rowSds(t(m))) |
|
89 |
-x2 <- x2$v[,1] |
|
90 |
-x2 |
|
91 |
-colMeans(m) |
|
92 |
-apply(m, 2, sd) |
|
93 |
-x1 <- t(scale(t(m))) |
|
94 |
-x1 <- svd(x1[gene.sets[[1]],]) |
|
95 |
-x1 <- x1$v[,1] |
|
96 |
-x1 |
|
97 |
-x2 <- runExactSVD(m[gene.sets[[1]],], center= colMeans(m), scale= TRUE) |
|
98 |
-x2 <- x2$v[,1] |
|
99 |
-x2 |
|
100 |
-x1 <- scale(m) |
|
101 |
-x1 <- t(scale(t(m))) |
|
102 |
-x1 <- svd(x1[1,]) |
|
103 |
-x1 |
|
104 |
-x1 <- svd(x1[1,,drop=FALSE]) |
|
105 |
-x1 <- svd(x1[1, , drop=FALSE]) |
|
106 |
-x1[1, , drop=FALSE] |
|
107 |
-x1[1, drop=FALSE] |
|
108 |
-x1 <- t(scale(t(m))) |
|
109 |
-x1 <- t(scale(t(m))) |
|
110 |
-x1 <- svd(x1[1, ,drop=FALSE]) |
|
111 |
-x1 |
|
112 |
-x2 <- runExactSVD(m[1, ]) |
|
113 |
-x2 <- runExactSVD(m[1, , drop=FALSE]) |
|
114 |
-x2 |
|
115 |
-x2 <- runExactSVD(m[1, , drop=FALSE], center=TRUE, scale=TRUE) |
|
116 |
-x2 |
|
117 |
-x1 <- t(scale(t(m))) |
|
118 |
-x1 <- svd(x1[1:2,]) |
|
119 |
-x1 |
|
120 |
-x2 <- runExactSVD(m[1:2,], center=TRUE, scale=TRUE) |
|
121 |
-x2 |
|
122 |
-x2 <- runExactSVD(m[1:2,], center=colMeans(t(m)), scale=TRUE) |
|
123 |
-x2 |
|
124 |
-x <- gene.sets[[1]] |
|
125 |
-x |
|
126 |
-x1 <- svd(x1[x,]) |
|
127 |
-x <- gene.sets[[1]] |
|
128 |
-x1 <- t(scale(t(m))) |
|
129 |
-x1 <- svd(x1[x,]) |
|
130 |
-x1 <- x1$v[,1] |
|
131 |
-x1 |
|
132 |
-x2 <- runExactSVD(m[x,], center= colMeans(t(m)), scale= colSds(t(m))) |
|
133 |
-x2 <- x2$v[,1] |
|
134 |
-x2 |
|
135 |
-x2 <- runExactSVD(m[x,], center= colMeans(t(m)), scale= colSds(m)) |
|
136 |
-x2 <- x2$v[,1] |
|
137 |
-x2 |
|
138 |
-x1 <- t(scale(t(m))) |
|
139 |
-x1 <- svd(x1[x,]) |
|
140 |
-x1 <- x1$v[,1] |
|
141 |
-x1 |
|
142 |
-x2 <- runExactSVD(m[x,], center= colMeans(t(m)), scale= rowSds(t(m))) |
|
143 |
-x2 <- x2$v[,1] |
|
144 |
-x2 |
|
145 |
-x2 <- runExactSVD(m[x,], center= colMeans(m), scale= rowSds(t(m))) |
|
146 |
-x2 <- x2$v[,1] |
|
147 |
-x2 |
|
148 |
-x2 <- runExactSVD(m[x,], center= colMeans(m), scale= colSds(t(m))) |
|
149 |
-x2 <- x2$v[,1] |
|
150 |
-x2 |
|
151 |
-x1 |
|
152 |
-x2 <- runExactSVD(m[x,], center= colMeans(m), scale= colSds(m)) |
|
153 |
-x2 <- x2$v[,1] |
|
154 |
-x2 |
|
155 |
-x1 |
|
156 |
-scale(t(m)) |
|
157 |
-colMeans(t(m)) |
|
158 |
-colSds(m) |
|
159 |
-colSds(t(m)) |
|
160 |
-rowSds(m) |
|
161 |
-x2 <- runExactSVD(m[x,], center= colMeans(t(m)), scale= rowSds(m)) |
|
162 |
-x2 <- x2$v[,1] |
|
163 |
-x2 |
|
164 |
-x1 |
|
165 |
-x1 <- t(scale(t(m))) |
|
166 |
-x1 <- svd(x1[x,]) |
|
167 |
-x1 |
|
168 |
-x2 <- runExactSVD(m[x,], center= colMeans(t(m)), scale= rowSds(m)) |
|
169 |
-x2 |
|
170 |
-min(dim(m[x,])) |
|
171 |
-x2 <- runSvd(m[x,], k=2, center= colMeans(t(m)), scale= rowSds(m)) |
|
172 |
-x2 <- runSVD(m[x,], k=2, center= colMeans(t(m)), scale= rowSds(m)) |
|
173 |
-x2 |
|
174 |
-x1$v[,1] |
|
175 |
-x2$v[,1] |
|
176 |
-x2 <- runSVD(m[x,], k=2, center= colMeans(t(m)), scale= rowSds(m), BSPARAM = ExactParam()) |
|
177 |
-x2$v[,1] |
|
178 |
-x2 <- runSVD(m[x,], k=2, center= colMeans(t(m)), scale= rowSds(m), BSPARAM = FastAutoParam()) |
|
179 |
-x2$v[,1] |
|
180 |
-x2 <- runSVD(m[x,], k=2, center= colMeans(t(m)), scale= rowSds(m), BSPARAM = bsparam()) |
|
181 |
-x2$v[,1] |
|
182 |
-x2 <- runSVD(m[x,], k=2, center= colMeans(t(m)), scale= rowSds(m), BSPARAM = ExactParam(deferred=TRUE)) |
|
183 |
-x2$v[,1] |
|
184 |
-x1 <- t(scale(t(m))) |
|
185 |
-x1 |
|
186 |
-colMeans(t(m)) |
|
187 |
-owSds(m) |
|
188 |
-rowSds(m) |
|
189 |
-m |
|
190 |
-m <- matrix(sample.int(10, 25, T), 5, 5) |
|
191 |
-colnames(m) <- paste0("cell_", 1:5) |
|
192 |
-rownames(m) <- paste0("gene_", 1:5) |
|
193 |
-set.seed(123) |
|
194 |
-m <- matrix(sample.int(10, 25, T), 5, 5) |
|
195 |
-colnames(m) <- paste0("cell_", 1:5) |
|
196 |
-rownames(m) <- paste0("gene_", 1:5) |
|
197 |
-gene.sets <- list("my_list1"= c(1,2)) |
|
198 |
-m <- matrix(sample.int(10, 25, T), 5, 5) |
|
199 |
-colnames(m) <- paste0("cell_", 1:5) |
|
200 |
-rownames(m) <- paste0("gene_", 1:5) |
|
201 |
-x <- c(1,2) |
|
202 |
-x1 <- t(scale(t(m))) |
|
203 |
-x1 <- svd(x1[x,]) |
|
204 |
-x1$v[,1] |
|
205 |
-x2 <- runExactSVD(m[x,], center= colMeans(t(m)), scale= rowSds(m)) |
|
206 |
-x2$v[,1] |
|
207 |
-rowSds(m) |
|
208 |
-scale(t(m)) |
|
209 |
-t(scale(t(m))) |
|
210 |
-x1 <- t(scale(t(m))) |
|
211 |
-x1@scaled |
|
212 |
-str(x) |
|
213 |
-str(x1) |
|
214 |
-x1$scaled |
|
215 |
-x1@scaled |
|
216 |
-attr(x1) |
|
217 |
-attr(x1, "scaled:scale") |
|
218 |
-x2 <- runExactSVD(t(m[x,]), center= colMeans(t(m)), scale= rowSds(m)) |
|
219 |
-x2$v[,1] |
|
220 |
-x2 <- runExactSVD(m[x,], center= colMeans(t(m)), scale= rowSds(m)) |
|
221 |
-x2$v[,1] |
|
222 |
-x2 <- runExactSVD(m[x,], center= colMeans(t(m)), scale= rowSds(t(m))) |
|
223 |
-x1$v[,1] |
|
224 |
-x1 <- t(scale(t(m))) |
|
225 |
-x1 <- svd(x1[x,]) |
|
226 |
-x1$v[,1] |
|
227 |
-x2 <- runExactSVD(m[x,], center= colMeans(t(m)), scale= rowSds(t(m))) |
|
228 |
-x2$v[,1] |
|
229 |
-x2 <- runExactSVD(m[x,], center= colMeans(t(m)), scale= rowSds(m)) |
|
230 |
-x2$v[,1] |
|
231 |
-C <- colMeans(t(m)) |
|
232 |
-i <- 1 |
|
233 |
-sqrt(sum((m[,i] - C[i])^2)/(ncol(m)-1)) |
|
234 |
-x2 <- runExactSVD(m[x,], center= colMeans(t(m)), scale= TRUE) |
|
235 |
-x2$v[,1] |
|
236 |
-x1$v[,1] |
|
237 |
-colMeans(t(m)) |
|
238 |
-rowMeans(m) |
|
239 |
-rowSds(m) |
|
240 |
-x1 <- t(scale(t(m))) |
|
241 |
-x1 <- svd(x1[x,]) |
|
242 |
-x1$v[,1] |
|
243 |
-x2 <- runExactSVD(m[x,], center= rowMeans(m), scale= rowSds(m)) |
|
244 |
-x2$v[,1] |
|
245 |
-library(BiocSingular) |
|
246 |
-set.seed(123) |
|
247 |
-m <- matrix(sample.int(10, 25, T), 5, 5) |
|
248 |
-x <- c(1,2) |
|
249 |
-x1 <- t(scale(t(m))) |
|
250 |
-x1 <- svd(x1[x,]) |
|
251 |
-x1$v[,1] |
|
252 |
-x2 <- runExactSVD(t(scale(t(m)))) |
|
253 |
-x2$v[,1] |
|
254 |
-x1 <- t(scale(t(m))) |
|
255 |
-res1 <- svd(x1[x,]) |
|
256 |
-res1$v[,1] |
|
257 |
-x2 <- runExactSVD(x1[x,]) |
|
258 |
-x2$v[,1] |
|
259 |
-x3 <- runExactSVD(m[x,], center=colMeans(t(m))) |
|
260 |
-x3 |
|
261 |
-library(matrixStats) |
|
262 |
-x1 <- t(scale(t(m))) |
|
263 |
-x2 <- t( (t(m) - colMeans(t(m))) / colSds(t(m)) ) |
|
264 |
-x1 |
|
265 |
-x2 |
|
266 |
-x2 <- t( (t(m) - colMeans(t(m))) / rowSds(t(m)) ) |
|
267 |
-x2 |
|
268 |
-x1 <- t(scale(t(m))) |
|
269 |
-x2 <- t( (t(m) - colMeans(t(m))) / apply(t(m), 2, sd) ) |
|
270 |
-x1 |
|
271 |
-x2 |
|
272 |
-apply(t(m), 2, sd) |
|
273 |
-m |
|
274 |
-x1 |
|
275 |
-t(m) |
|
276 |
-scale(m) |
|
277 |
-m |
|
278 |
-x1 <- t(scale(t(m))) |
|
279 |
-x1 |
|
280 |
-x1 <- t(scale(t(m))) |
|
281 |
-x2 <- t( (t(m) - colMeans(t(m))) / apply(t(m), 2, sd) ) |
|
282 |
-x1 |
|
283 |
-x2 |
|
284 |
-colMeans(t(m)) |
|
285 |
-apply(t(m), 2, sd) |
|
1 |
+sdGenes <- apply(expr, 1, sd) |
|
2 |
+} |
|
3 |
+if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
4 |
+warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
5 |
+" genes with constant expression values throuhgout the samples.") |
|
6 |
+if (method != "ssgsea") { |
|
7 |
+warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
8 |
+expr <- expr[sdGenes > 0 & !is.na(sdGenes), ] |
|
9 |
+} |
|
10 |
+} |
|
11 |
+expr |
|
12 |
+} |
|
13 |
+expr <- .filterFeatures(expr, method) |
|
14 |
+expr |
|
15 |
+m <- matrix(sample.int(10, 25, T), 15, 15) |
|
16 |
+colnames(m) <- paste0("cell_", 1:10) |
|
17 |
+colnames(m) <- paste0("cell_", 1:15) |
|
18 |
+rownames(m) <- paste0("gene_", 1:15) |
|
19 |
+m <- as(m, "HDF5Array") |
|
286 | 20 |
m |
287 |
-x1 <- scale(t(m)) |
|
288 |
-x2 <- (t(m) - colMeans(t(m))) / apply(t(m), 2, sd) |
|
289 |
-x1 |
|
290 |
-x2 |
|
291 |
-x2 <- (t(m) - colMeans(t(m))) / (apply(t(m), 2, sd)) |
|
292 |
-x1 |
|
293 |
-x2 |
|
294 |
-x1 <- scale(m) |
|
295 |
-x2 <- (m - colMeans(m)) / (apply(m, 2, sd)) |
|
296 |
-x1 |
|
297 |
-x2 |
|
21 |
+gset.idx.list <- list("my_list1"= paste0("gene_", 1:2), |
|
22 |
+"my_list2"= paste0("gene_", 6:7)) |
|
23 |
+expr <- m |
|
24 |
+.filterFeatures <- function(expr, method) { |
|
25 |
+if(is(expr, "DelayedArray")){ |
|
26 |
+sdGenes <- DelayedMatrixStats::rowSds(expr) |
|
27 |
+} else { |
|
28 |
+sdGenes <- apply(expr, 1, sd) |
|
29 |
+} |
|
30 |
+if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
31 |
+warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
32 |
+" genes with constant expression values throuhgout the samples.") |
|
33 |
+if (method != "ssgsea") { |
|
34 |
+warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
35 |
+expr <- expr[sdGenes > 0 & !is.na(sdGenes), ] |
|
36 |
+} |
|
37 |
+} |
|
38 |
+expr |
|
39 |
+} |
|
40 |
+expr <- .filterFeatures(expr, method) |
|
41 |
+expr |
|
42 |
+X <- expr |
|
43 |
+Z <- t(DelayedArray::scale(t(X))) |
|
44 |
+Z |
|
45 |
+dim(m) |
|
46 |
+nrow(m) |
|
47 |
+rightsingularsvdvectorgset <- function(gSetIdx, Z) { |
|
48 |
+s <- svd(Z[gSetIdx, ]) |
|
49 |
+s$v[, 1] |
|
50 |
+} |
|
51 |
+sink <- HDF5RealizationSink(dim = c(1L, nrow(Z))) |
|
52 |
+dim(sink) |
|
53 |
+rightsingularsvdvectorgset <- function(gSetIdx, Z) { |
|
54 |
+s <- svd(Z[gSetIdx, ]) |
|
55 |
+s$v[, 1] |
|
56 |
+} |
|
57 |
+right2 <- function(gSetIdx, Z) { |
|
58 |
+sink <- HDF5RealizationSink(dim = c(1L, nrow(Z))) |
|
59 |
+sink_grid <- defaultAutoGrid(sink, block.shape="first-dim-grows-first") |
|
60 |
+for(bid in seq_along(sink_grid)){ |
|
61 |
+block <- rightsingularsvdvectorgset(Z, gSetIdx) |
|
62 |
+block <- as(matrix(block, 1, nrow(Z)), "HDF5Matrix") |
|
63 |
+sink <- write_block(sink, sink_grid[[bid]], block) |
|
64 |
+} |
|
65 |
+close(sink) |
|
66 |
+res <- as(sink, "DelayedArray") |
|
67 |
+res |
|
68 |
+} |
|
69 |
+Z <- t(DelayedArray::scale(t(X))) |
|
70 |
+Z <- as(Z, "HDF5Array") |
|
71 |
+es <- bplapply(geneSets, right2, Z, |
|
72 |
+BPPARAM=BPPARAM) |
|
73 |
+geneSets <- list("my_list1"=c(1,2), "mylist2"=c(3,4)) |
|
74 |
+es <- bplapply(geneSets, right2, Z, |
|
75 |
+BPPARAM=BPPARAM) |
|
76 |
+names(geneSets) |
|
77 |
+length(names(geneSets)) |
|
78 |
+right2 <- function(gSetIdx, Z) { |
|
79 |
+sink <- HDF5RealizationSink(dim = c(1L, nrow(Z))) |
|
80 |
+sink_grid <- defaultAutoGrid(sink, block.shape="first-dim-grows-first") |
|
81 |
+for(bid in seq_along(sink_grid)){ |
|
82 |
+block <- rightsingularsvdvectorgset(gSetIdx, Z) |
|
83 |
+block <- as(matrix(block, 1, nrow(Z)), "HDF5Matrix") |
|
84 |
+sink <- write_block(sink, sink_grid[[bid]], block) |
|
85 |
+} |
|
86 |
+close(sink) |
|
87 |
+res <- as(sink, "DelayedArray") |
|
88 |
+res |
|
89 |
+} |
|
90 |
+Z <- t(DelayedArray::scale(t(X))) |
|
91 |
+Z <- as(Z, "HDF5Array") |
|
92 |
+es <- bplapply(geneSets, right2, Z, |
|
93 |
+BPPARAM=BPPARAM) |
|
94 |
+es |
|
95 |
+es <- do.call(rbind, es) |
|
96 |
+es |
|
97 |
+es <- as(es, "HDF5Array") |
|
98 |
+es |
|
99 |
+geneSets |
|
100 |
+es2 <- es |
|
101 |
+Z <- t(scale(t(X))) |
|
102 |
+es <- bplapply(geneSets, rightsingularsvdvectorgset, Z, |
|
103 |
+BPPARAM=BPPARAM) |
|
104 |
+es <- do.call(rbind, es) |
|
105 |
+es |
|
106 |
+es2 |
|
107 |
+library(scRNAseq) |
|
108 |
+sce <- ReprocessedAllenData("tophat_counts") |
|
109 |
+library(scRNAseq) |
|
110 |
+x <- do.call(cbind, lapply(1:20, function(j) { |
|
111 |
+rpois(n = 10000, lambda = sample(20:40, 10000, replace = TRUE)) |
|
112 |
+})) |
|
113 |
+colnames(x) <- paste0("S", 1:20) |
|
114 |
+x <- realize(x, "HDF5Array") |
|
115 |
+x |
|
116 |
+geneSets <- list("my_list1"=1:500, |
|
117 |
+"my_list2"=1001:1500) |
|
118 |
+geneSets |
|
119 |
+X <- x |
|
120 |
+X |
|
121 |
+plage2 <- function(X, geneSets, parallel.sz, verbose=TRUE, |
|
122 |
+BPPARAM=SerialParam(progressbar=verbose)) { |
|
123 |
+Z <- t(DelayedArray::scale(t(X))) |
|
124 |
+Z <- as(Z, "HDF5Array") |
|
125 |
+es <- bplapply(geneSets, right2, Z, |
|
126 |
+BPPARAM=BPPARAM) |
|
127 |
+es <- do.call(rbind, es) |
|
128 |
+es <- as(es, "HDF5Array") |
|
129 |
+es |
|
130 |
+} |
|
131 |
+res1 <- plage(x, geneSets) |
|
132 |
+plage <- function(X, geneSets, parallel.sz, verbose=TRUE, |
|
133 |
+BPPARAM=SerialParam(progressbar=verbose)) { |
|
134 |
+Z <- t(scale(t(X))) |
|
135 |
+es <- bplapply(geneSets, rightsingularsvdvectorgset, Z, |
|
136 |
+BPPARAM=BPPARAM) |
|
137 |
+es <- do.call(rbind, es) |
|
138 |
+if (length(geneSets) == 1) |
|
139 |
+es <- matrix(es, nrow=1) |
|
140 |
+rownames(es) <- names(geneSets) |
|
141 |
+colnames(es) <- colnames(X) |
|
142 |
+es |
|
143 |
+} |
|
144 |
+res1 <- plage(x, geneSets) |
|
145 |
+res1 |
|
146 |
+res2 <- plage2(x, geneSets) |
|
147 |
+res2 |
|
298 | 148 |
library(bench) |
299 |
-m <- matrix(sample.int(10, 25, T), 10, 10) |
|
300 |
-genes <- c(1,2) |
|
301 |
-x <- t(scale(t(m))) |
|
302 |
-m <- matrix(sample.int(10, 25, T), 10, 10) |
|
303 |
-genes <- c(1,2) |
|
304 | 149 |
bench::mark( |
305 |
-svd(x[genes,]), |
|
306 |
-runExactSVD(x[genes,]) |
|
150 |
+plage1(x, geneSets), |
|
151 |
+plage2(x, geneSets), |
|
307 | 152 |
) |
308 |
-library(Matrix) |
|
309 |
-m<-rsparsematrix(10,10,.5) |
|
310 |
-m |
|
311 |
-# m <- matrix(sample.int(10, 25, T), 10, 10) |
|
312 |
-m<-rsparsematrix(1000,10000,.5) |
|
313 |
-genes <- c(1:400) |
|
314 |
-x <- t(scale(t(m))) |
|
315 |
-X |
|
316 |
-x |
|
317 |
-x1 <- t(scale(t(m))) |
|
318 |
-x2 <- as(x1, "dgCMatrix") |
|
319 | 153 |
bench::mark( |
320 |
-svd(x1[genes,]), |
|
321 |
-runExactSVD(x2[genes,]) |
|
154 |
+plage(x, geneSets), |
|
155 |
+plage2(x, geneSets), |
|
156 |
+check=FALSE |
|
322 | 157 |
) |
158 |
+print(object.size(res), unit="Kb") |
|
159 |
+print(object.size(res2), unit="Kb") |
|
160 |
+res |
|
161 |
+print(object.size(res1), unit="Kb") |
|
162 |
+print(object.size(res2), unit="Kb") |
|
163 |
+sce <- readRDS("/home/bort/Downloads/pbmc.sce.rds") |
|
164 |
+# sce <- readRDS("/home/bort/curro/gsva-devel/shortdgC.sce.rds") |
|
165 |
+gset.idx.list <- list("my_genes1" = sample(rownames(sce), 5000), |
|
166 |
+"my_genes2" = sample(rownames(sce), 5000)) |
|
167 |
+se <- sce |
|
168 |
+annotation <- names(assays(se))[1] |
|
169 |
+expr <- assays(se)[[annotation]] |
|
170 |
+.filterFeatures <- function(expr, method) { |
|
171 |
+if(is(expr, "DelayedArray")){ |
|
172 |
+sdGenes <- DelayedMatrixStats::rowSds(expr) |
|
173 |
+} else { |
|
174 |
+sdGenes <- apply(expr, 1, sd) |
|
175 |
+} |
|
176 |
+if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
177 |
+warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
178 |
+" genes with constant expression values throuhgout the samples.") |
|
179 |
+if (method != "ssgsea") { |
|
180 |
+warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
181 |
+expr <- expr[sdGenes > 0 & !is.na(sdGenes), ] |
|
182 |
+} |
|
183 |
+} |
|
184 |
+expr |
|
185 |
+} |
|
186 |
+expr <- .filterFeatures(expr, method) |
|
187 |
+expr |
|
188 |
+expr <- as(expr, "HDF5Array") |
|
189 |
+expr <- NULL |
|
190 |
+sce <- NULL |
|
191 |
+se <- NULL |
|
192 |
+library(TEN) |
|
193 |
+library(TENxBrainData) |
|
194 |
+sce <- TENxBrainData20k() |
|
195 |
+sce |
|
196 |
+rownames(sce) <- rowData(sce)$Symbol |
|
197 |
+colData(sce) |
|
198 |
+colnames(sce) <- colData(sce)$Sequence |
|
199 |
+sce |
|
200 |
+se <- sce |
|
201 |
+annotation <- names(assays(se))[1] |
|
202 |
+expr <- assays(se)[[annotation]] |
|
203 |
+expr |
|
204 |
+expr <- as(expr, "HDF5Array") |
|
205 |
+se <- sce |
|
206 |
+annotation <- names(assays(se))[1] |
|
207 |
+expr <- assays(se)[[annotation]] |
|
208 |
+expr |
|
209 |
+# sce <- readRDS("/home/bort/curro/gsva-devel/shortdgC.sce.rds") |
|
210 |
+gset.idx.list <- list("my_genes1" = sample(rownames(sce), 5000), |
|
211 |
+"my_genes2" = sample(rownames(sce), 5000)) |
|
212 |
+gset.idx.list |
|
213 |
+se <- NULL |
|
214 |
+sce <- NULL |
|
215 |
+expr <- .filterFeatures(expr, method) |
|
216 |
+expr |
|
217 |
+mapped.gset.idx.list <- gset.idx.list |
|
218 |
+mapped.gset.idx.list <- lapply(mapped.gset.idx.list, |
|
219 |
+function(x, y) na.omit(fastmatch::fmatch(x, y)), |
|
220 |
+rownames(expr)) |
|
221 |
+filterGeneSets <- function(gSets, min.sz=1, max.sz=Inf) { |
|
222 |
+gSetsLen <- sapply(gSets,length) |
|
223 |
+return (gSets[gSetsLen >= min.sz & gSetsLen <= max.sz]) |
|
224 |
+} |
|
225 |
+mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
|
226 |
+min.sz=max(1, min.sz), |
|
227 |
+max.sz=max.sz) |
|
228 |
+eSco2 <- plage2(expr, mapped.gset.idx.list , parallel.sz, verbose, BPPARAM=BPPARAM) |
|
229 |
+X <- expr |
|
230 |
+Z <- t(DelayedArray::scale(t(X))) |
|
231 |
+t(X) |
|
232 |
+dimnames(X) |
|
233 |
+t(X) |
|
234 |
+colnames(X) |
|
235 |
+rownames(X) |
|
236 |
+t(X) |
|
237 |
+is.character(dimnames(X)) |
|
238 |
+which(is.character(dimnames(X))) |
|
239 |
+as.character(dimnames(X)) |
|
240 |
+dinmaes <- as.character(dimnames(X)) |
|
241 |
+dinmaes <- NULL |
|
242 |
+colnames(X) <- NULL |
|
243 |
+sce |
|
244 |
+BiocManager::install("scRNAseq") |
|
245 |
+options(Ncpus=6) |
|
246 |
+BiocManager::install("scRNAseq") |
|
247 |
+sce <- ReprocessedAllenData("tophat_counts") |
|
248 |
+library(scRNAseq) |
|
249 |
+sce <- ReprocessedAllenData("tophat_counts") |
|
250 |
+counts <- assay(sce, "tophat_counts") |
|
251 |
+counts |
|
252 |
+str(counts) |
|
253 |
+print(object.size(counts), unit="Kb") |
|
254 |
+print(object.size(counts), unit="Mb") |
|
255 |
+counts <- as(counts, "HDF5Array") |
|
256 |
+library(HDF5Array) |
|
257 |
+counts <- as(counts, "HDF5Array") |
|
258 |
+counts |
|
259 |
+expr <- as(t(counts), "HDF5Array") |
|
260 |
+expr |
|
261 |
+# sce <- readRDS("/home/bort/curro/gsva-devel/shortdgC.sce.rds") |
|
262 |
+gset.idx.list <- list("my_genes1" = sample(rownames(expr), 100), |
|
263 |
+"my_genes2" = sample(rownames(expr), 100)) |
|
264 |
+method <- "plage" |
|
265 |
+kcdf <- "Gaussian" |
|
266 |
+min.sz <- 1 |
|
267 |
+max.sz <- Inf |
|
268 |
+abs.ranking <- FALSE |
|
269 |
+parallel.sz=1L |
|
270 |
+mx.diff=TRUE |
|
271 |
+tau=1 |
|
272 |
+kernel=TRUE |
|
273 |
+ssgsea.norm=TRUE |
|
274 |
+verbose=TRUE |
|
275 |
+rnaseq=FALSE |
|
276 |
+BPPARAM=SerialParam(progressbar=verbose) |
|
277 |
+library(BiocParallel) |
|
278 |
+BPPARAM=SerialParam(progressbar=verbose) |
|
279 |
+.filterFeatures <- function(expr, method) { |
|
280 |
+if(is(expr, "DelayedArray")){ |
|
281 |
+sdGenes <- DelayedMatrixStats::rowSds(expr) |
|
282 |
+} else { |
|
283 |
+sdGenes <- apply(expr, 1, sd) |
|
284 |
+} |
|
285 |
+if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
286 |
+warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
287 |
+" genes with constant expression values throuhgout the samples.") |
|
288 |
+if (method != "ssgsea") { |
|
289 |
+warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
290 |
+expr <- expr[sdGenes > 0 & !is.na(sdGenes), ] |
|
291 |
+} |
|
292 |
+} |
|
293 |
+expr |
|
294 |
+} |
|
295 |
+expr <- .filterFeatures(expr, method) |
|
296 |
+expr |
|
297 |
+mapped.gset.idx.list <- gset.idx.list |
|
298 |
+mapped.gset.idx.list <- lapply(mapped.gset.idx.list, |
|
299 |
+function(x, y) na.omit(fastmatch::fmatch(x, y)), |
|
300 |
+rownames(expr)) |
|
301 |
+filterGeneSets <- function(gSets, min.sz=1, max.sz=Inf) { |
|
302 |
+gSetsLen <- sapply(gSets,length) |
|
303 |
+return (gSets[gSetsLen >= min.sz & gSetsLen <= max.sz]) |
|
304 |
+} |
|
305 |
+mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
|
306 |
+min.sz=max(1, min.sz), |
|
307 |
+max.sz=max.sz) |
|
308 |
+rightsingularsvdvectorgset <- function(gSetIdx, Z) { |
|
309 |
+s <- svd(Z[gSetIdx, ]) |
|
310 |
+s$v[, 1] |
|
311 |
+} |
|
312 |
+right2 <- function(gSetIdx, Z) { |
|
313 |
+sink <- HDF5RealizationSink(dim = c(1L, nrow(Z))) |
|
314 |
+sink_grid <- defaultAutoGrid(sink, block.shape="first-dim-grows-first") |
|
315 |
+for(bid in seq_along(sink_grid)){ |
|
316 |
+block <- rightsingularsvdvectorgset(gSetIdx, Z) |
|
317 |
+block <- as(matrix(block, 1, nrow(Z)), "HDF5Matrix") |
|
318 |
+sink <- write_block(sink, sink_grid[[bid]], block) |
|
319 |
+} |
|
320 |
+close(sink) |
|
321 |
+res <- as(sink, "DelayedArray") |
|
322 |
+res |
|
323 |
+} |
|
324 |
+plage2 <- function(X, geneSets, parallel.sz, verbose=TRUE, |
|
325 |
+BPPARAM=SerialParam(progressbar=verbose)) { |
|
326 |
+Z <- t(DelayedArray::scale(t(X))) |
|
327 |
+Z <- as(Z, "HDF5Array") |
|
328 |
+es <- bplapply(geneSets, right2, Z, |
|
329 |
+BPPARAM=BPPARAM) |
|
330 |
+es <- do.call(rbind, es) |
|
331 |
+es <- as(es, "HDF5Array") |
|
332 |
+es |
|
333 |
+} |
|
334 |
+plage3 <- function(X, geneSets, parallel.sz, verbose=TRUE, |
|
335 |
+BPPARAM=SerialParam(progressbar=verbose)) { |
|
336 |
+Z <- t(DelayedArray::scale(t(X))) |
|
337 |
+es <- bplapply(geneSets, right2, Z, |
|
338 |
+BPPARAM=BPPARAM) |
|
339 |
+es <- do.call(rbind, es) |
|
340 |
+es <- as(es, "HDF5Array") |
|
341 |
+es |
|
342 |
+} |
|
343 |
+plage <- function(X, geneSets, parallel.sz, verbose=TRUE, |
|
344 |
+BPPARAM=SerialParam(progressbar=verbose)) { |
|
345 |
+Z <- t(scale(t(X))) |
|
346 |
+es <- bplapply(geneSets, rightsingularsvdvectorgset, Z, |
|
347 |
+BPPARAM=BPPARAM) |
|
348 |
+es <- do.call(rbind, es) |
|
349 |
+if (length(geneSets) == 1) |
|
350 |
+es <- matrix(es, nrow=1) |
|
351 |
+rownames(es) <- names(geneSets) |
|
352 |
+colnames(es) <- colnames(X) |
|
353 |
+es |
|
354 |
+} |
|
355 |
+library(bench) |
|
323 | 356 |
bench::mark( |
324 |
-svd(x1[genes,]), |
|
325 |
-runExactSVD(m[genes,]) |
|
357 |
+plage(expr, mapped.gset.idx.list), |
|
358 |
+plage2(expr, mapped.gset.idx.list), |
|
359 |
+plage3(expr, mapped.gset.idx.list), |
|
360 |
+check=FALSE |
|
326 | 361 |
) |
327 | 362 |
bench::mark( |
328 |
-svd(x1[genes,]), |
|
329 |
-runExactSVD(m[genes,]), |
|
363 |
+plage(expr, mapped.gset.idx.list), |
|
364 |
+plage2(expr, mapped.gset.idx.list), |
|
365 |
+plage3(expr, mapped.gset.idx.list), |
|
330 | 366 |
check=FALSE |
331 | 367 |
) |
368 |
+plage3(expr, mapped.gset.idx.list) |
|
369 |
+plage2(expr, mapped.gset.idx.list) |
|
370 |
+expr |
|
371 |
+sink <- HDF5RealizationSink(dim = c(1L, ncol(Z))) |
|
372 |
+sink <- HDF5RealizationSink(dim = c(1L, ncol(expr))) |
|
373 |
+sink |
|
374 |
+right2 <- function(gSetIdx, Z) { |
|
375 |
+sink <- HDF5RealizationSink(dim = c(1L, ncol(Z))) |
|
376 |
+sink_grid <- defaultAutoGrid(sink, block.shape="first-dim-grows-first") |
|
377 |
+for(bid in seq_along(sink_grid)){ |
|
378 |
+block <- rightsingularsvdvectorgset(gSetIdx, Z) |
|
379 |
+block <- as(matrix(block, 1, nrow(Z)), "HDF5Matrix") |
|
380 |
+sink <- write_block(sink, sink_grid[[bid]], block) |
|
381 |
+} |
|
382 |
+close(sink) |
|
383 |
+res <- as(sink, "DelayedArray") |
|
384 |
+res |
|
385 |
+} |
|
332 | 386 |
bench::mark( |
333 |
-svd(x1[genes,]), |
|
334 |
-runExactSVD(x1[genes,]), |
|
387 |
+plage(expr, mapped.gset.idx.list), |
|
388 |
+plage2(expr, mapped.gset.idx.list), |
|
389 |
+plage3(expr, mapped.gset.idx.list), |
|
335 | 390 |
check=FALSE |
336 | 391 |
) |
337 |
-x1 <- NULL |
|
338 |
-x2 <- NULL |
|
339 |
-m <- NULL |
|
340 |
-x <- NULL |
|
341 |
-library(BiocSingular) |
|
342 |
-set.seed(123) |
|
343 |
-m <- matrix(sample.int(10, 25, T), 10, 10) |
|
344 |
-genes <- 1:2 |
|
345 |
-genes |
|
346 |
-genes <- c(1:2) |
|
347 |
-genes |
|
348 |
-genes <- 1:2 |
|
349 |
-m <- matrix(sample.int(10, 25, T), 10, 10) |
|
350 |
-genes <- 1:2 |
|
351 |
-x1 <- t(scale(t(m))) |
|
352 |
-res1 <- svd(x1[genes,]) |
|
353 |
-res$v[,1] |
|
354 |
-res1$v[,1] |
|
355 |
-m |
|
356 |
-t(m) |
|
357 |
-apply(t(m),2,sd) |
|
358 |
-res2 <- runExactSVD(m, center=colMeans(t(m)), scale=apply(t(m),2,sd)) |
|
359 |
-res2$v[,1] |
|
360 |
-res1$v[,1] |
|
361 |
-res2 <- runExactSVD(m, center=colMeans(t(m)), scale=apply(m,2,sd)) |
|
362 |
-res2$v[,1] |
|
363 |
-svd( t( (t(m) - C) / colSds(t(m)) )[genes,] ) |
|
364 |
-t(m) - colMeans(t(m)) |
|
365 |
-t(m) |
|
366 |
-colMeans(t(m)) |
|
367 |
-colSds(t(m)) |
|
368 |
-( t(m) - colMeans(t(m)) ) / colSds(t(m)) |
|
369 |
-sweep(t(m), 2, colMeanst(m)) |
|
370 |
-sweep(t(m), 2, colMeans(t(m))) |
|
371 |
-svd( t( sweep(t(m), 2, colMeans(t(m))) / colSds(t(m)) ) ) |
|
372 |
-svd( t( sweep(t(m), 2, colMeans(t(m))) / colSds(t(m)) )[genes,] ) |
|
373 |
-svd( t( sweep(t(m), 2, colMeans(t(m))) / colSds(t(m)) )[genes,] )$v[,1] |
|
374 |
-x1 <- t(scale(t(m))) |
|
375 |
-res1 <- svd(x1[genes,]) |
|
376 |
-res1$v[,1] |
|
377 |
-svd( t( sweep(t(m), 2, colMeans(t(m))) / rowSds(t(m)) )[genes,] )$v[,1] |
|
378 |
-res1$v[,1] |
|
379 |
-X <- m |
|
380 |
-Z <- Matrix::t(X) |
|
381 |
-Z <- .dgCapply(Z, scale, 2) |
|
382 |
-Z <- Matrix::t(Z) |
|
383 |
-Z |
|
384 |
-library(Matrix) |
|
385 |
-library(SingleCellExperiment) |
|
386 |
-library(BiocParallel) |
|
387 |
-library(sparseMatrixStats) |
|
388 |
-m<-rsparsematrix(10,10,.5) |
|
389 |
-colnames(m) <- paste0("cell_", 1:10) |
|
390 |
-rownames(m) <- paste0("gene_", 1:10) |
|
391 |
-# gset.idx.list <- list("my_genes" = sample(rownames(m), 2)) |
|
392 |
-geneSets <- list("my_genes1" = c(1,3)) |
|
393 |
-library(GSVA) |
|
394 |
-x <- gsva(m, geneSets, method="plage") |
|
395 |
-colnames(m) <- paste0("cell_", 1:10) |
|
396 |
-rownames(m) <- paste0("gene_", 1:10) |
|
397 |
-x <- gsva(m, geneSets, method="plage") |
|
398 |
-# gset.idx.list <- list("my_genes" = sample(rownames(m), 2)) |
|
399 |
-geneSets <- list("my_genes1" = c("gene_1","gene_3")) |
|
400 |
-x <- gsva(m, geneSets, method="plage") |
|
401 |
-x |
|
402 |
-x <- gsva(m, geneSets, method="zscore") |
|
403 |
-x |
|
404 |
-x <- gsva(m, geneSets, method="ssgsea") |
|
405 |
-x |
|
406 |
-X <- m |
|
407 |
-n <- ncol(X) |
|
408 |
-if(is(X, "dgCMatrix")){ |
|
409 |
-R <- t(sparseMatrixStats::colRanks(X, ties.method = "average")) |
|
410 |
-mode(R) <- "integer" |
|
411 |
-} else { |
|
412 |
-R <- apply(X, 2, function(x, p) as.integer(rank(x)), p) |
|
413 |
-} |
|
414 |
-R |
|
415 |
-R <- t(sparseMatrixStats::colRanks(X, ties.method = "average")) |
|
416 |
-mode(R) <- "integer" |
|
417 |
-R |
|
418 |
-R <- apply(X, 2, function(x, p) as.integer(rank(x)), p) |
|
419 |
-R |
|
420 |
-R <- t(sparseMatrixStats::colRanks(X, ties.method = "average")) |
|
421 |
-source('~/curro/gsva-devel/devel3_ssgsea.R', echo=TRUE) |
|
422 |
-mode(R) <- "integer" |
|
423 |
-R |
|
424 |
-Ra <- abs(R)^alpha |
|
425 |
-Ra |
|
426 |
-es <- bplapply(as.list(1:n), function(j) { |
|
427 |
-geneRanking <- order(R[, j], decreasing=TRUE) |
|
428 |
-es_sample <- lapply(geneSets, .fastRndWalk, geneRanking, j, Ra) |
|
429 |
-unlist(es_sample) |
|
430 |
-}, BPPARAM=BPPARAM) |
|
431 |
-.fastRndWalk <- function(gSetIdx, geneRanking, j, Ra) { |
|
432 |
-n <- length(geneRanking) |
|
433 |
-k <- length(gSetIdx) |
|
434 |
-idxs <- sort.int(fastmatch::fmatch(gSetIdx, geneRanking)) |
|
435 |
-stepCDFinGeneSet2 <- |
|
436 |
-sum(Ra[geneRanking[idxs], j] * (n - idxs + 1)) / |
|
437 |
-sum((Ra[geneRanking[idxs], j])) |
|
438 |
-stepCDFoutGeneSet2 <- (n * (n + 1) / 2 - sum(n - idxs + 1)) / (n - k) |
|
439 |
-walkStat <- stepCDFinGeneSet2 - stepCDFoutGeneSet2 |
|
440 |
-walkStat |
|
441 |
-} |
|
442 |
-n <- ncol(X) |
|
443 |
-es <- bplapply(as.list(1:n), function(j) { |
|
444 |
-geneRanking <- order(R[, j], decreasing=TRUE) |
|
445 |
-es_sample <- lapply(geneSets, .fastRndWalk, geneRanking, j, Ra) |
|
446 |
-unlist(es_sample) |
|
447 |
-}, BPPARAM=BPPARAM) |
|
448 |
-es |
|
449 |
-geneSets <- 1,3 |
|
450 |
-geneSets <- c(1,3) |
|
451 |
-es <- bplapply(as.list(1:n), function(j) { |
|
452 |
-geneRanking <- order(R[, j], decreasing=TRUE) |
|
453 |
-es_sample <- lapply(geneSets, .fastRndWalk, geneRanking, j, Ra) |
|
454 |
-unlist(es_sample) |
|
455 |
-}, BPPARAM=BPPARAM) |
|
456 |
-es |
|
457 |
-es <- do.call("cbind", es) |
|
458 |
-es |
|
459 |
-geneSets <- 1 |
|
460 |
-es <- bplapply(as.list(1:n), function(j) { |
|
461 |
-geneRanking <- order(R[, j], decreasing=TRUE) |
|
462 |
-es_sample <- lapply(geneSets, .fastRndWalk, geneRanking, j, Ra) |
|
463 |
-unlist(es_sample) |
|
464 |
-}, BPPARAM=BPPARAM) |
|
465 |
-es |
|
466 |
-es <- do.call("cbind", es) |
|
467 |
-es |
|
468 |
-if (normalization) { |
|
469 |
-## normalize enrichment scores by using the entire data set, as indicated |
|
470 |
-## by Barbie et al., 2009, online methods, pg. 2 |
|
471 |
-es <- apply(es, 2, function(x, es) x / (range(es)[2] - range(es)[1]), es) |
|
392 |
+right2 <- function(gSetIdx, Z) { |
|
393 |
+sink <- HDF5RealizationSink(dim = c(1L, ncol(Z))) |
|
394 |
+sink_grid <- defaultAutoGrid(sink, block.shape="first-dim-grows-first") |
|
395 |
+for(bid in seq_along(sink_grid)){ |
|
396 |
+block <- rightsingularsvdvectorgset(gSetIdx, Z) |
|
397 |
+block <- as(matrix(block, 1, nrow(Z)), "HDF5Matrix") |
|
398 |
+sink <- write_block(sink, sink_grid[[bid]], block) |
|
472 | 399 |
} |
473 |
-normalization=TRUE |
|
474 |
-if (normalization) { |
|
475 |
-## normalize enrichment scores by using the entire data set, as indicated |
|
476 |
-## by Barbie et al., 2009, online methods, pg. 2 |
|
477 |
-es <- apply(es, 2, function(x, es) x / (range(es)[2] - range(es)[1]), es) |
|
400 |
+close(sink) |
|
401 |
+res <- as(sink, "DelayedArray") |
|
402 |
+res |
|
478 | 403 |
} |
479 |
-es |
|
480 |
-rownames(es) <- names(geneSets) |
|
481 |
-colnames(es) <- colnames(X) |
|
482 |
-geneSets <- c(1,3) |
|
483 |
-es <- bplapply(as.list(1:n), function(j) { |
|
484 |
-geneRanking <- order(R[, j], decreasing=TRUE) |
|
485 |
-es_sample <- lapply(geneSets, .fastRndWalk, geneRanking, j, Ra) |
|
486 |
-unlist(es_sample) |
|
487 |
-}, BPPARAM=BPPARAM) |
|
488 |
-es <- do.call("cbind", es) |
|
489 |
-if (normalization) { |
|
490 |
-## normalize enrichment scores by using the entire data set, as indicated |
|
491 |
-## by Barbie et al., 2009, online methods, pg. 2 |
|
492 |
-es <- apply(es, 2, function(x, es) x / (range(es)[2] - range(es)[1]), es) |
|
404 |
+right2(mapped.gset.idx.list[[1]], expr) |
|
405 |
+expr |
|
406 |
+showtree(expr) |
|
407 |
+right2 <- function(gSetIdx, Z) { |
|
408 |
+sink <- HDF5RealizationSink(dim = c(1L, ncol(Z))) |
|
409 |
+sink_grid <- defaultAutoGrid(sink, block.shape="first-dim-grows-first") |
|
410 |
+for(bid in seq_along(sink_grid)){ |
|
411 |
+block <- rightsingularsvdvectorgset(gSetIdx, Z) |
|
412 |
+# block <- as(matrix(block, 1, nrow(Z)), "HDF5Matrix") |
|
413 |
+sink <- write_block(sink, sink_grid[[bid]], block) |
|
493 | 414 |
} |
494 |
-es |
|
495 |
-if(is(X, "dgCMatrix")){ |
|
496 |
-es <- as(es, "dgCMatrix") |
|
415 |
+close(sink) |
|
416 |
+res <- as(sink, "DelayedArray") |
|
417 |
+res |
|
497 | 418 |
} |
498 |
-es |
|
499 |
-rownames(es) <- names(geneSets) |
|
500 |
-colnames(es) <- colnames(X) |
|
501 |
-if(is(X, "dgCMatrix")){ |
|
502 |
-es <- as(es, "dgCMatrix") |
|
419 |
+right2(mapped.gset.idx.list[[1]], expr) |
|
420 |
+nrow(expr) |
|
421 |
+sink <- HDF5RealizationSink(dim = c(1L, nrow(Z))) |
|
422 |
+sink <- HDF5RealizationSink(dim = c(1L, nrow(expr))) |
|
423 |
+sink |
|
424 |
+matrix(block, 1, nrow(Z)) |
|
425 |
+matrix(block, 1, nrow(expr)) |
|
426 |
+Z <- expr |
|
427 |
+block <- rightsingularsvdvectorgset(gSetIdx, Z) |
|
428 |
+gSetIdx <- mapped.gset.idx.list[[1]] |
|
429 |
+block <- rightsingularsvdvectorgset(gSetIdx, Z) |
|
430 |
+block |
|
431 |
+dim(block) |
|
432 |
+length(block) |
|
433 |
+ncol(Z) |
|
434 |
+block <- as(matrix(block, 1, length(block)), "HDF5Matrix") |
|
435 |
+block |
|
436 |
+sink <- HDF5RealizationSink(dim = c(1L, ncol(Z))) |
|
437 |
+seq_along(sink_grid) |
|
438 |
+sink_grid <- defaultAutoGrid(sink, block.shape="first-dim-grows-first") |
|
439 |
+sink_grid <- defaultAutoGrid(sink, block.shape="first-dim-grows-first") |
|
440 |
+seq_along(sink_grid) |
|
441 |
+sink <- write_block(sink, sink_grid[[bid]], block) |
|
442 |
+sink <- write_block(sink, sink_grid[[1]], block) |
|
443 |
+sink <- write_block(sink, sink_grid[[1L]], block) |
|
444 |
+sink |
|
445 |
+close(sink) |
|
446 |
+res <- as(sink, "DelayedArray") |
|
447 |
+res |
|
448 |
+rightsingularsvdvectorgset(mapped.gset.idx.list[[1]], Z) |
|
449 |
+block <- rightsingularsvdvectorgset(gSetIdx, Z) |
|
450 |
+sink <- write_block(sink, sink_grid[[bid]], block) |
|
451 |
+sink <- write_block(sink, sink_grid[[1L]], block) |
|
452 |
+block <- matrix(block, 1, length(block)) |
|
453 |
+sink <- write_block(sink, sink_grid[[bid]], block) |
|
454 |
+sink <- write_block(sink, sink_grid[[1L]], block) |
|
455 |
+close(sink) |
|
456 |
+res <- as(sink, "DelayedArray") |
|
457 |
+res |
|
458 |
+right2 <- function(gSetIdx, Z) { |
|
459 |
+sink <- HDF5RealizationSink(dim = c(1L, ncol(Z))) |
|
460 |
+sink_grid <- rowAutoGrid(sink, nrow = 1) |
|
461 |
+for(bid in seq_along(sink_grid)){ |
|
462 |
+block <- rightsingularsvdvectorgset(gSetIdx, Z) |
|
463 |
+block <- matrix(block, 1, length(block)) |
|
464 |
+sink <- write_block(sink, sink_grid[[bid]], block) |
|
503 | 465 |
} |
504 |
-es |
|
466 |
+close(sink) |
|
467 |
+res <- as(sink, "DelayedArray") |
|
468 |
+res |
|
469 |
+} |
|
470 |
+right2(mapped.gset.idx.list[[1]], Z) |
|
471 |
+bench::mark( |
|
472 |
+plage(expr, mapped.gset.idx.list), |
|
473 |
+plage2(expr, mapped.gset.idx.list), |
|
474 |
+plage3(expr, mapped.gset.idx.list), |
|
475 |
+check=FALSE |
|
476 |
+) |
|
477 |
+expr |
|
478 |
+expr2 <- as(expr, "matrix") |
|
479 |
+expr2 |
|
480 |
+str(expr2) |
|
481 |
+x1 <- DelayedMatrixStats::rowSds(expr) |
|
482 |
+x2 <- apply(expr, 1, sd) |
|
483 |
+x1 |
|
484 |
+x2 |
|
485 |
+all.equal(x1, x2) |
|
486 |
+x1 |
|
487 |
+x2 |
|
488 |
+names(x2) |
|
489 |
+names(x2) <- NULL |
|
490 |
+x1 |
|
491 |
+x2 |
|
492 |
+all.equal(x1, x2) |
|
493 |
+x1 <- DelayedMatrixStats::rowSds(expr2) |
|
494 |
+x2 <- apply(expr2, 1, sd) |
|
495 |
+x1 |
|
496 |
+x2 |
|
497 |
+names(x2) <- NULL |
|
498 |
+all.equal(x1, x2) |
|
505 | 499 |
setwd("~/curro/gsva-devel/GSVA") |
506 | 500 |
devtools::check(vignettes = FALSE) |
507 | 501 |
devtools::check(vignettes = FALSE) |
502 |
+devtools::check(vignettes = FALSE) |
|
503 |
+devtools::check(vignettes = FALSE) |
|
508 | 504 |
devtools::build(vignettes = FALSE) |
509 | 505 |
devtools::install(build_vignettes = FALSE) |
510 |
-x <- gsva(sce, gset.idx.list, method="zscore") |
|
511 |
-x <- gsva(sce, gset.idx.list, method="ssgsea") |
|
512 |
-X |
|
506 |
+library(GSVA) |
|
507 |
+m <- matrix(sample.int(10, 25, T), 10, 10) |
|
508 |
+colnames(m) <- paste0("cell_", 1:10) |
|
509 |
+rownames(m) <- paste0("gene_", 1:10) |
|
510 |
+genes <- c(3,5) |
|
511 |
+m <- as(m, "HDF5Array") |
|
512 |
+res <- gsva(m, genes, method="plage") |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
Package: GSVA |
2 |
-Version: 1.39.7 |
|
2 |
+Version: 1.39.8 |
|
3 | 3 |
Title: Gene Set Variation Analysis for microarray and RNA-seq data |
4 | 4 |
Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.guinney@sagebase.org"), |
5 | 5 |
person("Robert", "Castelo", role="aut", email="robert.castelo@upf.edu"), |
... | ... |
@@ -8,8 +8,9 @@ Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.gui |
8 | 8 |
Depends: R (>= 3.5.0) |
9 | 9 |
Imports: methods, stats, utils, graphics, BiocGenerics, S4Vectors, |
10 | 10 |
Biobase, SummarizedExperiment, GSEABase, Matrix, |
11 |
- parallel, BiocParallel, fastmatch, SingleCellExperiment, |
|
12 |
- BiocSingular, sparseMatrixStats |
|
11 |
+ parallel, BiocParallel, fastmatch, SingleCellExperiment, |
|
12 |
+ sparseMatrixStats, DelayedArray, DelayedMatrixStats, |
|
13 |
+ HDF5Array |
|
13 | 14 |
Suggests: RUnit, limma, RColorBrewer, genefilter, edgeR, GSVAdata, |
14 | 15 |
shiny, shinythemes, ggplot2, data.table, plotly |
15 | 16 |
Description: Gene Set Variation Analysis (GSVA) is a non-parametric, unsupervised method for estimating variation of gene set enrichment through the samples of a expression data set. GSVA performs a change in coordinate systems, transforming the data from a gene by sample matrix to a gene-set by sample matrix, thereby allowing the evaluation of pathway enrichment for each sample. This new matrix of GSVA enrichment scores facilitates applying standard analytical methods like functional enrichment, survival analysis, clustering, CNV-pathway analysis or cross-tissue pathway analysis, in a pathway-centric manner. |
... | ... |
@@ -8,6 +8,8 @@ importClassesFrom(SummarizedExperiment, SummarizedExperiment) |
8 | 8 |
importClassesFrom(GSEABase, GeneSetCollection) |
9 | 9 |
importClassesFrom(SingleCellExperiment, SingleCellExperiment) |
10 | 10 |
importClassesFrom(Matrix, dgCMatrix) |
11 |
+importClassesFrom(DelayedArray, DelayedArray) |
|
12 |
+importClassesFrom(HDF5Array, HDF5Array) |
|
11 | 13 |
|
12 | 14 |
importMethodsFrom(Biobase, featureNames, |
13 | 15 |
phenoData, |
... | ... |
@@ -48,8 +50,12 @@ importFrom(BiocParallel, SerialParam, |
48 | 50 |
multicoreWorkers, |
49 | 51 |
bpnworkers) |
50 | 52 |
importFrom(SingleCellExperiment, SingleCellExperiment) |
51 |
-importFrom(BiocSingular, runExactSVD) |
|
52 | 53 |
importFrom(sparseMatrixStats, colRanks) |
54 |
+importFrom(DelayedArray, rowAutoGrid, |
|
55 |
+ write_block, |
|
56 |
+ close) |
|
57 |
+importFrom(HDF5Array, HDF5RealizationSink) |
|
58 |
+importFrom(DelayedMatrixStats, rowSds) |
|
53 | 59 |
|
54 | 60 |
exportMethods(gsva, |
55 | 61 |
filterGeneSets, |
... | ... |
@@ -5,6 +5,61 @@ |
5 | 5 |
|
6 | 6 |
setGeneric("gsva", function(expr, gset.idx.list, ...) standardGeneric("gsva")) |
7 | 7 |
|
8 |
+setMethod("gsva", signature(expr="HDF5Array", gset.idx.list="list"), |
|
9 |
+ function(expr, gset.idx.list, annotation, |
|
10 |
+ method=c("gsva", "ssgsea", "zscore", "plage"), |
|
11 |
+ kcdf=c("Gaussian", "Poisson", "none"), |
|
12 |
+ abs.ranking=FALSE, |
|
13 |
+ min.sz=1, |
|
14 |
+ max.sz=Inf, |
|
15 |
+ parallel.sz=1L, |
|
16 |
+ mx.diff=TRUE, |
|
17 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
18 |
+ ssgsea.norm=TRUE, |
|
19 |
+ verbose=TRUE, |
|
20 |
+ BPPARAM=SerialParam(progressbar=verbose)) |
|
21 |
+{ |
|
22 |
+ method <- match.arg(method) |
|
23 |
+ kcdf <- match.arg(kcdf) |
|
24 |
+ |
|
25 |
+ ## filter genes according to verious criteria, |
|
26 |
+ ## e.g., constant expression |
|
27 |
+ expr <- .filterFeatures(expr, method) |
|
28 |
+ |
|
29 |
+ if (nrow(expr) < 2) |
|
30 |
+ stop("Less than two genes in the input assay object\n") |
|
31 |
+ |
|
32 |
+ ## map to the actual features for which expression data is available |
|
33 |
+ mapped.gset.idx.list <- lapply(gset.idx.list, |
|
34 |
+ function(x, y) na.omit(fastmatch::fmatch(x, y)), |
|
35 |
+ rownames(expr)) |
|
36 |
+ |
|
37 |
+ if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
38 |
+ stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
39 |
+ |
|
40 |
+ ## remove gene sets from the analysis for which no features are available |
|
41 |
+ ## and meet the minimum and maximum gene-set size specified by the user |
|
42 |
+ mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
|
43 |
+ min.sz=max(1, min.sz), |
|
44 |
+ max.sz=max.sz) |
|
45 |
+ |
|
46 |
+ if (!missing(kcdf)) { |
|
47 |
+ if (kcdf == "Gaussian") { |
|
48 |
+ rnaseq <- FALSE |
|
49 |
+ kernel <- TRUE |
|
50 |
+ } else if (kcdf == "Poisson") { |
|
51 |
+ rnaseq <- TRUE |
|
52 |
+ kernel <- TRUE |
|
53 |
+ } else |
|
54 |
+ kernel <- FALSE |
|
55 |
+ } |
|
56 |
+ |
|
57 |
+ rval <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking, |
|
58 |
+ parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) |
|
59 |
+ |
|
60 |
+ rval |
|
61 |
+}) |
|
62 |
+ |
|
8 | 63 |
setMethod("gsva", signature(expr="SingleCellExperiment", gset.idx.list="list"), |
9 | 64 |
function(expr, gset.idx.list, annotation, |
10 | 65 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
... | ... |
@@ -589,6 +644,11 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list"), |
589 | 644 |
ssgsea.norm=TRUE, |
590 | 645 |
verbose=TRUE, |
591 | 646 |
BPPARAM=SerialParam(progressbar=verbose)) { |
647 |
+ |
|
648 |
+ if(is(expr, "DelayedArray")){ |
|
649 |
+ return(.gsvaDelayedArray(expr, gset.idx.list, method, kcdf, rnaseq, abs.ranking, |
|
650 |
+ parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM)) |
|
651 |
+ } |
|
592 | 652 |
|
593 | 653 |
if (length(gset.idx.list) == 0) |
594 | 654 |
stop("The gene set list is empty! Filter may be too stringent.") |
595 | 655 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,94 @@ |
1 |
+.gsvaDelayedArray <- function(expr, gset.idx.list, |
|
2 |
+ method=c("gsva", "ssgsea", "zscore", "plage"), |
|
3 |
+ kcdf=c("Gaussian", "Poisson", "none"), |
|
4 |
+ rnaseq=FALSE, |
|
5 |
+ abs.ranking=FALSE, |
|
6 |
+ parallel.sz=1L, |
|
7 |
+ mx.diff=TRUE, |
|
8 |
+ tau=1, |
|
9 |
+ kernel=TRUE, |
|
10 |
+ ssgsea.norm=TRUE, |
|
11 |
+ verbose=TRUE, |
|
12 |
+ BPPARAM=SerialParam(progressbar=verbose)) { |
|
13 |
+ |
|
14 |
+ if (length(gset.idx.list) == 0) |
|
15 |
+ stop("The gene set list is empty! Filter may be too stringent.") |
|
16 |
+ |
|
17 |
+ if (any(lengths(gset.idx.list) == 1)) |
|
18 |
+ warning("Some gene sets have size one. Consider setting 'min.sz > 1'.") |
|
19 |
+ |
|
20 |
+ parallel.sz <- as.integer(parallel.sz) |
|
21 |
+ if (parallel.sz < 1L) |
|
22 |
+ parallel.sz <- 1L |
|
23 |
+ |
|
24 |
+ ## because we keep the argument 'parallel.sz' for backwards compatibility |
|
25 |
+ ## we need to harmonize it with the contents of BPPARAM |
|
26 |
+ if (parallel.sz > 1L && class(BPPARAM) == "SerialParam") { |
|
27 |
+ BPPARAM=MulticoreParam(progressbar=verbose, workers=parallel.sz, tasks=100) |
|
28 |
+ } else if (parallel.sz == 1L && class(BPPARAM) != "SerialParam") { |
|
29 |
+ parallel.sz <- bpnworkers(BPPARAM) |
|
30 |
+ } else if (parallel.sz > 1L && class(BPPARAM) != "SerialParam") { |
|
31 |
+ bpworkers(BPPARAM) <- parallel.sz |
|
32 |
+ } |
|
33 |
+ |
|
34 |
+ if (class(BPPARAM) != "SerialParam" && verbose) |
|
35 |
+ cat(sprintf("Setting parallel calculations through a %s back-end\nwith workers=%d and tasks=100.\n", |
|
36 |
+ class(BPPARAM), parallel.sz)) |
|
37 |
+ |
|
38 |
+ |
|
39 |
+ if (method == "plage") { |
|
40 |
+ if (rnaseq) |
|
41 |
+ stop("rnaseq=TRUE does not work with method='plage'.") |
|
42 |
+ |
|
43 |
+ if(verbose) |
|
44 |
+ cat("Estimating PLAGE scores for", length(gset.idx.list),"gene sets.\n") |
|
45 |
+ |
|
46 |
+ return(plageDelayed(expr, gset.idx.list, parallel.sz, verbose, BPPARAM=BPPARAM)) |
|
47 |
+ } |
|
48 |
+ |
|
49 |
+ |
|
50 |
+} |
|
51 |
+ |
|
52 |
+rightsingularsvdvectorgset <- function(gSetIdx, Z) { |
|
53 |
+ s <- svd(Z[gSetIdx, ]) |
|
54 |
+ s$v[, 1] |
|
55 |
+} |
|
56 |
+ |
|
57 |
+svdDelayed <- function(gSetIdx, Z) { |
|
58 |
+ |
|
59 |
+ # step 1: creating a HDF5 sink object |
|
60 |
+ sink <- HDF5Array::HDF5RealizationSink(dim = c(1L, ncol(Z))) |
|
61 |
+ |
|
62 |
+ #step 2: creating the sink's grid |
|
63 |
+ sink_grid <- DelayedArray::rowAutoGrid(sink, nrow = 1) |
|
64 |
+ |
|
65 |
+ # step 3: creating the block and writing it in the sink |
|
66 |
+ # object using the sink's grid |
|
67 |
+ block <- rightsingularsvdvectorgset(gSetIdx, Z) |
|
68 |
+ block <- matrix(block, 1, length(block)) |
|
69 |
+ sink <- DelayedArray::write_block(sink, sink_grid[[1L]], block) |
|
70 |
+ |
|
71 |
+ # step 4: closing the sink and realizating it |
|
72 |
+ # in a hdf5 array object |
|
73 |
+ DelayedArray::close(sink) |
|
74 |
+ res <- as(sink, "DelayedArray") |
|
75 |
+ |
|
76 |
+ res |
|
77 |
+ |
|
78 |
+} |
|
79 |
+ |
|
80 |
+plageDelayed <- function(X, geneSets, parallel.sz, verbose=TRUE, |
|
81 |
+ BPPARAM=SerialParam(progressbar=verbose)) { |
|
82 |
+ |
|
83 |
+ Z <- t(DelayedArray::scale(t(X))) |
|
84 |
+ |
|
85 |
+ es <- bplapply(geneSets, svdDelayed, Z, |
|
86 |
+ BPPARAM=BPPARAM) |
|
87 |
+ |
|
88 |
+ es <- do.call(rbind, es) |
|
89 |
+ |
|
90 |
+ es <- as(es, "HDF5Array") |
|
91 |
+ |
|
92 |
+ es |
|
93 |
+} |
|
94 |
+ |
... | ... |
@@ -1,6 +1,8 @@ |
1 | 1 |
.filterFeatures <- function(expr, method) { |
2 | 2 |
|
3 | 3 |
## filter out genes with constant expression values |
4 |
+ ## DelayedMatrixStats::rowSds() works for both base and |
|
5 |
+ ## DelayedArray matrices |
|
4 | 6 |
sdGenes <- DelayedMatrixStats::rowSds(expr) |
5 | 7 |
if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
6 | 8 |
warning(sum(sdGenes == 0 | is.na(sdGenes)), |
... | ... |
@@ -1,5 +1,6 @@ |
1 | 1 |
\name{gsva} |
2 | 2 |
\alias{gsva} |
3 |
+\alias{gsva,HDF5Array,list-method} |
|
3 | 4 |
\alias{gsva,SingleCellExperiment,list-method} |
4 | 5 |
\alias{gsva,dgCMatrix,list-method} |
5 | 6 |
\alias{gsva,SummarizedExperiment,list-method} |
... | ... |
@@ -30,6 +31,18 @@ Estimates GSVA enrichment scores. |
30 | 31 |
ssgsea.norm=TRUE, |
31 | 32 |
verbose=TRUE, |
32 | 33 |
BPPARAM=SerialParam(progressbar=verbose)) |
34 |
+\S4method{gsva}{HDF5Array,list}(expr, gset.idx.list, annotation, |
|
35 |
+ method=c("gsva", "ssgsea", "zscore", "plage"), |
|
36 |
+ kcdf=c("Gaussian", "Poisson", "none"), |
|
37 |
+ abs.ranking=FALSE, |
|
38 |
+ min.sz=1, |
|
39 |
+ max.sz=Inf, |
|
40 |
+ parallel.sz=1L, |
|
41 |
+ mx.diff=TRUE, |
|
42 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
43 |
+ ssgsea.norm=TRUE, |
|
44 |
+ verbose=TRUE, |
|
45 |
+ BPPARAM=SerialParam(progressbar=verbose)) |
|
33 | 46 |
\S4method{gsva}{dgCMatrix,list}(expr, gset.idx.list, annotation, |
34 | 47 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
35 | 48 |
kcdf=c("Gaussian", "Poisson", "none"), |
... | ... |
@@ -120,7 +133,8 @@ Estimates GSVA enrichment scores. |
120 | 133 |
\code{SummarizedExperiment}, \code{SingleCellExperiment} |
121 | 134 |
\code{ExpressionSet} object, or as a matrix of expression |
122 | 135 |
values where rows correspond to genes and columns correspond to samples. |
123 |
- This matrix can be also in a sparse format, as a \code{dgCMatrix}.} |
|
136 |
+ This matrix can be also in a sparse format, as a \code{dgCMatrix}, or |
|
137 |
+ as an on-disk backend representation, such as \code{HDF5Array} .} |
|
124 | 138 |
\item{gset.idx.list}{Gene sets provided either as a \code{list} object or as a |
125 | 139 |
\code{GeneSetCollection} object.} |
126 | 140 |
\item{annotation}{In the case of calling \code{gsva()} on a |