git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/GSVA@64337 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,10 +1,10 @@ |
1 | 1 |
Package: GSVA |
2 |
-Version: 1.3.9 |
|
3 |
-Date: 2012-03-16 |
|
2 |
+Version: 1.3.10 |
|
3 |
+Date: 2012-03-23 |
|
4 | 4 |
Title: Gene Set Variation Analysis |
5 | 5 |
Author: Justin Guinney <justin.guinney@sagebase.org> (with contributions from Robert Castelo <robert.castelo@upf.edu> and Sonja Haenzelmann <shanzelmann@imim.es) |
6 | 6 |
Maintainer: Justin Guinney <justin.guinney@sagebase.org> |
7 |
-Depends: R (>= 2.13.0), methods |
|
7 |
+Depends: R (>= 2.13.0), methods, GSEABase (>= 1.17.4) |
|
8 | 8 |
Imports: methods, Biobase, GSEABase |
9 | 9 |
Enhances: snow, parallel |
10 | 10 |
Suggests: limma, qpgraph, RBGL, graph, Rgraphviz, RColorBrewer, genefilter, GSVAdata |
... | ... |
@@ -1,10 +1,12 @@ |
1 | 1 |
useDynLib(GSVA) |
2 | 2 |
|
3 | 3 |
import(methods) |
4 |
-importClassesFrom(Biobase) |
|
5 |
-importClassesFrom(GSEABase) |
|
4 |
+importClassesFrom(Biobase, ExpressionSet) |
|
5 |
+importClassesFrom(GSEABase, GeneSetCollection) |
|
6 | 6 |
importFrom(Biobase, exprs) |
7 | 7 |
importFrom(Biobase, annotation) |
8 |
+importFrom(GSEABase, AnnoOrEntrezIdentifier) |
|
9 |
+importFrom(GSEABase, mapIdentifiers) |
|
8 | 10 |
|
9 | 11 |
exportMethods("gsva", |
10 | 12 |
"filterGeneSets", |
... | ... |
@@ -1,3 +1,20 @@ |
1 |
+CHANGES IN VERSION 1.4 |
|
2 |
+---------------------- |
|
3 |
+ |
|
4 |
+USER VISIBLE CHANGES |
|
5 |
+ |
|
6 |
+ o removed the system-requirement dependency from the GNU Scientific Library |
|
7 |
+ |
|
8 |
+ o added two additional gene-set expression summarization methods: single-sample GSEA from Barbie et al. (Nature, 2009) and a combined Z-score method similar to the one used by Lee et al. (PLos Comp Biol, 2008) via a new 'method' argument in the 'gsva()' function |
|
9 |
+ |
|
10 |
+ o added handling of RNA-seq expression data matrices by the GSVA method with a new 'rnaseq' argument in the 'gsva()' function |
|
11 |
+ |
|
12 |
+ o added a method with signature(expr="matrix", gset.idx.list="GeneSetCollection", annotation="character") which did not exist before. Now gsva() accepts the following pairs of data structures storing expression data and gene sets: ExpressionSet-GeneSetCollection, ExpressionSet-list, matrix-GeneSetCollection and matrix-list |
|
13 |
+ |
|
14 |
+BUG FIXES |
|
15 |
+ |
|
16 |
+ o matching of gene IDs from ExpressionSet objects to GeneSetCollection objects now also works with Entrez-based gene IDs in ExpressionSet objects (e.g., when annotation(eset) == "org.Hs.eg.db") by using GSEABase >= 1.17.4 |
|
17 |
+ |
|
1 | 18 |
CHANGES IN VERSION 0.9 |
2 | 19 |
---------------------- |
3 | 20 |
|
... | ... |
@@ -14,12 +14,12 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list", annotati |
14 | 14 |
max.sz=Inf, |
15 | 15 |
no.bootstraps=0, |
16 | 16 |
bootstrap.percent = .632, |
17 |
- alpha=0.25, |
|
18 | 17 |
parallel.sz=0, |
19 | 18 |
parallel.type="SOCK", |
20 |
- verbose=TRUE, |
|
21 | 19 |
mx.diff=TRUE, |
22 |
- kernel=TRUE) |
|
20 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
21 |
+ kernel=TRUE, |
|
22 |
+ verbose=TRUE) |
|
23 | 23 |
{ |
24 | 24 |
method <- match.arg(method) |
25 | 25 |
|
... | ... |
@@ -35,8 +35,8 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list", annotati |
35 | 35 |
max.sz=max.sz) |
36 | 36 |
|
37 | 37 |
eSco <- GSVA:::.gsva(Biobase::exprs(expr), mapped.gset.idx.list, method, rnaseq, abs.ranking, |
38 |
- no.bootstraps, bootstrap.percent, alpha, parallel.sz, parallel.type, |
|
39 |
- verbose, mx.diff, kernel) |
|
38 |
+ no.bootstraps, bootstrap.percent, parallel.sz, parallel.type, |
|
39 |
+ mx.diff, tau, kernel, verbose) |
|
40 | 40 |
eScoEset <- expr |
41 | 41 |
eScoEset <- Biobase::`exprs<-`(eScoEset, eSco$es.obs) |
42 | 42 |
eScoEset <- Biobase::`annotation<-`(eScoEset, "") |
... | ... |
@@ -55,12 +55,12 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti |
55 | 55 |
max.sz=Inf, |
56 | 56 |
no.bootstraps=0, |
57 | 57 |
bootstrap.percent = .632, |
58 |
- alpha=0.25, |
|
59 | 58 |
parallel.sz=0, |
60 | 59 |
parallel.type="SOCK", |
61 |
- verbose=TRUE, |
|
62 | 60 |
mx.diff=TRUE, |
63 |
- kernel=TRUE) |
|
61 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
62 |
+ kernel=TRUE, |
|
63 |
+ verbose=TRUE) |
|
64 | 64 |
{ |
65 | 65 |
method <- match.arg(method) |
66 | 66 |
|
... | ... |
@@ -68,8 +68,8 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti |
68 | 68 |
cat("Mapping identifiers between gene sets and feature names\n") |
69 | 69 |
|
70 | 70 |
## map gene identifiers of the gene sets to the features in the chip |
71 |
- mapped.gset.idx.list <- mapIdentifiers(gset.idx.list, |
|
72 |
- AnnotationIdentifier(annotation(expr))) |
|
71 |
+ mapped.gset.idx.list <- GSEABase::mapIdentifiers(gset.idx.list, |
|
72 |
+ GSEABase::AnnoOrEntrezIdentifier(annotation(expr))) |
|
73 | 73 |
|
74 | 74 |
## map to the actual features for which expression data is available |
75 | 75 |
tmp <- lapply(geneIds(mapped.gset.idx.list), |
... | ... |
@@ -83,8 +83,8 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti |
83 | 83 |
max.sz=max.sz) |
84 | 84 |
|
85 | 85 |
eSco <- GSVA:::.gsva(Biobase::exprs(expr), mapped.gset.idx.list, method, rnaseq, abs.ranking, |
86 |
- no.bootstraps, bootstrap.percent, alpha, parallel.sz, parallel.type, |
|
87 |
- verbose, mx.diff, kernel) |
|
86 |
+ no.bootstraps, bootstrap.percent, parallel.sz, parallel.type, |
|
87 |
+ mx.diff, tau, kernel, verbose) |
|
88 | 88 |
eScoEset <- expr |
89 | 89 |
eScoEset <- Biobase::`exprs<-`(eScoEset, eSco$es.obs) |
90 | 90 |
eScoEset <- Biobase::`annotation<-`(eScoEset, "") |
... | ... |
@@ -103,12 +103,12 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection", an |
103 | 103 |
max.sz=Inf, |
104 | 104 |
no.bootstraps=0, |
105 | 105 |
bootstrap.percent = .632, |
106 |
- alpha=0.25, |
|
107 | 106 |
parallel.sz=0, |
108 | 107 |
parallel.type="SOCK", |
109 |
- verbose=TRUE, |
|
110 | 108 |
mx.diff=TRUE, |
111 |
- kernel=TRUE) |
|
109 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
110 |
+ kernel=TRUE, |
|
111 |
+ verbose=TRUE) |
|
112 | 112 |
{ |
113 | 113 |
method <- match.arg(method) |
114 | 114 |
|
... | ... |
@@ -118,8 +118,8 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection", an |
118 | 118 |
if (verbose) |
119 | 119 |
cat("Mapping identifiers between gene sets and feature names\n") |
120 | 120 |
|
121 |
- mapped.gset.idx.list <- mapIdentifiers(gset.idx.list, |
|
122 |
- AnnotationIdentifier(annotation)) |
|
121 |
+ mapped.gset.idx.list <- GSEABase::mapIdentifiers(gset.idx.list, |
|
122 |
+ GSEABase::AnnoOrEntrezIdentifier(annotation)) |
|
123 | 123 |
} |
124 | 124 |
|
125 | 125 |
## map to the actual features for which expression data is available |
... | ... |
@@ -135,8 +135,8 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection", an |
135 | 135 |
max.sz=max.sz) |
136 | 136 |
|
137 | 137 |
GSVA:::.gsva(expr, mapped.gset.idx.list, method, rnaseq, abs.ranking, |
138 |
- no.bootstraps, bootstrap.percent, alpha, parallel.sz, parallel.type, |
|
139 |
- verbose, mx.diff, kernel) |
|
138 |
+ no.bootstraps, bootstrap.percent, parallel.sz, parallel.type, |
|
139 |
+ mx.diff, tau, kernel, verbose) |
|
140 | 140 |
}) |
141 | 141 |
|
142 | 142 |
setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="missing"), |
... | ... |
@@ -148,12 +148,12 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis |
148 | 148 |
max.sz=Inf, |
149 | 149 |
no.bootstraps=0, |
150 | 150 |
bootstrap.percent = .632, |
151 |
- alpha=0.25, |
|
152 | 151 |
parallel.sz=0, |
153 | 152 |
parallel.type="SOCK", |
154 |
- verbose=TRUE, |
|
155 | 153 |
mx.diff=TRUE, |
156 |
- kernel=TRUE) |
|
154 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
155 |
+ kernel=TRUE, |
|
156 |
+ verbose=TRUE) |
|
157 | 157 |
{ |
158 | 158 |
method <- match.arg(method) |
159 | 159 |
|
... | ... |
@@ -168,8 +168,8 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis |
168 | 168 |
max.sz=max.sz) |
169 | 169 |
|
170 | 170 |
GSVA:::.gsva(expr, mapped.gset.idx.list, method, rnaseq, abs.ranking, no.bootstraps, |
171 |
- bootstrap.percent, alpha, parallel.sz, parallel.type, |
|
172 |
- verbose, mx.diff, kernel) |
|
171 |
+ bootstrap.percent, parallel.sz, parallel.type, |
|
172 |
+ mx.diff, tau, kernel, verbose) |
|
173 | 173 |
}) |
174 | 174 |
|
175 | 175 |
.gsva <- function(expr, gset.idx.list, |
... | ... |
@@ -178,12 +178,12 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis |
178 | 178 |
abs.ranking=FALSE, |
179 | 179 |
no.bootstraps=0, |
180 | 180 |
bootstrap.percent = .632, |
181 |
- alpha=0.25, |
|
182 | 181 |
parallel.sz=0, |
183 | 182 |
parallel.type="SOCK", |
184 |
- verbose=TRUE, |
|
185 | 183 |
mx.diff=TRUE, |
186 |
- kernel=TRUE) |
|
184 |
+ tau=1, |
|
185 |
+ kernel=TRUE, |
|
186 |
+ verbose=TRUE) |
|
187 | 187 |
{ |
188 | 188 |
if(length(gset.idx.list) == 0){ |
189 | 189 |
stop("The gene set list is empty! Filter may be too stringent.") |
... | ... |
@@ -193,7 +193,8 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis |
193 | 193 |
if(verbose) |
194 | 194 |
cat("Estimating ssGSEA scores for", length(gset.idx.list),"gene sets.\n") |
195 | 195 |
|
196 |
- return(ssgsea(expr, gset.idx.list, alpha, parallel.sz, parallel.type, verbose)) |
|
196 |
+ return(ssgsea(expr, gset.idx.list, alpha=tau, parallel.sz=parallel.sz, |
|
197 |
+ parallel.type=parallel.type, verbose=verbose)) |
|
197 | 198 |
} |
198 | 199 |
|
199 | 200 |
if (method == "zscore") { |
... | ... |
@@ -225,8 +226,9 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis |
225 | 226 |
if (verbose) |
226 | 227 |
cat("Computing observed enrichment scores\n") |
227 | 228 |
es.obs <- compute.geneset.es(expr, gset.idx.list, 1:n.samples, |
228 |
- rnaseq, abs.ranking,parallel.sz, |
|
229 |
- parallel.type,verbose=verbose, mx.diff=mx.diff, kernel=kernel) |
|
229 |
+ rnaseq=rnaseq, abs.ranking=abs.ranking, parallel.sz=parallel.sz, |
|
230 |
+ parallel.type=parallel.type, mx.diff=mx.diff, tau=tau, |
|
231 |
+ kernel=kernel, verbose=verbose) |
|
230 | 232 |
|
231 | 233 |
# es.bootstraps -> n.gset by n.samples by n.resamples |
232 | 234 |
es.bootstraps=NULL |
... | ... |
@@ -273,7 +275,8 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis |
273 | 275 |
if(verbose) cat("bootstrap cycle ", i, "\n") |
274 | 276 |
r <- clEvalQ(cl, compute.geneset.es(expr, gset.idx.list, |
275 | 277 |
sample(n.samples, bootstrap.nsamples, replace=T), |
276 |
- rnaseq, abs.ranking)) |
|
278 |
+ rnaseq=rnaseq, abs.ranking=abs.ranking, mx.diff=mx.diff, |
|
279 |
+ tau=tau, kernel=kernel, verbose=verbose)) |
|
277 | 280 |
for(j in 1:length(r)){ |
278 | 281 |
es.bootstraps[,,(parallel.sz * (i-1) + j)] <- r[[j]] |
279 | 282 |
} |
... | ... |
@@ -284,7 +287,8 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis |
284 | 287 |
for(i in 1:no.bootstraps){ |
285 | 288 |
es.bootstraps[,,i] <- compute.geneset.es(expr, gset.idx.list, |
286 | 289 |
sample(n.samples, bootstrap.nsamples, replace=T), |
287 |
- rnaseq, abs.ranking) |
|
290 |
+ rnaseq=rnaseq, abs.ranking=abs.ranking, mx.diff=mx.diff, |
|
291 |
+ tau=tau, kernel=kernel, verbose=verbose) |
|
288 | 292 |
} |
289 | 293 |
} |
290 | 294 |
|
... | ... |
@@ -340,7 +344,7 @@ compute.gene.density <- function(expr, sample.idxs, rnaseq=FALSE, kernel=TRUE){ |
340 | 344 |
|
341 | 345 |
compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
342 | 346 |
abs.ranking, parallel.sz=0, parallel.type="SOCK", |
343 |
- verbose=FALSE, mx.diff=TRUE, kernel=TRUE){ |
|
347 |
+ mx.diff=TRUE, tau=1, kernel=TRUE, verbose=TRUE){ |
|
344 | 348 |
num_genes <- nrow(expr) |
345 | 349 |
if (verbose) { |
346 | 350 |
if (kernel) { |
... | ... |
@@ -389,7 +393,7 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
389 | 393 |
cl <- makeCl(parallel.sz, type = parallel.type) |
390 | 394 |
clEvalQ(cl, library(GSVA)) |
391 | 395 |
if (verbose) { |
392 |
- cat("Evaluating parallel ks-tests...\n") |
|
396 |
+ cat("Estimating enrichment scores in parallel\n") |
|
393 | 397 |
if(mx.diff) { |
394 | 398 |
cat("Taking diff of max KS.\n") |
395 | 399 |
} else{ |
... | ... |
@@ -400,7 +404,7 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
400 | 404 |
m <- t(parSapp(cl, gset.idx.list, ks_test_m, |
401 | 405 |
gene.density=rank.scores, |
402 | 406 |
sort.idxs=sort.sgn.idxs, |
403 |
- mx.diff=mx.diff, verbose=FALSE)) |
|
407 |
+ mx.diff=mx.diff, tau=tau, verbose=FALSE)) |
|
404 | 408 |
if(verbose) |
405 | 409 |
cat("Cleaning up\n") |
406 | 410 |
stopCl(cl) |
... | ... |
@@ -425,7 +429,7 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
425 | 429 |
m <- mclapp(gset.idx.list, GSVA:::ks_test_m, |
426 | 430 |
gene.density=rank.scores, |
427 | 431 |
sort.idxs=sort.sgn.idxs, |
428 |
- mx.diff=mx.diff, verbose=verbose) |
|
432 |
+ mx.diff=mx.diff, tau=tau, verbose=verbose) |
|
429 | 433 |
m <- do.call("rbind", m) |
430 | 434 |
colnames(m) <- colnames(expr) |
431 | 435 |
|
... | ... |
@@ -436,7 +440,7 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
436 | 440 |
|
437 | 441 |
} else { |
438 | 442 |
if (verbose) { |
439 |
- cat("Evaluating ks-tests\n") |
|
443 |
+ cat("Estimating enrichment scores\n") |
|
440 | 444 |
if (mx.diff) { |
441 | 445 |
cat("Taking diff of max KS.\n") |
442 | 446 |
} else{ |
... | ... |
@@ -451,7 +455,7 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
451 | 455 |
} |
452 | 456 |
|
453 | 457 |
m <- t(sapply(gset.idx.list, ks_test_m, rank.scores, sort.sgn.idxs, |
454 |
- mx.diff=mx.diff, verbose=verbose)) |
|
458 |
+ mx.diff=mx.diff, tau=tau, verbose=verbose)) |
|
455 | 459 |
|
456 | 460 |
if (verbose) { |
457 | 461 |
close(get("progressBar", envir=globalenv())) |
... | ... |
@@ -461,7 +465,7 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
461 | 465 |
} |
462 | 466 |
|
463 | 467 |
|
464 |
-ks_test_m <- function(gset_idxs, gene.density, sort.idxs, tau.factor=1, mx.diff=TRUE, verbose=TRUE){ |
|
468 |
+ks_test_m <- function(gset_idxs, gene.density, sort.idxs, mx.diff=TRUE, tau=1, verbose=TRUE){ |
|
465 | 469 |
|
466 | 470 |
n.genes <- nrow(gene.density) |
467 | 471 |
n.samples <- ncol(gene.density) |
... | ... |
@@ -474,7 +478,7 @@ ks_test_m <- function(gset_idxs, gene.density, sort.idxs, tau.factor=1, mx.diff= |
474 | 478 |
n.genes, |
475 | 479 |
as.integer(gset_idxs), |
476 | 480 |
n.geneset, |
477 |
- as.double(tau.factor), |
|
481 |
+ as.double(tau), |
|
478 | 482 |
n.samples, |
479 | 483 |
as.integer(mx.diff))$R |
480 | 484 |
|
... | ... |
@@ -489,12 +493,12 @@ ks_test_m <- function(gset_idxs, gene.density, sort.idxs, tau.factor=1, mx.diff= |
489 | 493 |
|
490 | 494 |
|
491 | 495 |
## ks-test in R code - testing only |
492 |
-ks_test_Rcode <- function(gene.density, gset_idxs, tau.factor=1, make.plot=FALSE){ |
|
496 |
+ks_test_Rcode <- function(gene.density, gset_idxs, tau=1, make.plot=FALSE){ |
|
493 | 497 |
|
494 | 498 |
n.genes = length(gene.density) |
495 | 499 |
n.gset = length(gset_idxs) |
496 | 500 |
|
497 |
- sum.gset <- sum(abs(gene.density[gset_idxs])^tau.factor) |
|
501 |
+ sum.gset <- sum(abs(gene.density[gset_idxs])^tau) |
|
498 | 502 |
|
499 | 503 |
dec = 1 / (n.genes - n.gset) |
500 | 504 |
|
... | ... |
@@ -505,7 +509,7 @@ ks_test_Rcode <- function(gene.density, gset_idxs, tau.factor=1, make.plot=FALSE |
505 | 509 |
values <- rep(NaN, length(gset_idxs)) |
506 | 510 |
current = 0 |
507 | 511 |
for(i in seq_along(offsets)){ |
508 |
- current = current + abs(gene.density[sort.idxs[offsets[i]]])^tau.factor / sum.gset - dec * (offsets[i]-last.idx-1) |
|
512 |
+ current = current + abs(gene.density[sort.idxs[offsets[i]]])^tau / sum.gset - dec * (offsets[i]-last.idx-1) |
|
509 | 513 |
|
510 | 514 |
values[i] = current |
511 | 515 |
last.idx = offsets[i] |
... | ... |
@@ -588,7 +592,7 @@ ssgsea <- function(X, geneSets, alpha=0.25, parallel.sz, parallel.type, verbose) |
588 | 592 |
} |
589 | 593 |
geneRanking <- order(R[, j], decreasing=TRUE) |
590 | 594 |
es_sample <- NA |
591 |
- if (!haveParallel && !haveSnow) |
|
595 |
+ if (is.na(cl) && !haveParallel) |
|
592 | 596 |
es_sample <- sapply(geneSets, rndWalk, geneRanking, j, R, alpha) |
593 | 597 |
else { |
594 | 598 |
if (is.na(cl)) |
... | ... |
@@ -668,7 +672,7 @@ zscore <- function(X, geneSets, parallel.sz, parallel.type, verbose) { |
668 | 672 |
get("iSample", envir=globalenv()) / get("nSamples", envir=globalenv())) |
669 | 673 |
} |
670 | 674 |
es_sample <- NA |
671 |
- if (!haveParallel && !haveSnow) |
|
675 |
+ if (is.na(cl) && !haveParallel) |
|
672 | 676 |
es_sample <- sapply(geneSets, combinez, j, Z) |
673 | 677 |
else { |
674 | 678 |
if (is.na(cl)) |
... | ... |
@@ -759,7 +763,7 @@ setMethod("computeGeneSetsOverlap", signature(gSets="GeneSetCollection", uniqGen |
759 | 763 |
setMethod("computeGeneSetsOverlap", signature(gSets="GeneSetCollection", uniqGenes="ExpressionSet"), |
760 | 764 |
function(gSets, uniqGenes, min.sz=1, max.sz=Inf) { |
761 | 765 |
## map gene identifiers of the gene sets to the features in the chip |
762 |
- gSets <- mapIdentifiers(gSets, AnnotationIdentifier(annotation(uniqGenes))) |
|
766 |
+ gSets <- GSEABase::mapIdentifiers(gSets, GSEABase::AnnoOrEntrezIdentifier(annotation(uniqGenes))) |
|
763 | 767 |
|
764 | 768 |
uniqGenes <- Biobase::featureNames(uniqGenes) |
765 | 769 |
|
... | ... |
@@ -242,7 +242,7 @@ limit \Rfunction{gsva()} in the number of core processors that is should use |
242 | 242 |
we can do it by specifying such a value in the \Robject{parallel.sz} argument. |
243 | 243 |
|
244 | 244 |
The other two functions of the \Rpackage{GSVA} package are |
245 |
-\Rfunction{filterGeneSets()} and \Rfunction{computeGeneSetOverlaps()} that |
|
245 |
+\Rfunction{filterGeneSets()} and \Rfunction{computeGeneSetsOverlaps()} that |
|
246 | 246 |
serve to explicitly filter out gene sets by size and by pairwise overlap, |
247 | 247 |
respectively. Note that the size filter can be also applied within the |
248 | 248 |
\Rfunction{gsva()} function call. |
... | ... |
@@ -20,12 +20,12 @@ Estimates GSVA enrichment scores. |
20 | 20 |
max.sz=Inf, |
21 | 21 |
no.bootstraps=0, |
22 | 22 |
bootstrap.percent = .632, |
23 |
- alpha=0.25, |
|
24 | 23 |
parallel.sz=0, |
25 | 24 |
parallel.type="SOCK", |
26 |
- verbose=TRUE, |
|
27 | 25 |
mx.diff=TRUE, |
28 |
- kernel=TRUE) |
|
26 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
27 |
+ kernel=TRUE, |
|
28 |
+ verbose=TRUE) |
|
29 | 29 |
\S4method{gsva}{ExpressionSet,GeneSetCollection,missing}(expr, gset.idx.list, annotation=NULL, |
30 | 30 |
method=c("gsva", "ssgsea", "zscore"), |
31 | 31 |
rnaseq=FALSE, |
... | ... |
@@ -34,12 +34,12 @@ Estimates GSVA enrichment scores. |
34 | 34 |
max.sz=Inf, |
35 | 35 |
no.bootstraps=0, |
36 | 36 |
bootstrap.percent = .632, |
37 |
- alpha=0.25, |
|
38 | 37 |
parallel.sz=0, |
39 | 38 |
parallel.type="SOCK", |
40 |
- verbose=TRUE, |
|
41 | 39 |
mx.diff=TRUE, |
42 |
- kernel=TRUE) |
|
40 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
41 |
+ kernel=TRUE, |
|
42 |
+ verbose=TRUE) |
|
43 | 43 |
\S4method{gsva}{matrix,GeneSetCollection,character}(expr, gset.idx.list, annotation=NA, |
44 | 44 |
method=c("gsva", "ssgsea", "zscore"), |
45 | 45 |
rnaseq=FALSE, |
... | ... |
@@ -48,12 +48,12 @@ Estimates GSVA enrichment scores. |
48 | 48 |
max.sz=Inf, |
49 | 49 |
no.bootstraps=0, |
50 | 50 |
bootstrap.percent = .632, |
51 |
- alpha=0.25, |
|
52 | 51 |
parallel.sz=0, |
53 | 52 |
parallel.type="SOCK", |
54 |
- verbose=TRUE, |
|
55 | 53 |
mx.diff=TRUE, |
56 |
- kernel=TRUE) |
|
54 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
55 |
+ kernel=TRUE, |
|
56 |
+ verbose=TRUE) |
|
57 | 57 |
\S4method{gsva}{matrix,list,missing}(expr, gset.idx.list, annotation=NULL, |
58 | 58 |
method=c("gsva", "ssgsea", "zscore"), |
59 | 59 |
rnaseq=FALSE, |
... | ... |
@@ -62,12 +62,12 @@ Estimates GSVA enrichment scores. |
62 | 62 |
max.sz=Inf, |
63 | 63 |
no.bootstraps=0, |
64 | 64 |
bootstrap.percent = .632, |
65 |
- alpha=0.25, |
|
66 | 65 |
parallel.sz=0, |
67 | 66 |
parallel.type="SOCK", |
68 |
- verbose=TRUE, |
|
69 | 67 |
mx.diff=TRUE, |
70 |
- kernel=TRUE) |
|
68 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
69 |
+ kernel=TRUE, |
|
70 |
+ verbose=TRUE) |
|
71 | 71 |
} |
72 | 72 |
\arguments{ |
73 | 73 |
\item{expr}{Gene expression data which can be given either as an \code{ExpressionSet} |
... | ... |
@@ -97,8 +97,6 @@ Estimates GSVA enrichment scores. |
97 | 97 |
\item{max.sz}{Maximum size of the resulting gene sets.} |
98 | 98 |
\item{no.bootstraps}{Number of bootstrap iterations to perform.} |
99 | 99 |
\item{bootstrap.percent}{.632 is the ideal percent samples bootstrapped.} |
100 |
- \item{alpha}{Exponent defining the weight in the random walk of the \code{ssgsea} method (Barbie et al., 2009). |
|
101 |
- Its default value (0.25) corresponds to the one used in the publication.} |
|
102 | 100 |
\item{parallel.sz}{Number of processors to use when doing the calculations in parallel. |
103 | 101 |
This requires to previously load either the \code{parallel} or the |
104 | 102 |
\code{snow} library. If \code{parallel} is loaded and this argument |
... | ... |
@@ -108,16 +106,20 @@ Estimates GSVA enrichment scores. |
108 | 106 |
to a positive integer number that specifies the number of processors to |
109 | 107 |
employ in the parallel calculation.} |
110 | 108 |
\item{parallel.type}{Type of cluster architecture when using \code{snow}.} |
111 |
- \item{verbose}{Gives information about each calculation step. Default: \code{FALSE}.} |
|
112 | 109 |
\item{mx.diff}{Offers two approaches to calculate the enrichment statistic (ES) |
113 | 110 |
from the KS random walk statistic. \code{mx.diff=FALSE}: ES is calculated as |
114 | 111 |
the maximum distance of the random walk from 0. \code{mx.diff=TRUE} (default): ES |
115 | 112 |
is calculated as the magnitude difference between the largest positive |
116 | 113 |
and negative random walk deviations.} |
114 |
+ \item{tau}{Exponent defining the weight of the tail in the random walk performed by both the \code{gsva} |
|
115 |
+ (H\"anzelmann et al., submitted) and the \code{ssgsea} (Barbie et al., 2009) methods. By default, |
|
116 |
+ this \code{tau=1} when \code{method="gsva"} and \code{tau=0.25} when \code{method="ssgsea"} just |
|
117 |
+ as specified by Barbie et al. (2009) where this parameter is called \code{alpha}.} |
|
117 | 118 |
\item{kernel}{Logical, set to \code{TRUE} when the GSVA method employes a kernel non-parametric |
118 | 119 |
estimation of the empirical cumulative distribution function (default) and \code{FALSE} |
119 | 120 |
when this function is directly estimated from the observed data. This last option is |
120 | 121 |
justified in the limit of the size of the sample by the so-called Glivenko-Cantelli theorem.} |
122 |
+ \item{verbose}{Gives information about each calculation step. Default: \code{FALSE}.} |
|
121 | 123 |
} |
122 | 124 |
|
123 | 125 |
\details{ |