git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/GSVA@96422 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,13 +1,12 @@ |
1 | 1 |
Package: GSVA |
2 |
-Version: 1.15.0 |
|
3 |
-Date: 2014-02-21 |
|
2 |
+Version: 1.15.1 |
|
4 | 3 |
Title: Gene Set Variation Analysis for microarray and RNA-seq data |
5 | 4 |
Author: Justin Guinney with contributions from Robert Castelo |
6 | 5 |
Maintainer: Justin Guinney <justin.guinney@sagebase.org> |
7 |
-Depends: R (>= 2.13.0), methods, GSEABase (>= 1.17.4) |
|
8 |
-Imports: methods, BiocGenerics, Biobase, GSEABase |
|
9 |
-Enhances: snow, parallel |
|
10 |
-Suggests: limma, RColorBrewer, genefilter, mclust, edgeR, GSVAdata |
|
6 |
+Depends: R (>= 2.13.0) |
|
7 |
+Imports: methods, BiocGenerics, Biobase, GSEABase (>= 1.17.4) |
|
8 |
+Suggests: limma, RColorBrewer, genefilter, mclust, |
|
9 |
+ edgeR, snow, parallel, GSVAdata |
|
11 | 10 |
Description: Gene Set Variation Analysis (GSVA) is a non-parametric, unsupervised method for estimating variation of gene set enrichment through the samples of a expression data set. GSVA performs a change in coordinate systems, transforming the data from a gene by sample matrix to a gene-set by sample matrix, thereby allowing the evaluation of pathway enrichment for each sample. This new matrix of GSVA enrichment scores facilitates applying standard analytical methods like functional enrichment, survival analysis, clustering, CNV-pathway analysis or cross-tissue pathway analysis, in a pathway-centric manner. |
12 | 11 |
License: GPL (>= 2) |
13 | 12 |
LazyLoad: yes |
... | ... |
@@ -1,3 +1,14 @@ |
1 |
+CHANGES IN VERSION 1.14 |
|
2 |
+----------------------- |
|
3 |
+ |
|
4 |
+USER VISIBLE CHANGES |
|
5 |
+ |
|
6 |
+ o added an argument 'ssgsea.norm' to the 'gsva()' function to enable disabling the default score normalization of the original SSGSEA method by Barbie et al. (2009). |
|
7 |
+ |
|
8 |
+BUG FIXES |
|
9 |
+ |
|
10 |
+ o Better error handling of the situation when no gene identifiers match between gene sets and expression data. |
|
11 |
+ |
|
1 | 12 |
CHANGES IN VERSION 1.4 |
2 | 13 |
---------------------- |
3 | 14 |
|
... | ... |
@@ -3,10 +3,10 @@ |
3 | 3 |
## purpose: main function of the package which estimates activity |
4 | 4 |
## scores for each given gene-set |
5 | 5 |
|
6 |
-setGeneric("gsva", function(expr, gset.idx.list, annotation=NULL, ...) standardGeneric("gsva")) |
|
6 |
+setGeneric("gsva", function(expr, gset.idx.list, ...) standardGeneric("gsva")) |
|
7 | 7 |
|
8 |
-setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list", annotation="missing"), |
|
9 |
- function(expr, gset.idx.list, annotation=NULL, |
|
8 |
+setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"), |
|
9 |
+ function(expr, gset.idx.list, annotation, |
|
10 | 10 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
11 | 11 |
rnaseq=FALSE, |
12 | 12 |
abs.ranking=FALSE, |
... | ... |
@@ -19,16 +19,20 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list", annotati |
19 | 19 |
mx.diff=TRUE, |
20 | 20 |
tau=switch(method, gsva=1, ssgsea=0.25, NA), |
21 | 21 |
kernel=TRUE, |
22 |
+ ssgsea.norm=TRUE, |
|
22 | 23 |
verbose=TRUE) |
23 | 24 |
{ |
24 | 25 |
method <- match.arg(method) |
25 | 26 |
|
26 | 27 |
## filter out genes with constant expression values |
27 | 28 |
sdGenes <- Biobase::esApply(expr, 1, sd) |
28 |
- if (any(sdGenes == 0)) { |
|
29 |
- if (verbose) |
|
30 |
- cat("Filtering out", sum(sdGenes == 0), "genes with constant expression values throuhgout the samples\n") |
|
31 |
- expr <- expr[sdGenes > 0, ] |
|
29 |
+ if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
30 |
+ warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
31 |
+ "genes with constant expression values throuhgout the samples.") |
|
32 |
+ if (method != "ssgsea") { |
|
33 |
+ warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
34 |
+ expr <- expr[sdGenes > 0 & !is.na(sdGenes), ] |
|
35 |
+ } |
|
32 | 36 |
} |
33 | 37 |
|
34 | 38 |
if (nrow(expr) < 2) |
... | ... |
@@ -39,15 +43,18 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list", annotati |
39 | 43 |
function(x, y) na.omit(match(x, y)), |
40 | 44 |
featureNames(expr)) |
41 | 45 |
|
46 |
+ if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
47 |
+ stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
48 |
+ |
|
42 | 49 |
## remove gene sets from the analysis for which no features are available |
43 | 50 |
## and meet the minimum and maximum gene-set size specified by the user |
44 | 51 |
mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
45 | 52 |
min.sz=max(1, min.sz), |
46 | 53 |
max.sz=max.sz) |
47 | 54 |
|
48 |
- eSco <- GSVA:::.gsva(exprs(expr), mapped.gset.idx.list, method, rnaseq, abs.ranking, |
|
49 |
- no.bootstraps, bootstrap.percent, parallel.sz, parallel.type, |
|
50 |
- mx.diff, tau, kernel, verbose) |
|
55 |
+ eSco <- .gsva(exprs(expr), mapped.gset.idx.list, method, rnaseq, abs.ranking, |
|
56 |
+ no.bootstraps, bootstrap.percent, parallel.sz, parallel.type, |
|
57 |
+ mx.diff, tau, kernel, ssgsea.norm, verbose) |
|
51 | 58 |
|
52 | 59 |
if (method != "gsva") |
53 | 60 |
eSco <- list(es.obs=eSco, bootstrap=NULL, p.vals.sign=NULL) |
... | ... |
@@ -60,8 +67,8 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list", annotati |
60 | 67 |
p.vals.sign=eSco$p.vals.sign)) |
61 | 68 |
}) |
62 | 69 |
|
63 |
-setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollection", annotation="missing"), |
|
64 |
- function(expr, gset.idx.list, annotation=NULL, |
|
70 |
+setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollection"), |
|
71 |
+ function(expr, gset.idx.list, annotation, |
|
65 | 72 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
66 | 73 |
rnaseq=FALSE, |
67 | 74 |
abs.ranking=FALSE, |
... | ... |
@@ -74,16 +81,20 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti |
74 | 81 |
mx.diff=TRUE, |
75 | 82 |
tau=switch(method, gsva=1, ssgsea=0.25, NA), |
76 | 83 |
kernel=TRUE, |
84 |
+ ssgsea.norm=TRUE, |
|
77 | 85 |
verbose=TRUE) |
78 | 86 |
{ |
79 | 87 |
method <- match.arg(method) |
80 | 88 |
|
81 | 89 |
## filter out genes with constant expression values |
82 | 90 |
sdGenes <- Biobase::esApply(expr, 1, sd) |
83 |
- if (any(sdGenes == 0)) { |
|
84 |
- if (verbose) |
|
85 |
- cat("Filtering out", sum(sdGenes == 0), "genes with constant expression values throuhgout the samples\n") |
|
86 |
- expr <- expr[sdGenes > 0, ] |
|
91 |
+ if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
92 |
+ warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
93 |
+ "genes with constant expression values throuhgout the samples.") |
|
94 |
+ if (method != "ssgsea") { |
|
95 |
+ warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
96 |
+ expr <- expr[sdGenes > 0 & !is.na(sdGenes), ] |
|
97 |
+ } |
|
87 | 98 |
} |
88 | 99 |
|
89 | 100 |
if (nrow(expr) < 2) |
... | ... |
@@ -94,7 +105,7 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti |
94 | 105 |
|
95 | 106 |
## map gene identifiers of the gene sets to the features in the chip |
96 | 107 |
mapped.gset.idx.list <- GSEABase::mapIdentifiers(gset.idx.list, |
97 |
- GSEABase::AnnoOrEntrezIdentifier(annotation(expr))) |
|
108 |
+ GSEABase::AnnoOrEntrezIdentifier(Biobase::annotation(expr))) |
|
98 | 109 |
|
99 | 110 |
## map to the actual features for which expression data is available |
100 | 111 |
tmp <- lapply(geneIds(mapped.gset.idx.list), |
... | ... |
@@ -107,9 +118,9 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti |
107 | 118 |
min.sz=max(1, min.sz), |
108 | 119 |
max.sz=max.sz) |
109 | 120 |
|
110 |
- eSco <- GSVA:::.gsva(exprs(expr), mapped.gset.idx.list, method, rnaseq, abs.ranking, |
|
111 |
- no.bootstraps, bootstrap.percent, parallel.sz, parallel.type, |
|
112 |
- mx.diff, tau, kernel, verbose) |
|
121 |
+ eSco <- .gsva(exprs(expr), mapped.gset.idx.list, method, rnaseq, abs.ranking, |
|
122 |
+ no.bootstraps, bootstrap.percent, parallel.sz, parallel.type, |
|
123 |
+ mx.diff, tau, kernel, ssgsea.norm, verbose) |
|
113 | 124 |
|
114 | 125 |
if (method != "gsva") |
115 | 126 |
eSco <- list(es.obs=eSco, bootstrap=NULL, p.vals.sign=NULL) |
... | ... |
@@ -122,8 +133,8 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti |
122 | 133 |
p.vals.sign=eSco$p.vals.sign)) |
123 | 134 |
}) |
124 | 135 |
|
125 |
-setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection", annotation="character"), |
|
126 |
- function(expr, gset.idx.list, annotation=NA, |
|
136 |
+setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection"), |
|
137 |
+ function(expr, gset.idx.list, annotation, |
|
127 | 138 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
128 | 139 |
rnaseq=FALSE, |
129 | 140 |
abs.ranking=FALSE, |
... | ... |
@@ -136,16 +147,20 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection", an |
136 | 147 |
mx.diff=TRUE, |
137 | 148 |
tau=switch(method, gsva=1, ssgsea=0.25, NA), |
138 | 149 |
kernel=TRUE, |
150 |
+ ssgsea.norm=TRUE, |
|
139 | 151 |
verbose=TRUE) |
140 | 152 |
{ |
141 | 153 |
method <- match.arg(method) |
142 | 154 |
|
143 | 155 |
## filter out genes with constant expression values |
144 | 156 |
sdGenes <- apply(expr, 1, sd) |
145 |
- if (any(sdGenes == 0)) { |
|
146 |
- if (verbose) |
|
147 |
- cat("Filtering out", sum(sdGenes == 0), "genes with constant expression values throuhgout the samples\n") |
|
148 |
- expr <- expr[sdGenes > 0, ] |
|
157 |
+ if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
158 |
+ warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
159 |
+ "genes with constant expression values throuhgout the samples.") |
|
160 |
+ if (method != "ssgsea") { |
|
161 |
+ warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
162 |
+ expr <- expr[sdGenes > 0 & !is.na(sdGenes), , drop=FALSE] |
|
163 |
+ } |
|
149 | 164 |
} |
150 | 165 |
|
151 | 166 |
if (nrow(expr) < 2) |
... | ... |
@@ -153,7 +168,7 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection", an |
153 | 168 |
|
154 | 169 |
## map gene identifiers of the gene sets to the features in the matrix |
155 | 170 |
mapped.gset.idx.list <- gset.idx.list |
156 |
- if (!is.na(annotation)) { |
|
171 |
+ if (!missing(annotation)) { |
|
157 | 172 |
if (verbose) |
158 | 173 |
cat("Mapping identifiers between gene sets and feature names\n") |
159 | 174 |
|
... | ... |
@@ -167,19 +182,22 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection", an |
167 | 182 |
rownames(expr)) |
168 | 183 |
names(tmp) <- names(mapped.gset.idx.list) |
169 | 184 |
|
185 |
+ if (length(unlist(tmp, use.names=FALSE)) == 0) |
|
186 |
+ stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
187 |
+ |
|
170 | 188 |
## remove gene sets from the analysis for which no features are available |
171 | 189 |
## and meet the minimum and maximum gene-set size specified by the user |
172 | 190 |
mapped.gset.idx.list <- filterGeneSets(tmp, |
173 | 191 |
min.sz=max(1, min.sz), |
174 | 192 |
max.sz=max.sz) |
175 | 193 |
|
176 |
- GSVA:::.gsva(expr, mapped.gset.idx.list, method, rnaseq, abs.ranking, |
|
177 |
- no.bootstraps, bootstrap.percent, parallel.sz, parallel.type, |
|
178 |
- mx.diff, tau, kernel, verbose) |
|
194 |
+ .gsva(expr, mapped.gset.idx.list, method, rnaseq, abs.ranking, |
|
195 |
+ no.bootstraps, bootstrap.percent, parallel.sz, parallel.type, |
|
196 |
+ mx.diff, tau, kernel, ssgsea.norm, verbose) |
|
179 | 197 |
}) |
180 | 198 |
|
181 |
-setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="missing"), |
|
182 |
- function(expr, gset.idx.list, annotation=NULL, |
|
199 |
+setMethod("gsva", signature(expr="matrix", gset.idx.list="list"), |
|
200 |
+ function(expr, gset.idx.list, annotation, |
|
183 | 201 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
184 | 202 |
rnaseq=FALSE, |
185 | 203 |
abs.ranking=FALSE, |
... | ... |
@@ -192,16 +210,20 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis |
192 | 210 |
mx.diff=TRUE, |
193 | 211 |
tau=switch(method, gsva=1, ssgsea=0.25, NA), |
194 | 212 |
kernel=TRUE, |
213 |
+ ssgsea.norm=TRUE, |
|
195 | 214 |
verbose=TRUE) |
196 | 215 |
{ |
197 | 216 |
method <- match.arg(method) |
198 | 217 |
|
199 | 218 |
## filter out genes with constant expression values |
200 | 219 |
sdGenes <- apply(expr, 1, sd) |
201 |
- if (any(sdGenes == 0)) { |
|
202 |
- if (verbose) |
|
203 |
- cat("Filtering out", sum(sdGenes == 0), "genes with constant expression values throuhgout the samples\n") |
|
204 |
- expr <- expr[sdGenes > 0, ] |
|
220 |
+ if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
221 |
+ warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
222 |
+ "genes with constant expression values throuhgout the samples.") |
|
223 |
+ if (method != "ssgsea") { |
|
224 |
+ warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
225 |
+ expr <- expr[sdGenes > 0 & !is.na(sdGenes), , drop=FALSE] |
|
226 |
+ } |
|
205 | 227 |
} |
206 | 228 |
|
207 | 229 |
if (nrow(expr) < 2) |
... | ... |
@@ -211,15 +233,18 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis |
211 | 233 |
function(x ,y) na.omit(match(x, y)), |
212 | 234 |
rownames(expr)) |
213 | 235 |
|
236 |
+ if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
237 |
+ stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
238 |
+ |
|
214 | 239 |
## remove gene sets from the analysis for which no features are available |
215 | 240 |
## and meet the minimum and maximum gene-set size specified by the user |
216 | 241 |
mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
217 | 242 |
min.sz=max(1, min.sz), |
218 | 243 |
max.sz=max.sz) |
219 | 244 |
|
220 |
- GSVA:::.gsva(expr, mapped.gset.idx.list, method, rnaseq, abs.ranking, no.bootstraps, |
|
221 |
- bootstrap.percent, parallel.sz, parallel.type, |
|
222 |
- mx.diff, tau, kernel, verbose) |
|
245 |
+ .gsva(expr, mapped.gset.idx.list, method, rnaseq, abs.ranking, no.bootstraps, |
|
246 |
+ bootstrap.percent, parallel.sz, parallel.type, |
|
247 |
+ mx.diff, tau, kernel, ssgsea.norm, verbose) |
|
223 | 248 |
}) |
224 | 249 |
|
225 | 250 |
.gsva <- function(expr, gset.idx.list, |
... | ... |
@@ -233,6 +258,7 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis |
233 | 258 |
mx.diff=TRUE, |
234 | 259 |
tau=1, |
235 | 260 |
kernel=TRUE, |
261 |
+ ssgsea.norm=TRUE, |
|
236 | 262 |
verbose=TRUE) |
237 | 263 |
{ |
238 | 264 |
if(length(gset.idx.list) == 0){ |
... | ... |
@@ -242,9 +268,10 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis |
242 | 268 |
if (method == "ssgsea") { |
243 | 269 |
if(verbose) |
244 | 270 |
cat("Estimating ssGSEA scores for", length(gset.idx.list),"gene sets.\n") |
245 |
- |
|
271 |
+ |
|
246 | 272 |
return(ssgsea(expr, gset.idx.list, alpha=tau, parallel.sz=parallel.sz, |
247 |
- parallel.type=parallel.type, verbose=verbose)) |
|
273 |
+ parallel.type=parallel.type, normalization=ssgsea.norm, |
|
274 |
+ verbose=verbose)) |
|
248 | 275 |
} |
249 | 276 |
|
250 | 277 |
if (method == "zscore") { |
... | ... |
@@ -299,12 +326,13 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis |
299 | 326 |
if(verbose) cat("Computing bootstrap enrichment scores\n") |
300 | 327 |
bootstrap.nsamples <- floor(bootstrap.percent * n.samples) |
301 | 328 |
|
302 |
- p.vals.sign <- matrix(NaN, n.gset, n.samples,dimnames=list(names(gset.idx.list),colnames(expr))) |
|
329 |
+ p.vals.sign <- matrix(NaN, n.gset, n.samples, |
|
330 |
+ dimnames=list(names(gset.idx.list), colnames(expr))) |
|
303 | 331 |
|
304 | 332 |
es.bootstraps <- array(NaN, c(n.gset, n.samples, no.bootstraps)) |
305 | 333 |
if(parallel.sz > 0){ |
306 | 334 |
|
307 |
- if(!GSVA:::.isPackageLoaded("snow")) { |
|
335 |
+ if(!.isPackageLoaded("snow")) { |
|
308 | 336 |
stop("Please load the 'snow' library") |
309 | 337 |
} |
310 | 338 |
## copying ShortRead's strategy, the calls to the 'get()' are |
... | ... |
@@ -382,7 +410,7 @@ compute.gene.density <- function(expr, sample.idxs, rnaseq=FALSE, kernel=TRUE){ |
382 | 410 |
gene.density <- NA |
383 | 411 |
if (kernel) { |
384 | 412 |
A = .C("matrix_density_R", |
385 |
- as.double(t(expr[,sample.idxs])), |
|
413 |
+ as.double(t(expr[ ,sample.idxs, drop=FALSE])), |
|
386 | 414 |
as.double(t(expr)), |
387 | 415 |
R = double(n.test.samples * n.genes), |
388 | 416 |
n.density.samples, |
... | ... |
@@ -432,8 +460,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
432 | 460 |
|
433 | 461 |
rank.scores <- apply(sort.sgn.idxs, 2, compute_rank_score) |
434 | 462 |
|
435 |
- haveParallel <- GSVA:::.isPackageLoaded("parallel") |
|
436 |
- haveSnow <- GSVA:::.isPackageLoaded("snow") |
|
463 |
+ haveParallel <- .isPackageLoaded("parallel") |
|
464 |
+ haveSnow <- .isPackageLoaded("snow") |
|
437 | 465 |
|
438 | 466 |
if (parallel.sz > 1 || haveParallel) { |
439 | 467 |
if (!haveParallel && !haveSnow) { |
... | ... |
@@ -486,7 +514,7 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE, |
486 | 514 |
assign("iGeneSet", 0, envir=globalenv()) |
487 | 515 |
} |
488 | 516 |
|
489 |
- m <- mclapp(gset.idx.list, GSVA:::ks_test_m, |
|
517 |
+ m <- mclapp(gset.idx.list, ks_test_m, |
|
490 | 518 |
gene.density=rank.scores, |
491 | 519 |
sort.idxs=sort.sgn.idxs, |
492 | 520 |
mx.diff=mx.diff, tau=tau, verbose=verbose) |
... | ... |
@@ -602,7 +630,8 @@ rndWalk <- function(gSetIdx, geneRanking, j, R, alpha) { |
602 | 630 |
sum(walkStat) |
603 | 631 |
} |
604 | 632 |
|
605 |
-ssgsea <- function(X, geneSets, alpha=0.25, parallel.sz, parallel.type, verbose) { |
|
633 |
+ssgsea <- function(X, geneSets, alpha=0.25, parallel.sz, |
|
634 |
+ parallel.type, normalization=TRUE, verbose) { |
|
606 | 635 |
|
607 | 636 |
p <- nrow(X) |
608 | 637 |
n <- ncol(X) |
... | ... |
@@ -615,8 +644,8 @@ ssgsea <- function(X, geneSets, alpha=0.25, parallel.sz, parallel.type, verbose) |
615 | 644 |
|
616 | 645 |
R <- apply(X, 2, function(x,p) as.integer(rank(x)), p) |
617 | 646 |
|
618 |
- haveParallel <- GSVA:::.isPackageLoaded("parallel") |
|
619 |
- haveSnow <- GSVA:::.isPackageLoaded("snow") |
|
647 |
+ haveParallel <- .isPackageLoaded("parallel") |
|
648 |
+ haveSnow <- .isPackageLoaded("snow") |
|
620 | 649 |
|
621 | 650 |
cl <- makeCl <- parSapp <- stopCl <- mclapp <- detCor <- nCores <- NA |
622 | 651 |
if (parallel.sz > 1 || haveParallel) { |
... | ... |
@@ -670,9 +699,11 @@ ssgsea <- function(X, geneSets, alpha=0.25, parallel.sz, parallel.type, verbose) |
670 | 699 |
if (length(geneSets) == 1) |
671 | 700 |
es <- matrix(es, nrow=1) |
672 | 701 |
|
673 |
- ## normalize enrichment scores by using the entire data set, as indicated |
|
674 |
- ## by Barbie et al., 2009, online methods, pg. 2 |
|
675 |
- es <- apply(es, 2, function(x, es) x / (range(es)[2] - range(es)[1]), es) |
|
702 |
+ if (normalization) { |
|
703 |
+ ## normalize enrichment scores by using the entire data set, as indicated |
|
704 |
+ ## by Barbie et al., 2009, online methods, pg. 2 |
|
705 |
+ es <- apply(es, 2, function(x, es) x / (range(es)[2] - range(es)[1]), es) |
|
706 |
+ } |
|
676 | 707 |
|
677 | 708 |
if (length(geneSets) == 1) |
678 | 709 |
es <- matrix(es, nrow=1) |
... | ... |
@@ -706,8 +737,8 @@ zscore <- function(X, geneSets, parallel.sz, parallel.type, verbose) { |
706 | 737 |
|
707 | 738 |
Z <- t(apply(X, 1, function(x) (x-mean(x))/sd(x))) |
708 | 739 |
|
709 |
- haveParallel <- GSVA:::.isPackageLoaded("parallel") |
|
710 |
- haveSnow <- GSVA:::.isPackageLoaded("snow") |
|
740 |
+ haveParallel <- .isPackageLoaded("parallel") |
|
741 |
+ haveSnow <- .isPackageLoaded("snow") |
|
711 | 742 |
|
712 | 743 |
cl <- makeCl <- parSapp <- stopCl <- mclapp <- detCor <- nCores <- NA |
713 | 744 |
if (parallel.sz > 1 || haveParallel) { |
... | ... |
@@ -792,8 +823,8 @@ plage <- function(X, geneSets, parallel.sz, parallel.type, verbose) { |
792 | 823 |
|
793 | 824 |
Z <- t(apply(X, 1, function(x) (x-mean(x))/sd(x))) |
794 | 825 |
|
795 |
- haveParallel <- GSVA:::.isPackageLoaded("parallel") |
|
796 |
- haveSnow <- GSVA:::.isPackageLoaded("snow") |
|
826 |
+ haveParallel <- .isPackageLoaded("parallel") |
|
827 |
+ haveSnow <- .isPackageLoaded("snow") |
|
797 | 828 |
|
798 | 829 |
## the masterDescriptor() calls are disabled since they are not available in windows |
799 | 830 |
## they would help to report progress by just one of the processors. now all processors |
... | ... |
@@ -917,7 +948,7 @@ setMethod("computeGeneSetsOverlap", signature(gSets="list", uniqGenes="character |
917 | 948 |
members <- cbind(unlist(gSets, use.names=FALSE), rep(1:totalGsets, times=lenGsets)) |
918 | 949 |
gSetsMembershipMatrix[members] <- 1 |
919 | 950 |
|
920 |
- GSVA:::.computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz) |
|
951 |
+ .computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz) |
|
921 | 952 |
}) |
922 | 953 |
|
923 | 954 |
setMethod("computeGeneSetsOverlap", signature(gSets="list", uniqGenes="ExpressionSet"), |
... | ... |
@@ -936,7 +967,7 @@ setMethod("computeGeneSetsOverlap", signature(gSets="list", uniqGenes="Expressio |
936 | 967 |
members <- cbind(unlist(gSets, use.names=FALSE), rep(1:totalGsets, times=lenGsets)) |
937 | 968 |
gSetsMembershipMatrix[members] <- 1 |
938 | 969 |
|
939 |
- GSVA:::.computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz) |
|
970 |
+ .computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz) |
|
940 | 971 |
}) |
941 | 972 |
|
942 | 973 |
setMethod("computeGeneSetsOverlap", signature(gSets="GeneSetCollection", uniqGenes="character"), |
... | ... |
@@ -945,20 +976,20 @@ setMethod("computeGeneSetsOverlap", signature(gSets="GeneSetCollection", uniqGen |
945 | 976 |
gSetsMembershipMatrix <- incidence(gSets) |
946 | 977 |
gSetsMembershipMatrix <- t(gSetsMembershipMatrix[, colnames(gSetsMembershipMatrix) %in% uniqGenes]) |
947 | 978 |
|
948 |
- GSVA:::.computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz) |
|
979 |
+ .computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz) |
|
949 | 980 |
}) |
950 | 981 |
|
951 | 982 |
setMethod("computeGeneSetsOverlap", signature(gSets="GeneSetCollection", uniqGenes="ExpressionSet"), |
952 | 983 |
function(gSets, uniqGenes, min.sz=1, max.sz=Inf) { |
953 | 984 |
## map gene identifiers of the gene sets to the features in the chip |
954 |
- gSets <- GSEABase::mapIdentifiers(gSets, GSEABase::AnnoOrEntrezIdentifier(annotation(uniqGenes))) |
|
985 |
+ gSets <- GSEABase::mapIdentifiers(gSets, GSEABase::AnnoOrEntrezIdentifier(Biobase::annotation(uniqGenes))) |
|
955 | 986 |
|
956 | 987 |
uniqGenes <- Biobase::featureNames(uniqGenes) |
957 | 988 |
|
958 | 989 |
gSetsMembershipMatrix <- incidence(gSets) |
959 | 990 |
gSetsMembershipMatrix <- t(gSetsMembershipMatrix[, colnames(gSetsMembershipMatrix) %in% uniqGenes]) |
960 | 991 |
|
961 |
- GSVA:::.computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz) |
|
992 |
+ .computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz) |
|
962 | 993 |
}) |
963 | 994 |
|
964 | 995 |
.computeGeneSetsOverlap <- function(gSetsMembershipMatrix, min.sz=1, max.sz=Inf) { |
... | ... |
@@ -1,9 +1,9 @@ |
1 | 1 |
\name{gsva} |
2 | 2 |
\alias{gsva} |
3 |
-\alias{gsva,ExpressionSet,list,missing-method} |
|
4 |
-\alias{gsva,ExpressionSet,GeneSetCollection,missing-method} |
|
5 |
-\alias{gsva,matrix,GeneSetCollection,character-method} |
|
6 |
-\alias{gsva,matrix,list,missing-method} |
|
3 |
+\alias{gsva,ExpressionSet,list-method} |
|
4 |
+\alias{gsva,ExpressionSet,GeneSetCollection-method} |
|
5 |
+\alias{gsva,matrix,GeneSetCollection-method} |
|
6 |
+\alias{gsva,matrix,list-method} |
|
7 | 7 |
|
8 | 8 |
\encoding{latin1} |
9 | 9 |
|
... | ... |
@@ -14,7 +14,7 @@ Gene Set Variation Analysis |
14 | 14 |
Estimates GSVA enrichment scores. |
15 | 15 |
} |
16 | 16 |
\usage{ |
17 |
-\S4method{gsva}{ExpressionSet,list,missing}(expr, gset.idx.list, annotation=NULL, |
|
17 |
+\S4method{gsva}{ExpressionSet,list}(expr, gset.idx.list, annotation, |
|
18 | 18 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
19 | 19 |
rnaseq=FALSE, |
20 | 20 |
abs.ranking=FALSE, |
... | ... |
@@ -27,8 +27,9 @@ Estimates GSVA enrichment scores. |
27 | 27 |
mx.diff=TRUE, |
28 | 28 |
tau=switch(method, gsva=1, ssgsea=0.25, NA), |
29 | 29 |
kernel=TRUE, |
30 |
+ ssgsea.norm=TRUE, |
|
30 | 31 |
verbose=TRUE) |
31 |
-\S4method{gsva}{ExpressionSet,GeneSetCollection,missing}(expr, gset.idx.list, annotation=NULL, |
|
32 |
+\S4method{gsva}{ExpressionSet,GeneSetCollection}(expr, gset.idx.list, annotation, |
|
32 | 33 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
33 | 34 |
rnaseq=FALSE, |
34 | 35 |
abs.ranking=FALSE, |
... | ... |
@@ -41,8 +42,9 @@ Estimates GSVA enrichment scores. |
41 | 42 |
mx.diff=TRUE, |
42 | 43 |
tau=switch(method, gsva=1, ssgsea=0.25, NA), |
43 | 44 |
kernel=TRUE, |
45 |
+ ssgsea.norm=TRUE, |
|
44 | 46 |
verbose=TRUE) |
45 |
-\S4method{gsva}{matrix,GeneSetCollection,character}(expr, gset.idx.list, annotation=NA, |
|
47 |
+\S4method{gsva}{matrix,GeneSetCollection}(expr, gset.idx.list, annotation, |
|
46 | 48 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
47 | 49 |
rnaseq=FALSE, |
48 | 50 |
abs.ranking=FALSE, |
... | ... |
@@ -55,8 +57,9 @@ Estimates GSVA enrichment scores. |
55 | 57 |
mx.diff=TRUE, |
56 | 58 |
tau=switch(method, gsva=1, ssgsea=0.25, NA), |
57 | 59 |
kernel=TRUE, |
60 |
+ ssgsea.norm=TRUE, |
|
58 | 61 |
verbose=TRUE) |
59 |
-\S4method{gsva}{matrix,list,missing}(expr, gset.idx.list, annotation=NULL, |
|
62 |
+\S4method{gsva}{matrix,list}(expr, gset.idx.list, annotation, |
|
60 | 63 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
61 | 64 |
rnaseq=FALSE, |
62 | 65 |
abs.ranking=FALSE, |
... | ... |
@@ -69,6 +72,7 @@ Estimates GSVA enrichment scores. |
69 | 72 |
mx.diff=TRUE, |
70 | 73 |
tau=switch(method, gsva=1, ssgsea=0.25, NA), |
71 | 74 |
kernel=TRUE, |
75 |
+ ssgsea.norm=TRUE, |
|
72 | 76 |
verbose=TRUE) |
73 | 77 |
} |
74 | 78 |
\arguments{ |
... | ... |
@@ -81,9 +85,9 @@ Estimates GSVA enrichment scores. |
81 | 85 |
and gene sets as a \code{GeneSetCollection} object, the \code{annotation} argument |
82 | 86 |
can be used to supply the name of the Bioconductor package that contains |
83 | 87 |
annotations for the class of gene identifiers occurring in the row names of |
84 |
- the expression data matrix. By default \code{annotation=NULL} which implies that |
|
85 |
- \code{gsva()} will try to match the identifiers in \code{expr} to the identifiers |
|
86 |
- in \code{gset.idx.list} just as they are.} |
|
88 |
+ the expression data matrix. By default \code{gsva()} will try to match the |
|
89 |
+ identifiers in \code{expr} to the identifiers in \code{gset.idx.list} just as |
|
90 |
+ they are, unless the \code{annotation} argument is set.} |
|
87 | 91 |
\item{method}{Method to employ in the estimation of gene-set enrichment scores per sample. By default |
88 | 92 |
this is set to \code{gsva} (\enc{H�nzelmann}{Hanzelmann} et al, 2013) and other options are |
89 | 93 |
\code{ssgsea} (Barbie et al, 2009), \code{zscore} (Lee et al, 2008) or \code{plage} |
... | ... |
@@ -125,6 +129,10 @@ Estimates GSVA enrichment scores. |
125 | 129 |
estimation of the empirical cumulative distribution function (default) and \code{FALSE} |
126 | 130 |
when this function is directly estimated from the observed data. This last option is |
127 | 131 |
justified in the limit of the size of the sample by the so-called Glivenko-Cantelli theorem.} |
132 |
+ \item{ssgsea.norm}{Logical, set to \code{TRUE} (default) with \code{method="ssgsea"} runs the SSGSEA method |
|
133 |
+ from Barbie et al. (2009) normalizing the scores by the absolute difference between |
|
134 |
+ the minimum and the maximum, as described in their paper. When \code{ssgsea.norm=FALSE} |
|
135 |
+ this last normalization step is skipped.} |
|
128 | 136 |
\item{verbose}{Gives information about each calculation step. Default: \code{FALSE}.} |
129 | 137 |
} |
130 | 138 |
|