Browse code

Fixed error handling when no genes in gene sets match genes in expression data.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/GSVA@96422 bc3139a8-67e5-0310-9ffc-ced21a209358

Robert Castelo authored on 04/11/2014 13:13:04
Showing 4 changed files

... ...
@@ -1,13 +1,12 @@
1 1
 Package: GSVA
2
-Version: 1.15.0
3
-Date: 2014-02-21
2
+Version: 1.15.1
4 3
 Title: Gene Set Variation Analysis for microarray and RNA-seq data
5 4
 Author: Justin Guinney with contributions from Robert Castelo
6 5
 Maintainer: Justin Guinney <justin.guinney@sagebase.org>
7
-Depends: R (>= 2.13.0), methods, GSEABase (>= 1.17.4)
8
-Imports: methods, BiocGenerics, Biobase, GSEABase
9
-Enhances: snow, parallel
10
-Suggests: limma, RColorBrewer, genefilter, mclust, edgeR, GSVAdata
6
+Depends: R (>= 2.13.0)
7
+Imports: methods, BiocGenerics, Biobase, GSEABase (>= 1.17.4)
8
+Suggests: limma, RColorBrewer, genefilter, mclust,
9
+          edgeR, snow, parallel, GSVAdata
11 10
 Description: Gene Set Variation Analysis (GSVA) is a non-parametric, unsupervised method for estimating variation of gene set enrichment through the samples of a expression data set. GSVA performs a change in coordinate systems, transforming the data from a gene by sample matrix to a gene-set by sample matrix, thereby allowing the evaluation of pathway enrichment for each sample. This new matrix of GSVA enrichment scores facilitates applying standard analytical methods like functional enrichment, survival analysis, clustering, CNV-pathway analysis or cross-tissue pathway analysis, in a pathway-centric manner.
12 11
 License: GPL (>= 2)
13 12
 LazyLoad: yes
... ...
@@ -1,3 +1,14 @@
1
+CHANGES IN VERSION 1.14
2
+-----------------------
3
+
4
+USER VISIBLE CHANGES
5
+
6
+    o added an argument 'ssgsea.norm' to the 'gsva()' function to enable disabling the default score normalization of the original SSGSEA method by Barbie et al. (2009).
7
+
8
+BUG FIXES
9
+
10
+    o Better error handling of the situation when no gene identifiers match between gene sets and expression data.
11
+
1 12
 CHANGES IN VERSION 1.4
2 13
 ----------------------
3 14
 
... ...
@@ -3,10 +3,10 @@
3 3
 ## purpose: main function of the package which estimates activity
4 4
 ##          scores for each given gene-set
5 5
 
6
-setGeneric("gsva", function(expr, gset.idx.list, annotation=NULL, ...) standardGeneric("gsva"))
6
+setGeneric("gsva", function(expr, gset.idx.list, ...) standardGeneric("gsva"))
7 7
 
8
-setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list", annotation="missing"),
9
-          function(expr, gset.idx.list, annotation=NULL,
8
+setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"),
9
+          function(expr, gset.idx.list, annotation,
10 10
   method=c("gsva", "ssgsea", "zscore", "plage"),
11 11
   rnaseq=FALSE,
12 12
   abs.ranking=FALSE,
... ...
@@ -19,16 +19,20 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list", annotati
19 19
   mx.diff=TRUE,
20 20
   tau=switch(method, gsva=1, ssgsea=0.25, NA),
21 21
   kernel=TRUE,
22
+  ssgsea.norm=TRUE,
22 23
   verbose=TRUE)
23 24
 {
24 25
   method <- match.arg(method)
25 26
 
26 27
   ## filter out genes with constant expression values
27 28
   sdGenes <- Biobase::esApply(expr, 1, sd)
28
-  if (any(sdGenes == 0)) {
29
-    if (verbose)
30
-      cat("Filtering out", sum(sdGenes == 0), "genes with constant expression values throuhgout the samples\n")
31
-    expr <- expr[sdGenes > 0, ]
29
+  if (any(sdGenes == 0) || any(is.na(sdGenes))) {
30
+    warning(sum(sdGenes == 0 | is.na(sdGenes)),
31
+            "genes with constant expression values throuhgout the samples.")
32
+    if (method != "ssgsea") {
33
+      warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.")
34
+      expr <- expr[sdGenes > 0 & !is.na(sdGenes), ]
35
+    }
32 36
   } 
33 37
 
34 38
   if (nrow(expr) < 2)
... ...
@@ -39,15 +43,18 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list", annotati
39 43
                                  function(x, y) na.omit(match(x, y)),
40 44
                                  featureNames(expr))
41 45
 
46
+  if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0)
47
+    stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.")
48
+
42 49
   ## remove gene sets from the analysis for which no features are available
43 50
   ## and meet the minimum and maximum gene-set size specified by the user
44 51
   mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
45 52
                                          min.sz=max(1, min.sz),
46 53
                                          max.sz=max.sz)
47 54
 
48
-  eSco <- GSVA:::.gsva(exprs(expr), mapped.gset.idx.list, method, rnaseq, abs.ranking,
49
-                       no.bootstraps, bootstrap.percent, parallel.sz, parallel.type,
50
-                       mx.diff, tau, kernel, verbose)
55
+  eSco <- .gsva(exprs(expr), mapped.gset.idx.list, method, rnaseq, abs.ranking,
56
+                no.bootstraps, bootstrap.percent, parallel.sz, parallel.type,
57
+                mx.diff, tau, kernel, ssgsea.norm, verbose)
51 58
 
52 59
   if (method != "gsva")
53 60
     eSco <- list(es.obs=eSco, bootstrap=NULL, p.vals.sign=NULL)
... ...
@@ -60,8 +67,8 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list", annotati
60 67
               p.vals.sign=eSco$p.vals.sign))
61 68
 })
62 69
 
63
-setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollection", annotation="missing"),
64
-          function(expr, gset.idx.list, annotation=NULL,
70
+setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollection"),
71
+          function(expr, gset.idx.list, annotation,
65 72
   method=c("gsva", "ssgsea", "zscore", "plage"),
66 73
   rnaseq=FALSE,
67 74
   abs.ranking=FALSE,
... ...
@@ -74,16 +81,20 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti
74 81
   mx.diff=TRUE,
75 82
   tau=switch(method, gsva=1, ssgsea=0.25, NA),
76 83
   kernel=TRUE,
84
+  ssgsea.norm=TRUE,
77 85
   verbose=TRUE)
78 86
 {
79 87
   method <- match.arg(method)
80 88
 
81 89
   ## filter out genes with constant expression values
82 90
   sdGenes <- Biobase::esApply(expr, 1, sd)
83
-  if (any(sdGenes == 0)) {
84
-    if (verbose)
85
-      cat("Filtering out", sum(sdGenes == 0), "genes with constant expression values throuhgout the samples\n")
86
-    expr <- expr[sdGenes > 0, ]
91
+  if (any(sdGenes == 0) || any(is.na(sdGenes))) {
92
+    warning(sum(sdGenes == 0 | is.na(sdGenes)),
93
+            "genes with constant expression values throuhgout the samples.")
94
+    if (method != "ssgsea") {
95
+      warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.")
96
+      expr <- expr[sdGenes > 0 & !is.na(sdGenes), ]
97
+    }
87 98
   } 
88 99
 
89 100
   if (nrow(expr) < 2)
... ...
@@ -94,7 +105,7 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti
94 105
 
95 106
   ## map gene identifiers of the gene sets to the features in the chip
96 107
   mapped.gset.idx.list <- GSEABase::mapIdentifiers(gset.idx.list,
97
-                                                   GSEABase::AnnoOrEntrezIdentifier(annotation(expr)))
108
+                                                   GSEABase::AnnoOrEntrezIdentifier(Biobase::annotation(expr)))
98 109
   
99 110
   ## map to the actual features for which expression data is available
100 111
   tmp <- lapply(geneIds(mapped.gset.idx.list),
... ...
@@ -107,9 +118,9 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti
107 118
                                          min.sz=max(1, min.sz),
108 119
                                          max.sz=max.sz)
109 120
 
110
-  eSco <- GSVA:::.gsva(exprs(expr), mapped.gset.idx.list, method, rnaseq, abs.ranking,
111
-                       no.bootstraps, bootstrap.percent, parallel.sz, parallel.type,
112
-                       mx.diff, tau, kernel, verbose)
121
+  eSco <- .gsva(exprs(expr), mapped.gset.idx.list, method, rnaseq, abs.ranking,
122
+                no.bootstraps, bootstrap.percent, parallel.sz, parallel.type,
123
+                mx.diff, tau, kernel, ssgsea.norm, verbose)
113 124
 
114 125
   if (method != "gsva")
115 126
     eSco <- list(es.obs=eSco, bootstrap=NULL, p.vals.sign=NULL)
... ...
@@ -122,8 +133,8 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti
122 133
               p.vals.sign=eSco$p.vals.sign))
123 134
 })
124 135
 
125
-setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection", annotation="character"),
126
-          function(expr, gset.idx.list, annotation=NA,
136
+setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection"),
137
+          function(expr, gset.idx.list, annotation,
127 138
   method=c("gsva", "ssgsea", "zscore", "plage"),
128 139
   rnaseq=FALSE,
129 140
   abs.ranking=FALSE,
... ...
@@ -136,16 +147,20 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection", an
136 147
   mx.diff=TRUE,
137 148
   tau=switch(method, gsva=1, ssgsea=0.25, NA),
138 149
   kernel=TRUE,
150
+  ssgsea.norm=TRUE,
139 151
   verbose=TRUE)
140 152
 {
141 153
   method <- match.arg(method)
142 154
 
143 155
   ## filter out genes with constant expression values
144 156
   sdGenes <- apply(expr, 1, sd)
145
-  if (any(sdGenes == 0)) {
146
-    if (verbose)
147
-      cat("Filtering out", sum(sdGenes == 0), "genes with constant expression values throuhgout the samples\n")
148
-    expr <- expr[sdGenes > 0, ]
157
+  if (any(sdGenes == 0) || any(is.na(sdGenes))) {
158
+    warning(sum(sdGenes == 0 | is.na(sdGenes)),
159
+            "genes with constant expression values throuhgout the samples.")
160
+    if (method != "ssgsea") {
161
+      warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.")
162
+      expr <- expr[sdGenes > 0 & !is.na(sdGenes), , drop=FALSE]
163
+    }
149 164
   } 
150 165
 
151 166
   if (nrow(expr) < 2)
... ...
@@ -153,7 +168,7 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection", an
153 168
 
154 169
   ## map gene identifiers of the gene sets to the features in the matrix
155 170
   mapped.gset.idx.list <- gset.idx.list
156
-  if (!is.na(annotation)) {
171
+  if (!missing(annotation)) {
157 172
     if (verbose)
158 173
       cat("Mapping identifiers between gene sets and feature names\n")
159 174
 
... ...
@@ -167,19 +182,22 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection", an
167 182
                                  rownames(expr))
168 183
   names(tmp) <- names(mapped.gset.idx.list)
169 184
 
185
+  if (length(unlist(tmp, use.names=FALSE)) == 0)
186
+    stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.")
187
+
170 188
   ## remove gene sets from the analysis for which no features are available
171 189
   ## and meet the minimum and maximum gene-set size specified by the user
172 190
   mapped.gset.idx.list <- filterGeneSets(tmp,
173 191
                                          min.sz=max(1, min.sz),
174 192
                                          max.sz=max.sz)
175 193
 
176
-  GSVA:::.gsva(expr, mapped.gset.idx.list, method, rnaseq, abs.ranking,
177
-               no.bootstraps, bootstrap.percent, parallel.sz, parallel.type,
178
-               mx.diff, tau, kernel, verbose)
194
+  .gsva(expr, mapped.gset.idx.list, method, rnaseq, abs.ranking,
195
+        no.bootstraps, bootstrap.percent, parallel.sz, parallel.type,
196
+        mx.diff, tau, kernel, ssgsea.norm, verbose)
179 197
 })
180 198
 
181
-setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="missing"),
182
-          function(expr, gset.idx.list, annotation=NULL,
199
+setMethod("gsva", signature(expr="matrix", gset.idx.list="list"),
200
+          function(expr, gset.idx.list, annotation,
183 201
   method=c("gsva", "ssgsea", "zscore", "plage"),
184 202
   rnaseq=FALSE,
185 203
   abs.ranking=FALSE,
... ...
@@ -192,16 +210,20 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis
192 210
   mx.diff=TRUE,
193 211
   tau=switch(method, gsva=1, ssgsea=0.25, NA),
194 212
   kernel=TRUE,
213
+  ssgsea.norm=TRUE,
195 214
   verbose=TRUE)
196 215
 {
197 216
   method <- match.arg(method)
198 217
 
199 218
   ## filter out genes with constant expression values
200 219
   sdGenes <- apply(expr, 1, sd)
201
-  if (any(sdGenes == 0)) {
202
-    if (verbose)
203
-      cat("Filtering out", sum(sdGenes == 0), "genes with constant expression values throuhgout the samples\n")
204
-    expr <- expr[sdGenes > 0, ]
220
+  if (any(sdGenes == 0) || any(is.na(sdGenes))) {
221
+    warning(sum(sdGenes == 0 | is.na(sdGenes)),
222
+            "genes with constant expression values throuhgout the samples.")
223
+    if (method != "ssgsea") {
224
+      warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.")
225
+      expr <- expr[sdGenes > 0 & !is.na(sdGenes), , drop=FALSE]
226
+    }
205 227
   } 
206 228
 
207 229
   if (nrow(expr) < 2)
... ...
@@ -211,15 +233,18 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis
211 233
                                  function(x ,y) na.omit(match(x, y)),
212 234
                                  rownames(expr))
213 235
 
236
+  if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0)
237
+    stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.")
238
+
214 239
   ## remove gene sets from the analysis for which no features are available
215 240
   ## and meet the minimum and maximum gene-set size specified by the user
216 241
   mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
217 242
                                          min.sz=max(1, min.sz),
218 243
                                          max.sz=max.sz)
219 244
 
220
-  GSVA:::.gsva(expr, mapped.gset.idx.list, method, rnaseq, abs.ranking, no.bootstraps,
221
-               bootstrap.percent, parallel.sz, parallel.type,
222
-               mx.diff, tau, kernel, verbose)
245
+  .gsva(expr, mapped.gset.idx.list, method, rnaseq, abs.ranking, no.bootstraps,
246
+        bootstrap.percent, parallel.sz, parallel.type,
247
+        mx.diff, tau, kernel, ssgsea.norm, verbose)
223 248
 })
224 249
 
225 250
 .gsva <- function(expr, gset.idx.list,
... ...
@@ -233,6 +258,7 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis
233 258
   mx.diff=TRUE,
234 259
   tau=1,
235 260
   kernel=TRUE,
261
+  ssgsea.norm=TRUE,
236 262
   verbose=TRUE)
237 263
 {
238 264
 	if(length(gset.idx.list) == 0){
... ...
@@ -242,9 +268,10 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis
242 268
   if (method == "ssgsea") {
243 269
 	  if(verbose)
244 270
 		  cat("Estimating ssGSEA scores for", length(gset.idx.list),"gene sets.\n")
245
-	
271
+
246 272
     return(ssgsea(expr, gset.idx.list, alpha=tau, parallel.sz=parallel.sz,
247
-                  parallel.type=parallel.type, verbose=verbose))
273
+                  parallel.type=parallel.type, normalization=ssgsea.norm,
274
+                  verbose=verbose))
248 275
   }
249 276
 
250 277
   if (method == "zscore") {
... ...
@@ -299,12 +326,13 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis
299 326
 		if(verbose) cat("Computing bootstrap enrichment scores\n")
300 327
 		bootstrap.nsamples <- floor(bootstrap.percent * n.samples)
301 328
 		
302
-		p.vals.sign <- matrix(NaN, n.gset, n.samples,dimnames=list(names(gset.idx.list),colnames(expr)))
329
+		p.vals.sign <- matrix(NaN, n.gset, n.samples,
330
+                          dimnames=list(names(gset.idx.list), colnames(expr)))
303 331
 		
304 332
 		es.bootstraps <- array(NaN, c(n.gset, n.samples, no.bootstraps))
305 333
 		if(parallel.sz > 0){
306 334
 			
307
-		  if(!GSVA:::.isPackageLoaded("snow")) {
335
+		  if(!.isPackageLoaded("snow")) {
308 336
 			  stop("Please load the 'snow' library")
309 337
 		  }
310 338
       ## copying ShortRead's strategy, the calls to the 'get()' are
... ...
@@ -382,7 +410,7 @@ compute.gene.density <- function(expr, sample.idxs, rnaseq=FALSE, kernel=TRUE){
382 410
   gene.density <- NA
383 411
   if (kernel) {
384 412
 	  A = .C("matrix_density_R",
385
-			as.double(t(expr[,sample.idxs])),
413
+			as.double(t(expr[ ,sample.idxs, drop=FALSE])),
386 414
 			as.double(t(expr)),
387 415
 			R = double(n.test.samples * n.genes),
388 416
 			n.density.samples,
... ...
@@ -432,8 +460,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE,
432 460
 	
433 461
 	rank.scores <- apply(sort.sgn.idxs, 2, compute_rank_score)
434 462
 	
435
-	haveParallel <- GSVA:::.isPackageLoaded("parallel")
436
-	haveSnow <- GSVA:::.isPackageLoaded("snow")
463
+	haveParallel <- .isPackageLoaded("parallel")
464
+	haveSnow <- .isPackageLoaded("snow")
437 465
 	
438 466
 	if (parallel.sz > 1 || haveParallel) {
439 467
 		if (!haveParallel && !haveSnow) {
... ...
@@ -486,7 +514,7 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE,
486 514
         assign("iGeneSet", 0, envir=globalenv())
487 515
       }
488 516
 
489
-      m <- mclapp(gset.idx.list, GSVA:::ks_test_m,
517
+      m <- mclapp(gset.idx.list, ks_test_m,
490 518
                   gene.density=rank.scores,
491 519
                   sort.idxs=sort.sgn.idxs,
492 520
                   mx.diff=mx.diff, tau=tau, verbose=verbose)
... ...
@@ -602,7 +630,8 @@ rndWalk <- function(gSetIdx, geneRanking, j, R, alpha) {
602 630
   sum(walkStat) 
603 631
 }
604 632
 
605
-ssgsea <- function(X, geneSets, alpha=0.25, parallel.sz, parallel.type, verbose) {
633
+ssgsea <- function(X, geneSets, alpha=0.25, parallel.sz,
634
+                   parallel.type, normalization=TRUE, verbose) {
606 635
 
607 636
   p <- nrow(X)
608 637
   n <- ncol(X)
... ...
@@ -615,8 +644,8 @@ ssgsea <- function(X, geneSets, alpha=0.25, parallel.sz, parallel.type, verbose)
615 644
 
616 645
   R <- apply(X, 2, function(x,p) as.integer(rank(x)), p)
617 646
 
618
-	haveParallel <- GSVA:::.isPackageLoaded("parallel")
619
-	haveSnow <- GSVA:::.isPackageLoaded("snow")
647
+	haveParallel <- .isPackageLoaded("parallel")
648
+	haveSnow <- .isPackageLoaded("snow")
620 649
 	
621 650
   cl <- makeCl <- parSapp <- stopCl <- mclapp <- detCor <- nCores <- NA
622 651
 	if (parallel.sz > 1 || haveParallel) {
... ...
@@ -670,9 +699,11 @@ ssgsea <- function(X, geneSets, alpha=0.25, parallel.sz, parallel.type, verbose)
670 699
   if (length(geneSets) == 1)
671 700
     es <- matrix(es, nrow=1)
672 701
 
673
-  ## normalize enrichment scores by using the entire data set, as indicated
674
-  ## by Barbie et al., 2009, online methods, pg. 2
675
-  es <- apply(es, 2, function(x, es) x / (range(es)[2] - range(es)[1]), es)
702
+  if (normalization) {
703
+    ## normalize enrichment scores by using the entire data set, as indicated
704
+    ## by Barbie et al., 2009, online methods, pg. 2
705
+    es <- apply(es, 2, function(x, es) x / (range(es)[2] - range(es)[1]), es)
706
+  }
676 707
 
677 708
   if (length(geneSets) == 1)
678 709
     es <- matrix(es, nrow=1)
... ...
@@ -706,8 +737,8 @@ zscore <- function(X, geneSets, parallel.sz, parallel.type, verbose) {
706 737
 
707 738
   Z <- t(apply(X, 1, function(x) (x-mean(x))/sd(x)))
708 739
 
709
-	haveParallel <- GSVA:::.isPackageLoaded("parallel")
710
-	haveSnow <- GSVA:::.isPackageLoaded("snow")
740
+	haveParallel <- .isPackageLoaded("parallel")
741
+	haveSnow <- .isPackageLoaded("snow")
711 742
 	
712 743
   cl <- makeCl <- parSapp <- stopCl <- mclapp <- detCor <- nCores <- NA
713 744
 	if (parallel.sz > 1 || haveParallel) {
... ...
@@ -792,8 +823,8 @@ plage <- function(X, geneSets, parallel.sz, parallel.type, verbose) {
792 823
 
793 824
   Z <- t(apply(X, 1, function(x) (x-mean(x))/sd(x)))
794 825
 
795
-	haveParallel <- GSVA:::.isPackageLoaded("parallel")
796
-	haveSnow <- GSVA:::.isPackageLoaded("snow")
826
+	haveParallel <- .isPackageLoaded("parallel")
827
+	haveSnow <- .isPackageLoaded("snow")
797 828
 	
798 829
   ## the masterDescriptor() calls are disabled since they are not available in windows
799 830
   ## they would help to report progress by just one of the processors. now all processors
... ...
@@ -917,7 +948,7 @@ setMethod("computeGeneSetsOverlap", signature(gSets="list", uniqGenes="character
917 948
   members <- cbind(unlist(gSets, use.names=FALSE), rep(1:totalGsets, times=lenGsets))
918 949
   gSetsMembershipMatrix[members] <- 1
919 950
 
920
-  GSVA:::.computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz)
951
+  .computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz)
921 952
 })
922 953
 
923 954
 setMethod("computeGeneSetsOverlap", signature(gSets="list", uniqGenes="ExpressionSet"),
... ...
@@ -936,7 +967,7 @@ setMethod("computeGeneSetsOverlap", signature(gSets="list", uniqGenes="Expressio
936 967
   members <- cbind(unlist(gSets, use.names=FALSE), rep(1:totalGsets, times=lenGsets))
937 968
   gSetsMembershipMatrix[members] <- 1
938 969
 
939
-  GSVA:::.computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz)
970
+  .computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz)
940 971
 })
941 972
 
942 973
 setMethod("computeGeneSetsOverlap", signature(gSets="GeneSetCollection", uniqGenes="character"),
... ...
@@ -945,20 +976,20 @@ setMethod("computeGeneSetsOverlap", signature(gSets="GeneSetCollection", uniqGen
945 976
   gSetsMembershipMatrix <- incidence(gSets)
946 977
   gSetsMembershipMatrix <- t(gSetsMembershipMatrix[, colnames(gSetsMembershipMatrix) %in% uniqGenes])
947 978
 
948
-  GSVA:::.computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz)
979
+  .computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz)
949 980
 })
950 981
 
951 982
 setMethod("computeGeneSetsOverlap", signature(gSets="GeneSetCollection", uniqGenes="ExpressionSet"),
952 983
           function(gSets, uniqGenes, min.sz=1, max.sz=Inf) {
953 984
   ## map gene identifiers of the gene sets to the features in the chip
954
-  gSets <- GSEABase::mapIdentifiers(gSets, GSEABase::AnnoOrEntrezIdentifier(annotation(uniqGenes)))
985
+  gSets <- GSEABase::mapIdentifiers(gSets, GSEABase::AnnoOrEntrezIdentifier(Biobase::annotation(uniqGenes)))
955 986
   
956 987
   uniqGenes <- Biobase::featureNames(uniqGenes)
957 988
 
958 989
   gSetsMembershipMatrix <- incidence(gSets)
959 990
   gSetsMembershipMatrix <- t(gSetsMembershipMatrix[, colnames(gSetsMembershipMatrix) %in% uniqGenes])
960 991
 
961
-  GSVA:::.computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz)
992
+  .computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz)
962 993
 })
963 994
 
964 995
 .computeGeneSetsOverlap <- function(gSetsMembershipMatrix, min.sz=1, max.sz=Inf) {
... ...
@@ -1,9 +1,9 @@
1 1
 \name{gsva}
2 2
 \alias{gsva}
3
-\alias{gsva,ExpressionSet,list,missing-method}
4
-\alias{gsva,ExpressionSet,GeneSetCollection,missing-method}
5
-\alias{gsva,matrix,GeneSetCollection,character-method}
6
-\alias{gsva,matrix,list,missing-method}
3
+\alias{gsva,ExpressionSet,list-method}
4
+\alias{gsva,ExpressionSet,GeneSetCollection-method}
5
+\alias{gsva,matrix,GeneSetCollection-method}
6
+\alias{gsva,matrix,list-method}
7 7
 
8 8
 \encoding{latin1}
9 9
 
... ...
@@ -14,7 +14,7 @@ Gene Set Variation Analysis
14 14
 Estimates GSVA enrichment scores.
15 15
 }
16 16
 \usage{
17
-\S4method{gsva}{ExpressionSet,list,missing}(expr, gset.idx.list, annotation=NULL,
17
+\S4method{gsva}{ExpressionSet,list}(expr, gset.idx.list, annotation,
18 18
     method=c("gsva", "ssgsea", "zscore", "plage"),
19 19
     rnaseq=FALSE,
20 20
     abs.ranking=FALSE,
... ...
@@ -27,8 +27,9 @@ Estimates GSVA enrichment scores.
27 27
     mx.diff=TRUE,
28 28
     tau=switch(method, gsva=1, ssgsea=0.25, NA),
29 29
     kernel=TRUE,
30
+    ssgsea.norm=TRUE,
30 31
     verbose=TRUE)
31
-\S4method{gsva}{ExpressionSet,GeneSetCollection,missing}(expr, gset.idx.list, annotation=NULL,
32
+\S4method{gsva}{ExpressionSet,GeneSetCollection}(expr, gset.idx.list, annotation,
32 33
     method=c("gsva", "ssgsea", "zscore", "plage"),
33 34
     rnaseq=FALSE,
34 35
     abs.ranking=FALSE,
... ...
@@ -41,8 +42,9 @@ Estimates GSVA enrichment scores.
41 42
     mx.diff=TRUE,
42 43
     tau=switch(method, gsva=1, ssgsea=0.25, NA),
43 44
     kernel=TRUE,
45
+    ssgsea.norm=TRUE,
44 46
     verbose=TRUE)
45
-\S4method{gsva}{matrix,GeneSetCollection,character}(expr, gset.idx.list, annotation=NA,
47
+\S4method{gsva}{matrix,GeneSetCollection}(expr, gset.idx.list, annotation,
46 48
     method=c("gsva", "ssgsea", "zscore", "plage"),
47 49
     rnaseq=FALSE,
48 50
     abs.ranking=FALSE,
... ...
@@ -55,8 +57,9 @@ Estimates GSVA enrichment scores.
55 57
     mx.diff=TRUE,
56 58
     tau=switch(method, gsva=1, ssgsea=0.25, NA),
57 59
     kernel=TRUE,
60
+    ssgsea.norm=TRUE,
58 61
     verbose=TRUE)
59
-\S4method{gsva}{matrix,list,missing}(expr, gset.idx.list, annotation=NULL,
62
+\S4method{gsva}{matrix,list}(expr, gset.idx.list, annotation,
60 63
     method=c("gsva", "ssgsea", "zscore", "plage"),
61 64
     rnaseq=FALSE,
62 65
     abs.ranking=FALSE,
... ...
@@ -69,6 +72,7 @@ Estimates GSVA enrichment scores.
69 72
     mx.diff=TRUE,
70 73
     tau=switch(method, gsva=1, ssgsea=0.25, NA),
71 74
     kernel=TRUE,
75
+    ssgsea.norm=TRUE,
72 76
     verbose=TRUE)
73 77
 }
74 78
 \arguments{
... ...
@@ -81,9 +85,9 @@ Estimates GSVA enrichment scores.
81 85
                     and gene sets as a \code{GeneSetCollection} object, the \code{annotation} argument
82 86
                     can be used to supply the name of the Bioconductor package that contains
83 87
                     annotations for the class of gene identifiers occurring in the row names of
84
-                    the expression data matrix. By default \code{annotation=NULL} which implies that
85
-                    \code{gsva()} will try to match the identifiers in \code{expr} to the identifiers
86
-                    in \code{gset.idx.list} just as they are.}
88
+                    the expression data matrix. By default \code{gsva()} will try to match the
89
+                    identifiers in \code{expr} to the identifiers in \code{gset.idx.list} just as
90
+                    they are, unless the \code{annotation} argument is set.}
87 91
   \item{method}{Method to employ in the estimation of gene-set enrichment scores per sample. By default
88 92
                 this is set to \code{gsva} (\enc{H�nzelmann}{Hanzelmann} et al, 2013) and other options are
89 93
                 \code{ssgsea} (Barbie et al, 2009), \code{zscore} (Lee et al, 2008) or \code{plage}
... ...
@@ -125,6 +129,10 @@ Estimates GSVA enrichment scores.
125 129
                 estimation of the empirical cumulative distribution function (default) and \code{FALSE}
126 130
                 when this function is directly estimated from the observed data. This last option is
127 131
                 justified in the limit of the size of the sample by the so-called Glivenko-Cantelli theorem.}
132
+  \item{ssgsea.norm}{Logical, set to \code{TRUE} (default) with \code{method="ssgsea"} runs the SSGSEA method
133
+                     from Barbie et al. (2009) normalizing the scores by the absolute difference between
134
+                     the minimum and the maximum, as described in their paper. When \code{ssgsea.norm=FALSE}
135
+                     this last normalization step is skipped.}
128 136
   \item{verbose}{Gives information about each calculation step. Default: \code{FALSE}.}
129 137
 }
130 138