Browse code

Added methods to take expression data in a SummarizedExperiment object.

Robert Castelo authored on 16/12/2019 11:46:25
Showing 5 changed files

... ...
@@ -5,10 +5,10 @@ Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.gui
5 5
              person("Robert", "Castelo", role="aut", email="robert.castelo@upf.edu"),
6 6
              person("Joan", "Fernandez", role="ctb", email="joanfernandez1331@gmail.com"))
7 7
 Depends: R (>= 3.5.0)
8
-Imports: methods, BiocGenerics, Biobase, SummarizedExperiment,
9
-         SingleCellExperiment, GSEABase (>= 1.17.4), parallel,
10
-         BiocParallel, shiny, shinythemes
11
-Suggests: limma, RColorBrewer, genefilter, edgeR, snow, GSVAdata
8
+Imports: methods, stats, utils, graphics, BiocGenerics, S4Vectors,
9
+         Biobase, SummarizedExperiment, GSEABase,
10
+         parallel, BiocParallel, shiny, shinythemes
11
+Suggests: limma, RColorBrewer, genefilter, edgeR, GSVAdata
12 12
 Description: Gene Set Variation Analysis (GSVA) is a non-parametric, unsupervised method for estimating variation of gene set enrichment through the samples of a expression data set. GSVA performs a change in coordinate systems, transforming the data from a gene by sample matrix to a gene-set by sample matrix, thereby allowing the evaluation of pathway enrichment for each sample. This new matrix of GSVA enrichment scores facilitates applying standard analytical methods like functional enrichment, survival analysis, clustering, CNV-pathway analysis or cross-tissue pathway analysis, in a pathway-centric manner.
13 13
 License: GPL (>= 2)
14 14
 LazyLoad: yes
... ...
@@ -6,16 +6,20 @@ import(shiny)
6 6
 
7 7
 importClassesFrom(Biobase, ExpressionSet)
8 8
 importClassesFrom(SummarizedExperiment, SummarizedExperiment)
9
-importClassesFrom(SingleCellExperiment, SingleCellExperiment)
10 9
 importClassesFrom(GSEABase, GeneSetCollection)
11 10
 
12 11
 importMethodsFrom(Biobase, featureNames,
13 12
                            phenoData,
14 13
                            experimentData,
15
-                           esApply,
16 14
                            exprs,
17 15
                            annotation)
18 16
 
17
+importMethodsFrom(S4Vectors, metadata,
18
+                             "metadata<-")
19
+
20
+importMethodsFrom(SummarizedExperiment, assays,
21
+                                        colData)
22
+
19 23
 importMethodsFrom(GSEABase, geneIds,
20 24
                             incidence)
21 25
 
... ...
@@ -27,13 +31,16 @@ importMethodsFrom(BiocParallel, bpiterate,
27 31
 importFrom(graphics, plot)
28 32
 importFrom(stats, ecdf,
29 33
                   na.omit)
30
-importFrom(utils, setTxtProgressBar,
34
+importFrom(utils, installed.packages,
35
+                  setTxtProgressBar,
31 36
                   txtProgressBar,
32 37
                   read.csv,
33 38
                   write.csv)
39
+importFrom(S4Vectors, SimpleList)
34 40
 importFrom(GSEABase, AnnoOrEntrezIdentifier,
35 41
                      mapIdentifiers,
36 42
                      getGmt)
43
+importFrom(SummarizedExperiment, SummarizedExperiment)
37 44
 importFrom(parallel, splitIndices)
38 45
 importFrom(BiocParallel, SerialParam,
39 46
                          MulticoreParam,
... ...
@@ -5,7 +5,7 @@
5 5
 
6 6
 setGeneric("gsva", function(expr, gset.idx.list, ...) standardGeneric("gsva"))
7 7
 
8
-setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"),
8
+setMethod("gsva", signature(expr="SummarizedExperiment", gset.idx.list="GeneSetCollection"),
9 9
           function(expr, gset.idx.list, annotation,
10 10
   method=c("gsva", "ssgsea", "zscore", "plage"),
11 11
   kcdf=c("Gaussian", "Poisson", "none"),
... ...
@@ -22,16 +22,125 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"),
22 22
   method <- match.arg(method)
23 23
   kcdf <- match.arg(kcdf)
24 24
 
25
-  ## filter out genes with constant expression values
26
-  sdGenes <- esApply(expr, 1, sd)
27
-  if (any(sdGenes == 0) || any(is.na(sdGenes))) {
28
-    warning(sum(sdGenes == 0 | is.na(sdGenes)),
29
-            " genes with constant expression values throuhgout the samples.")
30
-    if (method != "ssgsea") {
31
-      warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.")
32
-      expr <- expr[sdGenes > 0 & !is.na(sdGenes), ]
25
+  if (length(assays(expr)) == 0L)
26
+    stop("The input SummarizedExperiment object has no assay data.")
27
+
28
+  if (missing(annotation))
29
+    annotation <- names(assays(se))[1]
30
+  else {
31
+    if (!is.character(annotation))
32
+      stop("The 'annotation' argument must contain a character string.")
33
+    annotation <- annotation[1]
34
+
35
+    if (!annotation %in% names(assays(se)))
36
+      stop(sprintf("Assay %s not found in the input SummarizedExperiment object.", annotation))
37
+  }
38
+
39
+  se <- expr
40
+  expr <- assays(se)[[annotation]]
41
+
42
+  ## filter genes according to verious criteria,
43
+  ## e.g., constant expression
44
+  expr <- .filterFeatures(expr, method)
45
+
46
+  if (nrow(expr) < 2)
47
+    stop("Less than two genes in the input ExpressionSet object\n")
48
+
49
+  annotpkg <- metadata(se)$annotation
50
+  if (!is.null(annotpkg) && length(annotpkg) > 0 && is.character(annotpkg) && annotpkg != "") {
51
+    if (!annotpkg %in% installed.packages())
52
+      stop(sprintf("Please install the nnotation package %s", annotpkg))
53
+
54
+    if (verbose)
55
+      cat("Mapping identifiers between gene sets and feature names\n")
56
+
57
+    ## map gene identifiers of the gene sets to the features in the chip
58
+    ## Biobase::annotation() is necessary to disambiguate from the
59
+    ## 'annotation' argument
60
+    mapped.gset.idx.list <- mapIdentifiers(gset.idx.list,
61
+                                           AnnoOrEntrezIdentifier(annotpkg))
62
+    mapped.gset.idx.list <- geneIds(mapped.gset.idx.list) 
63
+  } else {
64
+    mapped.gset.idx.list <- gset.idx.list
65
+    if (verbose) {
66
+      cat("No annotation package name available in the input 'SummarizedExperiment' object 'expr'.",
67
+          "Attempting to directly match identifiers in 'expr' to gene sets.", sep="\n")
33 68
     }
34
-  } 
69
+  }
70
+
71
+  ## map to the actual features for which expression data is available
72
+  mapped.gset.idx.list <- lapply(mapped.gset.idx.list,
73
+                                 function(x, y) na.omit(match(x, y)),
74
+                                 rownames(se))
75
+
76
+  if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0)
77
+    stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.")
78
+
79
+  ## remove gene sets from the analysis for which no features are available
80
+  ## and meet the minimum and maximum gene-set size specified by the user
81
+  mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
82
+                                         min.sz=max(1, min.sz),
83
+                                         max.sz=max.sz)
84
+
85
+  if (!missing(kcdf)) {
86
+    if (kcdf == "Gaussian") {
87
+      rnaseq <- FALSE
88
+      kernel <- TRUE
89
+    } else if (kcdf == "Poisson") {
90
+      rnaseq <- TRUE
91
+      kernel <- TRUE
92
+    } else
93
+      kernel <- FALSE
94
+  }
95
+
96
+  eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
97
+                parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) 
98
+
99
+  rval <- SummarizedExperiment(assays=SimpleList(es=eSco),
100
+                               colData=colData(se),
101
+                               metadata=metadata(se))
102
+  metadata(rval)$annotation <- NULL
103
+
104
+  rval
105
+})
106
+
107
+setMethod("gsva", signature(expr="SummarizedExperiment", gset.idx.list="list"),
108
+          function(expr, gset.idx.list, annotation,
109
+  method=c("gsva", "ssgsea", "zscore", "plage"),
110
+  kcdf=c("Gaussian", "Poisson", "none"),
111
+  abs.ranking=FALSE,
112
+  min.sz=1,
113
+  max.sz=Inf,
114
+  parallel.sz=1L, 
115
+  mx.diff=TRUE,
116
+  tau=switch(method, gsva=1, ssgsea=0.25, NA),
117
+  ssgsea.norm=TRUE,
118
+  verbose=TRUE,
119
+  BPPARAM=SerialParam(progressbar=verbose))
120
+{
121
+  method <- match.arg(method)
122
+  kcdf <- match.arg(kcdf)
123
+
124
+  if (length(assays(expr)) == 0L)
125
+    stop("The input SummarizedExperiment object has no assay data.")
126
+
127
+  if (missing(annotation))
128
+    annotation <- names(assays(se))[1]
129
+  else {
130
+    if (!is.character(annotation))
131
+      stop("The 'annotation' argument must contain a character string.")
132
+    annotation <- annotation[1]
133
+
134
+    if (!annotation %in% names(assays(se)))
135
+      stop(sprintf("Assay %s not found in the input SummarizedExperiment object.", annotation))
136
+  }
137
+
138
+  se <- expr
139
+  expr <- assays(se)[[annotation]]
140
+
141
+  ## filter genes according to verious criteria,
142
+  ## e.g., constant expression
143
+  expr <- .filterFeatures(expr, method)
35 144
 
36 145
   if (nrow(expr) < 2)
37 146
     stop("Less than two genes in the input ExpressionSet object\n")
... ...
@@ -39,7 +148,7 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"),
39 148
   ## map to the actual features for which expression data is available
40 149
   mapped.gset.idx.list <- lapply(gset.idx.list,
41 150
                                  function(x, y) na.omit(match(x, y)),
42
-                                 featureNames(expr))
151
+                                 rownames(se))
43 152
 
44 153
   if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0)
45 154
     stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.")
... ...
@@ -61,13 +170,73 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"),
61 170
       kernel <- FALSE
62 171
   }
63 172
 
64
-  eSco <- .gsva(exprs(expr), mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
173
+  eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
65 174
                 parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) 
66 175
 
67
-  eScoEset <- new("ExpressionSet", exprs=eSco, phenoData=phenoData(expr),
68
-                  experimentData=experimentData(expr), annotation="")
176
+  rval <- SummarizedExperiment(assays=SimpleList(es=eSco),
177
+                               colData=colData(se),
178
+                               metadata=metadata(se))
179
+  metadata(rval)$annotation <- NULL
69 180
 
70
-  rval <- eScoEset
181
+  rval
182
+})
183
+
184
+setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"),
185
+          function(expr, gset.idx.list, annotation,
186
+  method=c("gsva", "ssgsea", "zscore", "plage"),
187
+  kcdf=c("Gaussian", "Poisson", "none"),
188
+  abs.ranking=FALSE,
189
+  min.sz=1,
190
+  max.sz=Inf,
191
+  parallel.sz=1L, 
192
+  mx.diff=TRUE,
193
+  tau=switch(method, gsva=1, ssgsea=0.25, NA),
194
+  ssgsea.norm=TRUE,
195
+  verbose=TRUE,
196
+  BPPARAM=SerialParam(progressbar=verbose))
197
+{
198
+  method <- match.arg(method)
199
+  kcdf <- match.arg(kcdf)
200
+
201
+  eset <- expr
202
+  expr <- exprs(eset)
203
+  ## filter genes according to verious criteria,
204
+  ## e.g., constant expression
205
+  expr <- .filterFeatures(expr, method)
206
+
207
+  if (nrow(expr) < 2)
208
+    stop("Less than two genes in the input ExpressionSet object\n")
209
+
210
+  ## map to the actual features for which expression data is available
211
+  mapped.gset.idx.list <- lapply(gset.idx.list,
212
+                                 function(x, y) na.omit(match(x, y)),
213
+                                 featureNames(eset))
214
+
215
+  if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0)
216
+    stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.")
217
+
218
+  ## remove gene sets from the analysis for which no features are available
219
+  ## and meet the minimum and maximum gene-set size specified by the user
220
+  mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
221
+                                         min.sz=max(1, min.sz),
222
+                                         max.sz=max.sz)
223
+
224
+  if (!missing(kcdf)) {
225
+    if (kcdf == "Gaussian") {
226
+      rnaseq <- FALSE
227
+      kernel <- TRUE
228
+    } else if (kcdf == "Poisson") {
229
+      rnaseq <- TRUE
230
+      kernel <- TRUE
231
+    } else
232
+      kernel <- FALSE
233
+  }
234
+
235
+  eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
236
+                parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) 
237
+
238
+  rval <- new("ExpressionSet", exprs=eSco, phenoData=phenoData(eset),
239
+              experimentData=experimentData(eset), annotation="")
71 240
 
72 241
   rval
73 242
 })
... ...
@@ -89,34 +258,43 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti
89 258
   method <- match.arg(method)
90 259
   kcdf <- match.arg(kcdf)
91 260
 
92
-  ## filter out genes with constant expression values
93
-  sdGenes <- esApply(expr, 1, sd)
94
-  if (any(sdGenes == 0) || any(is.na(sdGenes))) {
95
-    warning(sum(sdGenes == 0 | is.na(sdGenes)),
96
-            " genes with constant expression values throuhgout the samples.")
97
-    if (method != "ssgsea") {
98
-      warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.")
99
-      expr <- expr[sdGenes > 0 & !is.na(sdGenes), ]
100
-    }
101
-  } 
261
+  eset <- expr
262
+  expr <- exprs(eset)
263
+  ## filter genes according to verious criteria,
264
+  ## e.g., constant expression
265
+  expr <- .filterFeatures(expr, method)
102 266
 
103 267
   if (nrow(expr) < 2)
104 268
     stop("Less than two genes in the input ExpressionSet object\n")
105 269
 
106
-  if (verbose)
107
-    cat("Mapping identifiers between gene sets and feature names\n")
270
+  annotpkg <- Biobase::annotation(eset)
271
+  if (length(annotpkg) > 0 && annotpkg != "") {
272
+    if (!annotpkg %in% installed.packages())
273
+      stop(sprintf("Please install the nnotation package %s", annotpkg))
274
+
275
+    if (verbose)
276
+      cat("Mapping identifiers between gene sets and feature names\n")
277
+
278
+    ## map gene identifiers of the gene sets to the features in the chip
279
+    ## Biobase::annotation() is necessary to disambiguate from the
280
+    ## 'annotation' argument
281
+    mapped.gset.idx.list <- mapIdentifiers(gset.idx.list,
282
+                                           AnnoOrEntrezIdentifier(annotpkg))
283
+    mapped.gset.idx.list <- geneIds(mapped.gset.idx.list) 
284
+  } else {
285
+    mapped.gset.idx.list <- gset.idx.list
286
+    if (verbose) {
287
+      cat("No annotation package name available in the input 'ExpressionSet' object 'expr'.",
288
+        "Attempting to directly match identifiers in 'expr' to gene sets.", sep="\n")
289
+    }
290
+  }
108 291
 
109
-  ## map gene identifiers of the gene sets to the features in the chip
110
-  ## Biobase::annotation() is necessary to disambiguate from the
111
-  ## 'annotation' argument
112
-  mapped.gset.idx.list <- mapIdentifiers(gset.idx.list,
113
-                                         AnnoOrEntrezIdentifier(Biobase::annotation(expr)))
114
-  
115 292
   ## map to the actual features for which expression data is available
116
-  tmp <- lapply(geneIds(mapped.gset.idx.list),
117
-                                 function(x, y) na.omit(match(x, y)),
118
-                                 featureNames(expr))
293
+  tmp <- lapply(mapped.gset.idx.list,
294
+                function(x, y) na.omit(match(x, y)),
295
+                featureNames(eset))
119 296
   names(tmp) <- names(mapped.gset.idx.list)
297
+
120 298
   ## remove gene sets from the analysis for which no features are available
121 299
   ## and meet the minimum and maximum gene-set size specified by the user
122 300
   mapped.gset.idx.list <- filterGeneSets(tmp,
... ...
@@ -134,13 +312,11 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti
134 312
       kernel <- FALSE
135 313
   }
136 314
 
137
-  eSco <- .gsva(exprs(expr), mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
315
+  eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
138 316
                 parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM)
139 317
 
140
-  eScoEset <- new("ExpressionSet", exprs=eSco, phenoData=phenoData(expr),
141
-                  experimentData=experimentData(expr), annotation="")
142
-
143
-  rval <- eScoEset
318
+  rval <- new("ExpressionSet", exprs=eSco, phenoData=phenoData(eset),
319
+              experimentData=experimentData(eset), annotation="")
144 320
 
145 321
   rval
146 322
 })
... ...
@@ -162,16 +338,9 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection"),
162 338
   method <- match.arg(method)
163 339
   kcdf <- match.arg(kcdf)
164 340
 
165
-  ## filter out genes with constant expression values
166
-  sdGenes <- apply(expr, 1, sd)
167
-  if (any(sdGenes == 0) || any(is.na(sdGenes))) {
168
-    warning(sum(sdGenes == 0 | is.na(sdGenes)),
169
-            " genes with constant expression values throuhgout the samples.")
170
-    if (method != "ssgsea") {
171
-      warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.")
172
-      expr <- expr[sdGenes > 0 & !is.na(sdGenes), , drop=FALSE]
173
-    }
174
-  } 
341
+  ## filter genes according to verious criteria,
342
+  ## e.g., constant expression
343
+  expr <- .filterFeatures(expr, method)
175 344
 
176 345
   if (nrow(expr) < 2)
177 346
     stop("Less than two genes in the input expression data matrix\n")
... ...
@@ -236,16 +405,9 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list"),
236 405
   method <- match.arg(method)
237 406
   kcdf <- match.arg(kcdf)
238 407
 
239
-  ## filter out genes with constant expression values
240
-  sdGenes <- apply(expr, 1, sd)
241
-  if (any(sdGenes == 0) || any(is.na(sdGenes))) {
242
-    warning(sum(sdGenes == 0 | is.na(sdGenes)),
243
-            " genes with constant expression values throuhgout the samples.")
244
-    if (method != "ssgsea") {
245
-      warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.")
246
-      expr <- expr[sdGenes > 0 & !is.na(sdGenes), , drop=FALSE]
247
-    }
248
-  } 
408
+  ## filter genes according to verious criteria,
409
+  ## e.g., constant expression
410
+  expr <- .filterFeatures(expr, method)
249 411
 
250 412
   if (nrow(expr) < 2)
251 413
     stop("Less than two genes in the input expression data matrix\n")
252 414
new file mode 100644
... ...
@@ -0,0 +1,15 @@
1
+.filterFeatures <- function(expr, method) {
2
+
3
+  ## filter out genes with constant expression values
4
+  sdGenes <- apply(expr, 1, sd)
5
+  if (any(sdGenes == 0) || any(is.na(sdGenes))) {
6
+    warning(sum(sdGenes == 0 | is.na(sdGenes)),
7
+            " genes with constant expression values throuhgout the samples.")
8
+    if (method != "ssgsea") {
9
+      warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.")
10
+      expr <- expr[sdGenes > 0 & !is.na(sdGenes), ]
11
+    }
12
+  } 
13
+
14
+  expr
15
+}
... ...
@@ -1,5 +1,7 @@
1 1
 \name{gsva}
2 2
 \alias{gsva}
3
+\alias{gsva,SummarizedExperiment,list-method}
4
+\alias{gsva,SummarizedExperiment,GeneSetCollection-method}
3 5
 \alias{gsva,ExpressionSet,list-method}
4 6
 \alias{gsva,ExpressionSet,GeneSetCollection-method}
5 7
 \alias{gsva,matrix,GeneSetCollection-method}
... ...
@@ -14,6 +16,30 @@ Gene Set Variation Analysis
14 16
 Estimates GSVA enrichment scores.
15 17
 }
16 18
 \usage{
19
+\S4method{gsva}{SummarizedExperiment,GeneSetCollection}(expr, gset.idx.list, annotation,
20
+    method=c("gsva", "ssgsea", "zscore", "plage"),
21
+    kcdf=c("Gaussian", "Poisson", "none"),
22
+    abs.ranking=FALSE,
23
+    min.sz=1,
24
+    max.sz=Inf,
25
+    parallel.sz=1L,
26
+    mx.diff=TRUE,
27
+    tau=switch(method, gsva=1, ssgsea=0.25, NA),
28
+    ssgsea.norm=TRUE,
29
+    verbose=TRUE,
30
+    BPPARAM=SerialParam(progressbar=verbose))
31
+\S4method{gsva}{SummarizedExperiment,list}(expr, gset.idx.list, annotation,
32
+    method=c("gsva", "ssgsea", "zscore", "plage"),
33
+    kcdf=c("Gaussian", "Poisson", "none"),
34
+    abs.ranking=FALSE,
35
+    min.sz=1,
36
+    max.sz=Inf,
37
+    parallel.sz=1L,
38
+    mx.diff=TRUE,
39
+    tau=switch(method, gsva=1, ssgsea=0.25, NA),
40
+    ssgsea.norm=TRUE,
41
+    verbose=TRUE,
42
+    BPPARAM=SerialParam(progressbar=verbose))
17 43
 \S4method{gsva}{ExpressionSet,list}(expr, gset.idx.list, annotation,
18 44
     method=c("gsva", "ssgsea", "zscore", "plage"),
19 45
     kcdf=c("Gaussian", "Poisson", "none"),
... ...
@@ -64,18 +90,26 @@ Estimates GSVA enrichment scores.
64 90
     BPPARAM=SerialParam(progressbar=verbose))
65 91
 }
66 92
 \arguments{
67
-  \item{expr}{Gene expression data which can be given either as an \code{ExpressionSet}
68
-              object or as a matrix of expression values where rows correspond
69
-              to genes and columns correspond to samples.}
93
+  \item{expr}{Gene expression data which can be given either as a
94
+              \code{SummarizedExperiment} or \code{ExpressionSet} object,
95
+              or as a matrix of expression values where rows correspond to genes
96
+              and columns correspond to samples.}
70 97
   \item{gset.idx.list}{Gene sets provided either as a \code{list} object or as a
71 98
                        \code{GeneSetCollection} object.}
72
-  \item{annotation}{In the case of calling \code{gsva()} with expression data in a \code{matrix}
73
-                    and gene sets as a \code{GeneSetCollection} object, the \code{annotation} argument
74
-                    can be used to supply the name of the Bioconductor package that contains
75
-                    annotations for the class of gene identifiers occurring in the row names of
76
-                    the expression data matrix. By default \code{gsva()} will try to match the
77
-                    identifiers in \code{expr} to the identifiers in \code{gset.idx.list} just as
78
-                    they are, unless the \code{annotation} argument is set.}
99
+  \item{annotation}{In the case of calling \code{gsva()} on a
100
+                    \code{SummarizedExperiment} object, the \code{annotation}
101
+                    argument can be used to select the assay containing the
102
+                    molecular data we want as input to the \code{gsva()}
103
+                    function, otherwise the first assay is selected.
104
+                    In the case of calling \code{gsva()} with expression data in
105
+                    a \code{matrix} and gene sets as a \code{GeneSetCollection}
106
+                    object, the \code{annotation} argument can be used to supply
107
+                    the name of the Bioconductor package that contains
108
+                    annotations for the class of gene identifiers occurring in
109
+                    the row names of the expression data matrix.
110
+                    In the case of calling \code{gsva()} on a
111
+                    \code{ExpressionSet} object, the \code{annotation} argument
112
+                    is ignored. See details information below.}
79 113
   \item{method}{Method to employ in the estimation of gene-set enrichment scores per sample. By default
80 114
                 this is set to \code{gsva} (\enc{H�nzelmann}{Hanzelmann} et al, 2013) and other options are
81 115
                 \code{ssgsea} (Barbie et al, 2009), \code{zscore} (Lee et al, 2008) or \code{plage}
... ...
@@ -126,11 +160,32 @@ gene expression matrix into a g-geneset by n-sample pathway enrichment matrix.
126 160
 This facilitates many forms of statistical analysis in the 'space' of pathways
127 161
 rather than genes, providing a higher level of interpretability.
128 162
 
129
-The \code{gsva()} function first maps the identifiers in the gene sets to the
130
-identifiers in the input expression data leading to a filtered collection of
131
-gene sets. This collection can be further filtered to require a minimun and/or
132
-maximum size of the gene sets for which we want to calculate GSVA enrichment
133
-scores, by using the arguments \code{min.sz} and \code{max.sz}.
163
+By default, \code{gsva()} will try to match the identifiers in \code{expr} to
164
+the identifiers in \code{gset.idx.list} just as they are, unless the \code{annotation} argument is set.
165
+
166
+The \code{gsva()} function first maps the identifiers in the gene sets in
167
+\code{gset.idx.list} to the identifiers in the input expression data \code{expr}.
168
+When the input gene sets in \code{gset.idx.list} is provided as a \code{list}
169
+object, \code{gsva()} will try to match the identifiers in \code{expr} directly
170
+to the identifiers in \code{gset.idx.list} just as they are. Because unmatching
171
+identifiers will be discarded in both, \code{expr} and \code{gset.idx.list},
172
+\code{gsva()} may prompt an error if no identifiers can be matched as in the case
173
+of different types of identifiers (e.g., gene symbols vs Entrez identitifers).
174
+
175
+However, then the input gene sets in \code{gset.idx.list} is provided as a
176
+\code{GeneSetCollection} object, \code{gsva()} will try to automatically convert
177
+those identifiers to the type of identifier in the input expression data \code{expr}.
178
+Such an automatic conversion, however, will only occur in three scenarios: 1. when
179
+\code{expr} is an \code{ExpressionSet} object with an appropriately set
180
+\code{annotation} slot; 2. when \code{expr} is a \code{SummarizedExperiment} object
181
+with an appropriately set \code{annotation} slot in the metadata of \code{expr};
182
+3. when \code{expr} is a \code{matrix} and the \code{annotation} argument of the
183
+\code{gsva()} function is set to the name of the annotation package that provides
184
+the relationships between the type of identifiers in \code{expr} and \code{gset.idx.list}.
185
+
186
+The collection of gene sets resulting from the previous identifier matching,
187
+can be further filtered to require a minimun and/or maximum size by using the
188
+arguments \code{min.sz} and \code{max.sz}.
134 189
 }
135 190
 \value{
136 191
 A gene-set by sample matrix of GSVA enrichment scores.