... | ... |
@@ -5,10 +5,10 @@ Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.gui |
5 | 5 |
person("Robert", "Castelo", role="aut", email="robert.castelo@upf.edu"), |
6 | 6 |
person("Joan", "Fernandez", role="ctb", email="joanfernandez1331@gmail.com")) |
7 | 7 |
Depends: R (>= 3.5.0) |
8 |
-Imports: methods, BiocGenerics, Biobase, SummarizedExperiment, |
|
9 |
- SingleCellExperiment, GSEABase (>= 1.17.4), parallel, |
|
10 |
- BiocParallel, shiny, shinythemes |
|
11 |
-Suggests: limma, RColorBrewer, genefilter, edgeR, snow, GSVAdata |
|
8 |
+Imports: methods, stats, utils, graphics, BiocGenerics, S4Vectors, |
|
9 |
+ Biobase, SummarizedExperiment, GSEABase, |
|
10 |
+ parallel, BiocParallel, shiny, shinythemes |
|
11 |
+Suggests: limma, RColorBrewer, genefilter, edgeR, GSVAdata |
|
12 | 12 |
Description: Gene Set Variation Analysis (GSVA) is a non-parametric, unsupervised method for estimating variation of gene set enrichment through the samples of a expression data set. GSVA performs a change in coordinate systems, transforming the data from a gene by sample matrix to a gene-set by sample matrix, thereby allowing the evaluation of pathway enrichment for each sample. This new matrix of GSVA enrichment scores facilitates applying standard analytical methods like functional enrichment, survival analysis, clustering, CNV-pathway analysis or cross-tissue pathway analysis, in a pathway-centric manner. |
13 | 13 |
License: GPL (>= 2) |
14 | 14 |
LazyLoad: yes |
... | ... |
@@ -6,16 +6,20 @@ import(shiny) |
6 | 6 |
|
7 | 7 |
importClassesFrom(Biobase, ExpressionSet) |
8 | 8 |
importClassesFrom(SummarizedExperiment, SummarizedExperiment) |
9 |
-importClassesFrom(SingleCellExperiment, SingleCellExperiment) |
|
10 | 9 |
importClassesFrom(GSEABase, GeneSetCollection) |
11 | 10 |
|
12 | 11 |
importMethodsFrom(Biobase, featureNames, |
13 | 12 |
phenoData, |
14 | 13 |
experimentData, |
15 |
- esApply, |
|
16 | 14 |
exprs, |
17 | 15 |
annotation) |
18 | 16 |
|
17 |
+importMethodsFrom(S4Vectors, metadata, |
|
18 |
+ "metadata<-") |
|
19 |
+ |
|
20 |
+importMethodsFrom(SummarizedExperiment, assays, |
|
21 |
+ colData) |
|
22 |
+ |
|
19 | 23 |
importMethodsFrom(GSEABase, geneIds, |
20 | 24 |
incidence) |
21 | 25 |
|
... | ... |
@@ -27,13 +31,16 @@ importMethodsFrom(BiocParallel, bpiterate, |
27 | 31 |
importFrom(graphics, plot) |
28 | 32 |
importFrom(stats, ecdf, |
29 | 33 |
na.omit) |
30 |
-importFrom(utils, setTxtProgressBar, |
|
34 |
+importFrom(utils, installed.packages, |
|
35 |
+ setTxtProgressBar, |
|
31 | 36 |
txtProgressBar, |
32 | 37 |
read.csv, |
33 | 38 |
write.csv) |
39 |
+importFrom(S4Vectors, SimpleList) |
|
34 | 40 |
importFrom(GSEABase, AnnoOrEntrezIdentifier, |
35 | 41 |
mapIdentifiers, |
36 | 42 |
getGmt) |
43 |
+importFrom(SummarizedExperiment, SummarizedExperiment) |
|
37 | 44 |
importFrom(parallel, splitIndices) |
38 | 45 |
importFrom(BiocParallel, SerialParam, |
39 | 46 |
MulticoreParam, |
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
|
6 | 6 |
setGeneric("gsva", function(expr, gset.idx.list, ...) standardGeneric("gsva")) |
7 | 7 |
|
8 |
-setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"), |
|
8 |
+setMethod("gsva", signature(expr="SummarizedExperiment", gset.idx.list="GeneSetCollection"), |
|
9 | 9 |
function(expr, gset.idx.list, annotation, |
10 | 10 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
11 | 11 |
kcdf=c("Gaussian", "Poisson", "none"), |
... | ... |
@@ -22,16 +22,125 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"), |
22 | 22 |
method <- match.arg(method) |
23 | 23 |
kcdf <- match.arg(kcdf) |
24 | 24 |
|
25 |
- ## filter out genes with constant expression values |
|
26 |
- sdGenes <- esApply(expr, 1, sd) |
|
27 |
- if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
28 |
- warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
29 |
- " genes with constant expression values throuhgout the samples.") |
|
30 |
- if (method != "ssgsea") { |
|
31 |
- warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
32 |
- expr <- expr[sdGenes > 0 & !is.na(sdGenes), ] |
|
25 |
+ if (length(assays(expr)) == 0L) |
|
26 |
+ stop("The input SummarizedExperiment object has no assay data.") |
|
27 |
+ |
|
28 |
+ if (missing(annotation)) |
|
29 |
+ annotation <- names(assays(se))[1] |
|
30 |
+ else { |
|
31 |
+ if (!is.character(annotation)) |
|
32 |
+ stop("The 'annotation' argument must contain a character string.") |
|
33 |
+ annotation <- annotation[1] |
|
34 |
+ |
|
35 |
+ if (!annotation %in% names(assays(se))) |
|
36 |
+ stop(sprintf("Assay %s not found in the input SummarizedExperiment object.", annotation)) |
|
37 |
+ } |
|
38 |
+ |
|
39 |
+ se <- expr |
|
40 |
+ expr <- assays(se)[[annotation]] |
|
41 |
+ |
|
42 |
+ ## filter genes according to verious criteria, |
|
43 |
+ ## e.g., constant expression |
|
44 |
+ expr <- .filterFeatures(expr, method) |
|
45 |
+ |
|
46 |
+ if (nrow(expr) < 2) |
|
47 |
+ stop("Less than two genes in the input ExpressionSet object\n") |
|
48 |
+ |
|
49 |
+ annotpkg <- metadata(se)$annotation |
|
50 |
+ if (!is.null(annotpkg) && length(annotpkg) > 0 && is.character(annotpkg) && annotpkg != "") { |
|
51 |
+ if (!annotpkg %in% installed.packages()) |
|
52 |
+ stop(sprintf("Please install the nnotation package %s", annotpkg)) |
|
53 |
+ |
|
54 |
+ if (verbose) |
|
55 |
+ cat("Mapping identifiers between gene sets and feature names\n") |
|
56 |
+ |
|
57 |
+ ## map gene identifiers of the gene sets to the features in the chip |
|
58 |
+ ## Biobase::annotation() is necessary to disambiguate from the |
|
59 |
+ ## 'annotation' argument |
|
60 |
+ mapped.gset.idx.list <- mapIdentifiers(gset.idx.list, |
|
61 |
+ AnnoOrEntrezIdentifier(annotpkg)) |
|
62 |
+ mapped.gset.idx.list <- geneIds(mapped.gset.idx.list) |
|
63 |
+ } else { |
|
64 |
+ mapped.gset.idx.list <- gset.idx.list |
|
65 |
+ if (verbose) { |
|
66 |
+ cat("No annotation package name available in the input 'SummarizedExperiment' object 'expr'.", |
|
67 |
+ "Attempting to directly match identifiers in 'expr' to gene sets.", sep="\n") |
|
33 | 68 |
} |
34 |
- } |
|
69 |
+ } |
|
70 |
+ |
|
71 |
+ ## map to the actual features for which expression data is available |
|
72 |
+ mapped.gset.idx.list <- lapply(mapped.gset.idx.list, |
|
73 |
+ function(x, y) na.omit(match(x, y)), |
|
74 |
+ rownames(se)) |
|
75 |
+ |
|
76 |
+ if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
77 |
+ stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
78 |
+ |
|
79 |
+ ## remove gene sets from the analysis for which no features are available |
|
80 |
+ ## and meet the minimum and maximum gene-set size specified by the user |
|
81 |
+ mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
|
82 |
+ min.sz=max(1, min.sz), |
|
83 |
+ max.sz=max.sz) |
|
84 |
+ |
|
85 |
+ if (!missing(kcdf)) { |
|
86 |
+ if (kcdf == "Gaussian") { |
|
87 |
+ rnaseq <- FALSE |
|
88 |
+ kernel <- TRUE |
|
89 |
+ } else if (kcdf == "Poisson") { |
|
90 |
+ rnaseq <- TRUE |
|
91 |
+ kernel <- TRUE |
|
92 |
+ } else |
|
93 |
+ kernel <- FALSE |
|
94 |
+ } |
|
95 |
+ |
|
96 |
+ eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking, |
|
97 |
+ parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) |
|
98 |
+ |
|
99 |
+ rval <- SummarizedExperiment(assays=SimpleList(es=eSco), |
|
100 |
+ colData=colData(se), |
|
101 |
+ metadata=metadata(se)) |
|
102 |
+ metadata(rval)$annotation <- NULL |
|
103 |
+ |
|
104 |
+ rval |
|
105 |
+}) |
|
106 |
+ |
|
107 |
+setMethod("gsva", signature(expr="SummarizedExperiment", gset.idx.list="list"), |
|
108 |
+ function(expr, gset.idx.list, annotation, |
|
109 |
+ method=c("gsva", "ssgsea", "zscore", "plage"), |
|
110 |
+ kcdf=c("Gaussian", "Poisson", "none"), |
|
111 |
+ abs.ranking=FALSE, |
|
112 |
+ min.sz=1, |
|
113 |
+ max.sz=Inf, |
|
114 |
+ parallel.sz=1L, |
|
115 |
+ mx.diff=TRUE, |
|
116 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
117 |
+ ssgsea.norm=TRUE, |
|
118 |
+ verbose=TRUE, |
|
119 |
+ BPPARAM=SerialParam(progressbar=verbose)) |
|
120 |
+{ |
|
121 |
+ method <- match.arg(method) |
|
122 |
+ kcdf <- match.arg(kcdf) |
|
123 |
+ |
|
124 |
+ if (length(assays(expr)) == 0L) |
|
125 |
+ stop("The input SummarizedExperiment object has no assay data.") |
|
126 |
+ |
|
127 |
+ if (missing(annotation)) |
|
128 |
+ annotation <- names(assays(se))[1] |
|
129 |
+ else { |
|
130 |
+ if (!is.character(annotation)) |
|
131 |
+ stop("The 'annotation' argument must contain a character string.") |
|
132 |
+ annotation <- annotation[1] |
|
133 |
+ |
|
134 |
+ if (!annotation %in% names(assays(se))) |
|
135 |
+ stop(sprintf("Assay %s not found in the input SummarizedExperiment object.", annotation)) |
|
136 |
+ } |
|
137 |
+ |
|
138 |
+ se <- expr |
|
139 |
+ expr <- assays(se)[[annotation]] |
|
140 |
+ |
|
141 |
+ ## filter genes according to verious criteria, |
|
142 |
+ ## e.g., constant expression |
|
143 |
+ expr <- .filterFeatures(expr, method) |
|
35 | 144 |
|
36 | 145 |
if (nrow(expr) < 2) |
37 | 146 |
stop("Less than two genes in the input ExpressionSet object\n") |
... | ... |
@@ -39,7 +148,7 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"), |
39 | 148 |
## map to the actual features for which expression data is available |
40 | 149 |
mapped.gset.idx.list <- lapply(gset.idx.list, |
41 | 150 |
function(x, y) na.omit(match(x, y)), |
42 |
- featureNames(expr)) |
|
151 |
+ rownames(se)) |
|
43 | 152 |
|
44 | 153 |
if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
45 | 154 |
stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
... | ... |
@@ -61,13 +170,73 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"), |
61 | 170 |
kernel <- FALSE |
62 | 171 |
} |
63 | 172 |
|
64 |
- eSco <- .gsva(exprs(expr), mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking, |
|
173 |
+ eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking, |
|
65 | 174 |
parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) |
66 | 175 |
|
67 |
- eScoEset <- new("ExpressionSet", exprs=eSco, phenoData=phenoData(expr), |
|
68 |
- experimentData=experimentData(expr), annotation="") |
|
176 |
+ rval <- SummarizedExperiment(assays=SimpleList(es=eSco), |
|
177 |
+ colData=colData(se), |
|
178 |
+ metadata=metadata(se)) |
|
179 |
+ metadata(rval)$annotation <- NULL |
|
69 | 180 |
|
70 |
- rval <- eScoEset |
|
181 |
+ rval |
|
182 |
+}) |
|
183 |
+ |
|
184 |
+setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"), |
|
185 |
+ function(expr, gset.idx.list, annotation, |
|
186 |
+ method=c("gsva", "ssgsea", "zscore", "plage"), |
|
187 |
+ kcdf=c("Gaussian", "Poisson", "none"), |
|
188 |
+ abs.ranking=FALSE, |
|
189 |
+ min.sz=1, |
|
190 |
+ max.sz=Inf, |
|
191 |
+ parallel.sz=1L, |
|
192 |
+ mx.diff=TRUE, |
|
193 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
194 |
+ ssgsea.norm=TRUE, |
|
195 |
+ verbose=TRUE, |
|
196 |
+ BPPARAM=SerialParam(progressbar=verbose)) |
|
197 |
+{ |
|
198 |
+ method <- match.arg(method) |
|
199 |
+ kcdf <- match.arg(kcdf) |
|
200 |
+ |
|
201 |
+ eset <- expr |
|
202 |
+ expr <- exprs(eset) |
|
203 |
+ ## filter genes according to verious criteria, |
|
204 |
+ ## e.g., constant expression |
|
205 |
+ expr <- .filterFeatures(expr, method) |
|
206 |
+ |
|
207 |
+ if (nrow(expr) < 2) |
|
208 |
+ stop("Less than two genes in the input ExpressionSet object\n") |
|
209 |
+ |
|
210 |
+ ## map to the actual features for which expression data is available |
|
211 |
+ mapped.gset.idx.list <- lapply(gset.idx.list, |
|
212 |
+ function(x, y) na.omit(match(x, y)), |
|
213 |
+ featureNames(eset)) |
|
214 |
+ |
|
215 |
+ if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
216 |
+ stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
217 |
+ |
|
218 |
+ ## remove gene sets from the analysis for which no features are available |
|
219 |
+ ## and meet the minimum and maximum gene-set size specified by the user |
|
220 |
+ mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
|
221 |
+ min.sz=max(1, min.sz), |
|
222 |
+ max.sz=max.sz) |
|
223 |
+ |
|
224 |
+ if (!missing(kcdf)) { |
|
225 |
+ if (kcdf == "Gaussian") { |
|
226 |
+ rnaseq <- FALSE |
|
227 |
+ kernel <- TRUE |
|
228 |
+ } else if (kcdf == "Poisson") { |
|
229 |
+ rnaseq <- TRUE |
|
230 |
+ kernel <- TRUE |
|
231 |
+ } else |
|
232 |
+ kernel <- FALSE |
|
233 |
+ } |
|
234 |
+ |
|
235 |
+ eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking, |
|
236 |
+ parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) |
|
237 |
+ |
|
238 |
+ rval <- new("ExpressionSet", exprs=eSco, phenoData=phenoData(eset), |
|
239 |
+ experimentData=experimentData(eset), annotation="") |
|
71 | 240 |
|
72 | 241 |
rval |
73 | 242 |
}) |
... | ... |
@@ -89,34 +258,43 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti |
89 | 258 |
method <- match.arg(method) |
90 | 259 |
kcdf <- match.arg(kcdf) |
91 | 260 |
|
92 |
- ## filter out genes with constant expression values |
|
93 |
- sdGenes <- esApply(expr, 1, sd) |
|
94 |
- if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
95 |
- warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
96 |
- " genes with constant expression values throuhgout the samples.") |
|
97 |
- if (method != "ssgsea") { |
|
98 |
- warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
99 |
- expr <- expr[sdGenes > 0 & !is.na(sdGenes), ] |
|
100 |
- } |
|
101 |
- } |
|
261 |
+ eset <- expr |
|
262 |
+ expr <- exprs(eset) |
|
263 |
+ ## filter genes according to verious criteria, |
|
264 |
+ ## e.g., constant expression |
|
265 |
+ expr <- .filterFeatures(expr, method) |
|
102 | 266 |
|
103 | 267 |
if (nrow(expr) < 2) |
104 | 268 |
stop("Less than two genes in the input ExpressionSet object\n") |
105 | 269 |
|
106 |
- if (verbose) |
|
107 |
- cat("Mapping identifiers between gene sets and feature names\n") |
|
270 |
+ annotpkg <- Biobase::annotation(eset) |
|
271 |
+ if (length(annotpkg) > 0 && annotpkg != "") { |
|
272 |
+ if (!annotpkg %in% installed.packages()) |
|
273 |
+ stop(sprintf("Please install the nnotation package %s", annotpkg)) |
|
274 |
+ |
|
275 |
+ if (verbose) |
|
276 |
+ cat("Mapping identifiers between gene sets and feature names\n") |
|
277 |
+ |
|
278 |
+ ## map gene identifiers of the gene sets to the features in the chip |
|
279 |
+ ## Biobase::annotation() is necessary to disambiguate from the |
|
280 |
+ ## 'annotation' argument |
|
281 |
+ mapped.gset.idx.list <- mapIdentifiers(gset.idx.list, |
|
282 |
+ AnnoOrEntrezIdentifier(annotpkg)) |
|
283 |
+ mapped.gset.idx.list <- geneIds(mapped.gset.idx.list) |
|
284 |
+ } else { |
|
285 |
+ mapped.gset.idx.list <- gset.idx.list |
|
286 |
+ if (verbose) { |
|
287 |
+ cat("No annotation package name available in the input 'ExpressionSet' object 'expr'.", |
|
288 |
+ "Attempting to directly match identifiers in 'expr' to gene sets.", sep="\n") |
|
289 |
+ } |
|
290 |
+ } |
|
108 | 291 |
|
109 |
- ## map gene identifiers of the gene sets to the features in the chip |
|
110 |
- ## Biobase::annotation() is necessary to disambiguate from the |
|
111 |
- ## 'annotation' argument |
|
112 |
- mapped.gset.idx.list <- mapIdentifiers(gset.idx.list, |
|
113 |
- AnnoOrEntrezIdentifier(Biobase::annotation(expr))) |
|
114 |
- |
|
115 | 292 |
## map to the actual features for which expression data is available |
116 |
- tmp <- lapply(geneIds(mapped.gset.idx.list), |
|
117 |
- function(x, y) na.omit(match(x, y)), |
|
118 |
- featureNames(expr)) |
|
293 |
+ tmp <- lapply(mapped.gset.idx.list, |
|
294 |
+ function(x, y) na.omit(match(x, y)), |
|
295 |
+ featureNames(eset)) |
|
119 | 296 |
names(tmp) <- names(mapped.gset.idx.list) |
297 |
+ |
|
120 | 298 |
## remove gene sets from the analysis for which no features are available |
121 | 299 |
## and meet the minimum and maximum gene-set size specified by the user |
122 | 300 |
mapped.gset.idx.list <- filterGeneSets(tmp, |
... | ... |
@@ -134,13 +312,11 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti |
134 | 312 |
kernel <- FALSE |
135 | 313 |
} |
136 | 314 |
|
137 |
- eSco <- .gsva(exprs(expr), mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking, |
|
315 |
+ eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking, |
|
138 | 316 |
parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) |
139 | 317 |
|
140 |
- eScoEset <- new("ExpressionSet", exprs=eSco, phenoData=phenoData(expr), |
|
141 |
- experimentData=experimentData(expr), annotation="") |
|
142 |
- |
|
143 |
- rval <- eScoEset |
|
318 |
+ rval <- new("ExpressionSet", exprs=eSco, phenoData=phenoData(eset), |
|
319 |
+ experimentData=experimentData(eset), annotation="") |
|
144 | 320 |
|
145 | 321 |
rval |
146 | 322 |
}) |
... | ... |
@@ -162,16 +338,9 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection"), |
162 | 338 |
method <- match.arg(method) |
163 | 339 |
kcdf <- match.arg(kcdf) |
164 | 340 |
|
165 |
- ## filter out genes with constant expression values |
|
166 |
- sdGenes <- apply(expr, 1, sd) |
|
167 |
- if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
168 |
- warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
169 |
- " genes with constant expression values throuhgout the samples.") |
|
170 |
- if (method != "ssgsea") { |
|
171 |
- warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
172 |
- expr <- expr[sdGenes > 0 & !is.na(sdGenes), , drop=FALSE] |
|
173 |
- } |
|
174 |
- } |
|
341 |
+ ## filter genes according to verious criteria, |
|
342 |
+ ## e.g., constant expression |
|
343 |
+ expr <- .filterFeatures(expr, method) |
|
175 | 344 |
|
176 | 345 |
if (nrow(expr) < 2) |
177 | 346 |
stop("Less than two genes in the input expression data matrix\n") |
... | ... |
@@ -236,16 +405,9 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list"), |
236 | 405 |
method <- match.arg(method) |
237 | 406 |
kcdf <- match.arg(kcdf) |
238 | 407 |
|
239 |
- ## filter out genes with constant expression values |
|
240 |
- sdGenes <- apply(expr, 1, sd) |
|
241 |
- if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
242 |
- warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
243 |
- " genes with constant expression values throuhgout the samples.") |
|
244 |
- if (method != "ssgsea") { |
|
245 |
- warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
246 |
- expr <- expr[sdGenes > 0 & !is.na(sdGenes), , drop=FALSE] |
|
247 |
- } |
|
248 |
- } |
|
408 |
+ ## filter genes according to verious criteria, |
|
409 |
+ ## e.g., constant expression |
|
410 |
+ expr <- .filterFeatures(expr, method) |
|
249 | 411 |
|
250 | 412 |
if (nrow(expr) < 2) |
251 | 413 |
stop("Less than two genes in the input expression data matrix\n") |
252 | 414 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,15 @@ |
1 |
+.filterFeatures <- function(expr, method) { |
|
2 |
+ |
|
3 |
+ ## filter out genes with constant expression values |
|
4 |
+ sdGenes <- apply(expr, 1, sd) |
|
5 |
+ if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
6 |
+ warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
7 |
+ " genes with constant expression values throuhgout the samples.") |
|
8 |
+ if (method != "ssgsea") { |
|
9 |
+ warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
10 |
+ expr <- expr[sdGenes > 0 & !is.na(sdGenes), ] |
|
11 |
+ } |
|
12 |
+ } |
|
13 |
+ |
|
14 |
+ expr |
|
15 |
+} |
... | ... |
@@ -1,5 +1,7 @@ |
1 | 1 |
\name{gsva} |
2 | 2 |
\alias{gsva} |
3 |
+\alias{gsva,SummarizedExperiment,list-method} |
|
4 |
+\alias{gsva,SummarizedExperiment,GeneSetCollection-method} |
|
3 | 5 |
\alias{gsva,ExpressionSet,list-method} |
4 | 6 |
\alias{gsva,ExpressionSet,GeneSetCollection-method} |
5 | 7 |
\alias{gsva,matrix,GeneSetCollection-method} |
... | ... |
@@ -14,6 +16,30 @@ Gene Set Variation Analysis |
14 | 16 |
Estimates GSVA enrichment scores. |
15 | 17 |
} |
16 | 18 |
\usage{ |
19 |
+\S4method{gsva}{SummarizedExperiment,GeneSetCollection}(expr, gset.idx.list, annotation, |
|
20 |
+ method=c("gsva", "ssgsea", "zscore", "plage"), |
|
21 |
+ kcdf=c("Gaussian", "Poisson", "none"), |
|
22 |
+ abs.ranking=FALSE, |
|
23 |
+ min.sz=1, |
|
24 |
+ max.sz=Inf, |
|
25 |
+ parallel.sz=1L, |
|
26 |
+ mx.diff=TRUE, |
|
27 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
28 |
+ ssgsea.norm=TRUE, |
|
29 |
+ verbose=TRUE, |
|
30 |
+ BPPARAM=SerialParam(progressbar=verbose)) |
|
31 |
+\S4method{gsva}{SummarizedExperiment,list}(expr, gset.idx.list, annotation, |
|
32 |
+ method=c("gsva", "ssgsea", "zscore", "plage"), |
|
33 |
+ kcdf=c("Gaussian", "Poisson", "none"), |
|
34 |
+ abs.ranking=FALSE, |
|
35 |
+ min.sz=1, |
|
36 |
+ max.sz=Inf, |
|
37 |
+ parallel.sz=1L, |
|
38 |
+ mx.diff=TRUE, |
|
39 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
40 |
+ ssgsea.norm=TRUE, |
|
41 |
+ verbose=TRUE, |
|
42 |
+ BPPARAM=SerialParam(progressbar=verbose)) |
|
17 | 43 |
\S4method{gsva}{ExpressionSet,list}(expr, gset.idx.list, annotation, |
18 | 44 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
19 | 45 |
kcdf=c("Gaussian", "Poisson", "none"), |
... | ... |
@@ -64,18 +90,26 @@ Estimates GSVA enrichment scores. |
64 | 90 |
BPPARAM=SerialParam(progressbar=verbose)) |
65 | 91 |
} |
66 | 92 |
\arguments{ |
67 |
- \item{expr}{Gene expression data which can be given either as an \code{ExpressionSet} |
|
68 |
- object or as a matrix of expression values where rows correspond |
|
69 |
- to genes and columns correspond to samples.} |
|
93 |
+ \item{expr}{Gene expression data which can be given either as a |
|
94 |
+ \code{SummarizedExperiment} or \code{ExpressionSet} object, |
|
95 |
+ or as a matrix of expression values where rows correspond to genes |
|
96 |
+ and columns correspond to samples.} |
|
70 | 97 |
\item{gset.idx.list}{Gene sets provided either as a \code{list} object or as a |
71 | 98 |
\code{GeneSetCollection} object.} |
72 |
- \item{annotation}{In the case of calling \code{gsva()} with expression data in a \code{matrix} |
|
73 |
- and gene sets as a \code{GeneSetCollection} object, the \code{annotation} argument |
|
74 |
- can be used to supply the name of the Bioconductor package that contains |
|
75 |
- annotations for the class of gene identifiers occurring in the row names of |
|
76 |
- the expression data matrix. By default \code{gsva()} will try to match the |
|
77 |
- identifiers in \code{expr} to the identifiers in \code{gset.idx.list} just as |
|
78 |
- they are, unless the \code{annotation} argument is set.} |
|
99 |
+ \item{annotation}{In the case of calling \code{gsva()} on a |
|
100 |
+ \code{SummarizedExperiment} object, the \code{annotation} |
|
101 |
+ argument can be used to select the assay containing the |
|
102 |
+ molecular data we want as input to the \code{gsva()} |
|
103 |
+ function, otherwise the first assay is selected. |
|
104 |
+ In the case of calling \code{gsva()} with expression data in |
|
105 |
+ a \code{matrix} and gene sets as a \code{GeneSetCollection} |
|
106 |
+ object, the \code{annotation} argument can be used to supply |
|
107 |
+ the name of the Bioconductor package that contains |
|
108 |
+ annotations for the class of gene identifiers occurring in |
|
109 |
+ the row names of the expression data matrix. |
|
110 |
+ In the case of calling \code{gsva()} on a |
|
111 |
+ \code{ExpressionSet} object, the \code{annotation} argument |
|
112 |
+ is ignored. See details information below.} |
|
79 | 113 |
\item{method}{Method to employ in the estimation of gene-set enrichment scores per sample. By default |
80 | 114 |
this is set to \code{gsva} (\enc{H�nzelmann}{Hanzelmann} et al, 2013) and other options are |
81 | 115 |
\code{ssgsea} (Barbie et al, 2009), \code{zscore} (Lee et al, 2008) or \code{plage} |
... | ... |
@@ -126,11 +160,32 @@ gene expression matrix into a g-geneset by n-sample pathway enrichment matrix. |
126 | 160 |
This facilitates many forms of statistical analysis in the 'space' of pathways |
127 | 161 |
rather than genes, providing a higher level of interpretability. |
128 | 162 |
|
129 |
-The \code{gsva()} function first maps the identifiers in the gene sets to the |
|
130 |
-identifiers in the input expression data leading to a filtered collection of |
|
131 |
-gene sets. This collection can be further filtered to require a minimun and/or |
|
132 |
-maximum size of the gene sets for which we want to calculate GSVA enrichment |
|
133 |
-scores, by using the arguments \code{min.sz} and \code{max.sz}. |
|
163 |
+By default, \code{gsva()} will try to match the identifiers in \code{expr} to |
|
164 |
+the identifiers in \code{gset.idx.list} just as they are, unless the \code{annotation} argument is set. |
|
165 |
+ |
|
166 |
+The \code{gsva()} function first maps the identifiers in the gene sets in |
|
167 |
+\code{gset.idx.list} to the identifiers in the input expression data \code{expr}. |
|
168 |
+When the input gene sets in \code{gset.idx.list} is provided as a \code{list} |
|
169 |
+object, \code{gsva()} will try to match the identifiers in \code{expr} directly |
|
170 |
+to the identifiers in \code{gset.idx.list} just as they are. Because unmatching |
|
171 |
+identifiers will be discarded in both, \code{expr} and \code{gset.idx.list}, |
|
172 |
+\code{gsva()} may prompt an error if no identifiers can be matched as in the case |
|
173 |
+of different types of identifiers (e.g., gene symbols vs Entrez identitifers). |
|
174 |
+ |
|
175 |
+However, then the input gene sets in \code{gset.idx.list} is provided as a |
|
176 |
+\code{GeneSetCollection} object, \code{gsva()} will try to automatically convert |
|
177 |
+those identifiers to the type of identifier in the input expression data \code{expr}. |
|
178 |
+Such an automatic conversion, however, will only occur in three scenarios: 1. when |
|
179 |
+\code{expr} is an \code{ExpressionSet} object with an appropriately set |
|
180 |
+\code{annotation} slot; 2. when \code{expr} is a \code{SummarizedExperiment} object |
|
181 |
+with an appropriately set \code{annotation} slot in the metadata of \code{expr}; |
|
182 |
+3. when \code{expr} is a \code{matrix} and the \code{annotation} argument of the |
|
183 |
+\code{gsva()} function is set to the name of the annotation package that provides |
|
184 |
+the relationships between the type of identifiers in \code{expr} and \code{gset.idx.list}. |
|
185 |
+ |
|
186 |
+The collection of gene sets resulting from the previous identifier matching, |
|
187 |
+can be further filtered to require a minimun and/or maximum size by using the |
|
188 |
+arguments \code{min.sz} and \code{max.sz}. |
|
134 | 189 |
} |
135 | 190 |
\value{ |
136 | 191 |
A gene-set by sample matrix of GSVA enrichment scores. |