... | ... |
@@ -8,7 +8,7 @@ Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.gui |
8 | 8 |
Depends: R (>= 3.5.0) |
9 | 9 |
Imports: methods, stats, utils, graphics, BiocGenerics, S4Vectors, |
10 | 10 |
Biobase, SummarizedExperiment, GSEABase, |
11 |
- parallel, BiocParallel, fastmatch |
|
11 |
+ parallel, BiocParallel, fastmatch, SingleCellExperiment |
|
12 | 12 |
Suggests: RUnit, limma, RColorBrewer, genefilter, edgeR, GSVAdata, |
13 | 13 |
shiny, shinythemes, ggplot2, data.table, plotly |
14 | 14 |
Description: Gene Set Variation Analysis (GSVA) is a non-parametric, unsupervised method for estimating variation of gene set enrichment through the samples of a expression data set. GSVA performs a change in coordinate systems, transforming the data from a gene by sample matrix to a gene-set by sample matrix, thereby allowing the evaluation of pathway enrichment for each sample. This new matrix of GSVA enrichment scores facilitates applying standard analytical methods like functional enrichment, survival analysis, clustering, CNV-pathway analysis or cross-tissue pathway analysis, in a pathway-centric manner. |
... | ... |
@@ -6,6 +6,7 @@ import(BiocGenerics) |
6 | 6 |
importClassesFrom(Biobase, ExpressionSet) |
7 | 7 |
importClassesFrom(SummarizedExperiment, SummarizedExperiment) |
8 | 8 |
importClassesFrom(GSEABase, GeneSetCollection) |
9 |
+importClassesFrom(SingleCellExperiment, SingleCellExperiment) |
|
9 | 10 |
|
10 | 11 |
importMethodsFrom(Biobase, featureNames, |
11 | 12 |
phenoData, |
... | ... |
@@ -45,6 +46,7 @@ importFrom(BiocParallel, SerialParam, |
45 | 46 |
MulticoreParam, |
46 | 47 |
multicoreWorkers, |
47 | 48 |
bpnworkers) |
49 |
+importFrom(SingleCellExperiment, SingleCellExperiment) |
|
48 | 50 |
|
49 | 51 |
exportMethods(gsva, |
50 | 52 |
filterGeneSets, |
... | ... |
@@ -5,6 +5,83 @@ |
5 | 5 |
|
6 | 6 |
setGeneric("gsva", function(expr, gset.idx.list, ...) standardGeneric("gsva")) |
7 | 7 |
|
8 |
+setMethod("gsva", signature(expr="SingleCellExperiment", gset.idx.list="list"), |
|
9 |
+ function(expr, gset.idx.list, annotation, |
|
10 |
+ method=c("gsva", "ssgsea", "zscore", "plage"), |
|
11 |
+ kcdf=c("Gaussian", "Poisson", "none"), |
|
12 |
+ abs.ranking=FALSE, |
|
13 |
+ min.sz=1, |
|
14 |
+ max.sz=Inf, |
|
15 |
+ parallel.sz=1L, |
|
16 |
+ mx.diff=TRUE, |
|
17 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
18 |
+ ssgsea.norm=TRUE, |
|
19 |
+ verbose=TRUE, |
|
20 |
+ BPPARAM=SerialParam(progressbar=verbose)) |
|
21 |
+{ |
|
22 |
+ method <- match.arg(method) |
|
23 |
+ kcdf <- match.arg(kcdf) |
|
24 |
+ |
|
25 |
+ if (length(assays(expr)) == 0L) |
|
26 |
+ stop("The input SummarizedExperiment object has no assay data.") |
|
27 |
+ |
|
28 |
+ se <- expr |
|
29 |
+ |
|
30 |
+ if (missing(annotation)) |
|
31 |
+ annotation <- names(assays(se))[1] |
|
32 |
+ else { |
|
33 |
+ if (!is.character(annotation)) |
|
34 |
+ stop("The 'annotation' argument must contain a character string.") |
|
35 |
+ annotation <- annotation[1] |
|
36 |
+ |
|
37 |
+ if (!annotation %in% names(assays(se))) |
|
38 |
+ stop(sprintf("Assay %s not found in the input SummarizedExperiment object.", annotation)) |
|
39 |
+ } |
|
40 |
+ expr <- assays(se)[[annotation]] |
|
41 |
+ |
|
42 |
+ ## filter genes according to verious criteria, |
|
43 |
+ ## e.g., constant expression |
|
44 |
+ expr <- .filterFeatures(expr, method) |
|
45 |
+ |
|
46 |
+ if (nrow(expr) < 2) |
|
47 |
+ stop("Less than two genes in the input ExpressionSet object\n") |
|
48 |
+ |
|
49 |
+ ## map to the actual features for which expression data is available |
|
50 |
+ mapped.gset.idx.list <- lapply(gset.idx.list, |
|
51 |
+ function(x, y) na.omit(fastmatch::fmatch(x, y)), |
|
52 |
+ rownames(expr)) |
|
53 |
+ |
|
54 |
+ if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
55 |
+ stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
56 |
+ |
|
57 |
+ ## remove gene sets from the analysis for which no features are available |
|
58 |
+ ## and meet the minimum and maximum gene-set size specified by the user |
|
59 |
+ mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
|
60 |
+ min.sz=max(1, min.sz), |
|
61 |
+ max.sz=max.sz) |
|
62 |
+ |
|
63 |
+ if (!missing(kcdf)) { |
|
64 |
+ if (kcdf == "Gaussian") { |
|
65 |
+ rnaseq <- FALSE |
|
66 |
+ kernel <- TRUE |
|
67 |
+ } else if (kcdf == "Poisson") { |
|
68 |
+ rnaseq <- TRUE |
|
69 |
+ kernel <- TRUE |
|
70 |
+ } else |
|
71 |
+ kernel <- FALSE |
|
72 |
+ } |
|
73 |
+ |
|
74 |
+ eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking, |
|
75 |
+ parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) |
|
76 |
+ |
|
77 |
+ rval <- SingleCellExperiment(assays=SimpleList(es=eSco), |
|
78 |
+ colData=colData(se), |
|
79 |
+ metadata=metadata(se)) |
|
80 |
+ metadata(rval)$annotation <- NULL |
|
81 |
+ |
|
82 |
+ rval |
|
83 |
+ }) |
|
84 |
+ |
|
8 | 85 |
setMethod("gsva", signature(expr="SummarizedExperiment", gset.idx.list="GeneSetCollection"), |
9 | 86 |
function(expr, gset.idx.list, annotation, |
10 | 87 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
... | ... |
@@ -1,5 +1,6 @@ |
1 | 1 |
\name{gsva} |
2 | 2 |
\alias{gsva} |
3 |
+\alias{gsva,SingleCellExperiment,list-method} |
|
3 | 4 |
\alias{gsva,SummarizedExperiment,list-method} |
4 | 5 |
\alias{gsva,SummarizedExperiment,GeneSetCollection-method} |
5 | 6 |
\alias{gsva,ExpressionSet,list-method} |
... | ... |
@@ -16,6 +17,18 @@ Gene Set Variation Analysis |
16 | 17 |
Estimates GSVA enrichment scores. |
17 | 18 |
} |
18 | 19 |
\usage{ |
20 |
+\S4method{gsva}{SingleCellExperiment,list}(expr, gset.idx.list, annotation, |
|
21 |
+ method=c("gsva", "ssgsea", "zscore", "plage"), |
|
22 |
+ kcdf=c("Gaussian", "Poisson", "none"), |
|
23 |
+ abs.ranking=FALSE, |
|
24 |
+ min.sz=1, |
|
25 |
+ max.sz=Inf, |
|
26 |
+ parallel.sz=1L, |
|
27 |
+ mx.diff=TRUE, |
|
28 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
29 |
+ ssgsea.norm=TRUE, |
|
30 |
+ verbose=TRUE, |
|
31 |
+ BPPARAM=SerialParam(progressbar=verbose)) |
|
19 | 32 |
\S4method{gsva}{SummarizedExperiment,GeneSetCollection}(expr, gset.idx.list, annotation, |
20 | 33 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
21 | 34 |
kcdf=c("Gaussian", "Poisson", "none"), |
... | ... |
@@ -91,13 +104,13 @@ Estimates GSVA enrichment scores. |
91 | 104 |
} |
92 | 105 |
\arguments{ |
93 | 106 |
\item{expr}{Gene expression data which can be given either as a |
94 |
- \code{SummarizedExperiment} or \code{ExpressionSet} object, |
|
95 |
- or as a matrix of expression values where rows correspond to genes |
|
96 |
- and columns correspond to samples.} |
|
107 |
+ \code{SingleCellExperiment}, \code{SummarizedExperiment} or |
|
108 |
+ \code{ExpressionSet} object, or as a matrix of expression |
|
109 |
+ values where rows correspond to genes and columns correspond to samples.} |
|
97 | 110 |
\item{gset.idx.list}{Gene sets provided either as a \code{list} object or as a |
98 | 111 |
\code{GeneSetCollection} object.} |
99 |
- \item{annotation}{In the case of calling \code{gsva()} on a |
|
100 |
- \code{SummarizedExperiment} object, the \code{annotation} |
|
112 |
+ \item{annotation}{In the case of calling \code{gsva()} on a \code{SingleCellExperiment} |
|
113 |
+ or a \code{SummarizedExperiment} object, the \code{annotation} |
|
101 | 114 |
argument can be used to select the assay containing the |
102 | 115 |
molecular data we want as input to the \code{gsva()} |
103 | 116 |
function, otherwise the first assay is selected. |
... | ... |
@@ -111,7 +124,7 @@ Estimates GSVA enrichment scores. |
111 | 124 |
\code{ExpressionSet} object, the \code{annotation} argument |
112 | 125 |
is ignored. See details information below.} |
113 | 126 |
\item{method}{Method to employ in the estimation of gene-set enrichment scores per sample. By default |
114 |
- this is set to \code{gsva} (\enc{H�nzelmann}{Hanzelmann} et al, 2013) and other options are |
|
127 |
+ this is set to \code{gsva} (\enc{H?nzelmann}{Hanzelmann} et al, 2013) and other options are |
|
115 | 128 |
\code{ssgsea} (Barbie et al, 2009), \code{zscore} (Lee et al, 2008) or \code{plage} |
116 | 129 |
(Tomfohr et al, 2005). The latter two standardize first expression profiles into z-scores |
117 | 130 |
over the samples and, in the case of \code{zscore}, it combines them together as their sum |
... | ... |
@@ -142,7 +155,7 @@ Estimates GSVA enrichment scores. |
142 | 155 |
is calculated as the magnitude difference between the largest positive |
143 | 156 |
and negative random walk deviations.} |
144 | 157 |
\item{tau}{Exponent defining the weight of the tail in the random walk performed by both the \code{gsva} |
145 |
- (\enc{H�nzelmann}{Hanzelmann} et al., 2013) and the \code{ssgsea} (Barbie et al., 2009) methods. By default, |
|
158 |
+ (\enc{H?nzelmann}{Hanzelmann} et al., 2013) and the \code{ssgsea} (Barbie et al., 2009) methods. By default, |
|
146 | 159 |
this \code{tau=1} when \code{method="gsva"} and \code{tau=0.25} when \code{method="ssgsea"} just |
147 | 160 |
as specified by Barbie et al. (2009) where this parameter is called \code{alpha}.} |
148 | 161 |
\item{ssgsea.norm}{Logical, set to \code{TRUE} (default) with \code{method="ssgsea"} runs the SSGSEA method |
... | ... |
@@ -194,7 +207,7 @@ A gene-set by sample matrix of GSVA enrichment scores. |
194 | 207 |
Barbie, D.A. et al. Systematic RNA interference reveals that oncogenic KRAS-driven |
195 | 208 |
cancers require TBK1. \emph{Nature}, 462(5):108-112, 2009. |
196 | 209 |
|
197 |
-\enc{H�nzelmann}{Hanzelmann}, S., Castelo, R. and Guinney, J. |
|
210 |
+\enc{H?nzelmann}{Hanzelmann}, S., Castelo, R. and Guinney, J. |
|
198 | 211 |
GSVA: Gene set variation analysis for microarray and RNA-Seq data. |
199 | 212 |
\emph{BMC Bioinformatics}, 14:7, 2013. |
200 | 213 |
|