... | ... |
@@ -7,7 +7,7 @@ Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.gui |
7 | 7 |
person("Pablo Sebastian", "Rodriguez", role="ctb", email="pablosebastian.rodriguez@upf.edu")) |
8 | 8 |
Depends: R (>= 3.5.0) |
9 | 9 |
Imports: methods, stats, utils, graphics, BiocGenerics, S4Vectors, |
10 |
- Biobase, SummarizedExperiment, GSEABase, |
|
10 |
+ Biobase, SummarizedExperiment, GSEABase, Matrix, |
|
11 | 11 |
parallel, BiocParallel, fastmatch, SingleCellExperiment |
12 | 12 |
Suggests: RUnit, limma, RColorBrewer, genefilter, edgeR, GSVAdata, |
13 | 13 |
shiny, shinythemes, ggplot2, data.table, plotly |
... | ... |
@@ -7,6 +7,7 @@ importClassesFrom(Biobase, ExpressionSet) |
7 | 7 |
importClassesFrom(SummarizedExperiment, SummarizedExperiment) |
8 | 8 |
importClassesFrom(GSEABase, GeneSetCollection) |
9 | 9 |
importClassesFrom(SingleCellExperiment, SingleCellExperiment) |
10 |
+importClassesFrom(Matrix, dgCMatrix) |
|
10 | 11 |
|
11 | 12 |
importMethodsFrom(Biobase, featureNames, |
12 | 13 |
phenoData, |
... | ... |
@@ -81,6 +81,61 @@ setMethod("gsva", signature(expr="SingleCellExperiment", gset.idx.list="list"), |
81 | 81 |
|
82 | 82 |
rval |
83 | 83 |
}) |
84 |
+ |
|
85 |
+setMethod("gsva", signature(expr="dgCMatrix", gset.idx.list="list"), |
|
86 |
+ function(expr, gset.idx.list, annotation, |
|
87 |
+ method=c("gsva", "ssgsea", "zscore", "plage"), |
|
88 |
+ kcdf=c("Gaussian", "Poisson", "none"), |
|
89 |
+ abs.ranking=FALSE, |
|
90 |
+ min.sz=1, |
|
91 |
+ max.sz=Inf, |
|
92 |
+ parallel.sz=1L, |
|
93 |
+ mx.diff=TRUE, |
|
94 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
95 |
+ ssgsea.norm=TRUE, |
|
96 |
+ verbose=TRUE, |
|
97 |
+ BPPARAM=SerialParam(progressbar=verbose)) |
|
98 |
+{ |
|
99 |
+ method <- match.arg(method) |
|
100 |
+ kcdf <- match.arg(kcdf) |
|
101 |
+ |
|
102 |
+ ## filter genes according to verious criteria, |
|
103 |
+ ## e.g., constant expression |
|
104 |
+ expr <- .filterFeaturesSparse(expr, method) |
|
105 |
+ |
|
106 |
+ if (nrow(expr) < 2) |
|
107 |
+ stop("Less than two genes in the input assay object\n") |
|
108 |
+ |
|
109 |
+ ## map to the actual features for which expression data is available |
|
110 |
+ mapped.gset.idx.list <- lapply(gset.idx.list, |
|
111 |
+ function(x, y) na.omit(fastmatch::fmatch(x, y)), |
|
112 |
+ rownames(expr)) |
|
113 |
+ |
|
114 |
+ if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0) |
|
115 |
+ stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
116 |
+ |
|
117 |
+ ## remove gene sets from the analysis for which no features are available |
|
118 |
+ ## and meet the minimum and maximum gene-set size specified by the user |
|
119 |
+ mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list, |
|
120 |
+ min.sz=max(1, min.sz), |
|
121 |
+ max.sz=max.sz) |
|
122 |
+ |
|
123 |
+ if (!missing(kcdf)) { |
|
124 |
+ if (kcdf == "Gaussian") { |
|
125 |
+ rnaseq <- FALSE |
|
126 |
+ kernel <- TRUE |
|
127 |
+ } else if (kcdf == "Poisson") { |
|
128 |
+ rnaseq <- TRUE |
|
129 |
+ kernel <- TRUE |
|
130 |
+ } else |
|
131 |
+ kernel <- FALSE |
|
132 |
+ } |
|
133 |
+ |
|
134 |
+ rval <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking, |
|
135 |
+ parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) |
|
136 |
+ |
|
137 |
+ rval |
|
138 |
+}) |
|
84 | 139 |
|
85 | 140 |
setMethod("gsva", signature(expr="SummarizedExperiment", gset.idx.list="GeneSetCollection"), |
86 | 141 |
function(expr, gset.idx.list, annotation, |
... | ... |
@@ -13,3 +13,42 @@ |
13 | 13 |
|
14 | 14 |
expr |
15 | 15 |
} |
16 |
+ |
|
17 |
+## transforms a dgCMatrix into a list of its |
|
18 |
+## non-zero values by MARGIN (1 for row, 2 for column) |
|
19 |
+.sparseToList <-function(dgCMat, MARGIN){ |
|
20 |
+ MARGIN <- as.integer(MARGIN) |
|
21 |
+ J <- rep(1:ncol(dgCMat), diff(dgCMat@p)) |
|
22 |
+ I <- dgCMat@i + 1 |
|
23 |
+ x <- dgCMat@x |
|
24 |
+ if (MARGIN == 1L) { |
|
25 |
+ result <- split(x, I) |
|
26 |
+ names(result) <- rownames(dgCMat)[as.numeric(names(result))] |
|
27 |
+ } else if (MARGIN == 2L) { |
|
28 |
+ result <- split(x, J) |
|
29 |
+ names(result) <- colnames(dgCMat)[as.numeric(names(result))] |
|
30 |
+ } |
|
31 |
+ else { |
|
32 |
+ warning("invalid MARGIN; return NULL") |
|
33 |
+ result <- NULL |
|
34 |
+ } |
|
35 |
+ result |
|
36 |
+} |
|
37 |
+ |
|
38 |
+## filter out genes which non-zero values have |
|
39 |
+## constant expression values |
|
40 |
+.filterFeaturesSparse <- function(expr, method) { |
|
41 |
+ |
|
42 |
+ sdGenes <- sapply(.sparseToList(expr, 1), sd) |
|
43 |
+ |
|
44 |
+ if (any(sdGenes == 0) || any(is.na(sdGenes))) { |
|
45 |
+ warning(sum(sdGenes == 0 | is.na(sdGenes)), |
|
46 |
+ " genes with constant expression values throuhgout the samples.") |
|
47 |
+ if (method != "ssgsea") { |
|
48 |
+ warning("Since argument method!=\"ssgsea\", genes with constant expression values are discarded.") |
|
49 |
+ expr <- expr[sdGenes > 0 & !is.na(sdGenes), ] |
|
50 |
+ } |
|
51 |
+ } |
|
52 |
+ |
|
53 |
+ expr |
|
54 |
+} |
|
16 | 55 |
\ No newline at end of file |
... | ... |
@@ -1,6 +1,7 @@ |
1 | 1 |
\name{gsva} |
2 | 2 |
\alias{gsva} |
3 | 3 |
\alias{gsva,SingleCellExperiment,list-method} |
4 |
+\alias{gsva,dgCMatrix,list-method} |
|
4 | 5 |
\alias{gsva,SummarizedExperiment,list-method} |
5 | 6 |
\alias{gsva,SummarizedExperiment,GeneSetCollection-method} |
6 | 7 |
\alias{gsva,ExpressionSet,list-method} |
... | ... |
@@ -29,6 +30,18 @@ Estimates GSVA enrichment scores. |
29 | 30 |
ssgsea.norm=TRUE, |
30 | 31 |
verbose=TRUE, |
31 | 32 |
BPPARAM=SerialParam(progressbar=verbose)) |
33 |
+\S4method{gsva}{dgCMatrix,list}(expr, gset.idx.list, annotation, |
|
34 |
+ method=c("gsva", "ssgsea", "zscore", "plage"), |
|
35 |
+ kcdf=c("Gaussian", "Poisson", "none"), |
|
36 |
+ abs.ranking=FALSE, |
|
37 |
+ min.sz=1, |
|
38 |
+ max.sz=Inf, |
|
39 |
+ parallel.sz=1L, |
|
40 |
+ mx.diff=TRUE, |
|
41 |
+ tau=switch(method, gsva=1, ssgsea=0.25, NA), |
|
42 |
+ ssgsea.norm=TRUE, |
|
43 |
+ verbose=TRUE, |
|
44 |
+ BPPARAM=SerialParam(progressbar=verbose)) |
|
32 | 45 |
\S4method{gsva}{SummarizedExperiment,GeneSetCollection}(expr, gset.idx.list, annotation, |
33 | 46 |
method=c("gsva", "ssgsea", "zscore", "plage"), |
34 | 47 |
kcdf=c("Gaussian", "Poisson", "none"), |
... | ... |
@@ -106,7 +119,8 @@ Estimates GSVA enrichment scores. |
106 | 119 |
\item{expr}{Gene expression data which can be given either as a |
107 | 120 |
\code{SummarizedExperiment}, \code{SingleCellExperiment} |
108 | 121 |
\code{ExpressionSet} object, or as a matrix of expression |
109 |
- values where rows correspond to genes and columns correspond to samples.} |
|
122 |
+ values where rows correspond to genes and columns correspond to samples. |
|
123 |
+ This matrix can be also in a sparse format, as a \code{dgCMatrix}.} |
|
110 | 124 |
\item{gset.idx.list}{Gene sets provided either as a \code{list} object or as a |
111 | 125 |
\code{GeneSetCollection} object.} |
112 | 126 |
\item{annotation}{In the case of calling \code{gsva()} on a |
... | ... |
@@ -190,10 +204,11 @@ However, then the input gene sets in \code{gset.idx.list} is provided as a |
190 | 204 |
those identifiers to the type of identifier in the input expression data \code{expr}. |
191 | 205 |
Such an automatic conversion, however, will only occur in three scenarios: 1. when |
192 | 206 |
\code{expr} is an \code{ExpressionSet} object with an appropriately set |
193 |
-\code{annotation} slot; 2. when \code{expr} is a \code{SummarizedExperiment} object |
|
194 |
-with an appropriately set \code{annotation} slot in the metadata of \code{expr}; |
|
195 |
-3. when \code{expr} is a \code{matrix} and the \code{annotation} argument of the |
|
196 |
-\code{gsva()} function is set to the name of the annotation package that provides |
|
207 |
+\code{annotation} slot; 2. when \code{expr} is a \code{SummarizedExperiment} or a |
|
208 |
+\code{SingleCellExperiment} object with an appropriately set \code{annotation} slot |
|
209 |
+in the metadata of \code{expr}; 3. when \code{expr} is a \code{matrix} or a |
|
210 |
+\code{dgCMatrix} and the \code{annotation} argument of the \code{gsva()} function |
|
211 |
+is set to the name of the annotation package that provides |
|
197 | 212 |
the relationships between the type of identifiers in \code{expr} and \code{gset.idx.list}. |
198 | 213 |
|
199 | 214 |
The collection of gene sets resulting from the previous identifier matching, |
... | ... |
@@ -201,7 +216,8 @@ can be further filtered to require a minimun and/or maximum size by using the |
201 | 216 |
arguments \code{min.sz} and \code{max.sz}. |
202 | 217 |
} |
203 | 218 |
\value{ |
204 |
-A gene-set by sample matrix of GSVA enrichment scores. |
|
219 |
+A gene-set by sample matrix (of \code{matrix} or \code{dgCMatrix} type, |
|
220 |
+depending on the input) of GSVA enrichment scores. |
|
205 | 221 |
} |
206 | 222 |
\references{ |
207 | 223 |
Barbie, D.A. et al. Systematic RNA interference reveals that oncogenic KRAS-driven |