... | ... |
@@ -8,7 +8,8 @@ Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.gui |
8 | 8 |
Depends: R (>= 3.5.0) |
9 | 9 |
Imports: methods, stats, utils, graphics, BiocGenerics, S4Vectors, |
10 | 10 |
Biobase, SummarizedExperiment, GSEABase, Matrix, |
11 |
- parallel, BiocParallel, fastmatch, SingleCellExperiment |
|
11 |
+ parallel, BiocParallel, fastmatch, SingleCellExperiment, |
|
12 |
+ BiocSingular |
|
12 | 13 |
Suggests: RUnit, limma, RColorBrewer, genefilter, edgeR, GSVAdata, |
13 | 14 |
shiny, shinythemes, ggplot2, data.table, plotly |
14 | 15 |
Description: Gene Set Variation Analysis (GSVA) is a non-parametric, unsupervised method for estimating variation of gene set enrichment through the samples of a expression data set. GSVA performs a change in coordinate systems, transforming the data from a gene by sample matrix to a gene-set by sample matrix, thereby allowing the evaluation of pathway enrichment for each sample. This new matrix of GSVA enrichment scores facilitates applying standard analytical methods like functional enrichment, survival analysis, clustering, CNV-pathway analysis or cross-tissue pathway analysis, in a pathway-centric manner. |
... | ... |
@@ -41,7 +41,12 @@ setMethod("gsva", signature(expr="SingleCellExperiment", gset.idx.list="list"), |
41 | 41 |
|
42 | 42 |
## filter genes according to verious criteria, |
43 | 43 |
## e.g., constant expression |
44 |
- expr <- .filterFeatures(expr, method) |
|
44 |
+ if(is(expr, "dgCMatrix")){ |
|
45 |
+ expr <- .filterFeaturesSparse(expr, method) |
|
46 |
+ } else { |
|
47 |
+ expr <- .filterFeatures(expr, method) |
|
48 |
+ } |
|
49 |
+ |
|
45 | 50 |
|
46 | 51 |
if (nrow(expr) < 2) |
47 | 52 |
stop("Less than two genes in the input ExpressionSet object\n") |
... | ... |
@@ -918,29 +923,48 @@ zscore <- function(X, geneSets, parallel.sz, verbose=TRUE, |
918 | 923 |
} |
919 | 924 |
|
920 | 925 |
rightsingularsvdvectorgset <- function(gSetIdx, Z) { |
926 |
+ if(is(Z, "dgCMatrix")){ |
|
927 |
+ s <- BiocSingular::runExactSVD(Z[gSetIdx, ]) |
|
928 |
+ } else { |
|
921 | 929 |
s <- svd(Z[gSetIdx, ]) |
930 |
+ } |
|
922 | 931 |
s$v[, 1] |
923 | 932 |
} |
924 | 933 |
|
925 | 934 |
plage <- function(X, geneSets, parallel.sz, verbose=TRUE, |
926 | 935 |
BPPARAM=SerialParam(progressbar=verbose)) { |
927 |
- |
|
928 |
- p <- nrow(X) |
|
929 |
- n <- ncol(X) |
|
930 |
- |
|
931 |
- Z <- t(scale(t(X))) |
|
932 |
- |
|
933 |
- es <- bplapply(geneSets, rightsingularsvdvectorgset, Z, |
|
934 |
- BPPARAM=BPPARAM) |
|
935 |
- |
|
936 |
- es <- do.call(rbind, es) |
|
937 |
- |
|
938 |
- if (length(geneSets) == 1) |
|
939 |
- es <- matrix(es, nrow=1) |
|
940 |
- |
|
941 |
- rownames(es) <- names(geneSets) |
|
942 |
- colnames(es) <- colnames(X) |
|
943 |
- |
|
936 |
+ if(is(X, "dgCMatrix")){ |
|
937 |
+ message("Please bear in mind that this method first scale the values of the gene expression data. |
|
938 |
+ In order to take advantage of the sparse Matrix type, the scaling will only be applied |
|
939 |
+ to the non-zero values of the data.") |
|
940 |
+ |
|
941 |
+ Z <- Matrix::t(X) |
|
942 |
+ Z <- .dgCapply(Z, scale, 2) |
|
943 |
+ Z <- Matrix::t(Z) |
|
944 |
+ |
|
945 |
+ es <- bplapply(geneSets, rightsingularsvdvectorgset, Z, |
|
946 |
+ BPPARAM=BPPARAM) |
|
947 |
+ |
|
948 |
+ es <- do.call(rbind, es) |
|
949 |
+ |
|
950 |
+ es <- as(es, "dgCMatrix") |
|
951 |
+ |
|
952 |
+ } else { |
|
953 |
+ |
|
954 |
+ Z <- t(scale(t(X))) |
|
955 |
+ |
|
956 |
+ es <- bplapply(geneSets, rightsingularsvdvectorgset, Z, |
|
957 |
+ BPPARAM=BPPARAM) |
|
958 |
+ |
|
959 |
+ es <- do.call(rbind, es) |
|
960 |
+ |
|
961 |
+ if (length(geneSets) == 1) |
|
962 |
+ es <- matrix(es, nrow=1) |
|
963 |
+ |
|
964 |
+ rownames(es) <- names(geneSets) |
|
965 |
+ colnames(es) <- colnames(X) |
|
966 |
+ } |
|
967 |
+ |
|
944 | 968 |
es |
945 | 969 |
} |
946 | 970 |
|
... | ... |
@@ -35,6 +35,12 @@ |
35 | 35 |
result |
36 | 36 |
} |
37 | 37 |
|
38 |
+.dgCapply<-function(m,f, MARGIN){ |
|
39 |
+ x <- lapply(.sparseToList(m, MARGIN), f) |
|
40 |
+ m@x <- unlist(x, use.names=FALSE) |
|
41 |
+ m |
|
42 |
+} |
|
43 |
+ |
|
38 | 44 |
## filter out genes which non-zero values have |
39 | 45 |
## constant expression values |
40 | 46 |
.filterFeaturesSparse <- function(expr, method) { |