Browse code

adding dgCMatrix support to method plage

pablo-rodr-bio2 authored on 09/11/2020 11:11:55
Showing 4 changed files

... ...
@@ -8,7 +8,8 @@ Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.gui
8 8
 Depends: R (>= 3.5.0)
9 9
 Imports: methods, stats, utils, graphics, BiocGenerics, S4Vectors,
10 10
          Biobase, SummarizedExperiment, GSEABase, Matrix,
11
-         parallel, BiocParallel, fastmatch, SingleCellExperiment
11
+         parallel, BiocParallel, fastmatch, SingleCellExperiment,
12
+         BiocSingular
12 13
 Suggests: RUnit, limma, RColorBrewer, genefilter, edgeR, GSVAdata,
13 14
           shiny, shinythemes, ggplot2, data.table, plotly
14 15
 Description: Gene Set Variation Analysis (GSVA) is a non-parametric, unsupervised method for estimating variation of gene set enrichment through the samples of a expression data set. GSVA performs a change in coordinate systems, transforming the data from a gene by sample matrix to a gene-set by sample matrix, thereby allowing the evaluation of pathway enrichment for each sample. This new matrix of GSVA enrichment scores facilitates applying standard analytical methods like functional enrichment, survival analysis, clustering, CNV-pathway analysis or cross-tissue pathway analysis, in a pathway-centric manner.
... ...
@@ -29,6 +29,8 @@ importMethodsFrom(BiocParallel, bpiterate,
29 29
                                 bplapply,
30 30
                                 "bpprogressbar<-")
31 31
 
32
+importMethodsFrom(BiocSingular, runExactSVD)
33
+
32 34
 importFrom(graphics, plot)
33 35
 importFrom(stats, ecdf,
34 36
                   na.omit)
... ...
@@ -41,7 +41,12 @@ setMethod("gsva", signature(expr="SingleCellExperiment", gset.idx.list="list"),
41 41
   
42 42
   ## filter genes according to verious criteria,
43 43
   ## e.g., constant expression
44
-  expr <- .filterFeatures(expr, method)
44
+  if(is(expr, "dgCMatrix")){
45
+    expr <- .filterFeaturesSparse(expr, method)
46
+  } else {
47
+    expr <- .filterFeatures(expr, method)
48
+  }
49
+  
45 50
   
46 51
   if (nrow(expr) < 2)
47 52
     stop("Less than two genes in the input ExpressionSet object\n")
... ...
@@ -918,29 +923,48 @@ zscore <- function(X, geneSets, parallel.sz, verbose=TRUE,
918 923
 }
919 924
 
920 925
 rightsingularsvdvectorgset <- function(gSetIdx, Z) {
926
+  if(is(Z, "dgCMatrix")){
927
+    s <- BiocSingular::runExactSVD(Z[gSetIdx, ])
928
+  } else {
921 929
     s <- svd(Z[gSetIdx, ])
930
+  }
922 931
   s$v[, 1]
923 932
 }
924 933
 
925 934
 plage <- function(X, geneSets, parallel.sz, verbose=TRUE,
926 935
                   BPPARAM=SerialParam(progressbar=verbose)) {
927
-
928
-  p <- nrow(X)
929
-  n <- ncol(X)
930
-
931
-  Z <- t(scale(t(X)))
932
-
933
-  es <- bplapply(geneSets, rightsingularsvdvectorgset, Z,
934
-                 BPPARAM=BPPARAM)
935
-
936
-  es <- do.call(rbind, es)
937
-
938
-  if (length(geneSets) == 1)
939
-    es <- matrix(es, nrow=1)
940
-
941
-  rownames(es) <- names(geneSets)
942
-  colnames(es) <- colnames(X)
943
-
936
+  if(is(X, "dgCMatrix")){
937
+    message("Please bear in mind that this method first scale the values of the gene expression data.
938
+            In order to take advantage of the sparse Matrix type, the scaling will only be applied
939
+            to the non-zero values of the data.")
940
+    
941
+    Z <- Matrix::t(X)
942
+    Z <- .dgCapply(Z, scale, 2)
943
+    Z <- Matrix::t(Z)
944
+    
945
+    es <- bplapply(geneSets, rightsingularsvdvectorgset, Z,
946
+                   BPPARAM=BPPARAM)
947
+    
948
+    es <- do.call(rbind, es)
949
+    
950
+    es <- as(es, "dgCMatrix")
951
+    
952
+  } else {
953
+    
954
+    Z <- t(scale(t(X)))
955
+    
956
+    es <- bplapply(geneSets, rightsingularsvdvectorgset, Z,
957
+                   BPPARAM=BPPARAM)
958
+    
959
+    es <- do.call(rbind, es)
960
+    
961
+    if (length(geneSets) == 1)
962
+      es <- matrix(es, nrow=1)
963
+    
964
+    rownames(es) <- names(geneSets)
965
+    colnames(es) <- colnames(X)
966
+  }
967
+  
944 968
   es
945 969
 }
946 970
 
... ...
@@ -35,6 +35,12 @@
35 35
   result
36 36
 }
37 37
 
38
+.dgCapply<-function(m,f, MARGIN){
39
+  x <- lapply(.sparseToList(m, MARGIN), f)
40
+  m@x <- unlist(x, use.names=FALSE)
41
+  m
42
+}
43
+
38 44
 ## filter out genes which non-zero values have
39 45
 ## constant expression values
40 46
 .filterFeaturesSparse <- function(expr, method) {