Browse code

adding SingleCellExperiment support

pablo-rodr-bio2 authored on 29/10/2020 11:43:14
Showing 4 changed files

... ...
@@ -8,7 +8,7 @@ Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.gui
8 8
 Depends: R (>= 3.5.0)
9 9
 Imports: methods, stats, utils, graphics, BiocGenerics, S4Vectors,
10 10
          Biobase, SummarizedExperiment, GSEABase,
11
-         parallel, BiocParallel, fastmatch
11
+         parallel, BiocParallel, fastmatch, SingleCellExperiment
12 12
 Suggests: RUnit, limma, RColorBrewer, genefilter, edgeR, GSVAdata,
13 13
           shiny, shinythemes, ggplot2, data.table, plotly
14 14
 Description: Gene Set Variation Analysis (GSVA) is a non-parametric, unsupervised method for estimating variation of gene set enrichment through the samples of a expression data set. GSVA performs a change in coordinate systems, transforming the data from a gene by sample matrix to a gene-set by sample matrix, thereby allowing the evaluation of pathway enrichment for each sample. This new matrix of GSVA enrichment scores facilitates applying standard analytical methods like functional enrichment, survival analysis, clustering, CNV-pathway analysis or cross-tissue pathway analysis, in a pathway-centric manner.
... ...
@@ -6,6 +6,7 @@ import(BiocGenerics)
6 6
 importClassesFrom(Biobase, ExpressionSet)
7 7
 importClassesFrom(SummarizedExperiment, SummarizedExperiment)
8 8
 importClassesFrom(GSEABase, GeneSetCollection)
9
+importClassesFrom(SingleCellExperiment, SingleCellExperiment)
9 10
 
10 11
 importMethodsFrom(Biobase, featureNames,
11 12
                            phenoData,
... ...
@@ -45,6 +46,7 @@ importFrom(BiocParallel, SerialParam,
45 46
                          MulticoreParam,
46 47
                          multicoreWorkers,
47 48
                          bpnworkers)
49
+importFrom(SingleCellExperiment, SingleCellExperiment)
48 50
 
49 51
 exportMethods(gsva,
50 52
               filterGeneSets,
... ...
@@ -5,6 +5,83 @@
5 5
 
6 6
 setGeneric("gsva", function(expr, gset.idx.list, ...) standardGeneric("gsva"))
7 7
 
8
+setMethod("gsva", signature(expr="SingleCellExperiment", gset.idx.list="list"),
9
+          function(expr, gset.idx.list, annotation,
10
+  method=c("gsva", "ssgsea", "zscore", "plage"),
11
+  kcdf=c("Gaussian", "Poisson", "none"),
12
+  abs.ranking=FALSE,
13
+  min.sz=1,
14
+  max.sz=Inf,
15
+  parallel.sz=1L, 
16
+  mx.diff=TRUE,
17
+  tau=switch(method, gsva=1, ssgsea=0.25, NA),
18
+  ssgsea.norm=TRUE,
19
+  verbose=TRUE,
20
+  BPPARAM=SerialParam(progressbar=verbose))
21
+{
22
+  method <- match.arg(method)
23
+  kcdf <- match.arg(kcdf)
24
+  
25
+  if (length(assays(expr)) == 0L)
26
+    stop("The input SummarizedExperiment object has no assay data.")
27
+  
28
+  se <- expr
29
+  
30
+  if (missing(annotation))
31
+    annotation <- names(assays(se))[1]
32
+  else {
33
+    if (!is.character(annotation))
34
+      stop("The 'annotation' argument must contain a character string.")
35
+    annotation <- annotation[1]
36
+    
37
+    if (!annotation %in% names(assays(se)))
38
+      stop(sprintf("Assay %s not found in the input SummarizedExperiment object.", annotation))
39
+  }
40
+  expr <- assays(se)[[annotation]]
41
+  
42
+  ## filter genes according to verious criteria,
43
+  ## e.g., constant expression
44
+  expr <- .filterFeatures(expr, method)
45
+  
46
+  if (nrow(expr) < 2)
47
+    stop("Less than two genes in the input ExpressionSet object\n")
48
+  
49
+  ## map to the actual features for which expression data is available
50
+  mapped.gset.idx.list <- lapply(gset.idx.list,
51
+                                 function(x, y) na.omit(fastmatch::fmatch(x, y)),
52
+                                 rownames(expr))
53
+  
54
+  if (length(unlist(mapped.gset.idx.list, use.names=FALSE)) == 0)
55
+    stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.")
56
+  
57
+  ## remove gene sets from the analysis for which no features are available
58
+  ## and meet the minimum and maximum gene-set size specified by the user
59
+  mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
60
+                                         min.sz=max(1, min.sz),
61
+                                         max.sz=max.sz)
62
+  
63
+  if (!missing(kcdf)) {
64
+    if (kcdf == "Gaussian") {
65
+      rnaseq <- FALSE
66
+      kernel <- TRUE
67
+    } else if (kcdf == "Poisson") {
68
+      rnaseq <- TRUE
69
+      kernel <- TRUE
70
+    } else
71
+      kernel <- FALSE
72
+  }
73
+  
74
+  eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
75
+                parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) 
76
+  
77
+  rval <- SingleCellExperiment(assays=SimpleList(es=eSco),
78
+                               colData=colData(se),
79
+                               metadata=metadata(se))
80
+  metadata(rval)$annotation <- NULL
81
+  
82
+  rval
83
+          })
84
+
8 85
 setMethod("gsva", signature(expr="SummarizedExperiment", gset.idx.list="GeneSetCollection"),
9 86
           function(expr, gset.idx.list, annotation,
10 87
   method=c("gsva", "ssgsea", "zscore", "plage"),
... ...
@@ -1,5 +1,6 @@
1 1
 \name{gsva}
2 2
 \alias{gsva}
3
+\alias{gsva,SingleCellExperiment,list-method}
3 4
 \alias{gsva,SummarizedExperiment,list-method}
4 5
 \alias{gsva,SummarizedExperiment,GeneSetCollection-method}
5 6
 \alias{gsva,ExpressionSet,list-method}
... ...
@@ -16,6 +17,18 @@ Gene Set Variation Analysis
16 17
 Estimates GSVA enrichment scores.
17 18
 }
18 19
 \usage{
20
+\S4method{gsva}{SingleCellExperiment,list}(expr, gset.idx.list, annotation,
21
+    method=c("gsva", "ssgsea", "zscore", "plage"),
22
+    kcdf=c("Gaussian", "Poisson", "none"),
23
+    abs.ranking=FALSE,
24
+    min.sz=1,
25
+    max.sz=Inf,
26
+    parallel.sz=1L,
27
+    mx.diff=TRUE,
28
+    tau=switch(method, gsva=1, ssgsea=0.25, NA),
29
+    ssgsea.norm=TRUE,
30
+    verbose=TRUE,
31
+    BPPARAM=SerialParam(progressbar=verbose))
19 32
 \S4method{gsva}{SummarizedExperiment,GeneSetCollection}(expr, gset.idx.list, annotation,
20 33
     method=c("gsva", "ssgsea", "zscore", "plage"),
21 34
     kcdf=c("Gaussian", "Poisson", "none"),
... ...
@@ -91,13 +104,13 @@ Estimates GSVA enrichment scores.
91 104
 }
92 105
 \arguments{
93 106
   \item{expr}{Gene expression data which can be given either as a
94
-              \code{SummarizedExperiment} or \code{ExpressionSet} object,
95
-              or as a matrix of expression values where rows correspond to genes
96
-              and columns correspond to samples.}
107
+              \code{SingleCellExperiment}, \code{SummarizedExperiment} or
108
+              \code{ExpressionSet} object, or as a matrix of expression 
109
+              values where rows correspond to genes and columns correspond to samples.}
97 110
   \item{gset.idx.list}{Gene sets provided either as a \code{list} object or as a
98 111
                        \code{GeneSetCollection} object.}
99
-  \item{annotation}{In the case of calling \code{gsva()} on a
100
-                    \code{SummarizedExperiment} object, the \code{annotation}
112
+  \item{annotation}{In the case of calling \code{gsva()} on a \code{SingleCellExperiment}
113
+                    or a \code{SummarizedExperiment} object, the \code{annotation}
101 114
                     argument can be used to select the assay containing the
102 115
                     molecular data we want as input to the \code{gsva()}
103 116
                     function, otherwise the first assay is selected.
... ...
@@ -111,7 +124,7 @@ Estimates GSVA enrichment scores.
111 124
                     \code{ExpressionSet} object, the \code{annotation} argument
112 125
                     is ignored. See details information below.}
113 126
   \item{method}{Method to employ in the estimation of gene-set enrichment scores per sample. By default
114
-                this is set to \code{gsva} (\enc{H�nzelmann}{Hanzelmann} et al, 2013) and other options are
127
+                this is set to \code{gsva} (\enc{H?nzelmann}{Hanzelmann} et al, 2013) and other options are
115 128
                 \code{ssgsea} (Barbie et al, 2009), \code{zscore} (Lee et al, 2008) or \code{plage}
116 129
                 (Tomfohr et al, 2005). The latter two standardize first expression profiles into z-scores
117 130
                 over the samples and, in the case of \code{zscore}, it combines them together as their sum
... ...
@@ -142,7 +155,7 @@ Estimates GSVA enrichment scores.
142 155
                  is calculated as the magnitude difference between the largest positive
143 156
                  and negative random walk deviations.}
144 157
   \item{tau}{Exponent defining the weight of the tail in the random walk performed by both the \code{gsva}
145
-             (\enc{H�nzelmann}{Hanzelmann} et al., 2013) and the \code{ssgsea} (Barbie et al., 2009) methods. By default,
158
+             (\enc{H?nzelmann}{Hanzelmann} et al., 2013) and the \code{ssgsea} (Barbie et al., 2009) methods. By default,
146 159
              this \code{tau=1} when \code{method="gsva"} and \code{tau=0.25} when \code{method="ssgsea"} just
147 160
              as specified by Barbie et al. (2009) where this parameter is called \code{alpha}.}
148 161
   \item{ssgsea.norm}{Logical, set to \code{TRUE} (default) with \code{method="ssgsea"} runs the SSGSEA method
... ...
@@ -194,7 +207,7 @@ A gene-set by sample matrix of GSVA enrichment scores.
194 207
 Barbie, D.A. et al. Systematic RNA interference reveals that oncogenic KRAS-driven
195 208
 cancers require TBK1. \emph{Nature}, 462(5):108-112, 2009.
196 209
 
197
-\enc{H�nzelmann}{Hanzelmann}, S., Castelo, R. and Guinney, J.
210
+\enc{H?nzelmann}{Hanzelmann}, S., Castelo, R. and Guinney, J.
198 211
 GSVA: Gene set variation analysis for microarray and RNA-Seq data.
199 212
 \emph{BMC Bioinformatics}, 14:7, 2013.
200 213