Browse code

Arguments rnaseq and kernel deprecated and replaced by new argument kcdf.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/GSVA@131287 bc3139a8-67e5-0310-9ffc-ced21a209358

Robert Castelo authored on 17/07/2017 17:11:21
Showing 5 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: GSVA
2
-Version: 1.25.3
2
+Version: 1.25.4
3 3
 Title: Gene Set Variation Analysis for microarray and RNA-seq data
4 4
 Authors@R: c(person("Justin", "Guinney", role=c("aut", "cre"), email="justin.guinney@sagebase.org"),
5 5
              person("Robert", "Castelo", role="aut", email="robert.castelo@upf.edu"),
... ...
@@ -8,6 +8,7 @@ setGeneric("gsva", function(expr, gset.idx.list, ...) standardGeneric("gsva"))
8 8
 setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"),
9 9
           function(expr, gset.idx.list, annotation,
10 10
   method=c("gsva", "ssgsea", "zscore", "plage"),
11
+  kcdf=c("Gaussian", "Poisson", "none"),
11 12
   rnaseq=FALSE,
12 13
   abs.ranking=FALSE,
13 14
   min.sz=1,
... ...
@@ -23,6 +24,13 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"),
23 24
   verbose=TRUE)
24 25
 {
25 26
   method <- match.arg(method)
27
+  kcdf <- match.arg(kcdf)
28
+
29
+  if (!missing(rnaseq))
30
+    warning("The argument 'rnaseq' is deprecated and will be removed in the next release of GSVA. Please use the 'kcdf' argument instead.")
31
+
32
+  if (!missing(kernel))
33
+    warning("The argument 'kernel' is deprecated and will be removed in the next release of GSVA. Please use the 'kcdf' argument instead.")
26 34
 
27 35
   ## filter out genes with constant expression values
28 36
   sdGenes <- Biobase::esApply(expr, 1, sd)
... ...
@@ -52,7 +60,18 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"),
52 60
                                          min.sz=max(1, min.sz),
53 61
                                          max.sz=max.sz)
54 62
 
55
-  eSco <- .gsva(exprs(expr), mapped.gset.idx.list, method, rnaseq, abs.ranking,
63
+  if (!missing(kcdf)) {
64
+    if (kcdf == "Gaussian") {
65
+      rnaseq <- FALSE
66
+      kernel <- TRUE
67
+    } else if (kcdf == "Poisson") {
68
+      rnaseq <- TRUE
69
+      kernel <- TRUE
70
+    } else
71
+      kernel <- FALSE
72
+  }
73
+
74
+  eSco <- .gsva(exprs(expr), mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
56 75
                 no.bootstraps, bootstrap.percent, parallel.sz, parallel.type,
57 76
                 mx.diff, tau, kernel, ssgsea.norm, verbose)
58 77
 
... ...
@@ -70,6 +89,7 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"),
70 89
 setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollection"),
71 90
           function(expr, gset.idx.list, annotation,
72 91
   method=c("gsva", "ssgsea", "zscore", "plage"),
92
+  kcdf=c("Gaussian", "Poisson", "none"),
73 93
   rnaseq=FALSE,
74 94
   abs.ranking=FALSE,
75 95
   min.sz=1,
... ...
@@ -85,6 +105,13 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti
85 105
   verbose=TRUE)
86 106
 {
87 107
   method <- match.arg(method)
108
+  kcdf <- match.arg(kcdf)
109
+
110
+  if (!missing(rnaseq))
111
+    warning("The argument 'rnaseq' is deprecated and will be removed in the next release of GSVA. Please use the 'kcdf' argument instead.")
112
+
113
+  if (!missing(kernel))
114
+    warning("The argument 'kernel' is deprecated and will be removed in the next release of GSVA. Please use the 'kcdf' argument instead.")
88 115
 
89 116
   ## filter out genes with constant expression values
90 117
   sdGenes <- Biobase::esApply(expr, 1, sd)
... ...
@@ -118,7 +145,18 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti
118 145
                                          min.sz=max(1, min.sz),
119 146
                                          max.sz=max.sz)
120 147
 
121
-  eSco <- .gsva(exprs(expr), mapped.gset.idx.list, method, rnaseq, abs.ranking,
148
+  if (!missing(kcdf)) {
149
+    if (kcdf == "Gaussian") {
150
+      rnaseq <- FALSE
151
+      kernel <- TRUE
152
+    } else if (kcdf == "Poisson") {
153
+      rnaseq <- TRUE
154
+      kernel <- TRUE
155
+    } else
156
+      kernel <- FALSE
157
+  }
158
+
159
+  eSco <- .gsva(exprs(expr), mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
122 160
                 no.bootstraps, bootstrap.percent, parallel.sz, parallel.type,
123 161
                 mx.diff, tau, kernel, ssgsea.norm, verbose)
124 162
 
... ...
@@ -136,6 +174,7 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti
136 174
 setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection"),
137 175
           function(expr, gset.idx.list, annotation,
138 176
   method=c("gsva", "ssgsea", "zscore", "plage"),
177
+  kcdf=c("Gaussian", "Poisson", "none"),
139 178
   rnaseq=FALSE,
140 179
   abs.ranking=FALSE,
141 180
   min.sz=1,
... ...
@@ -151,6 +190,13 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection"),
151 190
   verbose=TRUE)
152 191
 {
153 192
   method <- match.arg(method)
193
+  kcdf <- match.arg(kcdf)
194
+
195
+  if (!missing(rnaseq))
196
+    warning("The argument 'rnaseq' is deprecated and will be removed in the next release of GSVA. Please use the 'kcdf' argument instead.")
197
+
198
+  if (!missing(kernel))
199
+    warning("The argument 'kernel' is deprecated and will be removed in the next release of GSVA. Please use the 'kcdf' argument instead.")
154 200
 
155 201
   ## filter out genes with constant expression values
156 202
   sdGenes <- apply(expr, 1, sd)
... ...
@@ -191,7 +237,18 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection"),
191 237
                                          min.sz=max(1, min.sz),
192 238
                                          max.sz=max.sz)
193 239
 
194
-  .gsva(expr, mapped.gset.idx.list, method, rnaseq, abs.ranking,
240
+  if (!missing(kcdf)) {
241
+    if (kcdf == "Gaussian") {
242
+      rnaseq <- FALSE
243
+      kernel <- TRUE
244
+    } else if (kcdf == "Poisson") {
245
+      rnaseq <- TRUE
246
+      kernel <- TRUE
247
+    } else
248
+      kernel <- FALSE
249
+  }
250
+
251
+  .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
195 252
         no.bootstraps, bootstrap.percent, parallel.sz, parallel.type,
196 253
         mx.diff, tau, kernel, ssgsea.norm, verbose)
197 254
 })
... ...
@@ -199,6 +256,7 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection"),
199 256
 setMethod("gsva", signature(expr="matrix", gset.idx.list="list"),
200 257
           function(expr, gset.idx.list, annotation,
201 258
   method=c("gsva", "ssgsea", "zscore", "plage"),
259
+  kcdf=c("Gaussian", "Poisson", "none"),
202 260
   rnaseq=FALSE,
203 261
   abs.ranking=FALSE,
204 262
   min.sz=1,
... ...
@@ -214,6 +272,13 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list"),
214 272
   verbose=TRUE)
215 273
 {
216 274
   method <- match.arg(method)
275
+  kcdf <- match.arg(kcdf)
276
+
277
+  if (!missing(rnaseq))
278
+    warning("The argument 'rnaseq' is deprecated and will be removed in the next release of GSVA. Please use the 'kcdf' argument instead.")
279
+
280
+  if (!missing(kernel))
281
+    warning("The argument 'kernel' is deprecated and will be removed in the next release of GSVA. Please use the 'kcdf' argument instead.")
217 282
 
218 283
   ## filter out genes with constant expression values
219 284
   sdGenes <- apply(expr, 1, sd)
... ...
@@ -242,13 +307,25 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list"),
242 307
                                          min.sz=max(1, min.sz),
243 308
                                          max.sz=max.sz)
244 309
 
245
-  .gsva(expr, mapped.gset.idx.list, method, rnaseq, abs.ranking, no.bootstraps,
310
+  if (!missing(kcdf)) {
311
+    if (kcdf == "Gaussian") {
312
+      rnaseq <- FALSE
313
+      kernel <- TRUE
314
+    } else if (kcdf == "Poisson") {
315
+      rnaseq <- TRUE
316
+      kernel <- TRUE
317
+    } else
318
+      kernel <- FALSE
319
+  }
320
+
321
+  .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking, no.bootstraps,
246 322
         bootstrap.percent, parallel.sz, parallel.type,
247 323
         mx.diff, tau, kernel, ssgsea.norm, verbose)
248 324
 })
249 325
 
250 326
 .gsva <- function(expr, gset.idx.list,
251 327
   method=c("gsva", "ssgsea", "zscore", "plage"),
328
+  kcdf=c("Gaussian", "Poisson", "none"),
252 329
   rnaseq=FALSE,
253 330
   abs.ranking=FALSE,
254 331
   no.bootstraps=0, 
... ...
@@ -261,6 +338,9 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list"),
261 338
   ssgsea.norm=TRUE,
262 339
   verbose=TRUE)
263 340
 {
341
+  if (no.bootstraps > 0)
342
+    warning("The 'no.bootstraps' and 'bootstrap.percent' arguments are experimental and will be deprecated and will dissapear in the following release of GSVA.")
343
+
264 344
 	if(length(gset.idx.list) == 0){
265 345
 		stop("The gene set list is empty!  Filter may be too stringent.")
266 346
 	}
... ...
@@ -98,16 +98,13 @@ argumentsDataInput <- function(id) {
98 98
           12,
99 99
           selectInput("method", "Choose method:",
100 100
                       c("gsva","ssgsea","zscore","plage")),
101
-          radioButtons("rnaseq", "Rnaseq:",
102
-                       c("False" = FALSE,
103
-                         "True" = TRUE)),
101
+          selectInput("kcdf", "Choose kcdf:",
102
+                      c("Gaussian","Poisson","none")),
104 103
           radioButtons("absRanking", "abs.ranking:",
105 104
                        c("False" = FALSE,
106 105
                          "True" = TRUE)),
107 106
           numericInput("minSz","min.sz:",value = 1),
108 107
           numericInput("maxSz","max.sz (Write 0 for infinite):",value = 0),
109
-          numericInput("noBootstraps","no.bootstraps:",value = 0),
110
-          numericInput("bootstrapPercent","bootstrap.percent:",value = .632),
111 108
           numericInput("parallelSz","parallel.sz:",value = 0),
112 109
           selectInput("parallelType", "parallel.type:",
113 110
                       c("SOCK","MPI","NWS")),
... ...
@@ -125,9 +122,6 @@ argumentsDataInput <- function(id) {
125 122
           conditionalPanel(
126 123
             condition = "input.method == 'zscore' || input.method == 'plage'"
127 124
           ),
128
-          radioButtons("kernel", "kernel:",
129
-                       c("True" = TRUE,
130
-                         "False" = FALSE)),
131 125
           radioButtons("ssgseaNorm", "ssgsea.norm:",
132 126
                        c("True" = TRUE,
133 127
                          "False" = FALSE)),
... ...
@@ -255,10 +249,10 @@ gsva_generation <- function(input, output, session, newY, genes,varMaxsz) {
255 249
   #GSVA Generation
256 250
   withProgress(message = 'Runing GSVA', value = 0, {
257 251
     incProgress(1, detail = "This may take a while...")
258
-    generated_gsva <<- gsva(newY, genes, method = input$method, rnaseq = as.logical(input$rnaseq), abs.ranking = as.logical(input$absRanking),
259
-                            min.sz = input$minSz, max.sz = varMaxsz, no.bootstraps = input$noBootstraps, bootstrap.percent = input$bootstrapPercent, 
260
-                            parallel.sz = input$parallelSz, parallel.type = input$parallelType, mx.diff = as.logical(input$mxDiff), tau = selectedTau, kernel = as.logical(input$kernel),
261
-                            ssgsea.norm = as.logical(input$ssgseaNorm), verbose = as.logical(input$verbose)) #Result asignation
252
+    generated_gsva <<- gsva(newY, genes, method=input$method, kcdf=input$kcdf, abs.ranking=as.logical(input$absRanking),
253
+                            min.sz=input$minSz, max.sz=varMaxsz, parallel.sz=input$parallelSz, parallel.type=input$parallelType,
254
+                            mx.diff=as.logical(input$mxDiff), tau=selectedTau, ssgsea.norm=as.logical(input$ssgseaNorm),
255
+                            verbose=as.logical(input$verbose))
262 256
   })
263 257
 }
264 258
 
... ...
@@ -16,6 +16,7 @@ Estimates GSVA enrichment scores.
16 16
 \usage{
17 17
 \S4method{gsva}{ExpressionSet,list}(expr, gset.idx.list, annotation,
18 18
     method=c("gsva", "ssgsea", "zscore", "plage"),
19
+    kcdf=c("Gaussian", "Poisson", "none"),
19 20
     rnaseq=FALSE,
20 21
     abs.ranking=FALSE,
21 22
     min.sz=1,
... ...
@@ -31,6 +32,7 @@ Estimates GSVA enrichment scores.
31 32
     verbose=TRUE)
32 33
 \S4method{gsva}{ExpressionSet,GeneSetCollection}(expr, gset.idx.list, annotation,
33 34
     method=c("gsva", "ssgsea", "zscore", "plage"),
35
+    kcdf=c("Gaussian", "Poisson", "none"),
34 36
     rnaseq=FALSE,
35 37
     abs.ranking=FALSE,
36 38
     min.sz=1,
... ...
@@ -46,6 +48,7 @@ Estimates GSVA enrichment scores.
46 48
     verbose=TRUE)
47 49
 \S4method{gsva}{matrix,GeneSetCollection}(expr, gset.idx.list, annotation,
48 50
     method=c("gsva", "ssgsea", "zscore", "plage"),
51
+    kcdf=c("Gaussian", "Poisson", "none"),
49 52
     rnaseq=FALSE,
50 53
     abs.ranking=FALSE,
51 54
     min.sz=1,
... ...
@@ -61,6 +64,7 @@ Estimates GSVA enrichment scores.
61 64
     verbose=TRUE)
62 65
 \S4method{gsva}{matrix,list}(expr, gset.idx.list, annotation,
63 66
     method=c("gsva", "ssgsea", "zscore", "plage"),
67
+    kcdf=c("Gaussian", "Poisson", "none"),
64 68
     rnaseq=FALSE,
65 69
     abs.ranking=FALSE,
66 70
     min.sz=1,
... ...
@@ -97,12 +101,15 @@ Estimates GSVA enrichment scores.
97 101
                 while in the case of \code{plage} they are used to calculate the singular value decomposition
98 102
                 (SVD) over the genes in the gene set and use the coefficients of the first right-singular vector
99 103
                 as pathway activity profile.}
100
-  \item{rnaseq}{Logical flag set by default to \code{rnaseq=FALSE} to inform whether the input gene
101
-                expression data are continues values, such as fluorescent units in logarithmic scale
102
-                from microarray experiments or some other kind of continuous value derived from
103
-                RNA-seq counts such as log-CPMs, log-RPKMs or log-TPMs. This flag should be set to
104
-                \code{rnaseq=TRUE} only when the values of the input gene expression data are integer
105
-                counts.}
104
+  \item{kcdf}{Character string denoting the kernel to use during the non-parametric estimation of the
105
+              cumulative distribution function of expression levels across samples when \code{method="gsva"}.
106
+              By default, \code{kcdf="Gaussian"} which is suitable when input expression values are continuous,
107
+              such as microarray fluorescent units in logarithmic scale, RNA-seq log-CPMs, log-RPKMs or log-TPMs.
108
+              When input expression values are integer counts, such as those derived from RNA-seq experiments,
109
+              then this argument should be set to \code{kcdf="Poisson"}. This argument supersedes arguments
110
+              \code{rnaseq} and \code{kernel}, which are deprecated and will be removed in the next release.}
111
+  \item{rnaseq}{This argument has been deprecated and will be removed in the next release. Please use the
112
+                argument \code{kcdf} instead.}
106 113
   \item{abs.ranking}{Flag used only when \code{mx.diff=TRUE}. When \code{abs.ranking=FALSE} (default)
107 114
             a modified Kuiper statistic is used to calculate enrichment scores, taking the magnitude
108 115
             difference between the largest positive and negative random walk deviations. When
... ...
@@ -131,10 +138,8 @@ Estimates GSVA enrichment scores.
131 138
              (\enc{H�nzelmann}{Hanzelmann} et al., 2013) and the \code{ssgsea} (Barbie et al., 2009) methods. By default,
132 139
              this \code{tau=1} when \code{method="gsva"} and \code{tau=0.25} when \code{method="ssgsea"} just
133 140
              as specified by Barbie et al. (2009) where this parameter is called \code{alpha}.}
134
-  \item{kernel}{Logical, set to \code{TRUE} when the GSVA method employes a kernel non-parametric
135
-                estimation of the empirical cumulative distribution function (default) and \code{FALSE}
136
-                when this function is directly estimated from the observed data. This last option is
137
-                justified in the limit of the size of the sample by the so-called Glivenko-Cantelli theorem.}
141
+  \item{kernel}{This argument has been deprecated and will be removed in the next release. Please use the
142
+                argument \code{kcdf} instead.}
138 143
   \item{ssgsea.norm}{Logical, set to \code{TRUE} (default) with \code{method="ssgsea"} runs the SSGSEA method
139 144
                      from Barbie et al. (2009) normalizing the scores by the absolute difference between
140 145
                      the minimum and the maximum, as described in their paper. When \code{ssgsea.norm=FALSE}
... ...
@@ -154,16 +159,6 @@ identifiers in the input expression data leading to a filtered collection of
154 159
 gene sets. This collection can be further filtered to require a minimun and/or
155 160
 maximum size of the gene sets for which we want to calculate GSVA enrichment
156 161
 scores, by using the arguments \code{min.sz} and \code{max.sz}.
157
-
158
-The name of the argument \code{rnaseq} can be misleading. When set to \code{rnaseq=FALSE}, the
159
-nonparametric estimation of the cumulative density function of the expression profile of
160
-each gene across samples is done using Gaussian kernels suited for continuous values. These were
161
-initially thought to be only microarray fluorescent units in logarithmic scale but nowadays these
162
-may also correspond to continuous values derived from RNA-seq experiments such as log-CPMs,
163
-log-RPKMs or log-TPMs. When \code{rnaseq=TRUE}, Poisson kernels are used instead and therefore,
164
-this option is only suitable when the input gene expression matrix is formed by integer counts.
165
-This implies that \code{rnaseq=FALSE} may also be used even when the expression data comes from
166
-a RNA-seq experiment. The name of this argument may change in the future to avoid this confusion.
167 162
 }
168 163
 \value{
169 164
 A gene-set by sample matrix of GSVA enrichment scores.
... ...
@@ -841,15 +841,18 @@ canonicalC2BroadSets <- GeneSetCollection(c(canonicalC2BroadSets, MSY, XiE))
841 841
 canonicalC2BroadSets
842 842
 @
843 843
 We calculate now GSVA enrichment scores for these gene sets using first the microarray
844
-data and then the RNA-seq data. Note that the only requirement to do the latter is to
845
-set the argument \Robject{rnaseq=TRUE} which is \Robject{FALSE} by default.
844
+data and then the RNA-seq integer count data. Note that the only requirement to do the
845
+latter is to set the argument \Robject{kcdf="Poisson"} which is \Robject{"Gaussian"} by
846
+default. Note, however, that if our RNA-seq derived expression levels would be continous,
847
+such as log-CPMs, log-RPKMs or log-TPMs, the the default value of the \Robject{kcdf}
848
+argument should remain unchanged.
846 849
 
847 850
 <<<>>=
848 851
 esmicro <- gsva(huangArrayRMAnoBatchCommon_eset, canonicalC2BroadSets, min.sz=5, max.sz=500,
849
-                mx.diff=TRUE, verbose=FALSE, rnaseq=FALSE, parallel.sz=1)$es.obs
852
+                mx.diff=TRUE, verbose=FALSE, parallel.sz=1)$es.obs
850 853
 dim(esmicro)
851 854
 esrnaseq <- gsva(pickrellCountsArgonneCQNcommon_eset, canonicalC2BroadSets, min.sz=5, max.sz=500,
852
-                 mx.diff=TRUE, verbose=FALSE, rnaseq=TRUE, parallel.sz=1)$es.obs
855
+                 kcdf="Poisson", mx.diff=TRUE, verbose=FALSE, parallel.sz=1)$es.obs
853 856
 dim(esrnaseq)
854 857
 @
855 858
 To compare expression values from both technologies we are going to transform