Browse code

Bugfixes when a gene had constant expression values, and when the annotation signature was character (thanks to Luca Beltrame). Updated documentation.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/GSVA@72582 bc3139a8-67e5-0310-9ffc-ced21a209358

Robert Castelo authored on 16/01/2013 15:17:11
Showing 6 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: GSVA
2
-Version: 1.7.4
3
-Date: 2013-1-7
2
+Version: 1.7.5
3
+Date: 2013-1-16
4 4
 Title: Gene Set Variation Analysis for microarray and RNA-seq data
5 5
 Author: Justin Guinney <justin.guinney@sagebase.org> (with contributions from Robert Castelo <robert.castelo@upf.edu> and Sonja Haenzelmann <shanzelmann@imim.es)
6 6
 Maintainer: Justin Guinney <justin.guinney@sagebase.org>
... ...
@@ -13,4 +13,4 @@ License: GPL (>= 2)
13 13
 LazyLoad: yes
14 14
 biocViews: Microarray, Pathways, GeneSetEnrichment
15 15
 URL: http://www.sagebase.org
16
-
16
+Encoding: latin1
... ...
@@ -23,6 +23,17 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list", annotati
23 23
 {
24 24
   method <- match.arg(method)
25 25
 
26
+  ## filter out genes with constant expression values
27
+  sdGenes <- Biobase::esApply(expr, 1, sd)
28
+  if (any(sdGenes == 0)) {
29
+    if (verbose)
30
+      cat("Filtering out ", sum(sdGenes), " genes with constant expression values throuhgout the samples\n")
31
+    expr <- expr[sdGenes > 0, ]
32
+  } 
33
+
34
+  if (nrow(expr) < 2)
35
+    stop("Less than two genes in the input ExpressionSet object\n")
36
+
26 37
   ## map to the actual features for which expression data is available
27 38
   mapped.gset.idx.list <- lapply(gset.idx.list,
28 39
                                  function(x, y) na.omit(match(x, y)),
... ...
@@ -66,6 +77,17 @@ setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollecti
66 77
 {
67 78
   method <- match.arg(method)
68 79
 
80
+  ## filter out genes with constant expression values
81
+  sdGenes <- Biobase::esApply(expr, 1, sd)
82
+  if (any(sdGenes == 0)) {
83
+    if (verbose)
84
+      cat("Filtering out ", sum(sdGenes), " genes with constant expression values throuhgout the samples\n")
85
+    expr <- expr[sdGenes > 0, ]
86
+  } 
87
+
88
+  if (nrow(expr) < 2)
89
+    stop("Less than two genes in the input ExpressionSet object\n")
90
+
69 91
   if (verbose)
70 92
     cat("Mapping identifiers between gene sets and feature names\n")
71 93
 
... ...
@@ -116,6 +138,17 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection", an
116 138
 {
117 139
   method <- match.arg(method)
118 140
 
141
+  ## filter out genes with constant expression values
142
+  sdGenes <- apply(expr, 1, sd)
143
+  if (any(sdGenes == 0)) {
144
+    if (verbose)
145
+      cat("Filtering out ", sum(sdGenes), " genes with constant expression values throuhgout the samples\n")
146
+    expr <- expr[sdGenes > 0, ]
147
+  } 
148
+
149
+  if (nrow(expr) < 2)
150
+    stop("Less than two genes in the input expression data matrix\n")
151
+
119 152
   ## map gene identifiers of the gene sets to the features in the matrix
120 153
   mapped.gset.idx.list <- gset.idx.list
121 154
   if (!is.na(annotation)) {
... ...
@@ -123,7 +156,7 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection", an
123 156
       cat("Mapping identifiers between gene sets and feature names\n")
124 157
 
125 158
     mapped.gset.idx.list <- GSEABase::mapIdentifiers(gset.idx.list,
126
-                                                     GSEABase::AnnoOrEntrezIdentifier(Biobase::annotation))
159
+                                                     GSEABase::AnnoOrEntrezIdentifier(annotation))
127 160
   }
128 161
   
129 162
   ## map to the actual features for which expression data is available
... ...
@@ -161,6 +194,17 @@ setMethod("gsva", signature(expr="matrix", gset.idx.list="list", annotation="mis
161 194
 {
162 195
   method <- match.arg(method)
163 196
 
197
+  ## filter out genes with constant expression values
198
+  sdGenes <- apply(expr, 1, sd)
199
+  if (any(sdGenes == 0)) {
200
+    if (verbose)
201
+      cat("Filtering out ", sum(sdGenes), " genes with constant expression values throuhgout the samples\n")
202
+    expr <- expr[sdGenes > 0, ]
203
+  } 
204
+
205
+  if (nrow(expr) < 2)
206
+    stop("Less than two genes in the input expression data matrix\n")
207
+
164 208
   mapped.gset.idx.list <- lapply(gset.idx.list,
165 209
                                  function(x ,y) na.omit(match(x, y)),
166 210
                                  rownames(expr))
... ...
@@ -1,13 +1,17 @@
1 1
 citHeader("To cite package 'GSVA' in publications use:")
2 2
 
3 3
 citEntry(entry="Article",
4
-  author = personList(as.person("Sonja H\"anzelmann"),
4
+  title = "{GSVA}: gene set variation analysis for microarray and {RNA-Seq} data",
5
+  author = personList(as.person("Sonja H{\\\"a}nzelmann"),
5 6
                       as.person("Robert Castelo"),
6 7
                       as.person("Justin Guinney")),
7
-  title = "GSVA: Gene Set Variation Analysis for microarray and RNA-Seq data",
8
-  journal = "BMC Bioinformatics, in press",
8
+  journal = "BMC Bioinformatics",
9
+  volume = "14",
10
+  pages = "7",
9 11
   year = "2013",
10
-  textVersion = paste("Hanzelmann, S., Castelo, R. and Guinney, A.",
11
-                      "GSVA: Gene Set Variation Analysis for microarray and RNA-seq data",
12
-                      "BMC Bioinformatics, in press, 2013.")
12
+  url = "http://www.biomedcentral.com/1471-2105/14/7",
13
+  doi = "10.1186/1471-2105-14-7",
14
+  textVersion = paste("H�nzelmann, S., Castelo, R. and Guinney, A.",
15
+                      "GSVA: gene set variation analysis for microarray and RNA-seq data.",
16
+                      "BMC Bioinformatics, 14:7, 2013.")
13 17
 )
... ...
@@ -5,6 +5,8 @@
5 5
 \alias{computeGeneSetsOverlap,GeneSetCollection,character-method}
6 6
 \alias{computeGeneSetsOverlap,GeneSetCollection,ExpressionSet-method}
7 7
 
8
+\encoding{latin1}
9
+
8 10
 \title{
9 11
 Compute gene-sets overlap
10 12
 }
... ...
@@ -38,8 +40,9 @@ size of the two gene sets.
38 40
 A gene-set by gene-set matrix of the overlap among every pair of gene sets.
39 41
 }
40 42
 \references{
41
-H\"anzelmann, S., Castelo, R. and Guinney, J.
42
-GSVA: Gene Set Variation Analysis for microarray and RNA-Seq data, \emph{BMC Bioinformatics, in press, 2013.}
43
+\enc{H�nzelmann}{Hanzelmann}, S., Castelo, R. and Guinney, J.
44
+GSVA: Gene set variation analysis for microarray and RNA-Seq data.
45
+\emph{BMC Bioinformatics}, 14:7, 2013.
43 46
 }
44 47
 \author{J. Guinney}
45 48
 \seealso{
... ...
@@ -3,6 +3,8 @@
3 3
 \alias{filterGeneSets,list-method}
4 4
 \alias{filterGeneSets,GeneSetCollection-method}
5 5
 
6
+\encoding{latin1}
7
+
6 8
 \title{
7 9
 Filter gene sets
8 10
 }
... ...
@@ -26,8 +28,9 @@ This function filters the input gene sets according to a given minimum and maxim
26 28
 A collection of gene sets that meet the given minimum and maximum set size.
27 29
 }
28 30
 \references{
29
-H\"anzelmann, S., Castelo, R. and Guinney, J.
30
-GSVA: Gene Set Variation Analysis for microarray and RNA-Seq data, \emph{BMC Bioinformatics, in press, 2013.}
31
+\enc{H�nzelmann}{Hanzelmann}, S., Castelo, R. and Guinney, J.
32
+GSVA: Gene set variation analysis for microarray and RNA-Seq data.
33
+\emph{BMC Bioinformatics}, 14:7, 2013.
31 34
 }
32 35
 \author{J. Guinney}
33 36
 \seealso{
... ...
@@ -5,6 +5,8 @@
5 5
 \alias{gsva,matrix,GeneSetCollection,character-method}
6 6
 \alias{gsva,matrix,list,missing-method}
7 7
 
8
+\encoding{latin1}
9
+
8 10
 \title{
9 11
 Gene Set Variation Analysis
10 12
 }
... ...
@@ -146,8 +148,9 @@ A gene-set by sample matrix of GSVA enrichment scores.
146 148
 Barbie, D.A. et al. Systematic RNA interference reveals that oncogenic KRAS-driven
147 149
 cancers require TBK1. \emph{Nature}, 462(5):108-112, 2009.
148 150
 
149
-H\"anzelmann, S., Castelo, R. and Guinney, J.
150
-GSVA: Gene Set Variation Analysis for microarray and RNA-Seq data, \emph{BMC Bioinformatics, in press, 2013.}
151
+\enc{H�nzelmann}{Hanzelmann}, S., Castelo, R. and Guinney, J.
152
+GSVA: Gene set variation analysis for microarray and RNA-Seq data.
153
+\emph{BMC Bioinformatics}, 14:7, 2013.
151 154
 
152 155
 Lee, E. et al. Inferring pathway activity toward precise disease classification.
153 156
 \emph{PLoS Comp Biol}, 4(11):e1000217, 2008.