Browse code

LJ: Updated version for bioC 2.8 release. Changes include:

o Added support of NCI networks through the NCIgraph package.
o Updated the man page of the Loi2008 data.



git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/DEGraph@53977 bc3139a8-67e5-0310-9ffc-ced21a209358

Laurent Jacob authored on 23/03/2011 22:42:35
Showing 9 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: DEGraph
2 2
 Title: Two-sample tests on a graph
3
-Version: 1.1.0
4
-Date: 2010-10-14
3
+Version: 1.2.0
4
+Date: 2011-03-23
5 5
 Author: Laurent Jacob, Pierre Neuvial and Sandrine Dudoit
6 6
 Maintainer: Laurent Jacob <laurent.jacob@gmail.com>
7 7
 Description: DEGraph implements recent hypothesis testing methods
... ...
@@ -16,7 +16,7 @@ Description: DEGraph implements recent hypothesis testing methods
16 16
   and tools to visualize the results.
17 17
 License: GPL-3
18 18
 LazyLoad: yes
19
-Imports: graph, KEGGgraph, lattice, mvtnorm, R.methodsS3, RBGL, Rgraphviz, rrcov
20
-Suggests: corpcor, fields, graph, KEGGgraph, lattice, marray, RBGL, rrcov, Rgraphviz
19
+Imports: graph, KEGGgraph, lattice, mvtnorm, NCIgraph, R.methodsS3, RBGL, Rgraphviz, rrcov
20
+Suggests: corpcor, fields, graph, KEGGgraph, lattice, marray, NCIgraph, RBGL, rrcov, Rgraphviz
21 21
 Depends: R (>= 2.10.0), R.utils
22 22
 biocViews: Microarray, Bioinformatics, DifferentialExpression, GraphsAndNetworks
... ...
@@ -25,4 +25,4 @@ importFrom(rrcov, T2.test)
25 25
 importFrom(mvtnorm, rmvnorm)
26 26
 importFrom(RBGL,connectedComp)
27 27
 importFrom(lattice, level.colors)
28
-
28
+importFrom(NCIgraph, is.NCIgraph)
... ...
@@ -88,11 +88,14 @@ getSignedGraph <- function(graph, positiveInteractionLabels=c("activation", "exp
88 88
     cat <- R.utils::cat
89 89
     pushState(verbose)
90 90
     on.exit(popState(verbose))
91
-  } 
91
+  }
92 92
 
93 93
   
94
-  ## retrieve edge types
95
-  st <- getSubtype(graph)  ## FIXME: does not work for undirected graphs
94
+  ## retrieve edge types (method detects whether or not graph is a NCIgraph)
95
+  ##  if(is.NCIgraph(graph))
96
+  ##    st <- getSubtype.NCIgraph(graph)
97
+  ##  else
98
+  st <- getSubtype(graph) ## FIXME: does not work for undirected graphs
96 99
   edgeNames <- names(st)
97 100
   intNames <- sapply(st, FUN=function(x) {
98 101
     x$subtype@name
... ...
@@ -134,7 +137,7 @@ getSignedGraph <- function(graph, positiveInteractionLabels=c("activation", "exp
134 137
 
135 138
     etrNames <- edgeNames[-c(idxPos, idxNeg)]
136 139
     ftNames <- sapply(etrNames, FUN=function(edge) {
137
-      unlist(strsplit(edge, "~"))
140
+      unlist(strsplit(edge, "~|\\|"))
138 141
     })
139 142
 
140 143
     oldGraph <- graph
... ...
@@ -161,7 +164,7 @@ getSignedGraph <- function(graph, positiveInteractionLabels=c("activation", "exp
161 164
   if (length(negNames)) {
162 165
     ## set negative edges to -1
163 166
     ftNames <- sapply(negNames, FUN=function(edge) {
164
-      unlist(strsplit(edge, "~"))
167
+      unlist(strsplit(edge, "~|\\|"))
165 168
     })
166 169
     nodeNames <- colnames(signMat)
167 170
     for (ii in seq(length=ncol(ftNames))) {
... ...
@@ -180,7 +183,7 @@ getSignedGraph <- function(graph, positiveInteractionLabels=c("activation", "exp
180 183
   if (length(posNames)) {
181 184
     ## check that positive edges are 1
182 185
     ftNames <- sapply(posNames, FUN=function(edge) {
183
-      unlist(strsplit(edge, "~"))
186
+      unlist(strsplit(edge, "~|\\|"))
184 187
     })
185 188
     nodeNames <- colnames(signMat)
186 189
     for (ii in seq(length=ncol(ftNames))) {
... ...
@@ -201,6 +204,8 @@ getSignedGraph <- function(graph, positiveInteractionLabels=c("activation", "exp
201 204
 
202 205
 ############################################################################
203 206
 ## HISTORY:
207
+## 2011-03-06
208
+## o Dealing with NCI graphs
204 209
 ## 2010-10-08
205 210
 ## o Now validating argument 'verbose'.
206 211
 ## o Updated arguments.
... ...
@@ -71,6 +71,7 @@ graph.T2.test <- function(X1, X2, G=NULL, lfA=NULL, ..., k=ncol(X1))
71 71
     }    
72 72
 
73 73
   U <- lfA$U
74
+  ## print(U)
74 75
   egVal <- lfA$l
75 76
   kIdx <- (egVal <= max(egVal[k],tol)) #lfA$kIdx
76 77
   rk <- max(which(kIdx)) ## "round" the number of kept eigenvectors to take into account eigenvalue multiplicity
... ...
@@ -179,11 +179,26 @@ plotValuedGraph <- function(graph, values=NULL, nodeLabels=nodes(graph), qMax=0.
179 179
     if (symmetrizeArrows) {
180 180
       edgeRenderInfo(graph) <- list(arrowtail=eArrowhead)
181 181
     }
182
-  } else {
183
-    ## graphAM
184
-    if (inherits(graph, "graphAM")) {
185
-     ahd <- rep("normal", length(ed))
186
-     names(ahd) <- names(ed)
182
+  }
183
+  else
184
+    if(is.NCIgraph(graph)) ## NCIgraph
185
+      {
186
+        eTypeDictionnary <- c('normal','tee')
187
+        eColDictionnary <- c('red','blue')
188
+        names(eTypeDictionnary) <- names(eColDictionnary) <- c('activation','inhibition')
189
+        eTypes <- unlist(lapply(graph@edgeData@data,FUN=function(e) tolower(e$edgeType)))
190
+        ERIarrowhead =eTypeDictionnary[eTypes]
191
+        ERIcol =eColDictionnary[eTypes]
192
+        eNames <- sapply(names(graph@edgeData@data),FUN=function(e) gsub(e,pattern='\\|',replacement='~'))
193
+        names(ERIarrowhead) <- names(ERIcol) <- eNames
194
+        edgeRenderInfo(graph) <- list(arrowhead=ERIarrowhead,col=ERIcol)
195
+      }
196
+    else
197
+      {
198
+        ## graphAM
199
+        if (inherits(graph, "graphAM")) {
200
+          ahd <- rep("normal", length(ed))
201
+          names(ahd) <- names(ed)
187 202
      adjMat <- graph@adjMat
188 203
      ## not useful: adjacency matrix is not signed...
189 204
    }
... ...
@@ -100,7 +100,13 @@ testOneGraph <- function(graph, data, classes, useInteractionSigns=TRUE, ..., ve
100 100
   ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
101 101
   verbose && enter(verbose, "Keeping genes in the graph *and* the expression data set")
102 102
   dataGN <- rownames(data)
103
-  graphGN <- translateKEGGID2GeneID(nodes(graph))
103
+  if(is.NCIgraph(graph))
104
+    {
105
+      graphGN <- translateNCI2GeneID(graph)
106
+      names(graphGN) <- NULL
107
+    }
108
+  else
109
+    graphGN <- translateKEGGID2GeneID(nodes(graph))
104 110
   commonGN <- intersect(dataGN, graphGN)
105 111
 
106 112
   mm <- match(graphGN, commonGN)
... ...
@@ -129,7 +135,13 @@ testOneGraph <- function(graph, data, classes, useInteractionSigns=TRUE, ..., ve
129 135
   data <- data[mm, ]
130 136
 
131 137
   ## sanity check (ordering should be the same now)
132
-  graphGN <- translateKEGGID2GeneID(nodes(graph))
138
+  if(is.NCIgraph(graph))
139
+    {
140
+      graphGN <- translateNCI2GeneID(graph)
141
+      names(graphGN) <- NULL
142
+    }
143
+  else
144
+    graphGN <- translateKEGGID2GeneID(nodes(graph))
133 145
   dataGN <- rownames(data)
134 146
   stopifnot(all.equal(dataGN, graphGN))
135 147
 
... ...
@@ -180,6 +192,8 @@ testOneGraph <- function(graph, data, classes, useInteractionSigns=TRUE, ..., ve
180 192
 
181 193
 ############################################################################
182 194
 ## HISTORY:
195
+## 2011-03-06
196
+## o Dealing with NCI graphs
183 197
 ## 2010-10-08
184 198
 ## o Now validating argument 'verbose'.
185 199
 ############################################################################
... ...
@@ -2,7 +2,8 @@
2 2
 ## Test all (locally stored) pathways from KEGG on Loi's data
3 3
 ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4 4
 
5
-library("R.utils")
5
+library('R.utils')
6
+library('xtable')
6 7
 
7 8
 verbose <- Arguments$getVerbose(-8, timestamp=TRUE)
8 9
 
... ...
@@ -14,12 +15,22 @@ sourceDirectory(path)
14 15
 path <- system.file("demoScripts", package="DEGraph")
15 16
 sourceDirectory(path)
16 17
 
18
+## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
19
+## get all NCI pathways
20
+## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
21
+
22
+library('NCIgraph')
23
+path <- system.file("downloadScripts", package="NCIgraph")
24
+sourceDirectory(path)
25
+load(file.path('rawNCINetworks','NCI-cyList.RData'))
26
+
27
+grList <- getNCIPathways(cyList=NCI.cyList, verbose=verbose)$pList
17 28
 
18 29
 ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
19 30
 ## get all KEGG pathways
20 31
 ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
21
-##grList <- getKEGGPathways(organism="hsa", metaTag="non-metabolic", verbose=verbose)
22
-grList <- getKEGGPathways(organism="hsa", metaTag="non-metabolic", patt="04060", verbose=verbose)
32
+## grList <- getKEGGPathways(organism="hsa", metaTag="non-metabolic", verbose=verbose)
33
+## grList <- getKEGGPathways(organism="hsa", metaTag="non-metabolic", patt="04060", verbose=verbose)
23 34
 
24 35
 ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
25 36
 ## test all KEGG pathways
... ...
@@ -33,34 +44,51 @@ res <- apply(exprData, 1, FUN=function(x) {
33 44
 ttpv <- res["p.value", ]
34 45
 tts <- res["statistic", ]
35 46
 
36
-names(ttpv) <- translateGeneID2KEGGID(names(ttpv))
37
-names(tts) <- translateGeneID2KEGGID(names(tts))
47
+if(!is.NCIgraph(grList[[1]]))
48
+  {
49
+    names(ttpv) <- translateGeneID2KEGGID(names(ttpv))
50
+    names(tts) <- translateGeneID2KEGGID(names(tts))
51
+  }
38 52
 
39 53
 prop <- 0.2 ## proportion of the spectrum to be retained for T2-Fourier 
40 54
 
41 55
 ## Multivariate tests
42 56
 resList <- NULL
43
-##for (ii in 1:40) {
57
+## for (ii in 1:5) {
44 58
 for (ii in seq(along=grList)) {
45 59
   verbose && cat(verbose, ii)
46 60
   gr <- grList[[ii]]
47
-  res <- testOneGraph(gr, exprData, classData, verbose=verbose, prop=prop)
48
-  if(!is.null(res))
61
+  if(min(length(nodes(gr)),length(gr@edgeData@data))>0) {
62
+    res <- testOneGraph(gr, exprData, classData, verbose=verbose, prop=prop)
63
+  } else {
64
+    res <- NULL
65
+  }
66
+  if(!is.null(res)) {
49 67
     res <- lapply(res, FUN=function(x) {
50
-      if(!is.null(x))
51
-        {
52
-          ht <- hyper.test(ttpv, nodes(x$graph), thr=0.001)
68
+      if(!is.null(x)) {
69
+          if(is.NCIgraph(x$graph)) {
70
+            geneIDs <- translateNCI2GeneID(x$graph)
71
+          } else {
72
+            geneIDs <- translateKEGGID2GeneID(nodes(x$graph))
73
+          }
74
+          ht <- hyper.test(ttpv, geneIDs, thr=0.001)
53 75
           x$p.value <- c(x$p.value, ht$p.value)
54 76
           names(x$p.value)[length(names(x$p.value))] <- "Hypergeometric test"
55 77
           return(x)
78
+        } else {
79
+          return(NULL)
56 80
         }
57
-      else
58
-        return(NULL)
59 81
     })
82
+  }
60 83
   resList <- c(resList, list(res))
61 84
 }
62 85
 resNames <- names(grList)
63
-pLabels <- sapply(grList, attr, "label")
86
+
87
+if(is.NCIgraph(grList[[1]])) {
88
+  pLabels <- names(grList)
89
+} else {
90
+  pLabels <- sapply(grList, attr, "label")
91
+}
64 92
 
65 93
 ## get rid of NULL results (no connected component of size > 1)
66 94
 isNULL <- sapply(resList, is.null)
... ...
@@ -99,7 +127,8 @@ for (res in resList) {
99 127
   pp <- sapply(res, FUN=function(x) {
100 128
     x$p.value
101 129
   })
102
-  pKEGG <- cbind(pKEGG, pp)
130
+  if(length(pp))
131
+    pKEGG <- cbind(pKEGG, pp)
103 132
 }
104 133
 colnames(pKEGG) <- graphNames
105 134
 rn <- rownames(pKEGG)
... ...
@@ -159,7 +188,7 @@ title(sprintf("Number of significant genes\n with the three BY-corrected tests a
159 188
 
160 189
 ##pathwayNames[graphNames[order(pHG)[[1]]]]
161 190
 
162
-oo <- order(pF/pH)[1:5]
191
+oo <- order(pF/pH)[1:25]
163 192
 
164 193
 gIdx <- oo[1]
165 194
 
... ...
@@ -173,11 +202,19 @@ if (require(marray)) {
173 202
 shift <- tts # Plot t-statistics
174 203
 ##shift <- -getMeanShift(exprData, classData) # Plot mean shifts
175 204
 
176
-dn <- getDisplayName(gr, shortLabel=TRUE)
177
-mm <- match(translateKEGGID2GeneID(nodes(gr)), rownames(annData))
205
+## dn <- getDisplayName(gr, shortLabel=TRUE)
206
+
207
+if(is.NCIgraph(gr)) {
208
+  mm <- match(translateNCI2GeneID(gr), rownames(annData))
209
+  nodes(gr) <- translateNCI2GeneID(gr)
210
+} else {
211
+  mm <- match(translateKEGGID2GeneID(nodes(gr)), rownames(annData))
212
+}
178 213
 dn <- annData[mm, "NCBI.gene.symbol"]
179
-  
180
-res <- plotValuedGraph(gr, values=shift, nodeLabels=dn, qMax=0.95, colorPalette=pal, height=40, lwd=1, verbose=verbose)
214
+
215
+##pdf("Loi2008-leukocyte.pdf",width=10,height=10)
216
+res <- plotValuedGraph(gr, values=shift, nodeLabels=dn, qMax=0.95, colorPalette=pal, height=40, lwd=1, cex=0.7, verbose=verbose)
217
+##devDone()
181 218
 title(paste(pathwayNames[graphNames[gIdx]],"pF=",as.character(signif(pF[gIdx],4)),"pH=",as.character(signif(pH[gIdx],4))))
182 219
 
183 220
 ## mm <- match(translateKEGGID2GeneID(nodes(gr)), names(shift))
... ...
@@ -188,18 +225,28 @@ title(paste(pathwayNames[graphNames[gIdx]],"pF=",as.character(signif(pF[gIdx],4)
188 225
 ## step.
189 226
 
190 227
 fSignif <- which(BHpKEGG[2,]<0.05)
191
-##fSignif <- fSignif[order(BHpKEGG[1,fSignif],decreasing=TRUE)]
192
-fSignif <- fSignif[order(BHpKEGG[2,fSignif],decreasing=FALSE)] # Order by increasing pF
228
+fSignif <- fSignif[order(BHpKEGG[1,fSignif],decreasing=TRUE)]
229
+##fSignif <- fSignif[order(BHpKEGG[2,fSignif],decreasing=FALSE)] # Order by increasing pF
193 230
 
194 231
 
195 232
 ## Plot all graphs
196 233
 ##dev.new()
197
-pdf("MI-smoothGraphs-loi.pdf")
234
+##printFileName <- "Tech-MI-smoothGraphs-loi-dec"
235
+printFileName <- "Loi-NCI"
236
+printStarted <- FALSE
237
+pdf(paste(printFileName,".pdf",sep=""))
198 238
 for (gIdx in fSignif) {
199 239
   ## Get graph
200 240
   gr <- graphList[[gIdx]]
201
-  mm <- match(translateKEGGID2GeneID(nodes(gr)), rownames(annData))
241
+
242
+  if(is.NCIgraph(gr)) {
243
+    mm <- match(translateNCI2GeneID(gr), rownames(annData))
244
+    nodes(gr) <- translateNCI2GeneID(gr)
245
+  } else {
246
+    mm <- match(translateKEGGID2GeneID(nodes(gr)), rownames(annData))
247
+  }
202 248
   dn <- annData[mm, "NCBI.gene.symbol"]
249
+
203 250
   ## Try to plot
204 251
   tt <- try(res <- plotValuedGraph(gr, values=shift, nodeLabels=dn, qMax=0.95, colorPalette=pal, height=40, lwd=1, cex=0.3, verbose=verbose))
205 252
   ## Add legend
... ...
@@ -210,7 +257,14 @@ for (gIdx in fSignif) {
210 257
     txt2 <- paste("p(T2F[", ndims[gIdx], "])=", ps[2], sep="")
211 258
     txt <- paste(txt1, txt2, sep="\n")
212 259
     stext(side=3, pos=1, txt)
213
-    image.plot(legend.only=TRUE, zlim=range(res$breaks), col=pal, legend.shrink=0.3, legend.width=0.8, legend.lab="t-scores", legend.mar=3.3) 
260
+    image.plot(legend.only=TRUE, zlim=range(res$breaks), col=pal, legend.shrink=0.3, legend.width=0.8, legend.lab="t-scores", legend.mar=3.3)
261
+    ## Now print gene table in a tex file
262
+    cIdx <- match(nodes(gr),names(shift)) #which(names(shift)%in% nodes(gr))
263
+    gTable <- cbind(dn,signif(shift[cIdx],2),signif(ttpv[cIdx],2))
264
+    colnames(gTable) <- c("Gene symbol","t-stat","p-value")
265
+    xt <- xtable(gTable,caption=sprintf("%s, p(Hotelling)=%g, p(netHotelling)=%g",pathwayNames[gIdx], ps[1], ps[2]))
266
+    print(xt,file=paste(printFileName,"-Tables.tex",sep=""),tabular.environment='longtable',floating=FALSE, append=printStarted)
267
+    printStarted <- TRUE
214 268
   } else {
215 269
     warning("check gIdx=", gIdx)
216 270
   }
... ...
@@ -221,5 +275,3 @@ devDone()
221 275
 ts <- format(Sys.time(), "%Y-%m-%d,%X")
222 276
 filename <- sprintf("pKEGG,signed,normalized,%s.rda", ts)
223 277
 save(resList, pKEGG, graphList, pathwayNames, ndims, file=filename)
224
-
225
-
... ...
@@ -1,6 +1,10 @@
1 1
 Package: DEGraph
2 2
 ===================
3 3
 
4
+Version: 1.2.0 [2011-03-23]
5
+o Added support of NCI networks through the NCIgraph package.
6
+o Updated the man page of the Loi2008 data.
7
+
4 8
 Version: 0.3.3 [2010-10-08]
5 9
 o Now validating 'verbose' argument in all functions.
6 10
 
... ...
@@ -16,6 +16,20 @@ data("Loi2008_DEGraphVignette")
16 16
 dim(exprLoi2008)
17 17
 head(exprLoi2008)
18 18
 }
19
+
20
+\details{
21
+  The original data set corresponds to data processed by RMA and median-centered
22
+  as available from the GSE6532 GEO archive:
23
+  http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE6532.
24
+  
25
+  These data were summarized from the probe set level to the gene level as 
26
+  follows. The expression level of a gene was defined as the expression level 
27
+  of the probe set with largest alignment score among all probe sets mapping 
28
+  to this gene according to the annotation in GSE6532. When the largest 
29
+  alignment score was achieved by several probe sets, the median expression 
30
+  level of those probe sets was taken.
31
+}
32
+
19 33
 \source{
20 34
   Loi et al.,
21 35
   \emph{Predicting prognosis using molecular profiling in estrogen