Browse code

bugfix for downloading KEGG data ( trunk)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ROntoTools@97968 bc3139a8-67e5-0310-9ffc-ced21a209358

Calin Voichita authored on 02/01/2015 20:47:08
Showing 6 changed files

... ...
@@ -1,16 +1,31 @@
1 1
 Package: ROntoTools
2 2
 Type: Package
3 3
 Title: R Onto-Tools suite
4
-Version: 1.7.0
5
-Author: Calin Voichita <calin@wayne.edu> and Sorin Draghici
6
-        <sorin@wayne.edu>
4
+Version: 1.7.1
5
+Author: Calin Voichita <calin@wayne.edu> and Sorin Draghici <sorin@wayne.edu>
7 6
 Maintainer: Calin Voichita <calin@wayne.edu>
8 7
 Description: Suite of tools for functional analysis
8
+biocViews: NetworkAnalysis, Microarray, GraphsAndNetworks
9 9
 License: GPL (>= 3)
10
-Depends: methods, graph, boot, KEGGREST, KEGGgraph, Rgraphviz
11
-Suggests: RUnit, BiocGenerics
12
-Collate: 'pathwayExpress.R' 'utils.R' 'graphWeights.R' 'keggDataREST.R'
13
-        'AllClasses.R' 'AllGenerics.R' 'nodeWeights-methods.R'
14
-        'pePathway-utils.R' 'Summary-methods.R' 'plot-methods.R'
15
-        'renderInfo-methods.R'
16
-biocViews: Network, Microarray, GraphAndNetwork
10
+Depends:
11
+    methods,
12
+    graph,
13
+    boot,
14
+    KEGGREST,
15
+    KEGGgraph,
16
+    Rgraphviz
17
+Suggests:
18
+    RUnit,
19
+    BiocGenerics
20
+Collate:
21
+    'pathwayExpress.R'
22
+    'utils.R'
23
+    'graphWeights.R'
24
+    'keggDataREST.R'
25
+    'AllClasses.R'
26
+    'AllGenerics.R'
27
+    'nodeWeights-methods.R'
28
+    'pePathway-utils.R'
29
+    'Summary-methods.R'
30
+    'plot-methods.R'
31
+    'renderInfo-methods.R'
... ...
@@ -23,17 +23,11 @@ loadKEGGpathwayDataREST <- function(organism = "hsa",
23 23
     
24 24
     allPathwayInfo <- lapply(names(pathList), 
25 25
                              function(pathID) {
26
-                               
27 26
                                p <- keggGet(pathID, "kgml")
28
-                               
29 27
                                pKgml <- file.path(tmpDir, paste(strsplit(pathID, ":")[[1]][2], ".kgml", sep = ""))
30
-                               
31 28
                                write(p, pKgml)
32
-                               
33 29
                                pathData <- parseKGML(pKgml)
34
-                               
35 30
                                file.remove(pKgml)
36
-                               
37 31
                                if(verbose)
38 32
                                  setTxtProgressBar(pb, getTxtProgressBar(pb) + 1)
39 33
                                return(pathData)
... ...
@@ -98,7 +92,7 @@ keggPathwayGraphs <- function(organism = "hsa",
98 92
                               verbose = TRUE)
99 93
 {
100 94
   defaultParameters <- FALSE
101
-  if ((organism == "hsa") & all(targRelTypes == c("GErel","PCrel","PPrel")) &
95
+  if ((organism == "hsa") & all(targRelTypes %in% c("GErel","PCrel","PPrel")) &
102 96
         (relPercThresh == 0.9) & (nodeOnlyGraphs == FALSE))
103 97
     defaultParameters <- TRUE  
104 98
   
... ...
@@ -124,8 +118,10 @@ keggPathwayGraphs <- function(organism = "hsa",
124 118
   counts <- do.call(rbind,lapply(l, function(x) as.vector(x[allRelTypes])))
125 119
   colnames(counts) <- allRelTypes
126 120
   
121
+  counts[is.na(counts)] <- 0
122
+  
127 123
   accIndex <- rowSums(counts[,targRelTypes], na.rm=T) / rowSums(counts, na.rm=T) >= relPercThresh
128
-  accIndex[is.na(accIndex)] <- FALSE  
124
+  accIndex[is.na(accIndex)] <- nodeOnlyGraphs
129 125
   allPathwayInfo <- allPathwayInfo[accIndex]
130 126
   
131 127
   names(allPathwayInfo) <- sapply(allPathwayInfo, getName)
... ...
@@ -138,24 +134,45 @@ keggPathwayGraphs <- function(organism = "hsa",
138 134
   
139 135
   pathwayGraphs <- lapply(allPathwayInfo, function(g) 
140 136
   {
141
-    g <- KEGGgraph::KEGGpathway2Graph(g)
142
-    kg <- new("graphNEL", nodes(g), edges(g), edgemode = "directed")
143 137
     
138
+#     tryCatch(
139
+      g <- KEGGpathway2Graph(g, expandGenes=TRUE)
140
+#       , error = function(e) {})    
141
+    if (class(g) != "graphNEL") {
142
+      return(NULL)
143
+    }
144
+    
145
+    kg <- new("graphNEL", nodes(g), edges(g), edgemode = "directed")
146
+
144 147
     if (length(getKEGGedgeData(g)) == 0)
145 148
     {
146 149
       if (verbose)
147 150
         setTxtProgressBar(pb, getTxtProgressBar(pb) + 1)
148
-      return(NULL)
151
+      if (nodeOnlyGraphs)
152
+        return(kg)
153
+      else
154
+        return(NULL)
149 155
     }
150
-    edgeDataDefaults(kg, "subtype") <- NA  
151
-        
152
-    relGeneTable <- data.frame(cbind(
156
+    edgeDataDefaults(kg, "subtype") <- NA
157
+    
158
+
159
+    cf <- cbind(
153 160
       do.call(rbind, strsplit(names(getKEGGedgeData(g)), '~')),
154 161
       sapply(getKEGGedgeData(g), function(e) 
155 162
         paste(lapply(getSubtype(e), getName), collapse=","))
156
-    ), stringsAsFactors = FALSE)
163
+    );
164
+
165
+    if (nrow(cf) < 2) {
166
+      ucf <- cf
167
+    } else {
168
+      ucf <- cf[unique(rownames(cf)),]
169
+      ucf[,3] <- tapply(cf[,3], rownames(cf), function(ll) return(paste( unique(ll), collapse = ',')))[rownames(ucf)]
170
+    }
171
+    
172
+
173
+    relGeneTable <- data.frame(ucf, stringsAsFactors = FALSE)
157 174
     names(relGeneTable) <- c("from","to","subtype")
158
-        
175
+
159 176
     edgeData(kg, relGeneTable$from, relGeneTable$to, "subtype") <- relGeneTable$subtype
160 177
 
161 178
     if (verbose)
... ...
@@ -213,3 +230,86 @@ keggPathwayNames <- function(organism = "hsa",
213 230
   return(allNames)
214 231
 }
215 232
 
233
+#' Modified version of the same function from KEGGgraph
234
+#' 
235
+#' @keywords internal
236
+KEGGpathway2Graph <- function (pathway, genesOnly = TRUE, expandGenes = TRUE) 
237
+{
238
+  stopifnot(is(pathway, "KEGGPathway"))
239
+  pathway <- splitKEGGgroup(pathway)
240
+  if (expandGenes) {
241
+    pathway <- expandKEGGPathway(pathway)
242
+  }
243
+  knodes <- nodes(pathway)
244
+  kedges <- unique(edges(pathway))
245
+  node.entryIDs <- getEntryID(knodes)
246
+  edge.entryIDs <- getEntryID(kedges)
247
+  V <- node.entryIDs
248
+  edL <- vector("list", length = length(V))
249
+  names(edL) <- V
250
+  if (is.null(nrow(edge.entryIDs))) {
251
+    for (i in seq(along = edL)) {
252
+      edL[[i]] <- list()
253
+    }
254
+  }
255
+  else {
256
+    for (i in 1:length(V)) {
257
+      id <- node.entryIDs[i]
258
+      hasRelation <- id == edge.entryIDs[, "Entry1ID"]
259
+      if (!any(hasRelation)) {
260
+        edL[[i]] <- list(edges = NULL)
261
+      }
262
+      else {
263
+        entry2 <- unname(edge.entryIDs[hasRelation, "Entry2ID"])
264
+        edL[[i]] <- list(edges = entry2)
265
+      }
266
+    }
267
+  }
268
+  gR <- new("graphNEL", nodes = V, edgeL = edL, edgemode = "directed")
269
+  names(kedges) <- sapply(kedges, function(x) paste(getEntryID(x), 
270
+                                                    collapse = "~"))
271
+  env.node <- new.env()
272
+  env.edge <- new.env()
273
+  assign("nodes", knodes, envir = env.node)
274
+  assign("edges", kedges, envir = env.edge)
275
+  nodeDataDefaults(gR, "KEGGNode") <- env.node
276
+  edgeDataDefaults(gR, "KEGGEdge") <- env.edge
277
+  if (genesOnly) {    
278
+    gR <- subGraphByNodeType(gR, "gene")
279
+  }
280
+  return(gR)
281
+}
282
+
283
+#' Modified version of the same function from KEGGgraph
284
+#' 
285
+#' @keywords internal
286
+subGraphByNodeType <- function (graph, type = "gene") 
287
+{
288
+  kegg.node.data <- getKEGGnodeData(graph)
289
+  types <- sapply(kegg.node.data, getType)
290
+  isType <- grep(type, types)
291
+  if (!any(isType)) {
292
+    stop("No '", type, "' type found in the file, maybe it is a map file. Please try parsing the file with 'genesOnly=FALSE'\n")
293
+  }
294
+  new.nodes <- names(types[isType])
295
+  new.edges <- edges(graph)
296
+  new.edges <- new.edges[names(new.edges) %in% new.nodes]
297
+  new.edges <- lapply(new.edges, function(eL) return(eL[eL %in% new.nodes]))
298
+  
299
+  subgraph <- new("graphNEL", new.nodes, new.edges, edgemode = "directed")
300
+    
301
+  subnodes <- new.nodes
302
+  subedges <- getRgraphvizEdgeNames(subgraph)
303
+  keggnodes <- get("nodes", nodeDataDefaults(graph, "KEGGNode"))
304
+  keggedges <- get("edges", edgeDataDefaults(graph, "KEGGEdge"))
305
+  subkeggnodes <- keggnodes[subnodes]
306
+  subkeggedges <- keggedges[subedges]
307
+  env.node <- new.env()
308
+  env.edge <- new.env()
309
+  assign("nodes", subkeggnodes, envir = env.node)
310
+  assign("edges", subkeggedges, envir = env.edge)
311
+  nodeDataDefaults(subgraph, "KEGGNode") <- env.node
312
+  edgeDataDefaults(subgraph, "KEGGEdge") <- env.edge
313
+  
314
+  return(subgraph)
315
+}
... ...
@@ -197,7 +197,7 @@ pe.boot <- function(g, x, ref, nboot, all.genes = F)
197 197
   if (length(pePath@input) == 0)
198 198
     return(NULL)
199 199
   
200
-  if (is.null(inv))
200
+  if (is.null(inv) || (numEdges(g) == 0))
201 201
   {
202 202
     pePath@asGS <- TRUE
203 203
     return(pePath)
... ...
@@ -270,4 +270,4 @@ compute.B <- function(g, non.zero = TRUE)
270 270
   return(B)
271 271
 }
272 272
 
273
-  
273
+  
274 274
\ No newline at end of file
275 275
Binary files a/inst/extdata/KEGGRESTunparsed_hsa.RData and b/inst/extdata/KEGGRESTunparsed_hsa.RData differ
276 276
Binary files a/inst/extdata/kpgDefault.RData and b/inst/extdata/kpgDefault.RData differ
... ...
@@ -1,4 +1,5 @@
1 1
 \name{Summary}
2
+\alias{Summary}
2 3
 \alias{Summary,peRes-method}
3 4
 \alias{Summary.peRes}
4 5
 \title{Summarize the results of a Pathway-Express analysis}