Browse code

Release 0.99.0 Biocunductor submission

amfy10 authored on 10/03/2014 12:16:44
Showing 85 changed files

1 1
new file mode 100755
... ...
@@ -0,0 +1,26 @@
1
+Package: NetPathMiner
2
+Version: 0.99.0
3
+Date: 2014 onwards
4
+Title: NetPathMiner for Biological Network Construction, Path Mining
5
+        and Visualization
6
+Author: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp>, Tim Hancock
7
+    <timothy.hancock@kuicr.kyoto-u.ac.jp>, Ichigaku Takigawa
8
+    <takigawa@kuicr.kyoto-u.ac.jp>, Nicolas Wicker <nicolas.wicker@unistra.fr>
9
+Maintainer: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp>
10
+Description: NetPathMiner is a general framework for network path mining using
11
+    genome-scale networks. It constructs networks from KGML, SBML and BioPAX
12
+    files, providing three network representations, metabolic, reaction and
13
+    gene representations. NetPathMiner finds active paths and applies machine
14
+    learning methods to summarize found paths for easy interpretation. It also
15
+    provides static and interactive visualizations of networks and paths to aid
16
+    manual investigation.
17
+Depends: igraph (>= 0.6)
18
+Suggests: rBiopaxParser, RCurl, RCytoscape
19
+License: GPL (>= 2)
20
+URL: https://github.com/ahmohamed/NetPathMiner
21
+NeedsCompilation: yes
22
+SystemRequurements: libxml2, libSBML (>= 5.5)
23
+biocViews: GraphAndNetwork, Pathways, Network, Clustering,
24
+        Classification
25
+Biarch: TRUE
26
+Packaged: 2014-03-10 09:06:56 UTC; mohamedahmed
0 27
new file mode 100644
... ...
@@ -0,0 +1,45 @@
1
+export(KGML2igraph)
2
+export(NPMdefaults)
3
+export(SBML2igraph)
4
+export(assignEdgeWeights)
5
+export(biopax2igraph)
6
+export(colorVertexByAttr)
7
+export(expandComplexes)
8
+export(extractPathNetwork)
9
+export(fetchAttribute)
10
+export(getAttrNames)
11
+export(getAttrStatus)
12
+export(getAttribute)
13
+export(getGeneSetNetworks)
14
+export(getGeneSets)
15
+export(getPathsAsEIDs)
16
+export(layoutVertexByAttr)
17
+export(makeGeneNetwork)
18
+export(makeReactionNetwork)
19
+export(pathClassifier)
20
+export(pathCluster)
21
+export(pathRanker)
22
+export(pathsToBinary)
23
+export(plotAllNetworks)
24
+export(plotClassifierROC)
25
+export(plotClusterMatrix)
26
+export(plotClusterProbs)
27
+export(plotClusters)
28
+export(plotCytoscape)
29
+export(plotNetwork)
30
+export(plotPathClassifier)
31
+export(plotPathCluster)
32
+export(plotPaths)
33
+export(predictPathClassifier)
34
+export(predictPathCluster)
35
+export(registerMemoryErr)
36
+export(rmAttribute)
37
+export(rmSmallCompounds)
38
+export(samplePaths)
39
+export(setAttribute)
40
+export(simplifyReactionNetwork)
41
+export(stdAttrNames)
42
+export(toGraphNEL)
43
+export(vertexDeleteReconnect)
44
+import(igraph)
45
+useDynLib(NetPathMiner)
0 46
new file mode 100644
1 47
new file mode 100644
... ...
@@ -0,0 +1,102 @@
1
+###############################################################################
2
+#
3
+# NPM-package.R: This file contains the documentation for the package itself, and .onLoad function.
4
+# author: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp>
5
+#
6
+# This is released under GPL-2.
7
+# 
8
+# Documentation was created using roxygen
9
+#
10
+###############################################################################
11
+
12
+#' General framework for network extraction, path mining.
13
+#'
14
+#' NetPathMiner implements a flexible module-based process flow for network path mining and visualization,
15
+#' which can be fully inte-grated with user-customized functions. 
16
+#' NetPathMiner supports construction of various types of genome scale networks from KGML, SBML and BioPAX
17
+#' formats, enabling its utility to most common pathway databases. 
18
+#' NetPathMiner also provides different visualization techniques to facilitate the analysis of even 
19
+#' thousands of output paths. 
20
+#' 
21
+#' @import igraph
22
+#' @useDynLib NetPathMiner
23
+#' @author Ahmed Mohamed \email{mohamed@@kuicr.kyoto-u.ac.jp}
24
+#' @name NetPathMiner-package
25
+#' @aliases NetPathMiner NPM
26
+#' @docType package
27
+#' 
28
+NULL
29
+
30
+#' Biopax example data
31
+#' 
32
+#' A dataset containing Porphyrin metabolism pathway in Biopax Level 3 and parsed with
33
+#' \code{\link[rBiopaxParser]{readBiopax}}.
34
+#' 
35
+#' @docType data
36
+#' @name ex_biopax
37
+#' @examples
38
+#' data(ex_biopax)
39
+#' ex_biopax
40
+#' 
41
+NULL
42
+
43
+#' Singaling network from KGML example
44
+#' 
45
+#' An exmaple igraph object representing Ras and chemokine signaling pathways in human 
46
+#' extracted from KGML files.
47
+#' 
48
+#' @docType data
49
+#' @name ex_kgml_sig
50
+#' @examples
51
+#' data(ex_kgml_sig)
52
+#' plotNetwork(ex_kgml_sig, vertex.color="pathway")
53
+#' 
54
+NULL
55
+
56
+#' Metabolic network from SBML example
57
+#' 
58
+#' An example igraph object representing bipartite metabolic network of Carbohydrate
59
+#' metabolism extracted from SBML file from Reactome database.
60
+#' 
61
+#' @docType data
62
+#' @name ex_sbml
63
+#' @examples
64
+#' data(ex_sbml)
65
+#' plotNetwork(ex_sbml, vertex.color="compartment.name")
66
+#' 
67
+NULL
68
+
69
+#' An microarray data example.
70
+#' 
71
+#' An microarray data example. This is part of the ALL dataset, for demonestration purposes.
72
+#' 
73
+#' @docType data
74
+#' @name ex_microarray
75
+#' @examples
76
+#' data(ex_microarray)
77
+#' 
78
+NULL
79
+
80
+.onLoad<- function(lib, pkg){
81
+    env <- new.env()
82
+    load(file.path(find.package("NetPathMiner"), "extdata", "env_data.RData"), envir=env)
83
+    env$memory.err <- character()
84
+    options(NPM_ENV = env)
85
+}
86
+
87
+#' Internal method to register memery errors.
88
+#' 
89
+#' Internal method to register memery errors, caused by compiled code. This method
90
+#' is used only by the package, and should not be invoked by users. 
91
+#' 
92
+#' @param method The mathod which generated the error. 
93
+#' 
94
+#' @return NULL
95
+#' 
96
+#' @author Ahmed Mohamed
97
+#' @export
98
+#'  
99
+registerMemoryErr <- function(method){
100
+    env <-options("NPM_ENV")[[1]]
101
+    env$memory.err <- c(env$memory.err, method)
102
+}
0 103
new file mode 100644
... ...
@@ -0,0 +1,1077 @@
1
+###############################################################################
2
+#
3
+# dbExtract.R:     This file contains all functions related to pathway file 
4
+# processing into igraph objects.
5
+# author: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp>
6
+#
7
+# This is released under GPL-2.
8
+# 
9
+# Documentation was created using roxygen
10
+#
11
+###############################################################################
12
+
13
+
14
+check.file <- function(filename, ext=".xml"){
15
+    if(is.null(filename)) stop("No file provided!")
16
+    if(length(filename)==1){
17
+        if (!file.exists(filename)) stop("Cannot find file:", filename)
18
+        if(length(list.files(filename))==0)
19
+            return(filename)
20
+        
21
+        fileList = file.path(filename,list.files(filename, ext))
22
+        return(check.file(fileList))
23
+    }else{
24
+        return(unlist(sapply(filename, check.file, USE.NAMES=FALSE)));
25
+    }        
26
+}
27
+
28
+#' Processes KGML files into igraph objects
29
+#' 
30
+#' This function takes KGML files as input, and returns either a metabolic or a signaling
31
+#' network as output. 
32
+#' 
33
+#' Users can specify whether files are processes as metabolic or signaling networks. 
34
+#' 
35
+#' Metabolic networks are given as bipartite graphs, where metabolites and reactions represent
36
+#' vertex types. This is constructed from <reaction> xml node in KGML file, connecting them
37
+#' to their corresponding substrates and products. Each reaction vertex has \code{genes} attribute,
38
+#' listing all genes associated with the reaction. As a general rule, reactions inherit all annotation
39
+#' attributes of its catalyzig genes. 
40
+#' 
41
+#' Signaling network have genes as vertices and edges represent interactions, such as activiation / inhibition. 
42
+#' Genes participating in successive reactions are also connected. Signaling parsing method processes <ECrel>, <PPrel> 
43
+#' and <PCrel> interactions from KGML files.
44
+#' 
45
+#' To generate a genome scale network, simply provide a list of files to be parsed, or put all
46
+#' file in a directory, as pass the directory path as \code{filename}
47
+#' 
48
+#' @param filename A character vector containing the KGML files to be processed. 
49
+#' If a directory path is provided, all *.xml files in it and its subdirectories are included. 
50
+#' @param parse.as Whether to process file into a metabolic or a signaling network.
51
+#' @param expand.complexes Split protein complexes into individual gene nodes. This argument is 
52
+#' ignored if \code{parse.as="metabolic"} 
53
+#' @param verbose Whether to display the progress of the function.
54
+#' 
55
+#' @return An igraph object, representing a metbolic or a signaling network.
56
+#' @author Ahmed Mohamed
57
+#' @family Database extraction methods
58
+#' @export
59
+#' @examples
60
+#' if(is.loaded("readkgmlfile")){ # This is false if libxml2 wasn't available at installation.
61
+#'     filename <- system.file("extdata", "hsa00860.xml", package="NetPathMiner") 
62
+#' 
63
+#'     # Process KGML file as a metabolic network 
64
+#'     g <- KGML2igraph(filename)
65
+#'     plotNetwork(g)
66
+#' 
67
+#'     # Process KGML file as a signaling network
68
+#'     g <- KGML2igraph(filename, parse.as="signaling", expand.complexes=TRUE)
69
+#'     dev.new()
70
+#'     plotNetwork(g)
71
+#' }
72
+#' 
73
+KGML2igraph <- function(filename, parse.as=c("metabolic","signaling"), expand.complexes=FALSE, verbose=TRUE){
74
+    if(!is.loaded("readkgmlfile"))
75
+        stop("KGML2igraph requires libxml2 to be present. Please reinstall NetPathMiner after installing libxml2.")
76
+    if("KGML2igraph" %in% options("NPM_ENV")[[1]]$memory.err)
77
+        stop("KGML2igraph has previously caused a critical memory error. For safety reasons, please restart R.")
78
+    
79
+    fileList = check.file(filename)
80
+    if(length(fileList)==0) stop("No files to process!")
81
+    # Check the parsing method.
82
+    if(!missing(parse.as)){
83
+        if(parse.as=="signaling"){
84
+            return(KGML_signal(fileList, expand.complexes, verbose))
85
+        }else{
86
+            if(parse.as != "metabolic")
87
+                stop("Unknown parsing method:", parse.as)
88
+        }
89
+    }
90
+    
91
+    if(verbose) message("Parsing KGML files as metabolic networks")
92
+    # If a directory is provided, all xml files are processed.
93
+    if(length(fileList)==1){
94
+        zkgml <- .Call("readkgmlfile", FILENAME = fileList, VERBOSE=verbose)
95
+    }else{
96
+        zkgml <- unlist(sapply(fileList, 
97
+                        function(x) .Call("readkgmlfile", FILENAME = x, VERBOSE=verbose)
98
+            , USE.NAMES=FALSE), recursive=FALSE)
99
+        
100
+        #Resolve reactions particiapting in multiple pathways
101
+        dup.rns = lapply(zkgml[duplicated(names(zkgml))], "[[", "miriam.kegg.pathway")
102
+        if(length(dup.rns)>0){
103
+            lapply(1:length(dup.rns), 
104
+                function(x) zkgml[[ names(dup.rns)[x] ]]$miriam.kegg.pathway <<- 
105
+                            unique( c(zkgml[[ names(dup.rns)[x] ]]$miriam.kegg.pathway, dup.rns[[x]] )
106
+                ))            
107
+        }
108
+    }
109
+    
110
+    if(verbose) message("Files processed succefully. Building the igraph object.")
111
+    ######### Make the reaction substrate network ############ 
112
+    edges <-do.call("rbind",lapply(zkgml, 
113
+                    function(x)expand.grid(x$reactants, x$name, x$reactant.stoichiometry))) 
114
+            
115
+    edges <- rbind(
116
+            do.call("rbind",lapply(zkgml, 
117
+                            function(x)expand.grid(x$name,x$products, x$product.stoichiometry)))
118
+            , edges)
119
+    
120
+    names(edges) <- c("from","to", "stoichiometry")
121
+    
122
+    ########## Making the iGraph object ###################
123
+    # contructing a bipartite graph and removing loop edges
124
+    graph = simplify(graph.data.frame(edges), edge.attr.comb="first")
125
+    
126
+    reactions <- V(graph)$name %in% names(zkgml)
127
+    V(graph)$reactions <- reactions
128
+    V(graph)$shape<- ifelse(V(graph)$type==TRUE, "square", "circle")
129
+    V(graph)$color <- ifelse(V(graph)$type==TRUE,"red", "skyblue")
130
+    
131
+    V(graph)[reactions]$attr <- zkgml[V(graph)[reactions]$name]    
132
+    V(graph)[!reactions]$attr = lapply(V(graph)[!reactions]$name, function(x) list(miriam.kegg.compound=x))
133
+    
134
+    graph$source = "KGML"
135
+    graph$type = "MR.graph"
136
+    
137
+    return(graph)
138
+}
139
+
140
+
141
+KGML_signal <- function(fileList, expand.complexes, verbose){
142
+    if(verbose) message("Parsing KGML files as metabolic networks")
143
+    zkgml <- .Call("readkgml_sign", FILENAME = fileList, 
144
+                EXPAND_COMPLEXES = expand.complexes, VERBOSE=verbose)
145
+    
146
+    if(verbose) message("Files processed succefully. Building the igraph object.")
147
+    
148
+    graph = graph.empty() + vertices(names(zkgml[[1]]), attr=zkgml[[1]])
149
+    graph = graph + igraph::edges(zkgml[[2]], attr=zkgml[[3]])
150
+    
151
+    V(graph)$shape<- "circle"
152
+    V(graph)$color <- "blue"
153
+    
154
+    graph$source = "KGML"
155
+    graph$type = "S.graph"
156
+
157
+    return(graph)
158
+}
159
+
160
+#' Processes SBML files into igraph objects
161
+#' 
162
+#' This function takes SBML files as input, and returns either a metabolic or a signaling
163
+#' network as output. 
164
+#' 
165
+#' Users can specify whether files are processes as metabolic or signaling networks. 
166
+#' 
167
+#' Metabolic networks are given as bipartite graphs, where metabolites and reactions represent
168
+#' vertex types. This is constructed from \code{ListOfReactions} in SBML file, connecting them
169
+#' to their corresponding substrates and products (\code{ListOfSpecies}). Each reaction vertex has \code{genes} attribute,
170
+#' listing all \code{modifiers} of this reaction. As a general rule, reactions inherit all annotation
171
+#' attributes of its catalyzig genes.
172
+#' 
173
+#' Signaling network have genes as vertices and edges represent interactions. Since SBML format may 
174
+#' represent singling events as \code{reaction}, all species are assumed to be genes (rather than small
175
+#' molecules). For a simple path \code{S0 -> R1 -> S1}, in signaling network, the path will be
176
+#' \code{S0 -> M(R1) -> S1} where \code{M(R1)} is R1 modifier(s). To ditiguish gene species from small
177
+#' molecules, user can provide \code{gene.attr} (for example: \code{miriam.uniprot} or \code{miriam.ncbigene})
178
+#' where only annotated species are considered genes. 
179
+#' 
180
+#' All annotation attributes written according to MIRIAM guidlines (either \code{urn:miriam:xxx:xxx} or
181
+#' \code{http://identifiers.org/xxx/xxx}) are etxracted by default. Non-conforming attributes can be extracted
182
+#' by specifying \code{miriam.attr}.
183
+#' 
184
+#' To generate a genome scale network, simply provide a list of files to be parsed, or put all
185
+#' file in a directory, as pass the directory path as \code{filename}
186
+#' 
187
+#' Note: This function requires libSBML installed (Please see the installation instructions in the Vignette). 
188
+#' Some SBML level-3 files may requires additional libraries also (An infomative error will be displayed when 
189
+#' parsing such files). Please visit \url{http://sbml.org/Documents/Specifications/SBML_Level_3/Packages} for 
190
+#' more information. 
191
+#' 
192
+#' @param filename A character vector containing the SBML files to be processed. If a directory path 
193
+#' is provided, all *.xml and *.sbml files in it and its subdirectories are included. 
194
+#' @param parse.as Whether to process file into a metabolic or a signaling network.
195
+#' @param miriam.attr A list of annotation attributes to be extracted. If \code{"all"}, then all attibutes
196
+#' written in MIRIAM guidelines (see Details) are extracted (Default). If \code{"none"}, then no attributes
197
+#' are extracted. Otherwise, only attributes matching those specified are extracted. 
198
+#' @param gene.attr An attribute to distinguish \code{species} representing genes from those 
199
+#' representing small molecules (see Details). Ignored if \code{parse.as="metabolic"}.  
200
+#' @param expand.complexes Split protein complexes into individual gene nodes. Ignored if 
201
+#' \code{parse.as="metabolic"}, or when \code{gene.attr} is not provided.
202
+#' @param verbose Whether to display the progress of the function.
203
+#' 
204
+#' @return An igraph object, representing a metbolic or a signaling network.
205
+#' @author Ahmed Mohamed
206
+#' @family Database extraction methods
207
+#' @export
208
+#' @examples
209
+#' if(is.loaded("readsbmlfile")){ # This is false if libSBML wasn't available at installation.
210
+#'     filename <- system.file("extdata", "porphyrin.sbml", package="NetPathMiner") 
211
+#' 
212
+#'     # Process SBML file as a metabolic network 
213
+#'     g <- SBML2igraph(filename)
214
+#'     plotNetwork(g)
215
+#' 
216
+#'     # Process SBML file as a signaling network
217
+#'     g <- SBML2igraph(filename, parse.as="signaling", 
218
+#'                     gene.attr="miriam.uniprot",expand.complexes=TRUE)
219
+#'     dev.new()
220
+#'     plotNetwork(g)
221
+#' }
222
+SBML2igraph <- function(filename, parse.as=c("metabolic","signaling"), miriam.attr="all", 
223
+                    gene.attr, expand.complexes, verbose=TRUE){
224
+    if(!is.loaded("readsbmlfile"))
225
+        stop("SBML2igraph requires libSBML to be present. Please reinstall NetPathMiner after installing libSBML.")
226
+    
227
+    if("SBML2igraph" %in% options("NPM_ENV")[[1]]$memory.err)
228
+        stop("SBML2igraph has previously caused a critical memory error. For safety reasons, please restart R.")
229
+                
230
+    fileList <- check.file(filename, ext=".sbml|.xml")
231
+    if(length(fileList)==0) stop("No files to process!")
232
+    
233
+    if(!missing(parse.as)){
234
+        if(parse.as=="signaling"){
235
+            return(SBML_signal(fileList, miriam.attr,gene.attr,expand.complexes, verbose))
236
+        }else{
237
+            if(parse.as != "metabolic")
238
+                stop("Unknown parsing method:", parse.as)
239
+        }
240
+    }
241
+    if(verbose) message("Parsing SBML files as metabolic networks")
242
+    
243
+    if(length(fileList)==1){
244
+        zsbml <- .Call("readsbmlfile", FILENAME = fileList, ATTR_TERMS = miriam.attr, VERBOSE=verbose)
245
+        names(zsbml) <- c("reactions", "species")
246
+    }else{
247
+        zsbml <- sapply(fileList, function(x) 
248
+                    .Call("readsbmlfile", FILENAME = x, ATTR_TERMS = miriam.attr, VERBOSE=verbose)
249
+                    , USE.NAMES=FALSE)
250
+        
251
+        zsbml <- apply(zsbml, 1, function(x) c(unlist(x, recursive=FALSE)))
252
+        names(zsbml) <- c("reactions", "species")
253
+        
254
+        #Resolve reactions and species particiapting in multiple pathways
255
+        dup.rns = lapply(zsbml$reactions[duplicated(names(zsbml$reactions))], "[[", "pathway")
256
+        if(length(dup.rns)>0){
257
+            lapply(1:length(dup.rns), 
258
+                    function(x) zsbml$reactions[[ names(dup.rns)[x] ]]$pathway <<- 
259
+                                unique( c(zsbml$reactions[[ names(dup.rns)[x] ]]$pathway, dup.rns[[x]] )
260
+                                ))            
261
+        }
262
+    
263
+        zsbml$species <- zsbml$species[!duplicated(names(zsbml$species))]
264
+    }
265
+    if(verbose) message("SBML files processed successfully")
266
+        
267
+    if(verbose) message("Constructing Metabolic Network")
268
+    ######### Make the reaction substrate network ############ 
269
+    edges <-do.call("rbind",lapply(1:length(zsbml$reactions), 
270
+                    function(x)expand.grid(zsbml$reactions[[x]]$reactants,
271
+                                names(zsbml$reactions[x]),
272
+                                zsbml$reactions[[x]]$reactant.stoichiometry))) 
273
+    
274
+    edges <- rbind(
275
+            do.call("rbind",lapply(1:length(zsbml$reactions), 
276
+                            function(x)expand.grid(names(zsbml$reactions[x]),
277
+                                        zsbml$reactions[[x]]$products, 
278
+                                        zsbml$reactions[[x]]$product.stoichiometry)))
279
+            , edges)
280
+    
281
+    names(edges) <- c("from","to", "stoichiometry")    
282
+    
283
+    ########## Making the iGraph object ###################
284
+    # contructing a bipartite graph and removing multiple edges
285
+    graph <- graph.empty() + vertices(names(zsbml$reactions))
286
+    graph <- graph + vertices(names(zsbml$species))
287
+    graph <- graph + igraph::edges(c(t(edges[,1:2])), stoichiometry=edges$stoichiometry)
288
+    
289
+    graph <- simplify(graph.data.frame(edges), edge.attr.comb="first")
290
+    V(graph)$attr <- unlist(zsbml, recursive=FALSE, use.names=FALSE)
291
+    
292
+    # Set graphical attributes
293
+    reactions <- V(graph)$name %in% names(zsbml$reactions)
294
+    V(graph)$reactions <- reactions 
295
+    V(graph)$shape<- ifelse(V(graph)$reactions==TRUE, "square", "circle")
296
+    V(graph)$color <- ifelse(V(graph)$reactions==TRUE,"red", "skyblue")
297
+
298
+    graph$source = "SBML"
299
+    graph$type = "MR.graph"
300
+        
301
+    return(graph)
302
+}
303
+
304
+SBML_signal <- function(fileList, miriam.attr="all", gene.attr, expand.complexes, verbose){
305
+    if(verbose) message("Parsing SBML files as signaling networks")
306
+    zsbml <- .Call("readsbml_sign", FILENAME = fileList, ATTR_TERMS = miriam.attr, VERBOSE=verbose)
307
+    
308
+    if(verbose) message("SBML files processed successfully")
309
+    
310
+    # DeleteReconnect non-gene reactions (translocation, spontaneous)
311
+    # DeleteReconnect all non-gene, max 3 edges (R->s->RN->s->R will be disconnected).
312
+    graph = graph.empty() + vertices(zsbml$vertices, attr=zsbml$attr)
313
+    graph = graph + igraph::edges(zsbml$edges)
314
+    
315
+    if(missing(gene.attr)){
316
+        if(verbose) message("No attributes distiuishing genes were provided.")
317
+        if(verbose) message("All species from the SBML file are included in the network.")
318
+    }else{
319
+        no.gene = which(lapply(lapply(zsbml$attr, "[[", gene.attr), length)==0)
320
+        if(length(no.gene>0))
321
+            graph = vertexDeleteReconnect(graph, no.gene, reconnect.threshold=3)
322
+        
323
+        if(!missing(expand.complexes)&& expand.complexes)
324
+            graph = expandComplexes(graph, gene.attr, keep.parent.attr=miriam.attr)
325
+    }
326
+    
327
+    V(graph)$shape<- "circle"
328
+    V(graph)$color <- "blue"
329
+    
330
+    graph$source = "SBML"
331
+    graph$type = "S.graph"
332
+    
333
+    return(graph)    
334
+}
335
+
336
+# TODO: biopax signaling L2, Biopax metabolic L2 small molecules.
337
+#' Processes BioPAX objects into igraph objects
338
+#' 
339
+#' This function takes BioPAX objects (level 2 or 3) as input, and returns either a metabolic or a signaling
340
+#' network as output. 
341
+#' 
342
+#' This function requires \code{rBiopaxParser} installed. 
343
+#' 
344
+#' Users can specify whether files are processes as metabolic or signaling networks. 
345
+#' 
346
+#' Metabolic networks are given as bipartite graphs, where metabolites and reactions represent
347
+#' vertex types. Reactions are constructed from \code{Conversion} classes, connecting them
348
+#' to their corresponding \code{Left}s and \code{Right}s. Each reaction vertex has \code{genes} attribute,
349
+#' listing all \code{Catalysis} relationships of this reaction. As a general rule, reactions inherit all annotation
350
+#' attributes of its catalyzig genes.
351
+#' 
352
+#' Signaling network have genes as vertices and edges represent interactions, such as activiation / inhibition. 
353
+#' Genes participating in successive reactions are also connected. Signaling interactions are constructed from
354
+#' \code{Control} classes, where edges are drawn from \code{controller} to \code{controlled}.
355
+#' 
356
+#' All annotation attributes are exracted from \code{XRefs} associated with the vertices, and are stored according to
357
+#' MIRIAM guidelines (\code{miraim.db}, where db is the database name).
358
+#' 
359
+#' 
360
+#' @param biopax BioPAX object generated by \code{\link[rBiopaxParser]{readBiopax}}.
361
+#' @param parse.as Whether to process file into a metabolic or a signaling network.
362
+#' @param expand.complexes Split protein complexes into individual gene nodes. Ignored if 
363
+#' \code{parse.as="metabolic"}.
364
+#' @param inc.sm.molecules Include small molecules that are participating in signaling events. Ignored if 
365
+#' \code{parse.as="metabolic"}.
366
+#' @param verbose Whether to display the progress of the function.
367
+#' 
368
+#' @return An igraph object, representing a metbolic or a signaling network.
369
+#' @author Ahmed Mohamed
370
+#' @family Database extraction methods
371
+#' @export
372
+#' @examples
373
+#' if(require(rBiopaxParser)){
374
+#'     data(ex_biopax)
375
+#'     # Process biopax as a metabolic network 
376
+#'     g <- biopax2igraph(ex_biopax)
377
+#'     plotNetwork(g)
378
+#' 
379
+#'     # Process SBML file as a signaling network
380
+#'     g <- biopax2igraph(ex_biopax, parse.as="signaling", expand.complexes=TRUE)
381
+#' }
382
+biopax2igraph <- function(biopax, parse.as=c("metabolic","signaling"), 
383
+                    expand.complexes=FALSE, inc.sm.molecules=FALSE, verbose=TRUE){
384
+    if (!require(rBiopaxParser))
385
+        stop("This functions needs the rBiopaxParser library installed. Check out the installation instructions!")
386
+    if (!("biopax" %in% class(biopax))) 
387
+        stop("Error: pathway2RegulatoryGraph: parameter biopax has to be of class biopax.")
388
+    if(missing(parse.as) || parse.as=="metabolic"){
389
+        if(biopax$biopaxlevel == 3)
390
+            return(bpMetabolicL3(biopax, verbose))
391
+        else
392
+            return(bpMetabolicL2(biopax, verbose))
393
+    }else if(parse.as=="signaling"){
394
+        if(biopax$biopaxlevel == 3)
395
+            return(bpSignalingL3(biopax, expand.complexes, inc.sm.molecules, verbose))
396
+        else
397
+            stop("Parsing singaling networks from BioPAX level 2 files is not currently supported.")
398
+    }
399
+}
400
+
401
+bpMetabolicL3 <- function(biopax, verbose){
402
+    if(verbose) message("Processing BioPAX (level 3) object as a metabolic network", appendLF=FALSE) 
403
+    df <- biopax$df
404
+    df$property = tolower(df$property)
405
+    
406
+    # All conversion events
407
+    lefts <- df[df$property=="left",c(from="property_attr_value", to="id")]
408
+    lefts[,1] <- striph(lefts[,1])
409
+    rights <- df[df$property=="right",c("id","property_attr_value")]
410
+    rights[,2] <- striph(rights[,2])
411
+    
412
+    # Getting distiction between Metabolic and signaling reactions.
413
+    # Metabolic reactions takes only small molecules as substrates/products.
414
+    # The following lines removes any Biochemical reactions with non-small molecules
415
+    #   participants.
416
+    classes <- listInstances(biopax,lefts[,1])
417
+    sig.reactions <- lefts[match(classes$id, lefts[,1])[classes$class !="SmallMolecule"],2]
418
+    classes <- listInstances(biopax,rights[,2])
419
+    sig.reactions <- unlist(list(sig.reactions, 
420
+                    rights[ match(classes$id, rights[,2])[classes$class !="SmallMolecule"], 1]))
421
+    
422
+    lefts <- lefts[ !lefts[,2] %in% sig.reactions, ]
423
+    rights <- rights[ !rights[,1] %in% sig.reactions, ]
424
+    
425
+    # Putting the edges into an igraph object.
426
+    graph <- graph.data.frame( rbind(lefts, setNames(rights, names(lefts))) )
427
+    reactions <- V(graph)$name %in% unique(as.character(lefts[,2], rights[,1]))
428
+    
429
+    if(sum(reactions)==0) 
430
+        stop("No metabolic reactions found. Try parsing the file as a signaling netowrk.")
431
+    if(verbose) message(": ", sum(reactions), " reactions found.")
432
+        
433
+    XRefs <- bpGetReferences(biopax, V(graph)$name)
434
+    names(XRefs) <- V(graph)$name
435
+    cat.r <- striph(selectInstances(biopax, class="Catalysis", property="controlled")$property_attr_value)
436
+    cat.gene <- striph(selectInstances(biopax, class="Catalysis", property="controller")$property_attr_value)
437
+    
438
+    gene.xref <- bpGetReferences(biopax, cat.gene)
439
+    names(gene.xref) <- cat.r
440
+    lapply(names(gene.xref), function(x) XRefs[[x]] <<- c(XRefs[[x]],gene.xref[[x]]))
441
+    
442
+    ##############################################################
443
+    # Getting attribute lists (name, compartment, pathway, MIRIAM)
444
+    # For reactions (name, reactants, reactant.stoichiometry, products, product.stoichiomentry,
445
+    # reversible, kinetics, genes, pathway)
446
+    
447
+    # MIRIAM attributes 
448
+    attr <- bpGetAnnFromXRef(df, XRefs[V(graph)$name])
449
+    
450
+    # Name attributes
451
+    v.names <- listInstances(biopax, id=V(graph)$name)
452
+    v.names <- v.names[match(V(graph)$name, v.names$id), "name"]
453
+    
454
+    # Pathway attributes##########################
455
+    ## Get Pathway name and annotations
456
+    pw <- listInstances(biopax, class="pathway")
457
+    pwXRef <- bpGetReferences(biopax, pw$id)
458
+    #pw.ann <- bpGetAnnFromXRef(df, pwXRef)
459
+    
460
+    ## Get pathway components (only reactions are returned)
461
+    pwcomp <- lapply(pw$id, function(x)listPathwayComponents(biopax,x, returnIDonly=TRUE))
462
+    pwcomp <- do.call("rbind", lapply(1:length(pwcomp), 
463
+                    function(x)data.frame(id=x, comp=pwcomp[[x]])))
464
+    
465
+    ## Map pathways to reaction vertices.
466
+    pwcomp <- pwcomp[pwcomp$comp %in% V(graph)[reactions]$name, ]
467
+    pwcomp <- split(pwcomp$id, pwcomp$comp, drop=TRUE)
468
+    
469
+    ## For metabolites, pathway attributes are inherited from reactions they participate in.
470
+    leftright = rbind(lefts, rights)  ## Edges connecting metaboites to reactions
471
+    leftright$id = match(leftright$id, names(pwcomp)) ## Use numerical ids.
472
+    
473
+    pwcomp <-c(pwcomp,  
474
+            lapply(split(pwcomp[leftright$id], leftright$property_attr_value), 
475
+                    function(x) unique(unlist(x)) ) 
476
+    ) # Do the magic :)
477
+    
478
+    pwcomp <- pwcomp[match(V(graph)$name, names(pwcomp))]  # Reorder to match V(graph)$name
479
+    
480
+    pw.ann <- bpGetAnnFromXRef(df,lapply(pwcomp, function(x) unlist(pwXRef[x], use.names=FALSE)))
481
+    ###########################################
482
+    
483
+    # Compartment attributes #######################
484
+    ## Metabolites have compaertment attributes, 
485
+    ## while reactions inherit them from their catalysts.
486
+    
487
+    ## Get Compartment name and annotations
488
+    comp <- listInstances(biopax, class="cellularLocationvocabulary", returnIDonly=TRUE)
489
+    comp.terms <- as.character(bpGetAttrbyID(df, comp, "term")$property_value)
490
+    #compXRef <- bpGetReferences(biopax, comp)
491
+    comp.ann <- bpGetAnnFromXRef(df, bpGetReferences(biopax, comp) )
492
+    
493
+    ## Get Metbolite compartments
494
+    met.loc <- bpGetAttrbyID(df, V(graph)$name, "cellularlocation", "property_attr_value")
495
+    met.loc$property_attr_value <- match(striph(met.loc$property_attr_value), comp)
496
+    met.loc <- split(met.loc$property_attr_value, met.loc$id, drop=TRUE)
497
+    
498
+    ## Get Catalyst compartments
499
+    cat.loc <- bpGetAttrbyID(df, cat.gene, "cellularlocation", "property_attr_value")
500
+    cat.loc$property_attr_value <- match(striph(cat.loc$property_attr_value), comp)
501
+    cat.loc$id <- cat.r[match( cat.loc$id, cat.gene )]
502
+    cat.loc <- split(cat.loc$property_attr_value, cat.loc$id, drop=TRUE)
503
+    
504
+    ## Combine reaction and metbolite comparments, and reorder
505
+    loc <- c(met.loc,cat.loc)
506
+    loc <- loc[ match(V(graph)$name, names(loc)) ]
507
+    ###############################################
508
+    
509
+    # Reaction-Specific attributes#################
510
+    ##Reactants
511
+    reactants <- split(lefts[,1], lefts[,2], drop=TRUE)
512
+    ##Products
513
+    products <- split(rights[,2], rights[,1], drop=TRUE)
514
+    ##Genes
515
+    cat.gene.name <- listInstances(biopax,cat.gene)
516
+    cat.gene.name <- cat.gene.name[match(cat.gene,cat.gene.name$id),"name"]
517
+    genes <- split(cat.gene.name, cat.r, drop=TRUE)
518
+    
519
+    ## Stoichiometry
520
+    st <- bpGetAttrbyID(df, V(graph)[reactions]$name, "participantstoichiometry", "property_attr_value")
521
+    st.met <- bpGetAttrbyID(df, st$property_attr_value, "physicalentity", "property_attr_value")
522
+    st$met <- striph(st.met[match(striph(st$property_attr_value),st.met$id),3])
523
+    st.cf <- bpGetAttrbyID(df, st$property_attr_value, "stoichiometriccoefficient")
524
+    st$cf <- st.cf[match(striph(st$property_attr_value),st.cf$id),3]
525
+    
526
+    edges <- rbind(lefts,rights)
527
+    edges<- cbind(edges, st=NA)
528
+    apply(st[,c(1,4,5)], 1, function(x) edges[edges$id==x[[1]] & edges$property_attr_value==x[[2]],"st"] <<-x[[3]])
529
+    edges$st <- as.numeric(edges$st)
530
+    
531
+    r.stoic <- split(edges[1:nrow(lefts),3], edges[1:nrow(lefts),2], drop=TRUE)
532
+    p.stoic <- split(edges[-c(1:nrow(lefts)),3], edges[-c(1:nrow(lefts)),2], drop=TRUE)
533
+    
534
+    # Reorder our attributes
535
+    reactants <- reactants[ match(V(graph)[reactions]$name, names(reactants)) ]
536
+    products <- products[match(V(graph)[reactions]$name, names(products))]
537
+    genes <- genes[match(V(graph)[reactions]$name, names(genes))]
538
+    r.stoic <- r.stoic[match(V(graph)[reactions]$name, names(r.stoic))]
539
+    p.stoic <- p.stoic[match(V(graph)[reactions]$name, names(p.stoic))]
540
+    #######################################################
541
+    # Putting it together
542
+    
543
+    ## A small hack to distinguish compartment annotations from vertex annotation.        
544
+    names(comp.ann) <- rep("compartment", length(comp.ann))
545
+    
546
+    ## For reactions
547
+    V(graph)[reactions]$attr <- 
548
+            mapply(function(...){ 
549
+                        args=list(...)
550
+                        c(name=args[[1]], 
551
+                            reversible=FALSE,
552
+                            reactants = list(args[[2]]),
553
+                            reactant.stoichiometry= list(args[[3]]),
554
+                            products = list(args[[4]]),
555
+                            product.stoichiometry= list(args[[5]]),
556
+                            kinetics=NULL,
557
+                            genes = list(args[[6]]),
558
+                            compartment=list(comp.terms[args[[7]] ]),
559
+                            unlist(comp.ann[args[[7]] ], recursive=FALSE),
560
+                            pathway=list(pw$name[args[[8]] ]),
561
+                            unlist(args[9],recursive=FALSE),
562
+                            args[[10]]
563
+                        )
564
+                    }, v.names[reactions],
565
+                    reactants, r.stoic, products, p.stoic, genes,
566
+                    loc[reactions], pwcomp[reactions], pathway = pw.ann[reactions],attr[reactions],
567
+                    SIMPLIFY=FALSE)
568
+    
569
+    V(graph)[!reactions]$attr <- 
570
+            mapply(function(...){ 
571
+                        args=list(...)
572
+                        c(name=args[[1]], 
573
+                            compartment=list(comp.terms[args[[2]] ]),
574
+                            unlist(comp.ann[args[[2]] ], recursive=FALSE),
575
+                            pathway=list(pw$name[args[[3]] ]),
576
+                            unlist(args[4],recursive=FALSE),
577
+                            args[[5]]
578
+                        )
579
+                    }, v.names[!reactions],
580
+                    loc[!reactions], pwcomp[!reactions], pathway= pw.ann[!reactions],attr[!reactions],
581
+                    SIMPLIFY=FALSE)    
582
+    
583
+    E(graph)$stoichiometry = edges$st
584
+    
585
+    V(graph)$reactions <- reactions 
586
+    V(graph)$shape<- ifelse(V(graph)$reactions==TRUE, "square", "circle")
587
+    V(graph)$color <- ifelse(V(graph)$reactions==TRUE,"red", "skyblue")
588
+    
589
+    graph$source = "BioPAX_L3"
590
+    graph$type = "MR.graph"
591
+    
592
+    return(graph)
593
+}
594
+
595
+bpMetabolicL2 <- function(biopax, verbose){
596
+    if(verbose) message("Processing BioPAX (level 2) object as a metabolic network", appendLF=FALSE)
597
+    
598
+    df <- biopax$df
599
+    df$property = tolower(df$property)
600
+    
601
+    lefts <- df[df$property=="left",c(from="property_attr_value", to="id")]
602
+    left.mol = bpGetAttrbyID(df,lefts[,1], "physical-entity", "property_attr_value")#
603
+    lefts[,1] = left.mol[match(striph(lefts[,1]), left.mol$id), "property_attr_value"]#
604
+    lefts[,1] <- striph(lefts[,1])
605
+    
606
+    rights <- df[df$property=="right",c("id","property_attr_value")]
607
+    right.mol = bpGetAttrbyID(df,rights[,2], "physical-entity", "property_attr_value")#
608
+    rights[,2] = right.mol[match(striph(rights[,2]), right.mol$id), "property_attr_value"]#
609
+    rights[,2] <- striph(rights[,2])
610
+    
611
+    graph <- graph.data.frame( rbind(lefts, setNames(rights, names(lefts))) )
612
+    reactions <- V(graph)$name %in% unique(as.character(lefts[,2], rights[,1]))
613
+    
614
+    if(verbose) message(": ", sum(reactions), " reactions found.")
615
+    
616
+    XRefs <- bpGetReferences(biopax, V(graph)$name)
617
+    names(XRefs) <- V(graph)$name
618
+    cat.r <- striph(selectInstances(biopax, class="Control", property="controlled")$property_attr_value)
619
+    cat.gene <- striph(selectInstances(biopax, class="Control", property="controller")$property_attr_value)
620
+    
621
+    gene.xref <- bpGetReferences(biopax, cat.gene, followProperties="physical-entity")#
622
+    names(gene.xref) <- cat.r
623
+    lapply(names(gene.xref), function(x) XRefs[[x]] <<- c(XRefs[[x]],gene.xref[[x]]))
624
+    
625
+    ##############################################################
626
+    # Getting attribute lists (name, compartment, pathway, MIRIAM)
627
+    # For reactions (name, reactants, reactant.stoichiometry, products, product.stoichiomentry,
628
+    # reversible, kinetics, genes, pathway)
629
+    
630
+    # MIRIAM attributes 
631
+    attr <- bpGetAnnFromXRef(df, XRefs[V(graph)$name])
632
+    
633
+    # Name attributes
634
+    v.names <- listInstances(biopax, id=V(graph)$name)
635
+    v.names <- v.names[match(V(graph)$name, v.names$id), "name"]
636
+    
637
+    # Pathway attributes##########################
638
+    ## Get Pathway name and annotations
639
+    pw <- listInstances(biopax, class="pathway")
640
+    pwXRef <- bpGetReferences(biopax, pw$id)
641
+    #pw.ann <- bpGetAnnFromXRef(df, pwXRef)
642
+    
643
+    ## Get pathway components (only reactions are returned)
644
+    pwcomp <- lapply(pw$id, function(x)listPathwayComponents(biopax,x, returnIDonly=TRUE))
645
+    pwcomp <- do.call("rbind", lapply(1:length(pwcomp), 
646
+                    function(x)data.frame(id=x, comp=pwcomp[[x]])))
647
+    
648
+    ## Map pathways to reaction vertices.
649
+    pwcomp <- pwcomp[pwcomp$comp %in% V(graph)[reactions]$name, ]
650
+    pwcomp <- split(pwcomp$id, pwcomp$comp, drop=TRUE)
651
+    
652
+    ## For metabolites, pathway attributes are inherited from reactions they participate in.
653
+    leftright = rbind(lefts, rights)  ## Edges connecting metaboites to reactions
654
+    leftright$id = match(leftright$id, names(pwcomp)) ## Use numerical ids.
655
+    
656
+    pwcomp <-c(pwcomp,  
657
+            lapply(split(pwcomp[leftright$id], leftright$property_attr_value), 
658
+                    function(x) unique(unlist(x)) ) 
659
+    ) # Do the magic :)
660
+    
661
+    pwcomp <- pwcomp[match(V(graph)$name, names(pwcomp))]  # Reorder to match V(graph)$name
662
+    
663
+    pw.ann <- bpGetAnnFromXRef(df,lapply(pwcomp, function(x) unlist(pwXRef[x], use.names=FALSE)))
664
+    ###########################################
665
+    
666
+    # Compartment attributes #######################
667
+    ## Metabolites have compaertment attributes, 
668
+    ## while reactions inherit them from their catalysts.
669
+    
670
+    ## Get Pathway name and annotations
671
+    comp <- listInstances(biopax, class="openControlledVocabulary", returnIDonly=TRUE)#
672
+    comp.terms <- as.character(bpGetAttrbyID(df, comp, "term")$property_value)
673
+    compXRef <- bpGetReferences(biopax, comp)#
674
+    #comp.ann <- bpGetAnnFromXRef(df, bpGetReferences(biopax, comp) )
675
+    
676
+    ## Get Metbolite compartments
677
+    left.loc <- bpGetAttrbyID(df, left.mol$id, "cellular-location", "property_attr_value")#
678
+    left.loc$id <- left.mol[match(left.loc$id, left.mol$id),3]#
679
+    right.loc <- bpGetAttrbyID(df, right.mol$id, "cellular-location", "property_attr_value")#
680
+    right.loc$id <- right.mol[match(right.loc$id, right.mol$id),3]#
681
+    
682
+    met.loc <- rbind(left.loc, right.loc)#
683
+    met.loc$property_attr_value <- match(striph(met.loc$property_attr_value), comp)
684
+    met.loc <- split(met.loc$property_attr_value, striph(met.loc$id), drop=TRUE)#
685
+    
686
+    ## Get Catalyst compartments
687
+    cat.loc <- bpGetAttrbyID(df, cat.gene, "cellular-location", "property_attr_value")#
688
+    cat.loc$property_attr_value <- match(striph(cat.loc$property_attr_value), comp)
689
+    cat.loc$id <- cat.r[match( cat.loc$id, cat.gene )]
690
+    cat.loc <- split(cat.loc$property_attr_value, cat.loc$id, drop=TRUE)
691
+    
692
+    ## Combine reaction and metbolite comparments, and reorder
693
+    loc <- c(met.loc,cat.loc)
694
+    loc <- loc[ match(V(graph)$name, names(loc)) ]
695
+    comp.names <- lapply(loc, function(x)unique(unlist(comp.terms[ x ], use.names=FALSE)))
696
+    
697
+    locXRef <- lapply(loc, function(x)unlist(compXRef[ x ], use.names=FALSE))
698
+    comp.ann <- bpGetAnnFromXRef(df, locXRef)
699
+    ###############################################
700
+    
701
+    # Reaction-Specific attributes#################
702
+    ##Reactants
703
+    reactants <- split(lefts[,1], lefts[,2], drop=TRUE)
704
+    ##Products
705
+    products <- split(rights[,2], rights[,1], drop=TRUE)
706
+    ##Genes ##totally different from level3 function
707
+    cat.gene.ref <- bpGetAttrbyID(df,cat.gene, "physical-entity", "property_attr_value")
708
+    cat.gene.name <- listInstances(biopax, cat.gene.ref[,3])
709
+    cat.gene.ref$name <- cat.gene.name$name[match(striph(cat.gene.ref[,3]), cat.gene.name$id)]
710
+    
711
+    cat.gene.ref <- cat.gene.ref[match(cat.gene,cat.gene.ref$id),"name"]    
712
+    genes <- split(cat.gene.ref, cat.r, drop=TRUE)
713
+    
714
+    ## Stoichiometry    
715
+    lefts <- df[df$property=="left",c(from="property_attr_value", to="id")]
716
+    rights <- df[df$property=="right",c("id","property_attr_value")]
717
+    edges <- rbind(lefts,rights)
718
+    edges<- cbind(edges, st=NA)
719
+    
720
+    st <- bpGetAttrbyID(df, edges$property_attr_value, "stoichiometric-coefficient")
721
+    edges$st = st$property_value[match(striph(edges$property_attr_value), st$id)]
722
+    edges$st <- as.numeric(as.character(edges$st))
723
+    
724
+    r.stoic <- split(edges[1:nrow(lefts),3], edges[1:nrow(lefts),2], drop=TRUE)
725
+    p.stoic <- split(edges[-c(1:nrow(lefts)),3], edges[-c(1:nrow(lefts)),2], drop=TRUE)
726
+    
727
+    # Reorder our attributes
728
+    reactants <- reactants[ match(V(graph)[reactions]$name, names(reactants)) ]
729
+    products <- products[match(V(graph)[reactions]$name, names(products))]
730
+    genes <- genes[match(V(graph)[reactions]$name, names(genes))]
731
+    r.stoic <- r.stoic[match(V(graph)[reactions]$name, names(r.stoic))]
732
+    p.stoic <- p.stoic[match(V(graph)[reactions]$name, names(p.stoic))]
733
+    #######################################################
734
+    # Putting it together
735
+    
736
+    ## A small hack to distinguish compartment annotations from vertex annotation.        
737
+    #names(comp.ann) <- rep("compartment", length(comp.ann))
738
+    
739
+    ## For reactions
740
+    V(graph)[reactions]$attr <- 
741
+            mapply(function(...){ 
742
+                        args=list(...)
743
+                        c(name=args[[1]], 
744
+                            reversible=FALSE,
745
+                            reactants = list(args[[2]]),
746
+                            reactant.stoichiometry= list(args[[3]]),
747
+                            products = list(args[[4]]),
748
+                            product.stoichiometry= list(args[[5]]),
749
+                            kinetics=NULL,
750
+                            genes = list(args[[6]]),
751
+                            compartment=list(args[[7]]),
752
+                            unlist(args[8], recursive=FALSE),
753
+                            pathway=list(pw$name[args[[9]] ]),
754
+                            unlist(args[10],recursive=FALSE),
755
+                            args[[11]]
756
+                        )
757
+                    }, v.names[reactions],
758
+                    reactants, r.stoic, products, p.stoic, genes,
759
+                    comp.names[reactions], compartment = comp.ann[reactions],
760
+                    pwcomp[reactions], pathway = pw.ann[reactions],attr[reactions],
761
+                    SIMPLIFY=FALSE)
762
+    
763
+    V(graph)[!reactions]$attr <- 
764
+            mapply(function(...){ 
765
+                    args=list(...)
766
+                    c(name=args[[1]], 
767
+                        compartment=list(args[[2]]),
768
+                        unlist(args[3], recursive=FALSE),
769
+                        pathway=list(pw$name[args[[4]] ]),
770
+                        unlist(args[5],recursive=FALSE),
771
+                        args[[6]]
772
+                    )
773
+                }, v.names[!reactions],
774
+                comp.names[!reactions], compartment = comp.ann[!reactions],
775
+                pwcomp[!reactions], pathway = pw.ann[!reactions],attr[!reactions],
776
+                SIMPLIFY=FALSE)
777
+    
778
+    E(graph)$stoichiometry = edges$st
779
+    
780
+    V(graph)$reactions <- reactions 
781
+    V(graph)$shape<- ifelse(V(graph)$reactions==TRUE, "square", "circle")
782
+    V(graph)$color <- ifelse(V(graph)$reactions==TRUE,"red", "skyblue")
783
+    
784
+    graph$source = "BioPAX_L2"
785
+    graph$type = "MR.graph"
786
+    
787
+    return(graph)
788
+}
789
+
790
+# TODO: edge attributes in signaling networks
791
+bpSignalingL3 <- function(biopax, expand.complexes=FALSE, inc.sm.molecules=FALSE, verbose=TRUE){
792
+    if(verbose) message("Processing BioPAX (level 3) object as a signaling network", appendLF=FALSE) 
793
+    df <- biopax$df
794
+    df$property = tolower(df$property)
795
+    
796
+    ############### Constructing the signaling graph ##########################
797
+    # All conversion events
798
+    lefts <- df[df$property=="left",c(from="property_attr_value", to="id")]
799
+    lefts[,1] <- striph(lefts[,1])
800
+    rights <- df[df$property=="right",c("id","property_attr_value")]
801
+    rights[,2] <- striph(rights[,2])
802
+    
803
+    # Identifying Metabolic and signaling reactions.
804
+    # Metabolic reactions takes only small molecules as substrates/products.
805
+    # Signaling reactions have at least one non-small-molecule participant.  
806
+    classes.left <- listInstances(biopax,lefts[,1])
807
+    sig.reactions <- lefts[match(classes.left$id, lefts[,1])[classes.left$class !="SmallMolecule"],2]
808
+    classes.right <- listInstances(biopax,rights[,2])
809
+    sig.reactions <- unlist(list(sig.reactions, 
810
+                    rights[ match(classes.right$id, rights[,2])[classes.right$class !="SmallMolecule"], 1]))
811
+    
812
+    
813
+    controlled <- striph(selectInstances(biopax, class="Catalysis", property="controlled")$property_attr_value)
814
+    controller <- striph(selectInstances(biopax, class="Catalysis", property="controller")$property_attr_value)
815
+    
816
+    
817
+    # Metabolic reactions are represented in signaling networks by their "Controller"
818
+    #   where controllers of successive reactions are connected. If no "Controller"
819
+    #   is assigned, no edges are drawn.
820
+    met.left <- lefts[ !lefts[,2] %in% sig.reactions,]
821
+    met.right <- rights[ !rights[,1] %in% sig.reactions,]
822
+    met.left <- do.call("rbind",
823
+            mapply(bpExpand.grid, split(met.left[,1], met.left[,2], drop=TRUE)[controlled], controller,SIMPLIFY=FALSE)
824
+    )
825
+    met.right <- do.call("rbind",
826
+            mapply(bpExpand.grid, controller, split(met.right[,2], met.right[,1], drop=TRUE)[controlled], SIMPLIFY=FALSE)
827
+    )
828
+    
829
+    graph <- graph.data.frame( na.omit( rbind(met.left, met.right) ) )
830
+    reactions <- V(graph)$name %in% unique(as.character(met.left[,2], met.right[,1]))
831
+    if(sum(reactions)>0){
832
+        V(graph)$reactions <- reactions
833
+        V(graph)$attr <- list(list())
834
+        graph <- makeReactionNetwork(graph)
835
+    }
836
+    
837
+    # Signaling reactions are represeted as Left -> Controller -> Right
838
+    # If no "Controller" is assigned, it's represented as Left -> Right
839
+    if(inc.sm.molecules){
840
+        sig.left <- lefts[ lefts[,2] %in% sig.reactions, ]
841
+        sig.right <- rights[ rights[,1] %in% sig.reactions, ]
842
+    }else{
843
+        sig.left <- lefts[ lefts[,1] %in% classes.left[classes.left$class!="SmallMolecule","id"] , ]
844
+        sig.right <- rights[ rights[,2] %in% classes.right[classes.right$class!="SmallMolecule","id"], ]
845
+    }
846
+    
847
+    # No Controllers (Left -> Right)
848
+    sig.no.ctrl <- as.character(unique( sig.reactions[ !sig.reactions %in% controlled ] ))
849
+    sig.no.ctrl <- do.call("rbind",
850
+            mapply(bpExpand.grid, split(sig.left[,1], sig.left[,2], drop=TRUE)[sig.no.ctrl], 
851
+                                  split(sig.right[,2], sig.right[,1], drop=TRUE)[sig.no.ctrl]
852
+                    ,SIMPLIFY=FALSE)
853
+    )
854
+    
855
+    # Controllers present (Left -> Controller -> Right)
856
+    sig.left <- do.call("rbind",
857
+            mapply(bpExpand.grid, split(sig.left[,1], sig.left[,2], drop=TRUE)[controlled], controller,SIMPLIFY=FALSE)
858
+    )
859
+    sig.right <- do.call("rbind",
860
+            mapply(bpExpand.grid, controller, split(sig.right[,2], sig.right[,1], drop=TRUE)[controlled], SIMPLIFY=FALSE)
861
+    )
862
+    
863
+    # Putting it all signaling reactions together
864
+    all.sig <- c(t( na.omit(rbind(sig.no.ctrl, sig.left, sig.right)) ))
865
+    sig.vertices <- unique(all.sig)
866
+    
867
+    # Combining the signaling reactions with the metabolic one.
868
+    graph <- graph + vertices(sig.vertices[!sig.vertices %in% V(graph)$name]) + igraph::edges(all.sig) 
869
+    
870
+    if(expand.complexes){
871
+        comsp <- bpSplitComplex(biopax,V(graph)$name, inc.sm.molecules)
872
+        graph <- setAttribute(graph,"complex", comsp)
873
+        graph <- igraph::simplify( expandComplexes(graph, "complex", NULL, "normal", "keep") )
874
+    }
875
+    
876
+    if(ecount(graph)==0) 
877
+        stop("No Singling interaction were found.")
878
+    if(verbose) message(": ", ecount(graph), " interaction found.")
879
+    
880
+    ##################################################################
881
+    ############ Getting attributes for graph vertices ###############
882
+    # Getting attribute lists (name, compartment, pathway, MIRIAM)
883
+    XRefs <- bpGetReferences(biopax, V(graph)$name)
884
+    names(XRefs) <- V(graph)$name
885
+    
886
+    # MIRIAM attributes 
887
+    attr <- bpGetAnnFromXRef(df, XRefs[V(graph)$name])
888
+    
889
+    # Name attributes
890
+    v.names <- listInstances(biopax, id=V(graph)$name)
891
+    v.names <- v.names[match(V(graph)$name, v.names$id), "name"]
892
+    
893
+    ## Get Pathway name and annotations
894
+    pw <- listInstances(biopax, class="pathway")
895
+    pwXRef <- bpGetReferences(biopax, pw$id)
896
+    
897
+    ## Get pathway components (only reactions are returned)
898
+    pwcomp <- lapply(pw$id, function(x)listPathwayComponents(biopax,x, returnIDonly=FALSE))
899
+    pwcomp <- do.call("rbind", lapply(1:length(pwcomp), 
900
+                    function(x)data.frame(id=x, comp=pwcomp[[x]])))
901
+    
902
+    ## Restrict Pathway components to Conversion reactions (Removing PathwayStep, Control).
903
+    pwcomp <- pwcomp[pwcomp$comp.class %in% c("Conversion", getSubClasses("Conversion", biopaxlevel=3)) ,]
904
+    
905
+    pwcomp <- split(pwcomp$id, pwcomp$comp.id, drop=TRUE)
906
+    
907
+    ## Since Conversions (reactions) are no longer present in the network 
908
+    #   (either their controllers or substrate/products are retained), vertices inherit
909
+    #   Pathway annotations from reactions they participate in.
910
+    leftright <- rbind(lefts, rights, cbind(property_attr_value=controller,id=controlled)) # Vertices and reactions they participate in.
911
+    leftright <- leftright[ leftright[,1] %in% V(graph)$name,]
912
+    leftright$id = match(leftright$id, names(pwcomp)) ## Use numerical ids.
913
+    
914
+    pwcomp <-c(pwcomp,  
915
+            lapply(split(pwcomp[leftright$id], leftright$property_attr_value), 
916
+                    function(x) unique(unlist(x)) ) 
917
+    ) # Do the magic :)
918
+    
919
+    pwcomp <- pwcomp[V(graph)$name]  # Reorder to match V(graph)$name    
920
+    pw.ann <- bpGetAnnFromXRef(df,lapply(pwcomp, function(x) unlist(pwXRef[x], use.names=FALSE)))
921
+    ###########################################
922
+    
923
+    ## Compartment attributes #######################
924
+    ### Get Compartment name and annotations
925
+    comp <- listInstances(biopax, class="cellularLocationvocabulary", returnIDonly=TRUE)
926
+    comp.terms <- as.character(bpGetAttrbyID(df, comp, "term")$property_value)
927
+    comp.ann <- bpGetAnnFromXRef(df, bpGetReferences(biopax, comp) )
928
+    
929
+    ## Get Vertices' compartments
930
+    loc <- bpGetAttrbyID(df, V(graph)$name, "cellularlocation", "property_attr_value")
931
+    loc$property_attr_value <- match(striph(loc$property_attr_value), comp)
932
+    loc <- split(loc$property_attr_value, loc$id, drop=TRUE)[ V(graph)$name]
933
+    ###############################################
934
+    ###############################################
935
+    # Putting it together
936
+    
937
+    ## A small hack to distinguish compartment annotations from vertex annotation.        
938
+    names(comp.ann) <- rep("compartment", length(comp.ann))
939
+    
940
+    V(graph)$attr <- 
941
+            mapply(function(...){ 
942
+                        args=list(...)
943
+                        c(name=args[[1]], 
944
+                              compartment=list(comp.terms[ args[[2]] ]),
945
+                            unlist(comp.ann[ args[[2]] ], recursive=FALSE),
946
+                            pathway=list(pw$name[ args[[3]] ]),
947
+                            unlist(args[4],recursive=FALSE),
948
+                            args[[5]]
949
+                        )
950
+                    }, v.names, loc, pwcomp, pathway= pw.ann ,attr,
951
+                    SIMPLIFY=FALSE)    
952
+    
953
+    V(graph)$shape<- "circle"
954
+    V(graph)$color <- "blue"
955
+    
956
+    graph$source = "BioPAX_L3"
957
+    graph$type = "S.graph"
958
+    
959
+    return(graph)
960
+}
961
+############Helper functions to process biopax objects################
962
+bpGetReferences <- function (biopax, id, 
963
+                        followProperties = c("entityreference","component", "memberPhysicalEntity"), 
964
+                        getProperties="xref") 
965
+{
966
+    id.u = unique(striph(id))
967
+    
968
+    isrdfresource = biopax$df$property_attr == "rdf:resource"
969
+    propertysel = tolower(biopax$df$property) %in% tolower(followProperties)
970
+    newIDs = biopax$df[biopax$df$id %in% id.u & isrdfresource & 
971
+                        propertysel, c("id","property_attr_value")]
972
+    
973
+    propertyget = tolower(biopax$df$property) %in% tolower(getProperties)
974
+    attr = biopax$df[biopax$df$id %in% id.u & isrdfresource & 
975
+                    propertyget, c("id","property_attr_value")]
976
+    
977
+    attr.ls = split(as.character(attr[,2]), attr$id, drop=TRUE)
978
+    if(nrow(newIDs)>0){
979
+        recur.ls = bpGetReferences(biopax, newIDs$property_attr_value, followProperties, getProperties)
980
+        recur.sp = split(recur.ls,newIDs$id, drop=TRUE)        
981
+        keys <- unique(c(names(attr.ls), names(recur.sp)))
982
+        attr.ls = setNames(mapply(function(...)unique(unlist(c(...))),
983
+                        attr.ls[keys], recur.sp[keys], SIMPLIFY=FALSE), 
984
+                keys)        
985
+    }
986
+    
987
+    pos = match(striph(id), names(attr.ls))
988
+    return(attr.ls[pos])
989
+}
990
+
991
+bpSplitComplex<-function (biopax, complexid, inc.sm.molecules) 
992
+{
993
+    compname = c("COMPONENTS", "PHYSICAL-ENTITY")
994
+    if (biopax$biopaxlevel == 3) {
995
+        compname = c("memberPhysicalEntity","component")
996
+    }
997
+    if(inc.sm.molecules)
998
+        classes <- c("dna", "rna", "protein", "smallmolecule")
999
+    else
1000
+        classes <- c("dna", "rna", "protein")
1001
+    
1002
+    ref = bpGetReferences(biopax, complexid, followProperties=compname, getProperties=compname)
1003
+    
1004
+    if (is.null(ref))
1005
+        return(NULL)
1006
+    
1007
+    ref = do.call("rbind", lapply( na.omit(names(ref)), function(x) cbind( x, striph(ref[[x]]) ) ) )
1008
+    referenced = listInstances(biopax, id = unique(ref[,2]))
1009
+    sel = referenced[ tolower(referenced$class) %in% classes, "id"]
1010
+    if (length(sel)==0) 
1011
+        return(NULL)
1012
+    
1013
+    ref = ref[ ref[,2] %in% sel, ]
1014
+    return( split(ref[,2], ref[,1])[ complexid ] )
1015
+}
1016
+bpGetAttrbyID <- function(df, id, attr, value="property_value"){
1017
+    id = striph(id)
1018
+    df[df$id %in% id & df$property %in% attr, c("id","property",value) ]
1019
+}
1020
+bpGetAnnFromXRef <- function(df, id.ls, attr){
1021
+    ann <- do.call("rbind",lapply(na.omit(names(id.ls)), 
1022
+                            function(x) cbind(x, if(is.null(id.ls[[x]])) NA else id.ls[[x]])
1023
+                ))
1024
+    
1025
+        
1026
+    id = unique(ann[,2])
1027
+    if(length(na.omit(id))==0)
1028
+        return(list()[match(names(id.ls), NULL)])
1029
+
1030
+    db <- bpGetAttrbyID(df, id, "db", "property_value")
1031
+    dbid <- bpGetAttrbyID(df, id, "id", "property_value")
1032
+    dbid <- cbind(db, value=dbid[,3])
1033
+    
1034
+    dbid$property_value <- paste("miriam.", dbid$property_value, sep="")
1035
+    
1036
+    pos <- match(striph(ann[,2]), dbid$id)
1037
+    # Split the dataframe into annotations belonging to each input id (ann[,1])
1038
+    # Then split each of the daughter dataframe by the attribute name.
1039
+    ann.ls <- lapply(split(dbid[pos,c(3,4)], ann[,1], drop=TRUE), 
1040
+                    function(x) split(as.character(x[,2]), x[,1], drop=TRUE) )
1041
+            
1042
+    
1043
+    return(ann.ls[match(names(id.ls), names(ann.ls))])
1044
+}
1045
+bpGetAnnotation <- function(df, id, attr){
1046
+    ann <- bpGetAttrbyID(df, id, "xref", "property_attr_value")
1047
+    ref <- bpGetAttrbyID(df, id, "entityreference", "property_attr_value")
1048
+    ref.ann <-  bpGetAttrbyID(df, ref$property_attr_value, "xref", "property_attr_value")
1049
+    
1050
+    pos <- match(striph(ref$property_attr_value), ref.ann$id)
1051
+    ann <- rbind(ann[,c(1,3)],data.frame(id=ref$id, property_attr_value=ref.ann$property_attr_value[pos], 
1052
+                    stringsAsFactors = FALSE))
1053
+    
1054
+    db <- bpGetAttrbyID(df, ann$property_attr_value, "db", "property_value")
1055
+    dbid <- bpGetAttrbyID(df, ann$property_attr_value, "id", "property_value")
1056
+    
1057
+    ann.ls <- as.list(as.character(dbid$property_value))
1058
+    names(ann.ls) <- paste("miriam.", db$property_value, sep="")
1059
+    pos <- match(striph(ann$property_attr_value), dbid$id)
1060
+    ann.ls <- split(ann.ls[pos], ann$id, drop=TRUE)
1061
+    
1062
+    
1063
+    return(ann.ls[match(striph(id), names(ann.ls))])
1064
+}
1065
+
1066
+bpExpand.grid <- function(x1,x2){
1067
+    if(is.null(x1))
1068
+        x1=NA
1069
+    if(is.null(x2))
1070
+        x2=NA
1071
+    return(expand.grid(x1,x2))
1072
+}
1073
+
1074
+striph <- function(s){
1075
+    return(sub("^#", "", s))
1076
+}
1077
+
0 1078
new file mode 100644
... ...
@@ -0,0 +1,399 @@
1
+###############################################################################
2
+#
3
+# netProcess.R:     This file contains all functions for network processing.
4
+# Conversion between different network representations, network editing are included 
5
+# here.
6
+#
7
+# author: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp>
8
+#
9
+# This is released under GPL-2.
10
+# 
11
+# Documentation was created using roxygen
12
+#
13
+###############################################################################
14
+
15
+# Neccessary non-sense to pass R CMD check
16
+utils::globalVariables(c("nei", "to", "from", "delete"))
17
+
18
+#' Remove uniquitous compounds from a metabolic network
19
+#' 
20
+#' This function removes uniquitous compounds (metabolites connected to numerous reactions)
21
+#' from a metabolic network.These compounds are reaction cofactors and currency compounds,
22
+#' such as ATP, CO2, etc. A path through these metabolites may not be bioloigcally meaningful.
23
+#' The defualt small compound list is derived from Reactome, containing keeg.compound, pubchem.compound,
24
+#' ChEBI and CAS identifiers.
25
+#'     
26
+#' 
27
+#' @param graph A metabolic network.
28
+#' @param method How to handle small compounds. Either simply delete these vertices \code{"remove"} (default), 
29
+#' or make a separate vertex for each reaction they participate in \code{"duplicate"}.
30
+#' @param small.comp.ls A list of small compounds to be used. 
31
+#' 
32
+#' @return A modified graph, with the small compounds removed or duplicated.
33
+#' 
34
+#' @author Ahmed Mohamed
35
+#' @family Network processing methods
36
+#' @export
37
+#' @examples
38
+#'     data(ex_sbml)
39
+#'     \dontshow{ex_sbml}    
40
+#'     
41
+#'     sbml.removed <- rmSmallCompounds(ex_sbml, method="remove")
42
+#'     \dontshow{sbml.removed}
43
+#' 
44
+rmSmallCompounds <- function(graph, method=c("remove","duplicate"), small.comp.ls=NPMdefaults("small.comp.ls")){
45
+    if(is.null(V(graph)$reactions))
46
+        stop("graph is not a metaboite-reaction netowrk.")
47
+    
48
+    if(is.null(V(graph)$attr))
49
+        stop("graph is not annotated.")
50
+    term.list <- c("miriam.pubchem.compound", "miriam.kegg.compound", "miriam.chebi", "miriam.cas")
51
+    
52
+    std.names <- stdAttrNames(graph, "matches")
53
+    terms <- std.names$standard %in% c("miriam.pubchem.compound", "miriam.kegg.compound", "miriam.chebi")
54
+    if(sum(terms)==0)
55
+        stop("No stuitable compound annotation found.\n
56
+            Current suuported annotations are ", toString(term.list))
57
+    
58
+    attr.ls <- lapply(rownames(std.names)[terms], 
59
+                        function(x) getAttribute(graph, x)[!V(graph)$reactions]
60
+                )
61
+    
62
+    
63
+    res <- sapply(1:length(attr.ls), function(i){
64
+                sapply(attr.ls[[i]], 
65
+                        function(x) sum(x%in%small.comp.ls[,std.names[terms,1][[i]]])>0
66
+                )
67
+            })
68
+    
69
+    vids <- which(apply(res,1,any)) # Rows with at least one match.
70
+    vids <- as.integer(V(graph)[!V(graph)$reactions])[vids] #vertex ids on the graph
71
+    
72
+    if(method=="duplicate"){
73
+        v.names <- V(graph)[vids]$name
74
+        el <- get.edgelist(graph)
75
+        from <- el[,1] %in% v.names
76
+        el[from,1] <- paste(el[from,1], el[from,2], sep="##")
77
+        to <- el[,2] %in% v.names
78
+        el[to,2] <- paste(el[to,2], el[to,1], sep="##")
79
+        
80
+        graph <- graph + vertices(c(el[from,1],el[to,2])) +igraph::edges(el[from|to,])
81
+    }
82
+    return(graph - vertices(vids))
83
+}
84
+
85
+#' Convert metabolic network to reaction network.
86
+#' 
87
+#' This function removes metabolite nodes keeping them as edge attributes. The resulting
88
+#' network contains reaction nodes only, where edges indicate that a metabolite produced
89
+#' by one reaction is consumed by the other.   
90
+#'     
91
+#' 
92
+#' @param graph A metabolic network.
93
+#' @param simplify An option to remove translocation and spontaneous reactions that require
94
+#' no catalyzing genes. Translocation reactions are detected from reaction name (SBML, BioPAX), or
95
+#' by having identical substrates and products.