1 | 1 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,26 @@ |
1 |
+Package: NetPathMiner |
|
2 |
+Version: 0.99.0 |
|
3 |
+Date: 2014 onwards |
|
4 |
+Title: NetPathMiner for Biological Network Construction, Path Mining |
|
5 |
+ and Visualization |
|
6 |
+Author: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp>, Tim Hancock |
|
7 |
+ <timothy.hancock@kuicr.kyoto-u.ac.jp>, Ichigaku Takigawa |
|
8 |
+ <takigawa@kuicr.kyoto-u.ac.jp>, Nicolas Wicker <nicolas.wicker@unistra.fr> |
|
9 |
+Maintainer: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp> |
|
10 |
+Description: NetPathMiner is a general framework for network path mining using |
|
11 |
+ genome-scale networks. It constructs networks from KGML, SBML and BioPAX |
|
12 |
+ files, providing three network representations, metabolic, reaction and |
|
13 |
+ gene representations. NetPathMiner finds active paths and applies machine |
|
14 |
+ learning methods to summarize found paths for easy interpretation. It also |
|
15 |
+ provides static and interactive visualizations of networks and paths to aid |
|
16 |
+ manual investigation. |
|
17 |
+Depends: igraph (>= 0.6) |
|
18 |
+Suggests: rBiopaxParser, RCurl, RCytoscape |
|
19 |
+License: GPL (>= 2) |
|
20 |
+URL: https://github.com/ahmohamed/NetPathMiner |
|
21 |
+NeedsCompilation: yes |
|
22 |
+SystemRequurements: libxml2, libSBML (>= 5.5) |
|
23 |
+biocViews: GraphAndNetwork, Pathways, Network, Clustering, |
|
24 |
+ Classification |
|
25 |
+Biarch: TRUE |
|
26 |
+Packaged: 2014-03-10 09:06:56 UTC; mohamedahmed |
0 | 27 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,45 @@ |
1 |
+export(KGML2igraph) |
|
2 |
+export(NPMdefaults) |
|
3 |
+export(SBML2igraph) |
|
4 |
+export(assignEdgeWeights) |
|
5 |
+export(biopax2igraph) |
|
6 |
+export(colorVertexByAttr) |
|
7 |
+export(expandComplexes) |
|
8 |
+export(extractPathNetwork) |
|
9 |
+export(fetchAttribute) |
|
10 |
+export(getAttrNames) |
|
11 |
+export(getAttrStatus) |
|
12 |
+export(getAttribute) |
|
13 |
+export(getGeneSetNetworks) |
|
14 |
+export(getGeneSets) |
|
15 |
+export(getPathsAsEIDs) |
|
16 |
+export(layoutVertexByAttr) |
|
17 |
+export(makeGeneNetwork) |
|
18 |
+export(makeReactionNetwork) |
|
19 |
+export(pathClassifier) |
|
20 |
+export(pathCluster) |
|
21 |
+export(pathRanker) |
|
22 |
+export(pathsToBinary) |
|
23 |
+export(plotAllNetworks) |
|
24 |
+export(plotClassifierROC) |
|
25 |
+export(plotClusterMatrix) |
|
26 |
+export(plotClusterProbs) |
|
27 |
+export(plotClusters) |
|
28 |
+export(plotCytoscape) |
|
29 |
+export(plotNetwork) |
|
30 |
+export(plotPathClassifier) |
|
31 |
+export(plotPathCluster) |
|
32 |
+export(plotPaths) |
|
33 |
+export(predictPathClassifier) |
|
34 |
+export(predictPathCluster) |
|
35 |
+export(registerMemoryErr) |
|
36 |
+export(rmAttribute) |
|
37 |
+export(rmSmallCompounds) |
|
38 |
+export(samplePaths) |
|
39 |
+export(setAttribute) |
|
40 |
+export(simplifyReactionNetwork) |
|
41 |
+export(stdAttrNames) |
|
42 |
+export(toGraphNEL) |
|
43 |
+export(vertexDeleteReconnect) |
|
44 |
+import(igraph) |
|
45 |
+useDynLib(NetPathMiner) |
1 | 47 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,102 @@ |
1 |
+############################################################################### |
|
2 |
+# |
|
3 |
+# NPM-package.R: This file contains the documentation for the package itself, and .onLoad function. |
|
4 |
+# author: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp> |
|
5 |
+# |
|
6 |
+# This is released under GPL-2. |
|
7 |
+# |
|
8 |
+# Documentation was created using roxygen |
|
9 |
+# |
|
10 |
+############################################################################### |
|
11 |
+ |
|
12 |
+#' General framework for network extraction, path mining. |
|
13 |
+#' |
|
14 |
+#' NetPathMiner implements a flexible module-based process flow for network path mining and visualization, |
|
15 |
+#' which can be fully inte-grated with user-customized functions. |
|
16 |
+#' NetPathMiner supports construction of various types of genome scale networks from KGML, SBML and BioPAX |
|
17 |
+#' formats, enabling its utility to most common pathway databases. |
|
18 |
+#' NetPathMiner also provides different visualization techniques to facilitate the analysis of even |
|
19 |
+#' thousands of output paths. |
|
20 |
+#' |
|
21 |
+#' @import igraph |
|
22 |
+#' @useDynLib NetPathMiner |
|
23 |
+#' @author Ahmed Mohamed \email{mohamed@@kuicr.kyoto-u.ac.jp} |
|
24 |
+#' @name NetPathMiner-package |
|
25 |
+#' @aliases NetPathMiner NPM |
|
26 |
+#' @docType package |
|
27 |
+#' |
|
28 |
+NULL |
|
29 |
+ |
|
30 |
+#' Biopax example data |
|
31 |
+#' |
|
32 |
+#' A dataset containing Porphyrin metabolism pathway in Biopax Level 3 and parsed with |
|
33 |
+#' \code{\link[rBiopaxParser]{readBiopax}}. |
|
34 |
+#' |
|
35 |
+#' @docType data |
|
36 |
+#' @name ex_biopax |
|
37 |
+#' @examples |
|
38 |
+#' data(ex_biopax) |
|
39 |
+#' ex_biopax |
|
40 |
+#' |
|
41 |
+NULL |
|
42 |
+ |
|
43 |
+#' Singaling network from KGML example |
|
44 |
+#' |
|
45 |
+#' An exmaple igraph object representing Ras and chemokine signaling pathways in human |
|
46 |
+#' extracted from KGML files. |
|
47 |
+#' |
|
48 |
+#' @docType data |
|
49 |
+#' @name ex_kgml_sig |
|
50 |
+#' @examples |
|
51 |
+#' data(ex_kgml_sig) |
|
52 |
+#' plotNetwork(ex_kgml_sig, vertex.color="pathway") |
|
53 |
+#' |
|
54 |
+NULL |
|
55 |
+ |
|
56 |
+#' Metabolic network from SBML example |
|
57 |
+#' |
|
58 |
+#' An example igraph object representing bipartite metabolic network of Carbohydrate |
|
59 |
+#' metabolism extracted from SBML file from Reactome database. |
|
60 |
+#' |
|
61 |
+#' @docType data |
|
62 |
+#' @name ex_sbml |
|
63 |
+#' @examples |
|
64 |
+#' data(ex_sbml) |
|
65 |
+#' plotNetwork(ex_sbml, vertex.color="compartment.name") |
|
66 |
+#' |
|
67 |
+NULL |
|
68 |
+ |
|
69 |
+#' An microarray data example. |
|
70 |
+#' |
|
71 |
+#' An microarray data example. This is part of the ALL dataset, for demonestration purposes. |
|
72 |
+#' |
|
73 |
+#' @docType data |
|
74 |
+#' @name ex_microarray |
|
75 |
+#' @examples |
|
76 |
+#' data(ex_microarray) |
|
77 |
+#' |
|
78 |
+NULL |
|
79 |
+ |
|
80 |
+.onLoad<- function(lib, pkg){ |
|
81 |
+ env <- new.env() |
|
82 |
+ load(file.path(find.package("NetPathMiner"), "extdata", "env_data.RData"), envir=env) |
|
83 |
+ env$memory.err <- character() |
|
84 |
+ options(NPM_ENV = env) |
|
85 |
+} |
|
86 |
+ |
|
87 |
+#' Internal method to register memery errors. |
|
88 |
+#' |
|
89 |
+#' Internal method to register memery errors, caused by compiled code. This method |
|
90 |
+#' is used only by the package, and should not be invoked by users. |
|
91 |
+#' |
|
92 |
+#' @param method The mathod which generated the error. |
|
93 |
+#' |
|
94 |
+#' @return NULL |
|
95 |
+#' |
|
96 |
+#' @author Ahmed Mohamed |
|
97 |
+#' @export |
|
98 |
+#' |
|
99 |
+registerMemoryErr <- function(method){ |
|
100 |
+ env <-options("NPM_ENV")[[1]] |
|
101 |
+ env$memory.err <- c(env$memory.err, method) |
|
102 |
+} |
0 | 103 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,1077 @@ |
1 |
+############################################################################### |
|
2 |
+# |
|
3 |
+# dbExtract.R: This file contains all functions related to pathway file |
|
4 |
+# processing into igraph objects. |
|
5 |
+# author: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp> |
|
6 |
+# |
|
7 |
+# This is released under GPL-2. |
|
8 |
+# |
|
9 |
+# Documentation was created using roxygen |
|
10 |
+# |
|
11 |
+############################################################################### |
|
12 |
+ |
|
13 |
+ |
|
14 |
+check.file <- function(filename, ext=".xml"){ |
|
15 |
+ if(is.null(filename)) stop("No file provided!") |
|
16 |
+ if(length(filename)==1){ |
|
17 |
+ if (!file.exists(filename)) stop("Cannot find file:", filename) |
|
18 |
+ if(length(list.files(filename))==0) |
|
19 |
+ return(filename) |
|
20 |
+ |
|
21 |
+ fileList = file.path(filename,list.files(filename, ext)) |
|
22 |
+ return(check.file(fileList)) |
|
23 |
+ }else{ |
|
24 |
+ return(unlist(sapply(filename, check.file, USE.NAMES=FALSE))); |
|
25 |
+ } |
|
26 |
+} |
|
27 |
+ |
|
28 |
+#' Processes KGML files into igraph objects |
|
29 |
+#' |
|
30 |
+#' This function takes KGML files as input, and returns either a metabolic or a signaling |
|
31 |
+#' network as output. |
|
32 |
+#' |
|
33 |
+#' Users can specify whether files are processes as metabolic or signaling networks. |
|
34 |
+#' |
|
35 |
+#' Metabolic networks are given as bipartite graphs, where metabolites and reactions represent |
|
36 |
+#' vertex types. This is constructed from <reaction> xml node in KGML file, connecting them |
|
37 |
+#' to their corresponding substrates and products. Each reaction vertex has \code{genes} attribute, |
|
38 |
+#' listing all genes associated with the reaction. As a general rule, reactions inherit all annotation |
|
39 |
+#' attributes of its catalyzig genes. |
|
40 |
+#' |
|
41 |
+#' Signaling network have genes as vertices and edges represent interactions, such as activiation / inhibition. |
|
42 |
+#' Genes participating in successive reactions are also connected. Signaling parsing method processes <ECrel>, <PPrel> |
|
43 |
+#' and <PCrel> interactions from KGML files. |
|
44 |
+#' |
|
45 |
+#' To generate a genome scale network, simply provide a list of files to be parsed, or put all |
|
46 |
+#' file in a directory, as pass the directory path as \code{filename} |
|
47 |
+#' |
|
48 |
+#' @param filename A character vector containing the KGML files to be processed. |
|
49 |
+#' If a directory path is provided, all *.xml files in it and its subdirectories are included. |
|
50 |
+#' @param parse.as Whether to process file into a metabolic or a signaling network. |
|
51 |
+#' @param expand.complexes Split protein complexes into individual gene nodes. This argument is |
|
52 |
+#' ignored if \code{parse.as="metabolic"} |
|
53 |
+#' @param verbose Whether to display the progress of the function. |
|
54 |
+#' |
|
55 |
+#' @return An igraph object, representing a metbolic or a signaling network. |
|
56 |
+#' @author Ahmed Mohamed |
|
57 |
+#' @family Database extraction methods |
|
58 |
+#' @export |
|
59 |
+#' @examples |
|
60 |
+#' if(is.loaded("readkgmlfile")){ # This is false if libxml2 wasn't available at installation. |
|
61 |
+#' filename <- system.file("extdata", "hsa00860.xml", package="NetPathMiner") |
|
62 |
+#' |
|
63 |
+#' # Process KGML file as a metabolic network |
|
64 |
+#' g <- KGML2igraph(filename) |
|
65 |
+#' plotNetwork(g) |
|
66 |
+#' |
|
67 |
+#' # Process KGML file as a signaling network |
|
68 |
+#' g <- KGML2igraph(filename, parse.as="signaling", expand.complexes=TRUE) |
|
69 |
+#' dev.new() |
|
70 |
+#' plotNetwork(g) |
|
71 |
+#' } |
|
72 |
+#' |
|
73 |
+KGML2igraph <- function(filename, parse.as=c("metabolic","signaling"), expand.complexes=FALSE, verbose=TRUE){ |
|
74 |
+ if(!is.loaded("readkgmlfile")) |
|
75 |
+ stop("KGML2igraph requires libxml2 to be present. Please reinstall NetPathMiner after installing libxml2.") |
|
76 |
+ if("KGML2igraph" %in% options("NPM_ENV")[[1]]$memory.err) |
|
77 |
+ stop("KGML2igraph has previously caused a critical memory error. For safety reasons, please restart R.") |
|
78 |
+ |
|
79 |
+ fileList = check.file(filename) |
|
80 |
+ if(length(fileList)==0) stop("No files to process!") |
|
81 |
+ # Check the parsing method. |
|
82 |
+ if(!missing(parse.as)){ |
|
83 |
+ if(parse.as=="signaling"){ |
|
84 |
+ return(KGML_signal(fileList, expand.complexes, verbose)) |
|
85 |
+ }else{ |
|
86 |
+ if(parse.as != "metabolic") |
|
87 |
+ stop("Unknown parsing method:", parse.as) |
|
88 |
+ } |
|
89 |
+ } |
|
90 |
+ |
|
91 |
+ if(verbose) message("Parsing KGML files as metabolic networks") |
|
92 |
+ # If a directory is provided, all xml files are processed. |
|
93 |
+ if(length(fileList)==1){ |
|
94 |
+ zkgml <- .Call("readkgmlfile", FILENAME = fileList, VERBOSE=verbose) |
|
95 |
+ }else{ |
|
96 |
+ zkgml <- unlist(sapply(fileList, |
|
97 |
+ function(x) .Call("readkgmlfile", FILENAME = x, VERBOSE=verbose) |
|
98 |
+ , USE.NAMES=FALSE), recursive=FALSE) |
|
99 |
+ |
|
100 |
+ #Resolve reactions particiapting in multiple pathways |
|
101 |
+ dup.rns = lapply(zkgml[duplicated(names(zkgml))], "[[", "miriam.kegg.pathway") |
|
102 |
+ if(length(dup.rns)>0){ |
|
103 |
+ lapply(1:length(dup.rns), |
|
104 |
+ function(x) zkgml[[ names(dup.rns)[x] ]]$miriam.kegg.pathway <<- |
|
105 |
+ unique( c(zkgml[[ names(dup.rns)[x] ]]$miriam.kegg.pathway, dup.rns[[x]] ) |
|
106 |
+ )) |
|
107 |
+ } |
|
108 |
+ } |
|
109 |
+ |
|
110 |
+ if(verbose) message("Files processed succefully. Building the igraph object.") |
|
111 |
+ ######### Make the reaction substrate network ############ |
|
112 |
+ edges <-do.call("rbind",lapply(zkgml, |
|
113 |
+ function(x)expand.grid(x$reactants, x$name, x$reactant.stoichiometry))) |
|
114 |
+ |
|
115 |
+ edges <- rbind( |
|
116 |
+ do.call("rbind",lapply(zkgml, |
|
117 |
+ function(x)expand.grid(x$name,x$products, x$product.stoichiometry))) |
|
118 |
+ , edges) |
|
119 |
+ |
|
120 |
+ names(edges) <- c("from","to", "stoichiometry") |
|
121 |
+ |
|
122 |
+ ########## Making the iGraph object ################### |
|
123 |
+ # contructing a bipartite graph and removing loop edges |
|
124 |
+ graph = simplify(graph.data.frame(edges), edge.attr.comb="first") |
|
125 |
+ |
|
126 |
+ reactions <- V(graph)$name %in% names(zkgml) |
|
127 |
+ V(graph)$reactions <- reactions |
|
128 |
+ V(graph)$shape<- ifelse(V(graph)$type==TRUE, "square", "circle") |
|
129 |
+ V(graph)$color <- ifelse(V(graph)$type==TRUE,"red", "skyblue") |
|
130 |
+ |
|
131 |
+ V(graph)[reactions]$attr <- zkgml[V(graph)[reactions]$name] |
|
132 |
+ V(graph)[!reactions]$attr = lapply(V(graph)[!reactions]$name, function(x) list(miriam.kegg.compound=x)) |
|
133 |
+ |
|
134 |
+ graph$source = "KGML" |
|
135 |
+ graph$type = "MR.graph" |
|
136 |
+ |
|
137 |
+ return(graph) |
|
138 |
+} |
|
139 |
+ |
|
140 |
+ |
|
141 |
+KGML_signal <- function(fileList, expand.complexes, verbose){ |
|
142 |
+ if(verbose) message("Parsing KGML files as metabolic networks") |
|
143 |
+ zkgml <- .Call("readkgml_sign", FILENAME = fileList, |
|
144 |
+ EXPAND_COMPLEXES = expand.complexes, VERBOSE=verbose) |
|
145 |
+ |
|
146 |
+ if(verbose) message("Files processed succefully. Building the igraph object.") |
|
147 |
+ |
|
148 |
+ graph = graph.empty() + vertices(names(zkgml[[1]]), attr=zkgml[[1]]) |
|
149 |
+ graph = graph + igraph::edges(zkgml[[2]], attr=zkgml[[3]]) |
|
150 |
+ |
|
151 |
+ V(graph)$shape<- "circle" |
|
152 |
+ V(graph)$color <- "blue" |
|
153 |
+ |
|
154 |
+ graph$source = "KGML" |
|
155 |
+ graph$type = "S.graph" |
|
156 |
+ |
|
157 |
+ return(graph) |
|
158 |
+} |
|
159 |
+ |
|
160 |
+#' Processes SBML files into igraph objects |
|
161 |
+#' |
|
162 |
+#' This function takes SBML files as input, and returns either a metabolic or a signaling |
|
163 |
+#' network as output. |
|
164 |
+#' |
|
165 |
+#' Users can specify whether files are processes as metabolic or signaling networks. |
|
166 |
+#' |
|
167 |
+#' Metabolic networks are given as bipartite graphs, where metabolites and reactions represent |
|
168 |
+#' vertex types. This is constructed from \code{ListOfReactions} in SBML file, connecting them |
|
169 |
+#' to their corresponding substrates and products (\code{ListOfSpecies}). Each reaction vertex has \code{genes} attribute, |
|
170 |
+#' listing all \code{modifiers} of this reaction. As a general rule, reactions inherit all annotation |
|
171 |
+#' attributes of its catalyzig genes. |
|
172 |
+#' |
|
173 |
+#' Signaling network have genes as vertices and edges represent interactions. Since SBML format may |
|
174 |
+#' represent singling events as \code{reaction}, all species are assumed to be genes (rather than small |
|
175 |
+#' molecules). For a simple path \code{S0 -> R1 -> S1}, in signaling network, the path will be |
|
176 |
+#' \code{S0 -> M(R1) -> S1} where \code{M(R1)} is R1 modifier(s). To ditiguish gene species from small |
|
177 |
+#' molecules, user can provide \code{gene.attr} (for example: \code{miriam.uniprot} or \code{miriam.ncbigene}) |
|
178 |
+#' where only annotated species are considered genes. |
|
179 |
+#' |
|
180 |
+#' All annotation attributes written according to MIRIAM guidlines (either \code{urn:miriam:xxx:xxx} or |
|
181 |
+#' \code{http://identifiers.org/xxx/xxx}) are etxracted by default. Non-conforming attributes can be extracted |
|
182 |
+#' by specifying \code{miriam.attr}. |
|
183 |
+#' |
|
184 |
+#' To generate a genome scale network, simply provide a list of files to be parsed, or put all |
|
185 |
+#' file in a directory, as pass the directory path as \code{filename} |
|
186 |
+#' |
|
187 |
+#' Note: This function requires libSBML installed (Please see the installation instructions in the Vignette). |
|
188 |
+#' Some SBML level-3 files may requires additional libraries also (An infomative error will be displayed when |
|
189 |
+#' parsing such files). Please visit \url{http://sbml.org/Documents/Specifications/SBML_Level_3/Packages} for |
|
190 |
+#' more information. |
|
191 |
+#' |
|
192 |
+#' @param filename A character vector containing the SBML files to be processed. If a directory path |
|
193 |
+#' is provided, all *.xml and *.sbml files in it and its subdirectories are included. |
|
194 |
+#' @param parse.as Whether to process file into a metabolic or a signaling network. |
|
195 |
+#' @param miriam.attr A list of annotation attributes to be extracted. If \code{"all"}, then all attibutes |
|
196 |
+#' written in MIRIAM guidelines (see Details) are extracted (Default). If \code{"none"}, then no attributes |
|
197 |
+#' are extracted. Otherwise, only attributes matching those specified are extracted. |
|
198 |
+#' @param gene.attr An attribute to distinguish \code{species} representing genes from those |
|
199 |
+#' representing small molecules (see Details). Ignored if \code{parse.as="metabolic"}. |
|
200 |
+#' @param expand.complexes Split protein complexes into individual gene nodes. Ignored if |
|
201 |
+#' \code{parse.as="metabolic"}, or when \code{gene.attr} is not provided. |
|
202 |
+#' @param verbose Whether to display the progress of the function. |
|
203 |
+#' |
|
204 |
+#' @return An igraph object, representing a metbolic or a signaling network. |
|
205 |
+#' @author Ahmed Mohamed |
|
206 |
+#' @family Database extraction methods |
|
207 |
+#' @export |
|
208 |
+#' @examples |
|
209 |
+#' if(is.loaded("readsbmlfile")){ # This is false if libSBML wasn't available at installation. |
|
210 |
+#' filename <- system.file("extdata", "porphyrin.sbml", package="NetPathMiner") |
|
211 |
+#' |
|
212 |
+#' # Process SBML file as a metabolic network |
|
213 |
+#' g <- SBML2igraph(filename) |
|
214 |
+#' plotNetwork(g) |
|
215 |
+#' |
|
216 |
+#' # Process SBML file as a signaling network |
|
217 |
+#' g <- SBML2igraph(filename, parse.as="signaling", |
|
218 |
+#' gene.attr="miriam.uniprot",expand.complexes=TRUE) |
|
219 |
+#' dev.new() |
|
220 |
+#' plotNetwork(g) |
|
221 |
+#' } |
|
222 |
+SBML2igraph <- function(filename, parse.as=c("metabolic","signaling"), miriam.attr="all", |
|
223 |
+ gene.attr, expand.complexes, verbose=TRUE){ |
|
224 |
+ if(!is.loaded("readsbmlfile")) |
|
225 |
+ stop("SBML2igraph requires libSBML to be present. Please reinstall NetPathMiner after installing libSBML.") |
|
226 |
+ |
|
227 |
+ if("SBML2igraph" %in% options("NPM_ENV")[[1]]$memory.err) |
|
228 |
+ stop("SBML2igraph has previously caused a critical memory error. For safety reasons, please restart R.") |
|
229 |
+ |
|
230 |
+ fileList <- check.file(filename, ext=".sbml|.xml") |
|
231 |
+ if(length(fileList)==0) stop("No files to process!") |
|
232 |
+ |
|
233 |
+ if(!missing(parse.as)){ |
|
234 |
+ if(parse.as=="signaling"){ |
|
235 |
+ return(SBML_signal(fileList, miriam.attr,gene.attr,expand.complexes, verbose)) |
|
236 |
+ }else{ |
|
237 |
+ if(parse.as != "metabolic") |
|
238 |
+ stop("Unknown parsing method:", parse.as) |
|
239 |
+ } |
|
240 |
+ } |
|
241 |
+ if(verbose) message("Parsing SBML files as metabolic networks") |
|
242 |
+ |
|
243 |
+ if(length(fileList)==1){ |
|
244 |
+ zsbml <- .Call("readsbmlfile", FILENAME = fileList, ATTR_TERMS = miriam.attr, VERBOSE=verbose) |
|
245 |
+ names(zsbml) <- c("reactions", "species") |
|
246 |
+ }else{ |
|
247 |
+ zsbml <- sapply(fileList, function(x) |
|
248 |
+ .Call("readsbmlfile", FILENAME = x, ATTR_TERMS = miriam.attr, VERBOSE=verbose) |
|
249 |
+ , USE.NAMES=FALSE) |
|
250 |
+ |
|
251 |
+ zsbml <- apply(zsbml, 1, function(x) c(unlist(x, recursive=FALSE))) |
|
252 |
+ names(zsbml) <- c("reactions", "species") |
|
253 |
+ |
|
254 |
+ #Resolve reactions and species particiapting in multiple pathways |
|
255 |
+ dup.rns = lapply(zsbml$reactions[duplicated(names(zsbml$reactions))], "[[", "pathway") |
|
256 |
+ if(length(dup.rns)>0){ |
|
257 |
+ lapply(1:length(dup.rns), |
|
258 |
+ function(x) zsbml$reactions[[ names(dup.rns)[x] ]]$pathway <<- |
|
259 |
+ unique( c(zsbml$reactions[[ names(dup.rns)[x] ]]$pathway, dup.rns[[x]] ) |
|
260 |
+ )) |
|
261 |
+ } |
|
262 |
+ |
|
263 |
+ zsbml$species <- zsbml$species[!duplicated(names(zsbml$species))] |
|
264 |
+ } |
|
265 |
+ if(verbose) message("SBML files processed successfully") |
|
266 |
+ |
|
267 |
+ if(verbose) message("Constructing Metabolic Network") |
|
268 |
+ ######### Make the reaction substrate network ############ |
|
269 |
+ edges <-do.call("rbind",lapply(1:length(zsbml$reactions), |
|
270 |
+ function(x)expand.grid(zsbml$reactions[[x]]$reactants, |
|
271 |
+ names(zsbml$reactions[x]), |
|
272 |
+ zsbml$reactions[[x]]$reactant.stoichiometry))) |
|
273 |
+ |
|
274 |
+ edges <- rbind( |
|
275 |
+ do.call("rbind",lapply(1:length(zsbml$reactions), |
|
276 |
+ function(x)expand.grid(names(zsbml$reactions[x]), |
|
277 |
+ zsbml$reactions[[x]]$products, |
|
278 |
+ zsbml$reactions[[x]]$product.stoichiometry))) |
|
279 |
+ , edges) |
|
280 |
+ |
|
281 |
+ names(edges) <- c("from","to", "stoichiometry") |
|
282 |
+ |
|
283 |
+ ########## Making the iGraph object ################### |
|
284 |
+ # contructing a bipartite graph and removing multiple edges |
|
285 |
+ graph <- graph.empty() + vertices(names(zsbml$reactions)) |
|
286 |
+ graph <- graph + vertices(names(zsbml$species)) |
|
287 |
+ graph <- graph + igraph::edges(c(t(edges[,1:2])), stoichiometry=edges$stoichiometry) |
|
288 |
+ |
|
289 |
+ graph <- simplify(graph.data.frame(edges), edge.attr.comb="first") |
|
290 |
+ V(graph)$attr <- unlist(zsbml, recursive=FALSE, use.names=FALSE) |
|
291 |
+ |
|
292 |
+ # Set graphical attributes |
|
293 |
+ reactions <- V(graph)$name %in% names(zsbml$reactions) |
|
294 |
+ V(graph)$reactions <- reactions |
|
295 |
+ V(graph)$shape<- ifelse(V(graph)$reactions==TRUE, "square", "circle") |
|
296 |
+ V(graph)$color <- ifelse(V(graph)$reactions==TRUE,"red", "skyblue") |
|
297 |
+ |
|
298 |
+ graph$source = "SBML" |
|
299 |
+ graph$type = "MR.graph" |
|
300 |
+ |
|
301 |
+ return(graph) |
|
302 |
+} |
|
303 |
+ |
|
304 |
+SBML_signal <- function(fileList, miriam.attr="all", gene.attr, expand.complexes, verbose){ |
|
305 |
+ if(verbose) message("Parsing SBML files as signaling networks") |
|
306 |
+ zsbml <- .Call("readsbml_sign", FILENAME = fileList, ATTR_TERMS = miriam.attr, VERBOSE=verbose) |
|
307 |
+ |
|
308 |
+ if(verbose) message("SBML files processed successfully") |
|
309 |
+ |
|
310 |
+ # DeleteReconnect non-gene reactions (translocation, spontaneous) |
|
311 |
+ # DeleteReconnect all non-gene, max 3 edges (R->s->RN->s->R will be disconnected). |
|
312 |
+ graph = graph.empty() + vertices(zsbml$vertices, attr=zsbml$attr) |
|
313 |
+ graph = graph + igraph::edges(zsbml$edges) |
|
314 |
+ |
|
315 |
+ if(missing(gene.attr)){ |
|
316 |
+ if(verbose) message("No attributes distiuishing genes were provided.") |
|
317 |
+ if(verbose) message("All species from the SBML file are included in the network.") |
|
318 |
+ }else{ |
|
319 |
+ no.gene = which(lapply(lapply(zsbml$attr, "[[", gene.attr), length)==0) |
|
320 |
+ if(length(no.gene>0)) |
|
321 |
+ graph = vertexDeleteReconnect(graph, no.gene, reconnect.threshold=3) |
|
322 |
+ |
|
323 |
+ if(!missing(expand.complexes)&& expand.complexes) |
|
324 |
+ graph = expandComplexes(graph, gene.attr, keep.parent.attr=miriam.attr) |
|
325 |
+ } |
|
326 |
+ |
|
327 |
+ V(graph)$shape<- "circle" |
|
328 |
+ V(graph)$color <- "blue" |
|
329 |
+ |
|
330 |
+ graph$source = "SBML" |
|
331 |
+ graph$type = "S.graph" |
|
332 |
+ |
|
333 |
+ return(graph) |
|
334 |
+} |
|
335 |
+ |
|
336 |
+# TODO: biopax signaling L2, Biopax metabolic L2 small molecules. |
|
337 |
+#' Processes BioPAX objects into igraph objects |
|
338 |
+#' |
|
339 |
+#' This function takes BioPAX objects (level 2 or 3) as input, and returns either a metabolic or a signaling |
|
340 |
+#' network as output. |
|
341 |
+#' |
|
342 |
+#' This function requires \code{rBiopaxParser} installed. |
|
343 |
+#' |
|
344 |
+#' Users can specify whether files are processes as metabolic or signaling networks. |
|
345 |
+#' |
|
346 |
+#' Metabolic networks are given as bipartite graphs, where metabolites and reactions represent |
|
347 |
+#' vertex types. Reactions are constructed from \code{Conversion} classes, connecting them |
|
348 |
+#' to their corresponding \code{Left}s and \code{Right}s. Each reaction vertex has \code{genes} attribute, |
|
349 |
+#' listing all \code{Catalysis} relationships of this reaction. As a general rule, reactions inherit all annotation |
|
350 |
+#' attributes of its catalyzig genes. |
|
351 |
+#' |
|
352 |
+#' Signaling network have genes as vertices and edges represent interactions, such as activiation / inhibition. |
|
353 |
+#' Genes participating in successive reactions are also connected. Signaling interactions are constructed from |
|
354 |
+#' \code{Control} classes, where edges are drawn from \code{controller} to \code{controlled}. |
|
355 |
+#' |
|
356 |
+#' All annotation attributes are exracted from \code{XRefs} associated with the vertices, and are stored according to |
|
357 |
+#' MIRIAM guidelines (\code{miraim.db}, where db is the database name). |
|
358 |
+#' |
|
359 |
+#' |
|
360 |
+#' @param biopax BioPAX object generated by \code{\link[rBiopaxParser]{readBiopax}}. |
|
361 |
+#' @param parse.as Whether to process file into a metabolic or a signaling network. |
|
362 |
+#' @param expand.complexes Split protein complexes into individual gene nodes. Ignored if |
|
363 |
+#' \code{parse.as="metabolic"}. |
|
364 |
+#' @param inc.sm.molecules Include small molecules that are participating in signaling events. Ignored if |
|
365 |
+#' \code{parse.as="metabolic"}. |
|
366 |
+#' @param verbose Whether to display the progress of the function. |
|
367 |
+#' |
|
368 |
+#' @return An igraph object, representing a metbolic or a signaling network. |
|
369 |
+#' @author Ahmed Mohamed |
|
370 |
+#' @family Database extraction methods |
|
371 |
+#' @export |
|
372 |
+#' @examples |
|
373 |
+#' if(require(rBiopaxParser)){ |
|
374 |
+#' data(ex_biopax) |
|
375 |
+#' # Process biopax as a metabolic network |
|
376 |
+#' g <- biopax2igraph(ex_biopax) |
|
377 |
+#' plotNetwork(g) |
|
378 |
+#' |
|
379 |
+#' # Process SBML file as a signaling network |
|
380 |
+#' g <- biopax2igraph(ex_biopax, parse.as="signaling", expand.complexes=TRUE) |
|
381 |
+#' } |
|
382 |
+biopax2igraph <- function(biopax, parse.as=c("metabolic","signaling"), |
|
383 |
+ expand.complexes=FALSE, inc.sm.molecules=FALSE, verbose=TRUE){ |
|
384 |
+ if (!require(rBiopaxParser)) |
|
385 |
+ stop("This functions needs the rBiopaxParser library installed. Check out the installation instructions!") |
|
386 |
+ if (!("biopax" %in% class(biopax))) |
|
387 |
+ stop("Error: pathway2RegulatoryGraph: parameter biopax has to be of class biopax.") |
|
388 |
+ if(missing(parse.as) || parse.as=="metabolic"){ |
|
389 |
+ if(biopax$biopaxlevel == 3) |
|
390 |
+ return(bpMetabolicL3(biopax, verbose)) |
|
391 |
+ else |
|
392 |
+ return(bpMetabolicL2(biopax, verbose)) |
|
393 |
+ }else if(parse.as=="signaling"){ |
|
394 |
+ if(biopax$biopaxlevel == 3) |
|
395 |
+ return(bpSignalingL3(biopax, expand.complexes, inc.sm.molecules, verbose)) |
|
396 |
+ else |
|
397 |
+ stop("Parsing singaling networks from BioPAX level 2 files is not currently supported.") |
|
398 |
+ } |
|
399 |
+} |
|
400 |
+ |
|
401 |
+bpMetabolicL3 <- function(biopax, verbose){ |
|
402 |
+ if(verbose) message("Processing BioPAX (level 3) object as a metabolic network", appendLF=FALSE) |
|
403 |
+ df <- biopax$df |
|
404 |
+ df$property = tolower(df$property) |
|
405 |
+ |
|
406 |
+ # All conversion events |
|
407 |
+ lefts <- df[df$property=="left",c(from="property_attr_value", to="id")] |
|
408 |
+ lefts[,1] <- striph(lefts[,1]) |
|
409 |
+ rights <- df[df$property=="right",c("id","property_attr_value")] |
|
410 |
+ rights[,2] <- striph(rights[,2]) |
|
411 |
+ |
|
412 |
+ # Getting distiction between Metabolic and signaling reactions. |
|
413 |
+ # Metabolic reactions takes only small molecules as substrates/products. |
|
414 |
+ # The following lines removes any Biochemical reactions with non-small molecules |
|
415 |
+ # participants. |
|
416 |
+ classes <- listInstances(biopax,lefts[,1]) |
|
417 |
+ sig.reactions <- lefts[match(classes$id, lefts[,1])[classes$class !="SmallMolecule"],2] |
|
418 |
+ classes <- listInstances(biopax,rights[,2]) |
|
419 |
+ sig.reactions <- unlist(list(sig.reactions, |
|
420 |
+ rights[ match(classes$id, rights[,2])[classes$class !="SmallMolecule"], 1])) |
|
421 |
+ |
|
422 |
+ lefts <- lefts[ !lefts[,2] %in% sig.reactions, ] |
|
423 |
+ rights <- rights[ !rights[,1] %in% sig.reactions, ] |
|
424 |
+ |
|
425 |
+ # Putting the edges into an igraph object. |
|
426 |
+ graph <- graph.data.frame( rbind(lefts, setNames(rights, names(lefts))) ) |
|
427 |
+ reactions <- V(graph)$name %in% unique(as.character(lefts[,2], rights[,1])) |
|
428 |
+ |
|
429 |
+ if(sum(reactions)==0) |
|
430 |
+ stop("No metabolic reactions found. Try parsing the file as a signaling netowrk.") |
|
431 |
+ if(verbose) message(": ", sum(reactions), " reactions found.") |
|
432 |
+ |
|
433 |
+ XRefs <- bpGetReferences(biopax, V(graph)$name) |
|
434 |
+ names(XRefs) <- V(graph)$name |
|
435 |
+ cat.r <- striph(selectInstances(biopax, class="Catalysis", property="controlled")$property_attr_value) |
|
436 |
+ cat.gene <- striph(selectInstances(biopax, class="Catalysis", property="controller")$property_attr_value) |
|
437 |
+ |
|
438 |
+ gene.xref <- bpGetReferences(biopax, cat.gene) |
|
439 |
+ names(gene.xref) <- cat.r |
|
440 |
+ lapply(names(gene.xref), function(x) XRefs[[x]] <<- c(XRefs[[x]],gene.xref[[x]])) |
|
441 |
+ |
|
442 |
+ ############################################################## |
|
443 |
+ # Getting attribute lists (name, compartment, pathway, MIRIAM) |
|
444 |
+ # For reactions (name, reactants, reactant.stoichiometry, products, product.stoichiomentry, |
|
445 |
+ # reversible, kinetics, genes, pathway) |
|
446 |
+ |
|
447 |
+ # MIRIAM attributes |
|
448 |
+ attr <- bpGetAnnFromXRef(df, XRefs[V(graph)$name]) |
|
449 |
+ |
|
450 |
+ # Name attributes |
|
451 |
+ v.names <- listInstances(biopax, id=V(graph)$name) |
|
452 |
+ v.names <- v.names[match(V(graph)$name, v.names$id), "name"] |
|
453 |
+ |
|
454 |
+ # Pathway attributes########################## |
|
455 |
+ ## Get Pathway name and annotations |
|
456 |
+ pw <- listInstances(biopax, class="pathway") |
|
457 |
+ pwXRef <- bpGetReferences(biopax, pw$id) |
|
458 |
+ #pw.ann <- bpGetAnnFromXRef(df, pwXRef) |
|
459 |
+ |
|
460 |
+ ## Get pathway components (only reactions are returned) |
|
461 |
+ pwcomp <- lapply(pw$id, function(x)listPathwayComponents(biopax,x, returnIDonly=TRUE)) |
|
462 |
+ pwcomp <- do.call("rbind", lapply(1:length(pwcomp), |
|
463 |
+ function(x)data.frame(id=x, comp=pwcomp[[x]]))) |
|
464 |
+ |
|
465 |
+ ## Map pathways to reaction vertices. |
|
466 |
+ pwcomp <- pwcomp[pwcomp$comp %in% V(graph)[reactions]$name, ] |
|
467 |
+ pwcomp <- split(pwcomp$id, pwcomp$comp, drop=TRUE) |
|
468 |
+ |
|
469 |
+ ## For metabolites, pathway attributes are inherited from reactions they participate in. |
|
470 |
+ leftright = rbind(lefts, rights) ## Edges connecting metaboites to reactions |
|
471 |
+ leftright$id = match(leftright$id, names(pwcomp)) ## Use numerical ids. |
|
472 |
+ |
|
473 |
+ pwcomp <-c(pwcomp, |
|
474 |
+ lapply(split(pwcomp[leftright$id], leftright$property_attr_value), |
|
475 |
+ function(x) unique(unlist(x)) ) |
|
476 |
+ ) # Do the magic :) |
|
477 |
+ |
|
478 |
+ pwcomp <- pwcomp[match(V(graph)$name, names(pwcomp))] # Reorder to match V(graph)$name |
|
479 |
+ |
|
480 |
+ pw.ann <- bpGetAnnFromXRef(df,lapply(pwcomp, function(x) unlist(pwXRef[x], use.names=FALSE))) |
|
481 |
+ ########################################### |
|
482 |
+ |
|
483 |
+ # Compartment attributes ####################### |
|
484 |
+ ## Metabolites have compaertment attributes, |
|
485 |
+ ## while reactions inherit them from their catalysts. |
|
486 |
+ |
|
487 |
+ ## Get Compartment name and annotations |
|
488 |
+ comp <- listInstances(biopax, class="cellularLocationvocabulary", returnIDonly=TRUE) |
|
489 |
+ comp.terms <- as.character(bpGetAttrbyID(df, comp, "term")$property_value) |
|
490 |
+ #compXRef <- bpGetReferences(biopax, comp) |
|
491 |
+ comp.ann <- bpGetAnnFromXRef(df, bpGetReferences(biopax, comp) ) |
|
492 |
+ |
|
493 |
+ ## Get Metbolite compartments |
|
494 |
+ met.loc <- bpGetAttrbyID(df, V(graph)$name, "cellularlocation", "property_attr_value") |
|
495 |
+ met.loc$property_attr_value <- match(striph(met.loc$property_attr_value), comp) |
|
496 |
+ met.loc <- split(met.loc$property_attr_value, met.loc$id, drop=TRUE) |
|
497 |
+ |
|
498 |
+ ## Get Catalyst compartments |
|
499 |
+ cat.loc <- bpGetAttrbyID(df, cat.gene, "cellularlocation", "property_attr_value") |
|
500 |
+ cat.loc$property_attr_value <- match(striph(cat.loc$property_attr_value), comp) |
|
501 |
+ cat.loc$id <- cat.r[match( cat.loc$id, cat.gene )] |
|
502 |
+ cat.loc <- split(cat.loc$property_attr_value, cat.loc$id, drop=TRUE) |
|
503 |
+ |
|
504 |
+ ## Combine reaction and metbolite comparments, and reorder |
|
505 |
+ loc <- c(met.loc,cat.loc) |
|
506 |
+ loc <- loc[ match(V(graph)$name, names(loc)) ] |
|
507 |
+ ############################################### |
|
508 |
+ |
|
509 |
+ # Reaction-Specific attributes################# |
|
510 |
+ ##Reactants |
|
511 |
+ reactants <- split(lefts[,1], lefts[,2], drop=TRUE) |
|
512 |
+ ##Products |
|
513 |
+ products <- split(rights[,2], rights[,1], drop=TRUE) |
|
514 |
+ ##Genes |
|
515 |
+ cat.gene.name <- listInstances(biopax,cat.gene) |
|
516 |
+ cat.gene.name <- cat.gene.name[match(cat.gene,cat.gene.name$id),"name"] |
|
517 |
+ genes <- split(cat.gene.name, cat.r, drop=TRUE) |
|
518 |
+ |
|
519 |
+ ## Stoichiometry |
|
520 |
+ st <- bpGetAttrbyID(df, V(graph)[reactions]$name, "participantstoichiometry", "property_attr_value") |
|
521 |
+ st.met <- bpGetAttrbyID(df, st$property_attr_value, "physicalentity", "property_attr_value") |
|
522 |
+ st$met <- striph(st.met[match(striph(st$property_attr_value),st.met$id),3]) |
|
523 |
+ st.cf <- bpGetAttrbyID(df, st$property_attr_value, "stoichiometriccoefficient") |
|
524 |
+ st$cf <- st.cf[match(striph(st$property_attr_value),st.cf$id),3] |
|
525 |
+ |
|
526 |
+ edges <- rbind(lefts,rights) |
|
527 |
+ edges<- cbind(edges, st=NA) |
|
528 |
+ apply(st[,c(1,4,5)], 1, function(x) edges[edges$id==x[[1]] & edges$property_attr_value==x[[2]],"st"] <<-x[[3]]) |
|
529 |
+ edges$st <- as.numeric(edges$st) |
|
530 |
+ |
|
531 |
+ r.stoic <- split(edges[1:nrow(lefts),3], edges[1:nrow(lefts),2], drop=TRUE) |
|
532 |
+ p.stoic <- split(edges[-c(1:nrow(lefts)),3], edges[-c(1:nrow(lefts)),2], drop=TRUE) |
|
533 |
+ |
|
534 |
+ # Reorder our attributes |
|
535 |
+ reactants <- reactants[ match(V(graph)[reactions]$name, names(reactants)) ] |
|
536 |
+ products <- products[match(V(graph)[reactions]$name, names(products))] |
|
537 |
+ genes <- genes[match(V(graph)[reactions]$name, names(genes))] |
|
538 |
+ r.stoic <- r.stoic[match(V(graph)[reactions]$name, names(r.stoic))] |
|
539 |
+ p.stoic <- p.stoic[match(V(graph)[reactions]$name, names(p.stoic))] |
|
540 |
+ ####################################################### |
|
541 |
+ # Putting it together |
|
542 |
+ |
|
543 |
+ ## A small hack to distinguish compartment annotations from vertex annotation. |
|
544 |
+ names(comp.ann) <- rep("compartment", length(comp.ann)) |
|
545 |
+ |
|
546 |
+ ## For reactions |
|
547 |
+ V(graph)[reactions]$attr <- |
|
548 |
+ mapply(function(...){ |
|
549 |
+ args=list(...) |
|
550 |
+ c(name=args[[1]], |
|
551 |
+ reversible=FALSE, |
|
552 |
+ reactants = list(args[[2]]), |
|
553 |
+ reactant.stoichiometry= list(args[[3]]), |
|
554 |
+ products = list(args[[4]]), |
|
555 |
+ product.stoichiometry= list(args[[5]]), |
|
556 |
+ kinetics=NULL, |
|
557 |
+ genes = list(args[[6]]), |
|
558 |
+ compartment=list(comp.terms[args[[7]] ]), |
|
559 |
+ unlist(comp.ann[args[[7]] ], recursive=FALSE), |
|
560 |
+ pathway=list(pw$name[args[[8]] ]), |
|
561 |
+ unlist(args[9],recursive=FALSE), |
|
562 |
+ args[[10]] |
|
563 |
+ ) |
|
564 |
+ }, v.names[reactions], |
|
565 |
+ reactants, r.stoic, products, p.stoic, genes, |
|
566 |
+ loc[reactions], pwcomp[reactions], pathway = pw.ann[reactions],attr[reactions], |
|
567 |
+ SIMPLIFY=FALSE) |
|
568 |
+ |
|
569 |
+ V(graph)[!reactions]$attr <- |
|
570 |
+ mapply(function(...){ |
|
571 |
+ args=list(...) |
|
572 |
+ c(name=args[[1]], |
|
573 |
+ compartment=list(comp.terms[args[[2]] ]), |
|
574 |
+ unlist(comp.ann[args[[2]] ], recursive=FALSE), |
|
575 |
+ pathway=list(pw$name[args[[3]] ]), |
|
576 |
+ unlist(args[4],recursive=FALSE), |
|
577 |
+ args[[5]] |
|
578 |
+ ) |
|
579 |
+ }, v.names[!reactions], |
|
580 |
+ loc[!reactions], pwcomp[!reactions], pathway= pw.ann[!reactions],attr[!reactions], |
|
581 |
+ SIMPLIFY=FALSE) |
|
582 |
+ |
|
583 |
+ E(graph)$stoichiometry = edges$st |
|
584 |
+ |
|
585 |
+ V(graph)$reactions <- reactions |
|
586 |
+ V(graph)$shape<- ifelse(V(graph)$reactions==TRUE, "square", "circle") |
|
587 |
+ V(graph)$color <- ifelse(V(graph)$reactions==TRUE,"red", "skyblue") |
|
588 |
+ |
|
589 |
+ graph$source = "BioPAX_L3" |
|
590 |
+ graph$type = "MR.graph" |
|
591 |
+ |
|
592 |
+ return(graph) |
|
593 |
+} |
|
594 |
+ |
|
595 |
+bpMetabolicL2 <- function(biopax, verbose){ |
|
596 |
+ if(verbose) message("Processing BioPAX (level 2) object as a metabolic network", appendLF=FALSE) |
|
597 |
+ |
|
598 |
+ df <- biopax$df |
|
599 |
+ df$property = tolower(df$property) |
|
600 |
+ |
|
601 |
+ lefts <- df[df$property=="left",c(from="property_attr_value", to="id")] |
|
602 |
+ left.mol = bpGetAttrbyID(df,lefts[,1], "physical-entity", "property_attr_value")# |
|
603 |
+ lefts[,1] = left.mol[match(striph(lefts[,1]), left.mol$id), "property_attr_value"]# |
|
604 |
+ lefts[,1] <- striph(lefts[,1]) |
|
605 |
+ |
|
606 |
+ rights <- df[df$property=="right",c("id","property_attr_value")] |
|
607 |
+ right.mol = bpGetAttrbyID(df,rights[,2], "physical-entity", "property_attr_value")# |
|
608 |
+ rights[,2] = right.mol[match(striph(rights[,2]), right.mol$id), "property_attr_value"]# |
|
609 |
+ rights[,2] <- striph(rights[,2]) |
|
610 |
+ |
|
611 |
+ graph <- graph.data.frame( rbind(lefts, setNames(rights, names(lefts))) ) |
|
612 |
+ reactions <- V(graph)$name %in% unique(as.character(lefts[,2], rights[,1])) |
|
613 |
+ |
|
614 |
+ if(verbose) message(": ", sum(reactions), " reactions found.") |
|
615 |
+ |
|
616 |
+ XRefs <- bpGetReferences(biopax, V(graph)$name) |
|
617 |
+ names(XRefs) <- V(graph)$name |
|
618 |
+ cat.r <- striph(selectInstances(biopax, class="Control", property="controlled")$property_attr_value) |
|
619 |
+ cat.gene <- striph(selectInstances(biopax, class="Control", property="controller")$property_attr_value) |
|
620 |
+ |
|
621 |
+ gene.xref <- bpGetReferences(biopax, cat.gene, followProperties="physical-entity")# |
|
622 |
+ names(gene.xref) <- cat.r |
|
623 |
+ lapply(names(gene.xref), function(x) XRefs[[x]] <<- c(XRefs[[x]],gene.xref[[x]])) |
|
624 |
+ |
|
625 |
+ ############################################################## |
|
626 |
+ # Getting attribute lists (name, compartment, pathway, MIRIAM) |
|
627 |
+ # For reactions (name, reactants, reactant.stoichiometry, products, product.stoichiomentry, |
|
628 |
+ # reversible, kinetics, genes, pathway) |
|
629 |
+ |
|
630 |
+ # MIRIAM attributes |
|
631 |
+ attr <- bpGetAnnFromXRef(df, XRefs[V(graph)$name]) |
|
632 |
+ |
|
633 |
+ # Name attributes |
|
634 |
+ v.names <- listInstances(biopax, id=V(graph)$name) |
|
635 |
+ v.names <- v.names[match(V(graph)$name, v.names$id), "name"] |
|
636 |
+ |
|
637 |
+ # Pathway attributes########################## |
|
638 |
+ ## Get Pathway name and annotations |
|
639 |
+ pw <- listInstances(biopax, class="pathway") |
|
640 |
+ pwXRef <- bpGetReferences(biopax, pw$id) |
|
641 |
+ #pw.ann <- bpGetAnnFromXRef(df, pwXRef) |
|
642 |
+ |
|
643 |
+ ## Get pathway components (only reactions are returned) |
|
644 |
+ pwcomp <- lapply(pw$id, function(x)listPathwayComponents(biopax,x, returnIDonly=TRUE)) |
|
645 |
+ pwcomp <- do.call("rbind", lapply(1:length(pwcomp), |
|
646 |
+ function(x)data.frame(id=x, comp=pwcomp[[x]]))) |
|
647 |
+ |
|
648 |
+ ## Map pathways to reaction vertices. |
|
649 |
+ pwcomp <- pwcomp[pwcomp$comp %in% V(graph)[reactions]$name, ] |
|
650 |
+ pwcomp <- split(pwcomp$id, pwcomp$comp, drop=TRUE) |
|
651 |
+ |
|
652 |
+ ## For metabolites, pathway attributes are inherited from reactions they participate in. |
|
653 |
+ leftright = rbind(lefts, rights) ## Edges connecting metaboites to reactions |
|
654 |
+ leftright$id = match(leftright$id, names(pwcomp)) ## Use numerical ids. |
|
655 |
+ |
|
656 |
+ pwcomp <-c(pwcomp, |
|
657 |
+ lapply(split(pwcomp[leftright$id], leftright$property_attr_value), |
|
658 |
+ function(x) unique(unlist(x)) ) |
|
659 |
+ ) # Do the magic :) |
|
660 |
+ |
|
661 |
+ pwcomp <- pwcomp[match(V(graph)$name, names(pwcomp))] # Reorder to match V(graph)$name |
|
662 |
+ |
|
663 |
+ pw.ann <- bpGetAnnFromXRef(df,lapply(pwcomp, function(x) unlist(pwXRef[x], use.names=FALSE))) |
|
664 |
+ ########################################### |
|
665 |
+ |
|
666 |
+ # Compartment attributes ####################### |
|
667 |
+ ## Metabolites have compaertment attributes, |
|
668 |
+ ## while reactions inherit them from their catalysts. |
|
669 |
+ |
|
670 |
+ ## Get Pathway name and annotations |
|
671 |
+ comp <- listInstances(biopax, class="openControlledVocabulary", returnIDonly=TRUE)# |
|
672 |
+ comp.terms <- as.character(bpGetAttrbyID(df, comp, "term")$property_value) |
|
673 |
+ compXRef <- bpGetReferences(biopax, comp)# |
|
674 |
+ #comp.ann <- bpGetAnnFromXRef(df, bpGetReferences(biopax, comp) ) |
|
675 |
+ |
|
676 |
+ ## Get Metbolite compartments |
|
677 |
+ left.loc <- bpGetAttrbyID(df, left.mol$id, "cellular-location", "property_attr_value")# |
|
678 |
+ left.loc$id <- left.mol[match(left.loc$id, left.mol$id),3]# |
|
679 |
+ right.loc <- bpGetAttrbyID(df, right.mol$id, "cellular-location", "property_attr_value")# |
|
680 |
+ right.loc$id <- right.mol[match(right.loc$id, right.mol$id),3]# |
|
681 |
+ |
|
682 |
+ met.loc <- rbind(left.loc, right.loc)# |
|
683 |
+ met.loc$property_attr_value <- match(striph(met.loc$property_attr_value), comp) |
|
684 |
+ met.loc <- split(met.loc$property_attr_value, striph(met.loc$id), drop=TRUE)# |
|
685 |
+ |
|
686 |
+ ## Get Catalyst compartments |
|
687 |
+ cat.loc <- bpGetAttrbyID(df, cat.gene, "cellular-location", "property_attr_value")# |
|
688 |
+ cat.loc$property_attr_value <- match(striph(cat.loc$property_attr_value), comp) |
|
689 |
+ cat.loc$id <- cat.r[match( cat.loc$id, cat.gene )] |
|
690 |
+ cat.loc <- split(cat.loc$property_attr_value, cat.loc$id, drop=TRUE) |
|
691 |
+ |
|
692 |
+ ## Combine reaction and metbolite comparments, and reorder |
|
693 |
+ loc <- c(met.loc,cat.loc) |
|
694 |
+ loc <- loc[ match(V(graph)$name, names(loc)) ] |
|
695 |
+ comp.names <- lapply(loc, function(x)unique(unlist(comp.terms[ x ], use.names=FALSE))) |
|
696 |
+ |
|
697 |
+ locXRef <- lapply(loc, function(x)unlist(compXRef[ x ], use.names=FALSE)) |
|
698 |
+ comp.ann <- bpGetAnnFromXRef(df, locXRef) |
|
699 |
+ ############################################### |
|
700 |
+ |
|
701 |
+ # Reaction-Specific attributes################# |
|
702 |
+ ##Reactants |
|
703 |
+ reactants <- split(lefts[,1], lefts[,2], drop=TRUE) |
|
704 |
+ ##Products |
|
705 |
+ products <- split(rights[,2], rights[,1], drop=TRUE) |
|
706 |
+ ##Genes ##totally different from level3 function |
|
707 |
+ cat.gene.ref <- bpGetAttrbyID(df,cat.gene, "physical-entity", "property_attr_value") |
|
708 |
+ cat.gene.name <- listInstances(biopax, cat.gene.ref[,3]) |
|
709 |
+ cat.gene.ref$name <- cat.gene.name$name[match(striph(cat.gene.ref[,3]), cat.gene.name$id)] |
|
710 |
+ |
|
711 |
+ cat.gene.ref <- cat.gene.ref[match(cat.gene,cat.gene.ref$id),"name"] |
|
712 |
+ genes <- split(cat.gene.ref, cat.r, drop=TRUE) |
|
713 |
+ |
|
714 |
+ ## Stoichiometry |
|
715 |
+ lefts <- df[df$property=="left",c(from="property_attr_value", to="id")] |
|
716 |
+ rights <- df[df$property=="right",c("id","property_attr_value")] |
|
717 |
+ edges <- rbind(lefts,rights) |
|
718 |
+ edges<- cbind(edges, st=NA) |
|
719 |
+ |
|
720 |
+ st <- bpGetAttrbyID(df, edges$property_attr_value, "stoichiometric-coefficient") |
|
721 |
+ edges$st = st$property_value[match(striph(edges$property_attr_value), st$id)] |
|
722 |
+ edges$st <- as.numeric(as.character(edges$st)) |
|
723 |
+ |
|
724 |
+ r.stoic <- split(edges[1:nrow(lefts),3], edges[1:nrow(lefts),2], drop=TRUE) |
|
725 |
+ p.stoic <- split(edges[-c(1:nrow(lefts)),3], edges[-c(1:nrow(lefts)),2], drop=TRUE) |
|
726 |
+ |
|
727 |
+ # Reorder our attributes |
|
728 |
+ reactants <- reactants[ match(V(graph)[reactions]$name, names(reactants)) ] |
|
729 |
+ products <- products[match(V(graph)[reactions]$name, names(products))] |
|
730 |
+ genes <- genes[match(V(graph)[reactions]$name, names(genes))] |
|
731 |
+ r.stoic <- r.stoic[match(V(graph)[reactions]$name, names(r.stoic))] |
|
732 |
+ p.stoic <- p.stoic[match(V(graph)[reactions]$name, names(p.stoic))] |
|
733 |
+ ####################################################### |
|
734 |
+ # Putting it together |
|
735 |
+ |
|
736 |
+ ## A small hack to distinguish compartment annotations from vertex annotation. |
|
737 |
+ #names(comp.ann) <- rep("compartment", length(comp.ann)) |
|
738 |
+ |
|
739 |
+ ## For reactions |
|
740 |
+ V(graph)[reactions]$attr <- |
|
741 |
+ mapply(function(...){ |
|
742 |
+ args=list(...) |
|
743 |
+ c(name=args[[1]], |
|
744 |
+ reversible=FALSE, |
|
745 |
+ reactants = list(args[[2]]), |
|
746 |
+ reactant.stoichiometry= list(args[[3]]), |
|
747 |
+ products = list(args[[4]]), |
|
748 |
+ product.stoichiometry= list(args[[5]]), |
|
749 |
+ kinetics=NULL, |
|
750 |
+ genes = list(args[[6]]), |
|
751 |
+ compartment=list(args[[7]]), |
|
752 |
+ unlist(args[8], recursive=FALSE), |
|
753 |
+ pathway=list(pw$name[args[[9]] ]), |
|
754 |
+ unlist(args[10],recursive=FALSE), |
|
755 |
+ args[[11]] |
|
756 |
+ ) |
|
757 |
+ }, v.names[reactions], |
|
758 |
+ reactants, r.stoic, products, p.stoic, genes, |
|
759 |
+ comp.names[reactions], compartment = comp.ann[reactions], |
|
760 |
+ pwcomp[reactions], pathway = pw.ann[reactions],attr[reactions], |
|
761 |
+ SIMPLIFY=FALSE) |
|
762 |
+ |
|
763 |
+ V(graph)[!reactions]$attr <- |
|
764 |
+ mapply(function(...){ |
|
765 |
+ args=list(...) |
|
766 |
+ c(name=args[[1]], |
|
767 |
+ compartment=list(args[[2]]), |
|
768 |
+ unlist(args[3], recursive=FALSE), |
|
769 |
+ pathway=list(pw$name[args[[4]] ]), |
|
770 |
+ unlist(args[5],recursive=FALSE), |
|
771 |
+ args[[6]] |
|
772 |
+ ) |
|
773 |
+ }, v.names[!reactions], |
|
774 |
+ comp.names[!reactions], compartment = comp.ann[!reactions], |
|
775 |
+ pwcomp[!reactions], pathway = pw.ann[!reactions],attr[!reactions], |
|
776 |
+ SIMPLIFY=FALSE) |
|
777 |
+ |
|
778 |
+ E(graph)$stoichiometry = edges$st |
|
779 |
+ |
|
780 |
+ V(graph)$reactions <- reactions |
|
781 |
+ V(graph)$shape<- ifelse(V(graph)$reactions==TRUE, "square", "circle") |
|
782 |
+ V(graph)$color <- ifelse(V(graph)$reactions==TRUE,"red", "skyblue") |
|
783 |
+ |
|
784 |
+ graph$source = "BioPAX_L2" |
|
785 |
+ graph$type = "MR.graph" |
|
786 |
+ |
|
787 |
+ return(graph) |
|
788 |
+} |
|
789 |
+ |
|
790 |
+# TODO: edge attributes in signaling networks |
|
791 |
+bpSignalingL3 <- function(biopax, expand.complexes=FALSE, inc.sm.molecules=FALSE, verbose=TRUE){ |
|
792 |
+ if(verbose) message("Processing BioPAX (level 3) object as a signaling network", appendLF=FALSE) |
|
793 |
+ df <- biopax$df |
|
794 |
+ df$property = tolower(df$property) |
|
795 |
+ |
|
796 |
+ ############### Constructing the signaling graph ########################## |
|
797 |
+ # All conversion events |
|
798 |
+ lefts <- df[df$property=="left",c(from="property_attr_value", to="id")] |
|
799 |
+ lefts[,1] <- striph(lefts[,1]) |
|
800 |
+ rights <- df[df$property=="right",c("id","property_attr_value")] |
|
801 |
+ rights[,2] <- striph(rights[,2]) |
|
802 |
+ |
|
803 |
+ # Identifying Metabolic and signaling reactions. |
|
804 |
+ # Metabolic reactions takes only small molecules as substrates/products. |
|
805 |
+ # Signaling reactions have at least one non-small-molecule participant. |
|
806 |
+ classes.left <- listInstances(biopax,lefts[,1]) |
|
807 |
+ sig.reactions <- lefts[match(classes.left$id, lefts[,1])[classes.left$class !="SmallMolecule"],2] |
|
808 |
+ classes.right <- listInstances(biopax,rights[,2]) |
|
809 |
+ sig.reactions <- unlist(list(sig.reactions, |
|
810 |
+ rights[ match(classes.right$id, rights[,2])[classes.right$class !="SmallMolecule"], 1])) |
|
811 |
+ |
|
812 |
+ |
|
813 |
+ controlled <- striph(selectInstances(biopax, class="Catalysis", property="controlled")$property_attr_value) |
|
814 |
+ controller <- striph(selectInstances(biopax, class="Catalysis", property="controller")$property_attr_value) |
|
815 |
+ |
|
816 |
+ |
|
817 |
+ # Metabolic reactions are represented in signaling networks by their "Controller" |
|
818 |
+ # where controllers of successive reactions are connected. If no "Controller" |
|
819 |
+ # is assigned, no edges are drawn. |
|
820 |
+ met.left <- lefts[ !lefts[,2] %in% sig.reactions,] |
|
821 |
+ met.right <- rights[ !rights[,1] %in% sig.reactions,] |
|
822 |
+ met.left <- do.call("rbind", |
|
823 |
+ mapply(bpExpand.grid, split(met.left[,1], met.left[,2], drop=TRUE)[controlled], controller,SIMPLIFY=FALSE) |
|
824 |
+ ) |
|
825 |
+ met.right <- do.call("rbind", |
|
826 |
+ mapply(bpExpand.grid, controller, split(met.right[,2], met.right[,1], drop=TRUE)[controlled], SIMPLIFY=FALSE) |
|
827 |
+ ) |
|
828 |
+ |
|
829 |
+ graph <- graph.data.frame( na.omit( rbind(met.left, met.right) ) ) |
|
830 |
+ reactions <- V(graph)$name %in% unique(as.character(met.left[,2], met.right[,1])) |
|
831 |
+ if(sum(reactions)>0){ |
|
832 |
+ V(graph)$reactions <- reactions |
|
833 |
+ V(graph)$attr <- list(list()) |
|
834 |
+ graph <- makeReactionNetwork(graph) |
|
835 |
+ } |
|
836 |
+ |
|
837 |
+ # Signaling reactions are represeted as Left -> Controller -> Right |
|
838 |
+ # If no "Controller" is assigned, it's represented as Left -> Right |
|
839 |
+ if(inc.sm.molecules){ |
|
840 |
+ sig.left <- lefts[ lefts[,2] %in% sig.reactions, ] |
|
841 |
+ sig.right <- rights[ rights[,1] %in% sig.reactions, ] |
|
842 |
+ }else{ |
|
843 |
+ sig.left <- lefts[ lefts[,1] %in% classes.left[classes.left$class!="SmallMolecule","id"] , ] |
|
844 |
+ sig.right <- rights[ rights[,2] %in% classes.right[classes.right$class!="SmallMolecule","id"], ] |
|
845 |
+ } |
|
846 |
+ |
|
847 |
+ # No Controllers (Left -> Right) |
|
848 |
+ sig.no.ctrl <- as.character(unique( sig.reactions[ !sig.reactions %in% controlled ] )) |
|
849 |
+ sig.no.ctrl <- do.call("rbind", |
|
850 |
+ mapply(bpExpand.grid, split(sig.left[,1], sig.left[,2], drop=TRUE)[sig.no.ctrl], |
|
851 |
+ split(sig.right[,2], sig.right[,1], drop=TRUE)[sig.no.ctrl] |
|
852 |
+ ,SIMPLIFY=FALSE) |
|
853 |
+ ) |
|
854 |
+ |
|
855 |
+ # Controllers present (Left -> Controller -> Right) |
|
856 |
+ sig.left <- do.call("rbind", |
|
857 |
+ mapply(bpExpand.grid, split(sig.left[,1], sig.left[,2], drop=TRUE)[controlled], controller,SIMPLIFY=FALSE) |
|
858 |
+ ) |
|
859 |
+ sig.right <- do.call("rbind", |
|
860 |
+ mapply(bpExpand.grid, controller, split(sig.right[,2], sig.right[,1], drop=TRUE)[controlled], SIMPLIFY=FALSE) |
|
861 |
+ ) |
|
862 |
+ |
|
863 |
+ # Putting it all signaling reactions together |
|
864 |
+ all.sig <- c(t( na.omit(rbind(sig.no.ctrl, sig.left, sig.right)) )) |
|
865 |
+ sig.vertices <- unique(all.sig) |
|
866 |
+ |
|
867 |
+ # Combining the signaling reactions with the metabolic one. |
|
868 |
+ graph <- graph + vertices(sig.vertices[!sig.vertices %in% V(graph)$name]) + igraph::edges(all.sig) |
|
869 |
+ |
|
870 |
+ if(expand.complexes){ |
|
871 |
+ comsp <- bpSplitComplex(biopax,V(graph)$name, inc.sm.molecules) |
|
872 |
+ graph <- setAttribute(graph,"complex", comsp) |
|
873 |
+ graph <- igraph::simplify( expandComplexes(graph, "complex", NULL, "normal", "keep") ) |
|
874 |
+ } |
|
875 |
+ |
|
876 |
+ if(ecount(graph)==0) |
|
877 |
+ stop("No Singling interaction were found.") |
|
878 |
+ if(verbose) message(": ", ecount(graph), " interaction found.") |
|
879 |
+ |
|
880 |
+ ################################################################## |
|
881 |
+ ############ Getting attributes for graph vertices ############### |
|
882 |
+ # Getting attribute lists (name, compartment, pathway, MIRIAM) |
|
883 |
+ XRefs <- bpGetReferences(biopax, V(graph)$name) |
|
884 |
+ names(XRefs) <- V(graph)$name |
|
885 |
+ |
|
886 |
+ # MIRIAM attributes |
|
887 |
+ attr <- bpGetAnnFromXRef(df, XRefs[V(graph)$name]) |
|
888 |
+ |
|
889 |
+ # Name attributes |
|
890 |
+ v.names <- listInstances(biopax, id=V(graph)$name) |
|
891 |
+ v.names <- v.names[match(V(graph)$name, v.names$id), "name"] |
|
892 |
+ |
|
893 |
+ ## Get Pathway name and annotations |
|
894 |
+ pw <- listInstances(biopax, class="pathway") |
|
895 |
+ pwXRef <- bpGetReferences(biopax, pw$id) |
|
896 |
+ |
|
897 |
+ ## Get pathway components (only reactions are returned) |
|
898 |
+ pwcomp <- lapply(pw$id, function(x)listPathwayComponents(biopax,x, returnIDonly=FALSE)) |
|
899 |
+ pwcomp <- do.call("rbind", lapply(1:length(pwcomp), |
|
900 |
+ function(x)data.frame(id=x, comp=pwcomp[[x]]))) |
|
901 |
+ |
|
902 |
+ ## Restrict Pathway components to Conversion reactions (Removing PathwayStep, Control). |
|
903 |
+ pwcomp <- pwcomp[pwcomp$comp.class %in% c("Conversion", getSubClasses("Conversion", biopaxlevel=3)) ,] |
|
904 |
+ |
|
905 |
+ pwcomp <- split(pwcomp$id, pwcomp$comp.id, drop=TRUE) |
|
906 |
+ |
|
907 |
+ ## Since Conversions (reactions) are no longer present in the network |
|
908 |
+ # (either their controllers or substrate/products are retained), vertices inherit |
|
909 |
+ # Pathway annotations from reactions they participate in. |
|
910 |
+ leftright <- rbind(lefts, rights, cbind(property_attr_value=controller,id=controlled)) # Vertices and reactions they participate in. |
|
911 |
+ leftright <- leftright[ leftright[,1] %in% V(graph)$name,] |
|
912 |
+ leftright$id = match(leftright$id, names(pwcomp)) ## Use numerical ids. |
|
913 |
+ |
|
914 |
+ pwcomp <-c(pwcomp, |
|
915 |
+ lapply(split(pwcomp[leftright$id], leftright$property_attr_value), |
|
916 |
+ function(x) unique(unlist(x)) ) |
|
917 |
+ ) # Do the magic :) |
|
918 |
+ |
|
919 |
+ pwcomp <- pwcomp[V(graph)$name] # Reorder to match V(graph)$name |
|
920 |
+ pw.ann <- bpGetAnnFromXRef(df,lapply(pwcomp, function(x) unlist(pwXRef[x], use.names=FALSE))) |
|
921 |
+ ########################################### |
|
922 |
+ |
|
923 |
+ ## Compartment attributes ####################### |
|
924 |
+ ### Get Compartment name and annotations |
|
925 |
+ comp <- listInstances(biopax, class="cellularLocationvocabulary", returnIDonly=TRUE) |
|
926 |
+ comp.terms <- as.character(bpGetAttrbyID(df, comp, "term")$property_value) |
|
927 |
+ comp.ann <- bpGetAnnFromXRef(df, bpGetReferences(biopax, comp) ) |
|
928 |
+ |
|
929 |
+ ## Get Vertices' compartments |
|
930 |
+ loc <- bpGetAttrbyID(df, V(graph)$name, "cellularlocation", "property_attr_value") |
|
931 |
+ loc$property_attr_value <- match(striph(loc$property_attr_value), comp) |
|
932 |
+ loc <- split(loc$property_attr_value, loc$id, drop=TRUE)[ V(graph)$name] |
|
933 |
+ ############################################### |
|
934 |
+ ############################################### |
|
935 |
+ # Putting it together |
|
936 |
+ |
|
937 |
+ ## A small hack to distinguish compartment annotations from vertex annotation. |
|
938 |
+ names(comp.ann) <- rep("compartment", length(comp.ann)) |
|
939 |
+ |
|
940 |
+ V(graph)$attr <- |
|
941 |
+ mapply(function(...){ |
|
942 |
+ args=list(...) |
|
943 |
+ c(name=args[[1]], |
|
944 |
+ compartment=list(comp.terms[ args[[2]] ]), |
|
945 |
+ unlist(comp.ann[ args[[2]] ], recursive=FALSE), |
|
946 |
+ pathway=list(pw$name[ args[[3]] ]), |
|
947 |
+ unlist(args[4],recursive=FALSE), |
|
948 |
+ args[[5]] |
|
949 |
+ ) |
|
950 |
+ }, v.names, loc, pwcomp, pathway= pw.ann ,attr, |
|
951 |
+ SIMPLIFY=FALSE) |
|
952 |
+ |
|
953 |
+ V(graph)$shape<- "circle" |
|
954 |
+ V(graph)$color <- "blue" |
|
955 |
+ |
|
956 |
+ graph$source = "BioPAX_L3" |
|
957 |
+ graph$type = "S.graph" |
|
958 |
+ |
|
959 |
+ return(graph) |
|
960 |
+} |
|
961 |
+############Helper functions to process biopax objects################ |
|
962 |
+bpGetReferences <- function (biopax, id, |
|
963 |
+ followProperties = c("entityreference","component", "memberPhysicalEntity"), |
|
964 |
+ getProperties="xref") |
|
965 |
+{ |
|
966 |
+ id.u = unique(striph(id)) |
|
967 |
+ |
|
968 |
+ isrdfresource = biopax$df$property_attr == "rdf:resource" |
|
969 |
+ propertysel = tolower(biopax$df$property) %in% tolower(followProperties) |
|
970 |
+ newIDs = biopax$df[biopax$df$id %in% id.u & isrdfresource & |
|
971 |
+ propertysel, c("id","property_attr_value")] |
|
972 |
+ |
|
973 |
+ propertyget = tolower(biopax$df$property) %in% tolower(getProperties) |
|
974 |
+ attr = biopax$df[biopax$df$id %in% id.u & isrdfresource & |
|
975 |
+ propertyget, c("id","property_attr_value")] |
|
976 |
+ |
|
977 |
+ attr.ls = split(as.character(attr[,2]), attr$id, drop=TRUE) |
|
978 |
+ if(nrow(newIDs)>0){ |
|
979 |
+ recur.ls = bpGetReferences(biopax, newIDs$property_attr_value, followProperties, getProperties) |
|
980 |
+ recur.sp = split(recur.ls,newIDs$id, drop=TRUE) |
|
981 |
+ keys <- unique(c(names(attr.ls), names(recur.sp))) |
|
982 |
+ attr.ls = setNames(mapply(function(...)unique(unlist(c(...))), |
|
983 |
+ attr.ls[keys], recur.sp[keys], SIMPLIFY=FALSE), |
|
984 |
+ keys) |
|
985 |
+ } |
|
986 |
+ |
|
987 |
+ pos = match(striph(id), names(attr.ls)) |
|
988 |
+ return(attr.ls[pos]) |
|
989 |
+} |
|
990 |
+ |
|
991 |
+bpSplitComplex<-function (biopax, complexid, inc.sm.molecules) |
|
992 |
+{ |
|
993 |
+ compname = c("COMPONENTS", "PHYSICAL-ENTITY") |
|
994 |
+ if (biopax$biopaxlevel == 3) { |
|
995 |
+ compname = c("memberPhysicalEntity","component") |
|
996 |
+ } |
|
997 |
+ if(inc.sm.molecules) |
|
998 |
+ classes <- c("dna", "rna", "protein", "smallmolecule") |
|
999 |
+ else |
|
1000 |
+ classes <- c("dna", "rna", "protein") |
|
1001 |
+ |
|
1002 |
+ ref = bpGetReferences(biopax, complexid, followProperties=compname, getProperties=compname) |
|
1003 |
+ |
|
1004 |
+ if (is.null(ref)) |
|
1005 |
+ return(NULL) |
|
1006 |
+ |
|
1007 |
+ ref = do.call("rbind", lapply( na.omit(names(ref)), function(x) cbind( x, striph(ref[[x]]) ) ) ) |
|
1008 |
+ referenced = listInstances(biopax, id = unique(ref[,2])) |
|
1009 |
+ sel = referenced[ tolower(referenced$class) %in% classes, "id"] |
|
1010 |
+ if (length(sel)==0) |
|
1011 |
+ return(NULL) |
|
1012 |
+ |
|
1013 |
+ ref = ref[ ref[,2] %in% sel, ] |
|
1014 |
+ return( split(ref[,2], ref[,1])[ complexid ] ) |
|
1015 |
+} |
|
1016 |
+bpGetAttrbyID <- function(df, id, attr, value="property_value"){ |
|
1017 |
+ id = striph(id) |
|
1018 |
+ df[df$id %in% id & df$property %in% attr, c("id","property",value) ] |
|
1019 |
+} |
|
1020 |
+bpGetAnnFromXRef <- function(df, id.ls, attr){ |
|
1021 |
+ ann <- do.call("rbind",lapply(na.omit(names(id.ls)), |
|
1022 |
+ function(x) cbind(x, if(is.null(id.ls[[x]])) NA else id.ls[[x]]) |
|
1023 |
+ )) |
|
1024 |
+ |
|
1025 |
+ |
|
1026 |
+ id = unique(ann[,2]) |
|
1027 |
+ if(length(na.omit(id))==0) |
|
1028 |
+ return(list()[match(names(id.ls), NULL)]) |
|
1029 |
+ |
|
1030 |
+ db <- bpGetAttrbyID(df, id, "db", "property_value") |
|
1031 |
+ dbid <- bpGetAttrbyID(df, id, "id", "property_value") |
|
1032 |
+ dbid <- cbind(db, value=dbid[,3]) |
|
1033 |
+ |
|
1034 |
+ dbid$property_value <- paste("miriam.", dbid$property_value, sep="") |
|
1035 |
+ |
|
1036 |
+ pos <- match(striph(ann[,2]), dbid$id) |
|
1037 |
+ # Split the dataframe into annotations belonging to each input id (ann[,1]) |
|
1038 |
+ # Then split each of the daughter dataframe by the attribute name. |
|
1039 |
+ ann.ls <- lapply(split(dbid[pos,c(3,4)], ann[,1], drop=TRUE), |
|
1040 |
+ function(x) split(as.character(x[,2]), x[,1], drop=TRUE) ) |
|
1041 |
+ |
|
1042 |
+ |
|
1043 |
+ return(ann.ls[match(names(id.ls), names(ann.ls))]) |
|
1044 |
+} |
|
1045 |
+bpGetAnnotation <- function(df, id, attr){ |
|
1046 |
+ ann <- bpGetAttrbyID(df, id, "xref", "property_attr_value") |
|
1047 |
+ ref <- bpGetAttrbyID(df, id, "entityreference", "property_attr_value") |
|
1048 |
+ ref.ann <- bpGetAttrbyID(df, ref$property_attr_value, "xref", "property_attr_value") |
|
1049 |
+ |
|
1050 |
+ pos <- match(striph(ref$property_attr_value), ref.ann$id) |
|
1051 |
+ ann <- rbind(ann[,c(1,3)],data.frame(id=ref$id, property_attr_value=ref.ann$property_attr_value[pos], |
|
1052 |
+ stringsAsFactors = FALSE)) |
|
1053 |
+ |
|
1054 |
+ db <- bpGetAttrbyID(df, ann$property_attr_value, "db", "property_value") |
|
1055 |
+ dbid <- bpGetAttrbyID(df, ann$property_attr_value, "id", "property_value") |
|
1056 |
+ |
|
1057 |
+ ann.ls <- as.list(as.character(dbid$property_value)) |
|
1058 |
+ names(ann.ls) <- paste("miriam.", db$property_value, sep="") |
|
1059 |
+ pos <- match(striph(ann$property_attr_value), dbid$id) |
|
1060 |
+ ann.ls <- split(ann.ls[pos], ann$id, drop=TRUE) |
|
1061 |
+ |
|
1062 |
+ |
|
1063 |
+ return(ann.ls[match(striph(id), names(ann.ls))]) |
|
1064 |
+} |
|
1065 |
+ |
|
1066 |
+bpExpand.grid <- function(x1,x2){ |
|
1067 |
+ if(is.null(x1)) |
|
1068 |
+ x1=NA |
|
1069 |
+ if(is.null(x2)) |
|
1070 |
+ x2=NA |
|
1071 |
+ return(expand.grid(x1,x2)) |
|
1072 |
+} |
|
1073 |
+ |
|
1074 |
+striph <- function(s){ |
|
1075 |
+ return(sub("^#", "", s)) |
|
1076 |
+} |
|
1077 |
+ |
0 | 1078 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,399 @@ |
1 |
+############################################################################### |
|
2 |
+# |
|
3 |
+# netProcess.R: This file contains all functions for network processing. |
|
4 |
+# Conversion between different network representations, network editing are included |
|
5 |
+# here. |
|
6 |
+# |
|
7 |
+# author: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp> |
|
8 |
+# |
|
9 |
+# This is released under GPL-2. |
|
10 |
+# |
|
11 |
+# Documentation was created using roxygen |
|
12 |
+# |
|
13 |
+############################################################################### |
|
14 |
+ |
|
15 |
+# Neccessary non-sense to pass R CMD check |
|
16 |
+utils::globalVariables(c("nei", "to", "from", "delete")) |
|
17 |
+ |
|
18 |
+#' Remove uniquitous compounds from a metabolic network |
|
19 |
+#' |
|
20 |
+#' This function removes uniquitous compounds (metabolites connected to numerous reactions) |
|
21 |
+#' from a metabolic network.These compounds are reaction cofactors and currency compounds, |
|
22 |
+#' such as ATP, CO2, etc. A path through these metabolites may not be bioloigcally meaningful. |
|
23 |
+#' The defualt small compound list is derived from Reactome, containing keeg.compound, pubchem.compound, |
|
24 |
+#' ChEBI and CAS identifiers. |
|
25 |
+#' |
|
26 |
+#' |
|
27 |
+#' @param graph A metabolic network. |
|
28 |
+#' @param method How to handle small compounds. Either simply delete these vertices \code{"remove"} (default), |
|
29 |
+#' or make a separate vertex for each reaction they participate in \code{"duplicate"}. |
|
30 |
+#' @param small.comp.ls A list of small compounds to be used. |
|
31 |
+#' |
|
32 |
+#' @return A modified graph, with the small compounds removed or duplicated. |
|
33 |
+#' |
|
34 |
+#' @author Ahmed Mohamed |
|
35 |
+#' @family Network processing methods |
|
36 |
+#' @export |
|
37 |
+#' @examples |
|
38 |
+#' data(ex_sbml) |
|
39 |
+#' \dontshow{ex_sbml} |
|
40 |
+#' |
|
41 |
+#' sbml.removed <- rmSmallCompounds(ex_sbml, method="remove") |
|
42 |
+#' \dontshow{sbml.removed} |
|
43 |
+#' |
|
44 |
+rmSmallCompounds <- function(graph, method=c("remove","duplicate"), small.comp.ls=NPMdefaults("small.comp.ls")){ |
|
45 |
+ if(is.null(V(graph)$reactions)) |
|
46 |
+ stop("graph is not a metaboite-reaction netowrk.") |
|
47 |
+ |
|
48 |
+ if(is.null(V(graph)$attr)) |
|
49 |
+ stop("graph is not annotated.") |
|
50 |
+ term.list <- c("miriam.pubchem.compound", "miriam.kegg.compound", "miriam.chebi", "miriam.cas") |
|
51 |
+ |
|
52 |
+ std.names <- stdAttrNames(graph, "matches") |
|
53 |
+ terms <- std.names$standard %in% c("miriam.pubchem.compound", "miriam.kegg.compound", "miriam.chebi") |
|
54 |
+ if(sum(terms)==0) |
|
55 |
+ stop("No stuitable compound annotation found.\n |
|
56 |
+ Current suuported annotations are ", toString(term.list)) |
|
57 |
+ |
|
58 |
+ attr.ls <- lapply(rownames(std.names)[terms], |
|
59 |
+ function(x) getAttribute(graph, x)[!V(graph)$reactions] |
|
60 |
+ ) |
|
61 |
+ |
|
62 |
+ |
|
63 |
+ res <- sapply(1:length(attr.ls), function(i){ |
|
64 |
+ sapply(attr.ls[[i]], |
|
65 |
+ function(x) sum(x%in%small.comp.ls[,std.names[terms,1][[i]]])>0 |
|
66 |
+ ) |
|
67 |
+ }) |
|
68 |
+ |
|
69 |
+ vids <- which(apply(res,1,any)) # Rows with at least one match. |
|
70 |
+ vids <- as.integer(V(graph)[!V(graph)$reactions])[vids] #vertex ids on the graph |
|
71 |
+ |
|
72 |
+ if(method=="duplicate"){ |
|
73 |
+ v.names <- V(graph)[vids]$name |
|
74 |
+ el <- get.edgelist(graph) |
|
75 |
+ from <- el[,1] %in% v.names |
|
76 |
+ el[from,1] <- paste(el[from,1], el[from,2], sep="##") |
|
77 |
+ to <- el[,2] %in% v.names |
|
78 |
+ el[to,2] <- paste(el[to,2], el[to,1], sep="##") |
|
79 |
+ |
|
80 |
+ graph <- graph + vertices(c(el[from,1],el[to,2])) +igraph::edges(el[from|to,]) |
|
81 |
+ } |
|
82 |
+ return(graph - vertices(vids)) |
|
83 |
+} |
|
84 |
+ |
|
85 |
+#' Convert metabolic network to reaction network. |
|
86 |
+#' |
|
87 |
+#' This function removes metabolite nodes keeping them as edge attributes. The resulting |
|
88 |
+#' network contains reaction nodes only, where edges indicate that a metabolite produced |
|
89 |
+#' by one reaction is consumed by the other. |
|
90 |
+#' |
|
91 |
+#' |
|
92 |
+#' @param graph A metabolic network. |
|
93 |
+#' @param simplify An option to remove translocation and spontaneous reactions that require |
|
94 |
+#' no catalyzing genes. Translocation reactions are detected from reaction name (SBML, BioPAX), or |
|
95 |
+#' by having identical substrates and products. |
|