############################################################################### # # netProcess.R: This file contains all functions for network processing. # Conversion between different network representations, network editing are included # here. # # author: Ahmed Mohamed <mohamed@kuicr.kyoto-u.ac.jp> # # This is released under GPL-2. # # Documentation was created using roxygen # ############################################################################### # Neccessary non-sense to pass R CMD check utils::globalVariables(c("nei", "to", "from", "delete")) #' Remove uniquitous compounds from a metabolic network #' #' This function removes uniquitous compounds (metabolites connected to numerous reactions) #' from a metabolic network.These compounds are reaction cofactors and currency compounds, #' such as ATP, CO2, etc. A path through these metabolites may not be bioloigcally meaningful. #' The defualt small compound list is derived from Reactome, containing keeg.compound, pubchem.compound, #' ChEBI and CAS identifiers. #' #' #' @param graph A metabolic network. #' @param method How to handle small compounds. Either simply delete these vertices \code{"remove"} (default), #' or make a separate vertex for each reaction they participate in \code{"duplicate"}. #' @param small.comp.ls A list of small compounds to be used. #' #' @return A modified graph, with the small compounds removed or duplicated. #' #' @author Ahmed Mohamed #' @family Network processing methods #' @export #' @examples #' data(ex_sbml) #' \dontshow{ex_sbml} #' #' sbml.removed <- rmSmallCompounds(ex_sbml, method="remove") #' \dontshow{sbml.removed} #' rmSmallCompounds <- function(graph, method=c("remove","duplicate"), small.comp.ls=NPMdefaults("small.comp.ls")){ if(graph$type != "MR.graph") stop("graph is not a metaboite-reaction netowrk.") if(is.null(V(graph)$attr)) stop("graph is not annotated.") term.list <- c("miriam.pubchem.compound", "miriam.kegg.compound", "miriam.chebi", "miriam.cas") std.names <- stdAttrNames(graph, "matches") terms <- std.names$standard %in% c("miriam.pubchem.compound", "miriam.kegg.compound", "miriam.chebi") if(sum(terms)==0) stop("No stuitable compound annotation found.\n Current suuported annotations are ", toString(term.list)) attr.ls <- lapply(rownames(std.names)[terms], function(x) getAttribute(graph, x)[!V(graph)$reactions] ) std_names_map = std.names[terms,1] res <- sapply(1:length(attr.ls), function(i){ sapply(attr.ls[[i]], function(x) sum( x %in% small.comp.ls[, std_names_map[[i]]] )>0 ) }) vids <- which(apply(res,1,any)) # Rows with at least one match. vids <- as.integer(V(graph)[!V(graph)$reactions])[vids] #vertex ids on the graph if(method=="duplicate"){ v.names <- V(graph)[vids]$name el <- get.edgelist(graph) from <- el[,1] %in% v.names el[from,1] <- paste(el[from,1], el[from,2], sep="##") to <- el[,2] %in% v.names el[to,2] <- paste(el[to,2], el[to,1], sep="##") graph <- graph + vertices(c(el[from,1],el[to,2])) +igraph::edges(el[from|to,]) } return(graph - vertices(vids)) } #' Convert metabolic network to metabolite network. #' #' This function removes reaction nodes keeping them as edge attributes. The resulting #' network contains metabolite nodes only, where edges indicate that reaction conversions. #' #' #' @param graph A metabolic network. #' #' @return A reaction network. #' #' @author Ahmed Mohamed #' @family Network processing methods #' @export #' @examples #' ## Conver a metabolic network to a metbolite network. #' data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism. #' mgraph <- makeMetaboliteNetwork(ex_sbml) #' makeMetaboliteNetwork <- function(graph) { V(graph)$reactions <- !V(graph)$reactions new.graph <- makeReactionNetwork(graph, FALSE) new.graph$type <- "M.graph" return(new.graph) } #' Convert metabolic network to reaction network. #' #' This function removes metabolite nodes keeping them as edge attributes. The resulting #' network contains reaction nodes only, where edges indicate that a metabolite produced #' by one reaction is consumed by the other. #' #' #' @param graph A metabolic network. #' @param simplify An option to remove translocation and spontaneous reactions that require #' no catalyzing genes. Translocation reactions are detected from reaction name (SBML, BioPAX), or #' by having identical substrates and products. #' #' @return A reaction network. #' #' @author Ahmed Mohamed #' @family Network processing methods #' @export #' @examples #' ## Conver a metabolic network to a reaction network. #' data(ex_sbml) # bipartite metabolic network of Carbohydrate metabolism. #' rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE) #' makeReactionNetwork <- function(graph, simplify=FALSE){ reactions <- V(graph)$reactions if(is.null(reactions) || class(reactions)!="logical" || sum(reactions)==0) stop("The graph contains no reactions.") edges <- get.edgelist(graph) input <- edges[edges[,2]%in% V(graph)[reactions]$name,] #Reactant-reaction edges output <- edges[edges[,1]%in% V(graph)[reactions]$name,] #Reaction-product edges match1 <- match(output[,2],input[,1]) #Find reactions that produces reactants in input match2 <- match(input[,1],output[,2]) #Find reactions that consumes products in output #Connect matches r.connections <- rbind( cbind(output[,],input[match1,2]), cbind(output[match2,], input[,2]) ) # remove missing and duplicate values r.connections <- r.connections[complete.cases(r.connections),] r.connections <- unique(r.connections) r.connections <- data.frame(r.connections[,c(1,3,2)], I(V(graph)[r.connections[,2]]$attr)) names(r.connections) <- c("from", "to", "compound", "attr") #### Making a reaction network graph from edges##### reaction.graph = simplify(graph.data.frame(r.connections),edge.attr.comb="c") V(reaction.graph)$attr <- V(graph)[V(reaction.graph)$name]$attr if(simplify) reaction.graph <- simplifyReactionNetwork(reaction.graph, remove.missing.genes=FALSE) reaction.graph$source <- graph$source reaction.graph$type <- "R.graph" return(reaction.graph) } #' Removes reactions with no gene annotations #' #' This function removes reaction vertices with no gene annotations as indicated by the parameter #' \code{gene.attr}, and connect their neighbour vertices to preserve graph connectivity. This is #' particularly meaningful when reactions are translocation or spontaneous reactions, #' which are not catalysed by genes. #' #' @param reaction.graph A reaction network. #' @param gene.attr The attribute to be considered as "genes". Reactions missing this annotation, will be removed. #' @param remove.missing.genes If \code{FALSE}, only tranlocation and spontaneous reactions are removed, otherwise #' all rections with no gene annotations are removed. #' @param reconnect.threshold An argument passed to \code{\link{vertexDeleteReconnect}} #' #' @return A simplified reaction network. #' #' @author Ahmed Mohamed #' @family Network processing methods #' @export #' @examples #' data(ex_sbml) #' rgraph <- makeReactionNetwork(ex_sbml, simplify=FALSE) #' #' ## Removes all reaction nodes with no annotated genes. #' rgraph <- simplifyReactionNetwork(rgraph, remove.missing.genes=TRUE) #' simplifyReactionNetwork <- function(reaction.graph, gene.attr="genes", remove.missing.genes=TRUE, reconnect.threshold=vcount(reaction.graph)) { #List of all missing-gene reactions. no.gene <- lapply( lapply(V(reaction.graph)$attr, "[[", gene.attr), length) == 0 if(sum(no.gene)==0) return(reaction.graph) attr.ls <- if(sum(no.gene)==1) list(V(reaction.graph)[no.gene]$attr) else V(reaction.graph)[no.gene]$attr translocation = sapply( attr.ls, function(x) ( grepl("transloc", x[["name"]], ignore.case=TRUE) || identical(x[["reactants"]],x[["products"]]) ) ) spontaneous = sapply( attr.ls, function(x) grepl("spontaneous", x[["name"]], ignore.case=TRUE) ) new.reaction.graph = reaction.graph if(sum(translocation | spontaneous) > 0){ new.reaction.graph = vertexDeleteReconnect(reaction.graph, V(reaction.graph)[no.gene][translocation | spontaneous], copy.attr=list(compound=function(...)paste(..., sep="->"), attr="c") ) } if(remove.missing.genes & length(which(no.gene)[!translocation & !spontaneous] >0)){ new.reaction.graph = vertexDeleteReconnect(new.reaction.graph, V(new.reaction.graph)[no.gene], reconnect.threshold) } return(new.reaction.graph) } #' Network editing: removing vertices and connecting their neighbours #' #' This function removes vertices given as \code{vids} and connects their neighbours as #' long as the shortest path beween the neighbours are below the \code{reconnect.threshold}. #' #' @param graph A reaction network. #' @param vids Vertex ids to be removed. #' @param reconnect.threshold If the shortest path between vertices is larger than this threshold, #' they are not reconnected. #' @param copy.attr A function, or a list of functions, combine edge attributes. Edge attributes #' of new edges (between reconnected neighbours) are obtained by combining original edges attributes #' along the shortest path between reconnected neighbors. #' #' @return A modified graph. #' #' @author Ahmed Mohamed #' @family Network processing methods #' @export #' @examples #' ## Remove all reaction vertices from a bipartite metabolic network #' ## keeping only metabolite vertices. #' data(ex_sbml) #' graph <- vertexDeleteReconnect(ex_sbml, vids=which(V(ex_sbml)$reactions)) #' vertexDeleteReconnect <- function(graph, vids, reconnect.threshold=vcount(graph), copy.attr=NULL){ if(length(vids)==0){return(graph)} V(graph)$delete <- FALSE V(graph)[vids]$delete <- TRUE #A subgraph only including vertices to be deleted and their neighbours. graph.sub <- subgraph.edges(graph, E(graph)[V(graph)[delete] %--% V(graph)[.nei(V(graph)[delete])|delete]]) #Calculate the shortest path length in this network between "neighbours" (retained) vertices. #In this graph, the shortest path will have to go through deleted vertices only. #Path lengths indicate the number of deleted vertices lying between 2 retained vertices. short.paths <- shortest.paths(graph.sub, V(graph.sub)[!delete],V(graph.sub)[!delete], "out") new.edges <- which(short.paths!=0 & short.paths!=Inf & short.paths< reconnect.threshold+1, arr.ind=TRUE) new.edges <- rbind(rownames(new.edges),colnames(short.paths)[new.edges[,2]]) #edges to be added #Get Actual shortest paths for vertices that passed the threshold, to copy edge attributes. attr <- NULL if(!is.null(copy.attr)){ paths <- mapply(function(from, to) igraph::get.shortest.paths(graph.sub, from, to, output="both",mode="out")$vpath, new.edges[1,], new.edges[2,]) if(!is.list(copy.attr)) copy.attr <- sapply(list.edge.attributes(graph.sub), function(x)copy.attr) attr <- sapply(names(copy.attr), function(y) lapply(paths, function(x) do.call( copy.attr[[y]], as.list(get.edge.attribute(graph.sub, y,E(graph.sub, path=x))) ) ) ,simplify=FALSE) } new.graph <- add.edges(graph, new.edges, attr=attr) new.graph <- delete.vertices(new.graph, V(graph)[delete]$name) new.graph <- remove.vertex.attribute(new.graph, "delete") return(new.graph) } #' Expand reactions / complexes into their gene constituents. #' #' These are general functions to expand vertices by their attributes, i.e. create a separate #' vertex for each attribute value. #' #' These functions can be very useful when merging networks constructed from different databases. #' For example, to match a network created from Reactome to a KEGG network, you can expand metabolite #' vertices by "miriam.kegg.compound" attribute. #' #' @param graph An annotated igraph object. #' @param v.attr Name of the attribute which vertices are expanded to. #' @param keep.parent.attr A (List of) \code{\link{regex}} experssions representing attributes to be #' inherited by daughter vertices. If \code{"all"} is passed, all parent attributes are inherited. #' @param expansion.method If \code{"duplicate"}, attribute values sharing more than one parent vertex #' are duplicated for each vertex they participate in. For exmaple, if one gene G1 catalyzes reactions #' R1, R2; then G1##R1, and G1##R2 vertices are created. If \code{"normal"} only one vertex (G1) is created, #' and inherit all R1 and R2 connections and attributes. #' @param missing.method How to deal with vertices with no attribute values. \code{"keep"} retains the parent #' node, \code{"remove"} simply deletes the vertex, and \code{"reconnect"} removes the vertex and connect its #' neighbours to each other (to prevent graph cuts). #' #' @return A new graph with vertices expanded. #' #' @author Ahmed Mohamed #' @family Network processing methods #' @rdname makeGeneNetwork #' @export #' @examples #' ## Make a gene network from a reaction network. #' data(ex_sbml) # A bipartite metbaolic network. #' rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE) #' ggraph <- makeGeneNetwork(rgraph) #' #' ## Expand vertices into their contituent genes. #' data(ex_kgml_sig) # Ras and chemokine signaling pathways in human #' ggraph <- expandComplexes(ex_kgml_sig, v.attr = "miriam.ncbigene", #' keep.parent.attr= c("^pathway", "^compartment")) #' #' ## Create a separate vertex for each compartment. This is useful in duplicating #' ## metabolite vertices in a network. #' \dontrun{ #' graph <- expandComplexes(graph, v.attr = "compartment", #' keep.parent.attr = "all", #' expansion.method = "duplicate", #' missing.method = "keep") #' } #' expandComplexes <- function(graph, v.attr, keep.parent.attr= "^pathway", expansion.method=c("normal", "duplicate"), missing.method=c("keep", "remove", "reconnect")) { if(missing(v.attr)) stop("v.attr: Vertex attribute to be used in expansion is not specified.") attr.names <- getAttrNames(graph) if(!v.attr %in% attr.names) stop(v.attr,": Attribute not found in graph.") attr.ls = lapply(V(graph)$attr, "[[", v.attr) if(missing(expansion.method)) expansion.method <- "normal" else if(!expansion.method %in% c("normal", "duplicate")) stop("Unknown expansion method provided: ", expansion.method) if(missing(missing.method)) missing.method <- "remove" else if(!missing.method %in% c("keep", "remove", "reconnect")) stop("Unknown missin method provided: ", missing.method) if(missing.method!="remove"){ no.attr <- lapply(attr.ls, length) ==0 attr.ls[no.attr] <- V(graph)[no.attr]$name } attr_func <- function(...){ l = mapply(function(...) unique(c(...)),...,SIMPLIFY=FALSE) l[!is.na(names(l))] } z = .Call("expand_complexes", ATTR_LS=attr.ls, EL=as.integer(t(get.edgelist(graph, names=FALSE))-1), V = V(graph)$name, EXPAND=expansion.method, MISSING=missing.method) gout = graph.empty() + vertices(z$vertices) gout = gout +igraph::edges(z$edges) if(missing.method=="reconnect"){ no.attr.vids <- which(z$parents %in% which(no.attr)) if(length(no.attr.vids)>0) gout <- vertexDeleteReconnect(gout, no.attr.vids) } if(!is.null(keep.parent.attr)){ if(length(keep.parent.attr)==1 && keep.parent.attr=="all"){ attr_terms = lapply(V(graph)$attr, "[", keep.parent.attr) V(gout)$attr <- lapply(z$parents, function(x) do.call("attr_func", attr_terms[x])) }else{ if(length(keep.parent.attr) > 1) keep.parent.attr <- do.call("paste",as.list(c(keep.parent.attr, sep="|"))) keep.parent.attr <- attr.names[grep(keep.parent.attr, attr.names)] attr_terms = lapply(V(graph)$attr, "[", keep.parent.attr) V(gout)$attr <- lapply(z$parents, function(x) do.call("attr_func", attr_terms[x])) } } gout <- setAttribute(gout, v.attr, V(gout)$name) for(i in list.edge.attributes(graph)){ gout <- set.edge.attribute(gout, i, value=get.edge.attribute(graph, i)[z$e.parents] ) } gout$source <- graph$source return(gout); } #' Replaces current vertex ids with chosen attribute. #' #' This function allows users to replace vertex ids with another attribute, #' calculating connectivities based on the new attribute. #' #' This functions can be very useful when merging networks constructed from different databases. #' For example, to match a network created from Reactome to a KEGG network, you can reindex the #' vertices by "miriam.kegg.compound" attribute. Another usage is to remove duplicated vertices (in case of #' different subcellular compartments, for example). if a network has ATP_membrane & ATP_cytoplasm vertices, #' reindexing by chemical name will collapse them into one `ATP` vertex. #' #' @param graph An annotated igraph object. #' @param v.attr Name of the attribute to use as vertex ids. #' #' @return A new graph with vertices expanded. #' #' @author Ahmed Mohamed #' @family Network processing methods #' @export #' @examples #' ## Make a gene network from a reaction network. #' data(ex_sbml) # A bipartite metbaolic network. #' rgraph <- makeReactionNetwork(ex_sbml, simplify=TRUE) #' ggraph <- makeGeneNetwork(rgraph) #' #' ## Expand vertices into their contituent genes. #' data(ex_kgml_sig) # Ras and chemokine signaling pathways in human #' ggraph <- expandComplexes(ex_kgml_sig, v.attr = "miriam.ncbigene", #' keep.parent.attr= c("^pathway", "^compartment")) #' #' ## Create a separate vertex for each compartment. This is useful in duplicating #' ## metabolite vertices in a network. #' \dontrun{ #' graph <- expandComplexes(graph, v.attr = "compartment", #' keep.parent.attr = "all", #' expansion.method = "duplicate", #' missing.method = "keep") #' } #' reindexNetwork <- function(graph, v.attr) { if(missing(v.attr)) stop("v.attr: Vertex attribute to be used in expansion is not specified.") attr.names <- getAttrNames(graph) if(!v.attr %in% attr.names) stop(v.attr,": Attribute not found in graph.") attr.ls = lapply(V(graph)$attr, "[[", v.attr) expansion.method <- "normal" missing.method <- "remove" attr_func <- function(...){ l = mapply(function(...) unique(c(...)),...,SIMPLIFY=FALSE) l[!is.na(names(l))] } z = .Call("expand_complexes", ATTR_LS=attr.ls, EL=as.integer(t(get.edgelist(graph, names=FALSE))-1), V = V(graph)$name, EXPAND=expansion.method, MISSING=missing.method) gout = graph.empty() + vertices(z$vertices) gout = gout +igraph::edges(z$edges) attr_terms = lapply(V(graph)$attr, "[", attr.names) V(gout)$attr <- lapply(z$parents, function(x) do.call("attr_func", attr_terms[x]) ) gout <- setAttribute(gout, v.attr, V(gout)$name) for(i in list.edge.attributes(graph)){ gout <- set.edge.attribute(gout, i, value=get.edge.attribute(graph, i)[z$e.parents] ) } for(i in list.graph.attributes(graph)){ gout <- set.graph.attribute(gout, i, value=get.graph.attribute(graph, i) ) } first_parent = sapply(z$parents, head,1) for(i in list.vertex.attributes(graph)){ if(!i %in% c("name", "attr")){ gout <- set.vertex.attribute(gout, i, value=get.vertex.attribute(graph, i)[first_parent] ) } } return(gout); } #' @return \code{makeGeneNetwork} returns a graph, where nodes are genes, and edges represent #' participation in succesive reactions. #' #' @rdname makeGeneNetwork #' @export makeGeneNetwork <- function(graph, v.attr="genes", keep.parent.attr= "^pathway", expansion.method="duplicate", missing.method="remove"){ gene.graph <- expandComplexes(graph, v.attr, keep.parent.attr, expansion.method, missing.method) V(gene.graph)$color <- "blue" gene.graph$source <- graph$source gene.graph$type = "G.graph" return(gene.graph) }