#' Get detailed KEGG mapping information for each relation [edge] documented #' in KEGG #' @description Extract relationship information from KGML object and re-map #' based on normalized node information #' #' @param KGML_file An object of formal class KEGGPathway #' @param KEGG_mappings The data.frame object generated by the function #' expand_KEGG_mappings #' #' @return A dataframe object with unique entry information for all edges #' documented in the KEGG pathway. #' Note that each row has a unique combination of values for #' (entry1, entry2, entry1symbol, entry2symbol). #' @export #' @importMethodsFrom KEGGgraph edges #' @examples #' p53_KGML <- get_KGML("hsa04115") #' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML, FALSE) #' p53_edges <- expand_KEGG_edges(p53_KGML, p53_KEGG_mappings) expand_KEGG_edges <- function(KGML_file,KEGG_mappings){ num_edges <- length(KGML_file@edges) if (num_edges == 0){ print("No Documented Edges in Pathway") expanded_edges <- data.frame("edgeID"=1, "entry1"= "1", "entry2" = "1", "entry1accession"="dummy", " entry2accession"="dummy", "type"= "dummy", "subtype1"="dummy", "value"="dummy", "subtype2" = "dummy", "value2" = "dummy", "specific_subtype"="dummy", "is_direct"= 0, "entry1type"="dummy", "entry2type"= "dummy", "entry1symbol"= "1", "entry2symbol"="1", stringsAsFactors = FALSE) return(expanded_edges) } map_edge_data<- KGML_file@edges get_edges<- data.frame(edgeID = seq(1:length(map_edge_data ))) for (i in 1:nrow(get_edges)){ get_edges$entry1[i] <- as.numeric(map_edge_data[[i]]@entry1ID) get_edges$entry2[i] <- as.numeric(map_edge_data[[i]]@entry2ID) get_edges$type[i] <- map_edge_data[[i]]@type if (length(map_edge_data[[i]]@subtype) == 1){ get_edges$subtype1[i] <- map_edge_data[[i]]@subtype[[1]]@name if (get_edges$subtype1[i] == "binding/association"){ get_edges$subtype1[i] <- "binding_association" } get_edges$value[i] <- map_edge_data[[i]]@subtype[[1]]@value get_edges$subtype2[i] <- NA get_edges$value2[i] <- NA get_edges$specific_subtype[i] <- get_edges$subtype1[i] get_edges$is_direct[i] <- 1 if (get_edges$subtype1[i] == "indirect effect"){ get_edges$is_direct[i] <- 0 get_edges$subtype1[i] <- "indirect_effect" } } else if (length(map_edge_data[[i]]@subtype) == 2){ get_edges$subtype1[i] <- map_edge_data[[i]]@subtype[[1]]@name if (get_edges$subtype1[i] == "binding/association"){ get_edges$subtype1[i] <- "binding_association" } get_edges$value[i] <- map_edge_data[[i]]@subtype[[1]]@value get_edges$subtype2[i] <- map_edge_data[[i]]@subtype[[2]]@name get_edges$specific_subtype[i] <- paste0(get_edges$subtype1[i], "_", get_edges$subtype2[i]) get_edges$value2[i] <- map_edge_data[[i]]@subtype[[2]]@value get_edges$is_direct[i] <- 1 if (get_edges$subtype2[i] == "indirect effect"){ get_edges$subtype2[i] <- "indirect" get_edges$is_direct[i] <- 0 } } else { get_edges$subtype1[i] <- "Not defined in KEGG" get_edges$value[i] <- "Not defined in KEGG" get_edges$subtype2[i] <- NA get_edges$value2[i] <- NA get_edges$specific_subtype[i] <- NA get_edges$is_direct[i] <- 1 } get_edges$entry1type[i] <- KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == get_edges$entry1[i])][1] get_edges$entry2type[i] <- KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == get_edges$entry2[i])][1] } get_edges <- get_edges[!is.na(get_edges$entry1type) & !is.na(get_edges$entry2type),] if (nrow(get_edges) ==0){ print("No Documented Edges in Pathway for selected cell type; all edges are between non-expressed genes") expanded_edges <- data.frame("edgeID"=1, "entry1"= "1", "entry2" = "1", "entry1accession"="dummy", "entry2accession"="dummy","type"= "dummy", "subtype1"="dummy", "value"="dummy", "subtype2" = "dummy", "value2" = "dummy", "specific_subtype"="dummy", "is_direct"= 0, "entry1type"="dummy", "entry2type"= "dummy","entry1symbol"= "1", "entry2symbol"="1", stringsAsFactors = FALSE) return(expanded_edges) } ##At some point include option not to ungroup edges edges_no_groups <- subset(get_edges, get_edges$entry1type != "group" & get_edges$entry2type != "group", select = -c(1)) edges_with_groups <- subset(get_edges, get_edges$entry1type == "group" | get_edges$entry2type == "group") edges_one_group <- subset(get_edges, get_edges$entry1type == "group" & !get_edges$entry2type == "group" | get_edges$entry2type == "group" & !get_edges$entry1type == "group") edges_two_groups <- subset(get_edges, get_edges$entry1type == "group" & get_edges$entry2type == "group") if (nrow(edges_with_groups) > 0){ if (nrow(edges_one_group) > 0){ edges_one_group$entry1all <- NA edges_one_group$entry2all <- NA for (i in 1:nrow(edges_one_group)){ if (edges_one_group$entry1type[i] == "group"){ edges_one_group$entry1all[i] <- KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == edges_one_group$entry1[i])] l <- length(unlist(edges_one_group$entry1all[i])) edges_one_group$entry2all[i] <- list(as.character(rep(edges_one_group$entry2[i], l))) } else if (edges_one_group$entry2type[i] == "group"){ edges_one_group$entry2all[i] <- KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == edges_one_group$entry2[i])] l = length(unlist(edges_one_group$entry2all[i])) edges_one_group$entry1all[i] <- list(as.character(rep(edges_one_group$entry1[i], l))) } l = length(unlist(edges_one_group$entry1all[i])) edges_one_group$edgeID_list[i] <- list(rep(edges_one_group$edgeID[i],l)) } ewg_temp <- data.frame("edgeID" = unlist(edges_one_group$edgeID_list), "entry1" = unlist(edges_one_group$entry1all), "entry2" = unlist(edges_one_group$entry2all), stringsAsFactors = FALSE) edges_one_group <- edges_one_group[,-c(2:3, 13:15)] edges_one_group <- merge(ewg_temp,edges_one_group, by = "edgeID") edges_one_group <- edges_one_group[,-c(1)] } if (nrow(edges_two_groups) > 0){ edges_two_groups$entry1all <- NA edges_two_groups$entry2all <- NA for (i in 1:nrow(edges_two_groups)){ edges_two_groups$entry1all[i] <- KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == edges_two_groups$entry1[i])] edges_two_groups$entry2all[i] <- KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == edges_two_groups$entry2[i])] x <- length(unlist(edges_two_groups$entry1all[i])) y <- length(unlist(edges_two_groups$entry2all[i])) if (x == 1 & y >1){ edges_two_groups$entry1all[i] <- list(rep(edges_two_groups$entry1all[i], y)) } else if (x > 1 & y == 1){ edges_two_groups$entry2all[i] <- list(rep(edges_two_groups$entry2all[i], x)) } else if (x > 1 & y > 1){ edges_two_groups$entry1all[i] <- list(rep(edges_two_groups$entry1all[i],y)) edges_two_groups$entry1all[i] <- list(unlist(edges_two_groups$entry1all[i])[ sort.list(unlist(edges_two_groups$entry1all[i]))]) edges_two_groups$entry2all[i] <- list(rep(edges_two_groups$entry2all[i],x)) } l <- length(unlist(edges_two_groups$entry1all[i])) edges_two_groups$edgeID_list[i] <- list(rep(edges_two_groups$edgeID[i],l)) } ewg_temp <- data.frame("edgeID" = unlist(edges_two_groups$edgeID_list), "entry1" = unlist(edges_two_groups$entry1all), "entry2" = unlist(edges_two_groups$entry2all), stringsAsFactors = FALSE) edges_two_groups <- edges_two_groups[,-c(2:3, 13:15)] edges_two_groups <- merge(ewg_temp,edges_two_groups, by = "edgeID") edges_two_groups <- edges_two_groups[,-c(1)] } if (nrow(edges_one_group) > 0 & nrow(edges_two_groups) > 0){ all_edges <- rbind(edges_no_groups, edges_one_group, edges_two_groups) } else if (nrow(edges_one_group) > 0){ all_edges <- rbind(edges_no_groups, edges_one_group) } else if (nrow(edges_two_groups) > 0){ all_edges <- rbind(edges_no_groups, edges_two_groups) } } else { all_edges <- edges_no_groups } for (i in 1:nrow(all_edges)){ all_edges$entry1all_accession[i] <- list(KEGG_mappings$entryACCESSION[which(KEGG_mappings$entryID == all_edges$entry1[i])]) all_edges$entry2all_accession[i] <- list(KEGG_mappings$entryACCESSION[which(KEGG_mappings$entryID == all_edges$entry2[i])]) x <- length(unlist(all_edges$entry1all_accession[i])) y <- length(unlist(all_edges$entry2all_accession[i])) if (x == 1 & y >1){ all_edges$entry1all_accession[i] <- list(rep(all_edges$entry1all_accession[i], y)) } else if (x > 1 & y == 1) { all_edges$entry2all_accession[i] <- list(rep(all_edges$entry2all_accession[i], x)) } else if (x > 1 & y > 1) { all_edges$entry1all_accession[i] <- list(rep(all_edges$entry1all_accession[i],y)) all_edges$entry1all_accession[i] <- list(unlist(all_edges$entry1all_accession[i])[sort.list(unlist( all_edges$entry1all_accession[i]))]) all_edges$entry2all_accession[i] <- list(rep(all_edges$entry2all_accession[i],x)) } l <- length(unlist(all_edges$entry1all_accession[i])) all_edges$edgeID[i] = list(rep(i,l)) } for (i in 1:nrow(all_edges)){ all_edges$l1[i] <- length(all_edges$entry1all_accession[[i]]) all_edges$l2[i] <- length(all_edges$entry2all_accession[[i]]) } all_edges <- all_edges[all_edges$l1 >0 & all_edges$l2 > 0, -c(15,16)] expanded_edges <- data.frame("edgeID" = unlist(all_edges$edgeID), "entry1accession" = unlist(all_edges$entry1all_accession), "entry2accession" = unlist(all_edges$entry2all_accession), stringsAsFactors = FALSE) all_edges <- all_edges[,-c(12:14)] all_edges$edgeID <- seq(1:nrow(all_edges)) expanded_edges <- merge(expanded_edges, all_edges, by = "edgeID") for (i in 1:nrow(expanded_edges)){ expanded_edges$entry1type[i] <- KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == expanded_edges$entry1[i])][1] expanded_edges$entry2type[i] <- KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == expanded_edges$entry2[i])][1] if (expanded_edges$entry1type[i] == "gene"| expanded_edges$entry1type[i] == "compound") { expanded_edges$entry1symbol[i] <- KEGG_mappings$entrySYMBOL[which(KEGG_mappings$entryACCESSION == expanded_edges$entry1accession[i])][1] } else { expanded_edges$entry1symbol[i] <- NA } if (expanded_edges$entry2type[i] == "gene" | expanded_edges$entry2type[i] == "compound") { expanded_edges$entry2symbol[i] <- KEGG_mappings$entrySYMBOL[which(KEGG_mappings$entryACCESSION == expanded_edges$entry2accession[i])][1] } else { expanded_edges$entry2symbol[i] <- NA } } expanded_edges$entry1symbol <- unlist(expanded_edges$entry1symbol) expanded_edges$entry2symbol <- unlist(expanded_edges$entry2symbol) expanded_edges$is_direct <- as.numeric(expanded_edges$is_direct) return(expanded_edges) }