git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/KEGGlincs@122278 bc3139a8-67e5-0310-9ffc-ced21a209358
0 | 6 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,23 @@ |
1 |
+Package: KEGGlincs |
|
2 |
+Type: Package |
|
3 |
+Title: Visualize all edges within a KEGG pathway and overlay LINCS data |
|
4 |
+ [option] |
|
5 |
+Version: 0.99.4 |
|
6 |
+Date: 2016-06-02 |
|
7 |
+Author: Shana White |
|
8 |
+Maintainer: Shana White <vandersm@mail.uc.edu> |
|
9 |
+Description: See what is going on 'under the hood' of KEGG pathways by |
|
10 |
+ explicitly re-creating the pathway maps from information |
|
11 |
+ obtained from KGML files. |
|
12 |
+License: GPL-3 |
|
13 |
+LazyData: true |
|
14 |
+RoxygenNote: 5.0.1 |
|
15 |
+Depends: R (>= 3.3), KOdata, hgu133a.db, org.Hs.eg.db (>= 3.3.0) |
|
16 |
+SystemRequirements: Cytoscape (>= 3.3.0), Java (>= 8) |
|
17 |
+Suggests: BiocInstaller (>= 1.20.3), knitr, rmarkdown, graph |
|
18 |
+biocViews: NetworkInference, GeneExpression, DataRepresentation, |
|
19 |
+ ThirdPartyClient,CellBiology,GraphAndNetwork,Pathways,KEGG,Network |
|
20 |
+Imports: |
|
21 |
+ AnnotationDbi,KEGGgraph,igraph,plyr,gtools,httr,RJSONIO,KEGGREST, |
|
22 |
+ methods,graphics,stats,utils |
|
23 |
+VignetteBuilder: knitr |
0 | 24 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,18 @@ |
1 |
+Version: 1.0 |
|
2 |
+ |
|
3 |
+RestoreWorkspace: Default |
|
4 |
+SaveWorkspace: Default |
|
5 |
+AlwaysSaveHistory: Default |
|
6 |
+ |
|
7 |
+EnableCodeIndexing: Yes |
|
8 |
+UseSpacesForTab: Yes |
|
9 |
+NumSpacesForTab: 4 |
|
10 |
+Encoding: UTF-8 |
|
11 |
+ |
|
12 |
+RnwWeave: Sweave |
|
13 |
+LaTeX: pdfLaTeX |
|
14 |
+ |
|
15 |
+BuildType: Package |
|
16 |
+PackageUseDevtools: Yes |
|
17 |
+PackageInstallArgs: --no-multiarch --with-keep.source |
|
18 |
+PackageBuildArgs: --resave-data |
0 | 19 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,48 @@ |
1 |
+# Generated by roxygen2: do not edit by hand |
|
2 |
+ |
|
3 |
+export(KEGG_lincs) |
|
4 |
+export(add_edge_data) |
|
5 |
+export(cyto_vis) |
|
6 |
+export(edge_mapping_info) |
|
7 |
+export(expand_KEGG_edges) |
|
8 |
+export(expand_KEGG_mappings) |
|
9 |
+export(generate_mappings) |
|
10 |
+export(get_KGML) |
|
11 |
+export(get_fisher_info) |
|
12 |
+export(get_graph_object) |
|
13 |
+export(node_mapping_info) |
|
14 |
+export(overlap_info) |
|
15 |
+export(path_genes_by_cell_type) |
|
16 |
+export(refine_mappings) |
|
17 |
+export(toCytoscape) |
|
18 |
+import(AnnotationDbi) |
|
19 |
+import(KEGGREST) |
|
20 |
+import(KOdata) |
|
21 |
+import(RJSONIO) |
|
22 |
+import(hgu133a.db) |
|
23 |
+import(httr) |
|
24 |
+import(methods) |
|
25 |
+import(org.Hs.eg.db) |
|
26 |
+importFrom(graphics,barplot) |
|
27 |
+importFrom(graphics,legend) |
|
28 |
+importFrom(graphics,par) |
|
29 |
+importFrom(gtools,smartbind) |
|
30 |
+importFrom(igraph,"V<-") |
|
31 |
+importFrom(igraph,E) |
|
32 |
+importFrom(igraph,V) |
|
33 |
+importFrom(igraph,ecount) |
|
34 |
+importFrom(igraph,edge.attributes) |
|
35 |
+importFrom(igraph,edge_attr) |
|
36 |
+importFrom(igraph,get.edgelist) |
|
37 |
+importFrom(igraph,graph.attributes) |
|
38 |
+importFrom(igraph,list.edge.attributes) |
|
39 |
+importFrom(igraph,list.vertex.attributes) |
|
40 |
+importFrom(igraph,vertex.attributes) |
|
41 |
+importFrom(plyr,rename) |
|
42 |
+importFrom(stats,fisher.test) |
|
43 |
+importFrom(stats,p.adjust) |
|
44 |
+importFrom(stats,p.adjust.methods) |
|
45 |
+importFrom(utils,combn) |
|
46 |
+importFrom(utils,data) |
|
47 |
+importMethodsFrom(KEGGgraph,edges) |
|
48 |
+importMethodsFrom(KEGGgraph,nodes) |
1 | 3 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,149 @@ |
1 |
+#' Combines all other package functions for one-step pathway visualization |
|
2 |
+#' @export |
|
3 |
+#' @param pathwayid A KEGG pathway ID of the form "hsa12345" |
|
4 |
+#' (only human pathways currently) |
|
5 |
+#' @param cell_line If left as NA will generate a pathway map without |
|
6 |
+#' data-dependent attributes (such as edge width). To use in combination |
|
7 |
+#' with LINCS data, choose from the set of cell lines: |
|
8 |
+#' (A375,A549,ASC,HA1E,HCC515,HEK293T,HEKTE,HEPG2,HT29,MCF7,NCIH716,NPC,PC3, |
|
9 |
+#' SHSY5Y,SKL,SW480,VCAP) |
|
10 |
+#' @param refine_by_cell_line A logical indicator |
|
11 |
+#' @param add_L1000_edge_data A logical indicator |
|
12 |
+#' @param data_type Choose from data types: (100_full, 100_bing, 50_lm) |
|
13 |
+#' @param pert_time Choose from (6,24,48,96,120,144,168) |
|
14 |
+#' @param only_mapped A logical indicator; if set to FALSE will return 'de-novo' |
|
15 |
+#' edges that 'exist' in data but are not documented in KEGG |
|
16 |
+#' @param significance_markup A logical indicator; if set to TRUE will color |
|
17 |
+#' edges based on direction and significance of correlation (as determined by |
|
18 |
+#' user-data-analysis) |
|
19 |
+#' @param layered_nodes A logical indicator; if set to TRUE will create a graph |
|
20 |
+#' with 'stacked' nodes that the user can manipulate when multiple nodes are |
|
21 |
+#' mapped to one location |
|
22 |
+#' @param graph_title An optional user-specified graph title |
|
23 |
+#' @param get_data A logical indicator; if set to true, will return the |
|
24 |
+#' 'expanded' edge information for the specified pathway |
|
25 |
+#' @param convert_KEGG_IDs A logical indicator; if set to TRUE KEGG |
|
26 |
+#' compounds will remain labeled via KEGG codes (do not need KEGGREST) |
|
27 |
+#' @return A dynamic map in Cytoscape automatically formatted for convenient |
|
28 |
+#' viewing and, if idicated by user, a data.frame object with detailed |
|
29 |
+#' information for 'expanded' KEGG edges |
|
30 |
+#' @examples \dontrun{ |
|
31 |
+#' |
|
32 |
+#' #Default KEGG pathway with colored edges representing type of relationship: |
|
33 |
+#' KEGG_lincs("hsa04115", convert_KEGG_IDs = FALSE) |
|
34 |
+#' |
|
35 |
+#' #KEGG pathway with edge width and color based on observed experimental data: |
|
36 |
+#' KEGG_lincs("hsa04115", "HA1E") |
|
37 |
+#' |
|
38 |
+#' #Have edge information data.frame to be output to the global environment: |
|
39 |
+#' p53_edge_info <- KEGG_lincs("hsa04115", graph_title = "p53" |
|
40 |
+#' convert_KEGG_IDs = FALSE, get_data = TRUE) |
|
41 |
+#' } |
|
42 |
+ |
|
43 |
+ |
|
44 |
+KEGG_lincs <- |
|
45 |
+function(pathwayid, cell_line = NA, |
|
46 |
+ refine_by_cell_line = NA, |
|
47 |
+ add_L1000_edge_data = TRUE, |
|
48 |
+ significance_markup = TRUE, |
|
49 |
+ data_type = "100_full", |
|
50 |
+ pert_time = 96, |
|
51 |
+ only_mapped = TRUE, |
|
52 |
+ layered_nodes = FALSE, graph_title = "default", |
|
53 |
+ get_data = FALSE, |
|
54 |
+ convert_KEGG_IDs = TRUE){ |
|
55 |
+ if (is.na(refine_by_cell_line)){ |
|
56 |
+ if (is.na(cell_line)){ |
|
57 |
+ refine_by_cell_line <- FALSE |
|
58 |
+ } |
|
59 |
+ else { |
|
60 |
+ refine_by_cell_line <- TRUE |
|
61 |
+ } |
|
62 |
+ } |
|
63 |
+ |
|
64 |
+ KGML <- get_KGML(pathwayid, get_if_no_edges = TRUE) |
|
65 |
+ if(!isS4(KGML)){ |
|
66 |
+ return() |
|
67 |
+ } |
|
68 |
+ KEGG_mappings <- |
|
69 |
+ expand_KEGG_mappings(KGML,convert_KEGG_IDs = convert_KEGG_IDs) |
|
70 |
+ if (refine_by_cell_line) { |
|
71 |
+ full_mappings <- KEGG_mappings |
|
72 |
+ KEGG_mappings <- refine_mappings(KEGG_mappings, cell_line) |
|
73 |
+ for (i in 1:nrow(full_mappings)){ |
|
74 |
+ if(!full_mappings$entryID[i] %in% KEGG_mappings$entryID){ |
|
75 |
+ full_mappings$BGcolor[i] <- "#d3d3d3" |
|
76 |
+ full_mappings$in_relationship <- 0 |
|
77 |
+ } |
|
78 |
+ } |
|
79 |
+ } |
|
80 |
+ expanded_edges <- expand_KEGG_edges(KGML, KEGG_mappings) |
|
81 |
+ if(expanded_edges$type[1] == "dummy"){ |
|
82 |
+ graph_title <- paste0("Pathway = ", pathwayid, ":", |
|
83 |
+ KGML@pathwayInfo@title, "Cell-Line: ", cell_line, |
|
84 |
+ " *No Edges in Pathway") |
|
85 |
+ } |
|
86 |
+ |
|
87 |
+ if (is.na(cell_line)){ |
|
88 |
+ edge_map <- edge_mapping_info(expanded_edges) |
|
89 |
+ if (graph_title == "default"){ |
|
90 |
+ graph_title <- paste0("Pathway = ", pathwayid, ":", |
|
91 |
+ KGML@pathwayInfo@title) |
|
92 |
+ } |
|
93 |
+ } |
|
94 |
+ if (!is.na(cell_line) & !add_L1000_edge_data){ |
|
95 |
+ edge_map <- edge_mapping_info(expanded_edges) |
|
96 |
+ if (graph_title == "default"){ |
|
97 |
+ graph_title <- paste0("Pathway = ", pathwayid, ":", |
|
98 |
+ KGML@pathwayInfo@title, "Refined by Cell-Line: ", |
|
99 |
+ cell_line) |
|
100 |
+ } |
|
101 |
+ } |
|
102 |
+ if (nrow(expanded_edges[expanded_edges$type == "maplink",]) == |
|
103 |
+ nrow(expanded_edges) & only_mapped) { |
|
104 |
+ edge_map <- edge_mapping_info(expanded_edges) |
|
105 |
+ if (graph_title == "default"){ |
|
106 |
+ graph_title <- paste0("Pathway = ", pathwayid, ":", |
|
107 |
+ KGML@pathwayInfo@title, "Cell-Line: ", |
|
108 |
+ cell_line, " *No Edges in Data") |
|
109 |
+ } |
|
110 |
+ warning("All documented edges are of type 'maplink'; |
|
111 |
+ Overlap data cannot be mapped to selected pathway") |
|
112 |
+ } |
|
113 |
+ if(!is.na(cell_line) & add_L1000_edge_data) { |
|
114 |
+ user_data <- overlap_info(KGML, KEGG_mappings, cell_line, |
|
115 |
+ data_type = data_type, pert_time = pert_time) |
|
116 |
+ if (!is.na(user_data)[1,1]) { |
|
117 |
+ edges_plus_data <- add_edge_data(expanded_edges, KEGG_mappings, |
|
118 |
+ user_data, c(10,12), |
|
119 |
+ only_mapped = only_mapped) |
|
120 |
+ edge_map <- edge_mapping_info(edges_plus_data, data_added = TRUE, |
|
121 |
+ significance_markup = significance_markup) |
|
122 |
+ if (graph_title == "default"){ |
|
123 |
+ graph_title <- paste0("Pathway = ", pathwayid, ":", |
|
124 |
+ KGML@pathwayInfo@title, ", Cell-Line: ", |
|
125 |
+ cell_line, ", Data type: ", data_type) |
|
126 |
+ } |
|
127 |
+ } |
|
128 |
+ else { |
|
129 |
+ edge_map <- edge_mapping_info(expanded_edges) |
|
130 |
+ if (graph_title == "default"){ |
|
131 |
+ graph_title <- paste0("Pathway = ", pathwayid, ":", |
|
132 |
+ KGML@pathwayInfo@title, ", Cell-Line: ", |
|
133 |
+ cell_line, " *No Edges in Data") |
|
134 |
+ } |
|
135 |
+ } |
|
136 |
+ } |
|
137 |
+ if(refine_by_cell_line){ |
|
138 |
+ node_map <- node_mapping_info(full_mappings) |
|
139 |
+ } |
|
140 |
+ else{ |
|
141 |
+ node_map <- node_mapping_info(KEGG_mappings) |
|
142 |
+ } |
|
143 |
+ graph_object <- get_graph_object(node_map, edge_map, |
|
144 |
+ layered_nodes = layered_nodes) |
|
145 |
+ cyto_vis(graph_object, title = graph_title) |
|
146 |
+ if(get_data){ |
|
147 |
+ return(edge_map) |
|
148 |
+ } |
|
149 |
+} |
1 | 7 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,266 @@ |
1 |
+#' Annotate KEGG edge mappings with user data |
|
2 |
+#' @description Add data column[s] to object created from function |
|
3 |
+#' expand_KEGG_edges |
|
4 |
+#' @export |
|
5 |
+#' @importFrom gtools smartbind |
|
6 |
+#' @importFrom plyr rename |
|
7 |
+#' @param expanded_edges The data frame object generated via the function |
|
8 |
+#' expand_KEGG_edges |
|
9 |
+#' @param KEGG_mappings KEGG_mappings The data.frame object generated by the |
|
10 |
+#' function expand_KEGG_mappings |
|
11 |
+#' @param user_data A data frame where in which the first two columns contain |
|
12 |
+#' gene symbols representing an edge and any/all other column[s] contain |
|
13 |
+#' corresponding edge data. |
|
14 |
+#' @param data_column_no The column index for desired user data to be added |
|
15 |
+#' @param only_mapped A logical indicator; if set to FALSE will return 'de-novo' |
|
16 |
+#' edges that 'exist' in data but are not documented in KEGG |
|
17 |
+#' @return A data frame object with detailed KEGG edge mappings annotated with |
|
18 |
+#' user data |
|
19 |
+#' @examples |
|
20 |
+#' p53_KGML <- get_KGML('hsa04115') |
|
21 |
+#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML) |
|
22 |
+#' p53_edges <- expand_KEGG_edges(p53_KGML, p53_KEGG_mappings) |
|
23 |
+#' p53_HA1E_data <- overlap_info(p53_KGML, p53_KEGG_mappings, 'HA1E', |
|
24 |
+#' data_type = '100_bing', only_mapped = FALSE) |
|
25 |
+#' |
|
26 |
+#' p53_edges_HA1E_ALL <- add_edge_data(p53_edges, p53_KEGG_mappings, |
|
27 |
+#' p53_HA1E_data, c(3, 10,12)) |
|
28 |
+#' p53_edges_HA1E_MAPPED <- add_edge_data(p53_edges, p53_KEGG_mappings, |
|
29 |
+#' p53_HA1E_data, c(3, 10,12), |
|
30 |
+#' only_mapped = TRUE) |
|
31 |
+ |
|
32 |
+ |
|
33 |
+add_edge_data <- function(expanded_edges, KEGG_mappings, |
|
34 |
+ user_data, data_column_no = 3, only_mapped = FALSE) { |
|
35 |
+ |
|
36 |
+ expanded_edges <- expanded_edges[expanded_edges$type != "maplink", ] |
|
37 |
+ |
|
38 |
+ if ("pre_mapped" %in% names(user_data)) { |
|
39 |
+ user_data <- user_data[, -c(which(colnames(user_data) == "pre_mapped"))] |
|
40 |
+ } |
|
41 |
+ |
|
42 |
+ if (nrow(expanded_edges) > 0) { |
|
43 |
+ |
|
44 |
+ expanded_edges$unique_ID = paste0(expanded_edges$entry1symbol, ",", |
|
45 |
+ expanded_edges$entry2symbol) |
|
46 |
+ expanded_edges$unique_IDR = paste0(expanded_edges$entry2symbol, ",", |
|
47 |
+ expanded_edges$entry1symbol) |
|
48 |
+ user_data$unique_ID = paste0(user_data$knockout1, ",", |
|
49 |
+ user_data$knockout2) |
|
50 |
+ pre_mapped1 <- subset(user_data, user_data$unique_ID %in% |
|
51 |
+ expanded_edges$unique_ID) |
|
52 |
+ pre_mapped2 <- subset(user_data, user_data$unique_ID %in% |
|
53 |
+ expanded_edges$unique_IDR) |
|
54 |
+ pre_mapped2 <- pre_mapped2[, c(2, 1, 3:ncol(pre_mapped2))] |
|
55 |
+ names(pre_mapped2) = names(pre_mapped1) |
|
56 |
+ if (nrow(pre_mapped2) >= 1 & nrow(pre_mapped1) >= 1) { |
|
57 |
+ pre_mapped2$unique_ID <- paste0(pre_mapped2$knockout1, ",", |
|
58 |
+ pre_mapped2$knockout2) |
|
59 |
+ pre_mapped <- rbind(pre_mapped1, pre_mapped2) |
|
60 |
+ } |
|
61 |
+ if (nrow(pre_mapped2) == 0 & (nrow(pre_mapped1) == 0)) { |
|
62 |
+ pre_mapped <- data.frame(unique_ID = NA) |
|
63 |
+ } |
|
64 |
+ if (nrow(pre_mapped1) == 0 & nrow(pre_mapped2) >= 1) { |
|
65 |
+ pre_mapped2$unique_ID <- paste0(pre_mapped2$knockout1, ",", |
|
66 |
+ pre_mapped2$knockout2) |
|
67 |
+ pre_mapped <- pre_mapped2 |
|
68 |
+ } |
|
69 |
+ if (nrow(pre_mapped2) == 0 & nrow(pre_mapped1) >= 1) { |
|
70 |
+ pre_mapped <- pre_mapped1 |
|
71 |
+ } |
|
72 |
+ if (!is.na(pre_mapped[1, 1])) { |
|
73 |
+ expanded_edges_1 <- subset(expanded_edges, expanded_edges$unique_ID |
|
74 |
+ %in% pre_mapped$unique_ID) |
|
75 |
+ expanded_edges_1 <- expanded_edges[expanded_edges$unique_ID %in% |
|
76 |
+ pre_mapped$unique_ID, ] |
|
77 |
+ expanded_edges_2 <- expanded_edges[!expanded_edges$unique_ID %in% |
|
78 |
+ pre_mapped$unique_ID, ] |
|
79 |
+ expanded_edges_1$has_data = 1 |
|
80 |
+ testval <- nrow(expanded_edges_2) |
|
81 |
+ if (testval > 0) { |
|
82 |
+ expanded_edges_2$has_data = 0 |
|
83 |
+ edge_set <- rbind(expanded_edges_1, expanded_edges_2) |
|
84 |
+ } else { |
|
85 |
+ edge_set <- expanded_edges_1 |
|
86 |
+ } |
|
87 |
+ |
|
88 |
+ edge_set <- edge_set[order(edge_set$unique_ID), ] |
|
89 |
+ |
|
90 |
+ data_to_add <- cbind(as.character(pre_mapped[, "unique_ID"]), |
|
91 |
+ pre_mapped[, data_column_no]) |
|
92 |
+ data_to_add <- data.frame(data_to_add, stringsAsFactors = FALSE) |
|
93 |
+ names(data_to_add)[1] <- "unique_ID" |
|
94 |
+ if (ncol(data_to_add) == 2) { |
|
95 |
+ names(data_to_add)[2] <- names(pre_mapped)[data_column_no] |
|
96 |
+ } |
|
97 |
+ |
|
98 |
+ data_to_add <- data_to_add[order(data_to_add$unique_ID), ] |
|
99 |
+ |
|
100 |
+ annotated_edges <- merge(edge_set, data_to_add, "unique_ID", |
|
101 |
+ all.x = TRUE) |
|
102 |
+ drops <- c("unique_ID", "unique_IDR") |
|
103 |
+ annotated_edges <- annotated_edges[, !(names(annotated_edges) %in% |
|
104 |
+ drops)] |
|
105 |
+ cat(paste0("Number of edges documented in selected pathway = ", |
|
106 |
+ nrow(annotated_edges)), "\n") |
|
107 |
+ cat(paste0("Number of edges with corresponding user data = ", |
|
108 |
+ sum(annotated_edges$has_data), "\n")) |
|
109 |
+ cat(paste0("Coverage = ", round(sum(annotated_edges$has_data)/ |
|
110 |
+ nrow(annotated_edges) * 100, 2), "%", "\n")) |
|
111 |
+ annotated_edges$premapped <- 1 |
|
112 |
+ un_mapped <- subset(user_data, !user_data$unique_ID %in% |
|
113 |
+ expanded_edges$unique_ID & |
|
114 |
+ !user_data$unique_ID %in% expanded_edges$unique_IDR) |
|
115 |
+ un_mapped_edges <- un_mapped[, c(1:2, data_column_no, |
|
116 |
+ ncol(un_mapped))] |
|
117 |
+ |
|
118 |
+ if (only_mapped) { |
|
119 |
+ return(annotated_edges) |
|
120 |
+ } |
|
121 |
+ } else { |
|
122 |
+ annotated_edges <- expanded_edges |
|
123 |
+ annotated_edges$premapped <- 1 |
|
124 |
+ annotated_edges$has_data <- 0 |
|
125 |
+ un_mapped <- user_data |
|
126 |
+ un_mapped$unique_ID <- paste0(un_mapped$knockout1, |
|
127 |
+ un_mapped$knockout2) |
|
128 |
+ un_mapped_edges <- un_mapped[, c(1:2, data_column_no, |
|
129 |
+ ncol(un_mapped))] |
|
130 |
+ un_mapped_edges$premapped <- 0 |
|
131 |
+ un_mapped_edges$has_data <- 0 |
|
132 |
+ cat(paste0("Number of edges documented in selected pathway = ", |
|
133 |
+ nrow(annotated_edges)), "\n") |
|
134 |
+ cat(paste0("Number of edges with corresponding user data = ", |
|
135 |
+ sum(annotated_edges$has_data), "\n")) |
|
136 |
+ cat(paste0("Coverage = ", round(sum(annotated_edges$has_data)/ |
|
137 |
+ nrow(annotated_edges) * 100, 2), "%", "\n")) |
|
138 |
+ if (only_mapped) { |
|
139 |
+ cat(paste0("No documented edges are found in data; |
|
140 |
+ only data for de-novo edges can be mapped \n")) |
|
141 |
+ return(annotated_edges) |
|
142 |
+ } |
|
143 |
+ } |
|
144 |
+ } else if (nrow(expanded_edges) == 0) { |
|
145 |
+ annotated_edges <- expanded_edges |
|
146 |
+ annotated_edges$premapped <- 1 |
|
147 |
+ annotated_edges$has_data <- 0 |
|
148 |
+ un_mapped <- user_data |
|
149 |
+ un_mapped$unique_ID <- paste0(un_mapped$knockout1, un_mapped$knockout2) |
|
150 |
+ un_mapped_edges <- un_mapped[, c(1:2, data_column_no, ncol(un_mapped))] |
|
151 |
+ un_mapped_edges$premapped <- 0 |
|
152 |
+ un_mapped_edges$has_data <- 0 |
|
153 |
+ cat(paste0("Number of edges documented in selected pathway = ", |
|
154 |
+ nrow(annotated_edges)), "\n") |
|
155 |
+ cat(paste0("Number of edges with corresponding user data = ", |
|
156 |
+ sum(annotated_edges$has_data), "\n")) |
|
157 |
+ cat(paste0("Coverage = ", round(sum(annotated_edges$has_data)/ |
|
158 |
+ nrow(annotated_edges) * 100, 2), "%", "\n")) |
|
159 |
+ if (only_mapped) { |
|
160 |
+ return(annotated_edges) |
|
161 |
+ } |
|
162 |
+ } |
|
163 |
+ names(un_mapped_edges)[1:2] <- c("entryNAME_1", "entryNAME_2") |
|
164 |
+ for (i in 1:nrow(un_mapped_edges)) { |
|
165 |
+ un_mapped_edges$Source_eid[i] <- |
|
166 |
+ list(KEGG_mappings$entryID[which(KEGG_mappings$entrySYMBOL == |
|
167 |
+ un_mapped_edges$entryNAME_1[i])]) |
|
168 |
+ un_mapped_edges$Target_eid[i] <- |
|
169 |
+ list(KEGG_mappings$entryID[which(KEGG_mappings$entrySYMBOL == |
|
170 |
+ un_mapped_edges$entryNAME_2[i])]) |
|
171 |
+ x <- length(unlist(un_mapped_edges$Source_eid[i])) |
|
172 |
+ y <- length(unlist(un_mapped_edges$Target_eid[i])) |
|
173 |
+ if (x > 1 | y > 1) { |
|
174 |
+ un_mapped_edges$simple[i] <- FALSE |
|
175 |
+ } else { |
|
176 |
+ un_mapped_edges$simple[i] <- TRUE |
|
177 |
+ } |
|
178 |
+ } |
|
179 |
+ simple_un_mapped_edges <- un_mapped_edges[un_mapped_edges$simple == TRUE, |
|
180 |
+ -ncol(un_mapped_edges)] |
|
181 |
+ complex_un_mapped_edges <- un_mapped_edges[un_mapped_edges$simple == FALSE, |
|
182 |
+ -ncol(un_mapped_edges)] |
|
183 |
+ test_val <- nrow(complex_un_mapped_edges) |
|
184 |
+ if (test_val == 0) { |
|
185 |
+ un_mapped_edges <- simple_un_mapped_edges |
|
186 |
+ } else { |
|
187 |
+ keeps <- c("Source_eid", "Target_eid", "unique_ID") |
|
188 |
+ c_temp <- complex_un_mapped_edges[, (names(complex_un_mapped_edges) %in% |
|
189 |
+ keeps)] |
|
190 |
+ |
|
191 |
+ for (i in 1:nrow(c_temp)) { |
|
192 |
+ x <- length(unlist(c_temp$Source_eid[i])) |
|
193 |
+ y <- length(unlist(c_temp$Target_eid[i])) |
|
194 |
+ l <- x * y |
|
195 |
+ |
|
196 |
+ c_temp$Source_eid[i] <- list(sort(unlist(rep(c_temp$Source_eid[i], |
|
197 |
+ y)))) |
|
198 |
+ c_temp$Target_eid[i] <- list(unlist(rep(c_temp$Target_eid[i], x))) |
|
199 |
+ c_temp$unique_ID[i] <- list(rep(c_temp$unique_ID[i], l)) |
|
200 |
+ } |
|
201 |
+ |
|
202 |
+ c_temp <- data.frame(unique_ID = unlist(c_temp$unique_ID), Source_eid = |
|
203 |
+ unlist(c_temp$Source_eid), |
|
204 |
+ Target_eid = unlist(c_temp$Target_eid)) |
|
205 |
+ drops <- c("Source_eid", "Target_eid") |
|
206 |
+ complex_un_mapped_edges <- |
|
207 |
+ complex_un_mapped_edges[, !(names(complex_un_mapped_edges) %in% |
|
208 |
+ drops)] |
|
209 |
+ complex_un_mapped_edges <- merge(complex_un_mapped_edges, c_temp) |
|
210 |
+ un_mapped_edges <- rbind(simple_un_mapped_edges, |
|
211 |
+ complex_un_mapped_edges) |
|
212 |
+ } |
|
213 |
+ un_mapped_edges$Source_eid <- unlist(un_mapped_edges$Source_eid) |
|
214 |
+ un_mapped_edges$Target_eid <- unlist(un_mapped_edges$Target_eid) |
|
215 |
+ un_mapped_edges <- plyr::rename(un_mapped_edges, c(Source_eid = "entry1", |
|
216 |
+ Target_eid = "entry2", entryNAME_1 = "entry1symbol", |
|
217 |
+ entryNAME_2 = "entry2symbol")) |
|
218 |
+ un_mapped_edges$subtype1 <- "de_novo" |
|
219 |
+ |
|
220 |
+ drops <- c("unique_ID") |
|
221 |
+ un_mapped_edges <- un_mapped_edges[, !(names(un_mapped_edges) %in% drops)] |
|
222 |
+ un_mapped_edges$has_data <- 1 |
|
223 |
+ un_mapped_edges$premapped <- 0 |
|
224 |
+ for (i in 1:nrow(un_mapped_edges)) { |
|
225 |
+ un_mapped_edges$entry1accession[i] <- |
|
226 |
+ KEGG_mappings$entryACCESSION[which(KEGG_mappings$entrySYMBOL == |
|
227 |
+ un_mapped_edges$entry1symbol[i])][1] |
|
228 |
+ un_mapped_edges$entry2accession[i] <- |
|
229 |
+ KEGG_mappings$entryACCESSION[which(KEGG_mappings$entrySYMBOL == |
|
230 |
+ un_mapped_edges$entry2symbol[i])][1] |
|
231 |
+ un_mapped_edges$entry1type[i] <- |
|
232 |
+ KEGG_mappings$entryTYPE[which(KEGG_mappings$entrySYMBOL == |
|
233 |
+ un_mapped_edges$entry1symbol[i])][1] |
|
234 |
+ un_mapped_edges$entry2type[i] <- |
|
235 |
+ KEGG_mappings$entryTYPE[which(KEGG_mappings$entrySYMBOL == |
|
236 |
+ un_mapped_edges$entry2symbol[i])][1] |
|
237 |
+ } |
|
238 |
+ |
|
239 |
+ if (nrow(annotated_edges) > 0 & nrow(un_mapped_edges) > 0) { |
|
240 |
+ all_edges <- gtools::smartbind(annotated_edges, un_mapped_edges) |
|
241 |
+ } else if (nrow(annotated_edges) > 0) |
|
242 |
+ all_edges <- annotated_edges else { |
|
243 |
+ all_edges <- un_mapped_edges |
|
244 |
+ all_edges$value <- NA |
|
245 |
+ all_edges$subtype2 <- NA |
|
246 |
+ all_edges$value2 <- NA |
|
247 |
+ all_edges$specific_subtype <- NA |
|
248 |
+ all_edges$type <- NA |
|
249 |
+ all_edges$is_direct <- 1 |
|
250 |
+ all_edges$edgeID <- seq(1:nrow(un_mapped_edges)) |
|
251 |
+ refcols <- c("edgeID", "entry1accession", "entry2accession", "entry1", |
|
252 |
+ "entry2") |
|
253 |
+ all_edges <- all_edges[, c(refcols, setdiff(names(all_edges), refcols))] |
|
254 |
+ cat(paste0("All documented edges are of type maplink; only data for |
|
255 |
+ de-novo edges can be mapped \n")) |
|
256 |
+ } |
|
257 |
+ |
|
258 |
+ for (i in 1:nrow(all_edges)) { |
|
259 |
+ if (all_edges$entry1[i] == all_edges$entry2[i]) { |
|
260 |
+ all_edges$paralogs[i] <- 1 |
|
261 |
+ } else { |
|
262 |
+ all_edges$paralogs[i] <- 0 |
|
263 |
+ } |
|
264 |
+ } |
|
265 |
+ return(all_edges) |
|
266 |
+} |
0 | 267 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,95 @@ |
1 |
+#' Send graph to Cytoscape via CyREST |
|
2 |
+#' @description View the KEGG pathway in Cytoscape. With either the |
|
3 |
+#' 'expanded edges' or 'stacked nodes' layout, users can visualize and interact |
|
4 |
+#' with the graphs [strictly] as they are documented in the most recent KGML |
|
5 |
+#' available from KEGG. |
|
6 |
+#' This function is a modified version of the function send2cy(), |
|
7 |
+#' which is part of the cyREST utility functions. |
|
8 |
+#' @export |
|
9 |
+#' @import methods |
|
10 |
+#' @import httr |
|
11 |
+#' @import RJSONIO |
|
12 |
+#' @param graph_object An igraph object such as the one generated by the |
|
13 |
+#' function \code{\link{get_graph_object}} |
|
14 |
+#' @param title An optional title for the graph when it is in Cytoscape |
|
15 |
+#' @param edge_width_attribute The attribute that will be used for edge width; |
|
16 |
+#' if data is not added or the attribute is not part of the graphing |
|
17 |
+#' information, the edge width will default to 1. |
|
18 |
+#' @param port.number The port address for Cytoscape |
|
19 |
+#' @return A dynamic map in Cytoscape automatically formatted for convenient |
|
20 |
+#' viewing. |
|
21 |
+#' @examples |
|
22 |
+#' p53_KGML <- get_KGML("hsa04115") |
|
23 |
+#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML, FALSE) |
|
24 |
+#' nodes <- node_mapping_info(p53_KEGG_mappings) |
|
25 |
+#' |
|
26 |
+#' p53_edges <- expand_KEGG_edges(p53_KGML, p53_KEGG_mappings) |
|
27 |
+#' edges <- edge_mapping_info(p53_edges) |
|
28 |
+#' |
|
29 |
+#' p53_graph_object <- get_graph_object(nodes, edges) |
|
30 |
+#' |
|
31 |
+#' @examples \dontrun{ |
|
32 |
+#' cyto_vis(p53_graph_object, "Default p53 Graph [no data added]") |
|
33 |
+#' |
|
34 |
+#' #Workflow to visualize graph with data-dependent attributes: |
|
35 |
+#' |
|
36 |
+#' p53_KGML <- get_KGML("hsa04115") |
|
37 |
+#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML) |
|
38 |
+#' nodes <- node_mapping_info(p53_KEGG_mappings) |
|
39 |
+#' |
|
40 |
+#' p53_edges <- expand_KEGG_edges(p53_KGML, p53_KEGG_mappings) |
|
41 |
+#' |
|
42 |
+#' p53_HA1E_data <- overlap_info(p53_KGML, p53_KEGG_mappings, "HA1E", |
|
43 |
+#' data_type = "100_bing") |
|
44 |
+#' p53_edges_plus_data <- add_edge_data(p53_edges, p53_KEGG_mappings, |
|
45 |
+#' p53_HA1E_data, c(3, 10,12), |
|
46 |
+#' only_mapped = TRUE) |
|
47 |
+#' |
|
48 |
+#' edges <- edge_mapping_info(p53_edges_plus_data, data_added = TRUE) |
|
49 |
+#' |
|
50 |
+#' p53_plus_data_graph_object <- get_graph_object(nodes, edges) |
|
51 |
+#' |
|
52 |
+#' cyto_vis(p53_plus_data_graph_object, "p53 Graph: Mapped Edges + HA1E Data", |
|
53 |
+#' edge_width_attribute = "UP") |
|
54 |
+#' } |
|
55 |
+ |
|
56 |
+cyto_vis <- |
|
57 |
+ function(graph_object, title = "Cytoscape Graph Window", |
|
58 |
+ edge_width_attribute = "summary_score", |
|
59 |
+ port.number = 1234) { |
|
60 |
+ base.url = paste("http://localhost:", toString(port.number), "/v1", sep="") |
|
61 |
+ |
|
62 |
+ if (edge_width_attribute %in% names(igraph::edge_attr(graph_object))){ |
|
63 |
+ min.summary_score <- min(abs(igraph::E(graph_object)$summary_score), |
|
64 |
+ na.rm = TRUE) |
|
65 |
+ max.summary_score <- max(abs(igraph::E(graph_object)$summary_score), |
|
66 |
+ na.rm = TRUE) |
|
67 |
+ map_edge_width <- TRUE |
|
68 |
+ } |
|
69 |
+ else { |
|
70 |
+ min.summary_score <- NA |
|
71 |
+ max.summary_score <- NA |
|
72 |
+ map_edge_width <- FALSE |
|
73 |
+ } |
|
74 |
+ |
|
75 |
+ graph_object$name <- title |
|
76 |
+ cygraph <- toCytoscape(graph_object) |
|
77 |
+ |
|
78 |
+ network.url = paste(base.url, "networks", sep="/") |
|
79 |
+ res <- httr::POST(url=network.url, body=cygraph, encode="json") |
|
80 |
+ network.suid = unname(RJSONIO::fromJSON(rawToChar(res$content))) |
|
81 |
+ |
|
82 |
+ style.name = "myKEGGstyle" |
|
83 |
+ mappings <- generate_mappings(style.name, map_edge_width, |
|
84 |
+ edge_width_attribute, |
|
85 |
+ min_score = min.summary_score, |
|
86 |
+ max_score = max.summary_score) |
|
87 |
+ style.url = paste(base.url, "styles", sep="/") |
|
88 |
+ style <- list(title=style.name, defaults = mappings[[1]], |
|
89 |
+ mappings = mappings[[2]]) |
|
90 |
+ style.JSON <- RJSONIO::toJSON(style) |
|
91 |
+ httr::POST(url=style.url, body=style.JSON, encode = "json") |
|
92 |
+ apply.style.url = paste(base.url, "apply/styles", style.name , |
|
93 |
+ toString(network.suid), sep="/") |
|
94 |
+ httr::GET(apply.style.url) |
|
95 |
+ } |
0 | 96 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,150 @@ |
1 |
+#' Prepare edges for mapping |
|
2 |
+#' @description Modify the mapping information for desired look when graphed in |
|
3 |
+#' Cytoscape |
|
4 |
+#' @param expanded_edges The data frame object generated via the function |
|
5 |
+#' expand_KEGG_edges() OR has been modified by the function add_edge_data() |
|
6 |
+#' @param data_added A logical indicator; must be set to TRUE if user data has |
|
7 |
+#' been added (i.e. edges modified by function add_edge_data()) |
|
8 |
+#' @param significance_markup A logical indicator; if set to TRUE will color |
|
9 |
+#' edges based on direction and significance of correlation (as determined by |
|
10 |
+#' user-data-analysis) |
|
11 |
+#' @return A data.frame object for edges that will be passed on to the function |
|
12 |
+#' get_graph_object |
|
13 |
+#' @export |
|
14 |
+#' @examples |
|
15 |
+#' p53_KGML <- get_KGML("hsa04115") |
|
16 |
+#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML) |
|
17 |
+#' |
|
18 |
+#' #Default; no data added to edges: |
|
19 |
+#' |
|
20 |
+#' p53_edges <- expand_KEGG_edges(p53_KGML, p53_KEGG_mappings) |
|
21 |
+#' p53_edge_mapping_info <- edge_mapping_info(p53_edges) |
|
22 |
+#' |
|
23 |
+#' #If data is added to edges as additional attribute[s]: |
|
24 |
+#' |
|
25 |
+#' p53_HA1E_data <- overlap_info(p53_KGML, p53_KEGG_mappings, |
|
26 |
+#' "HA1E", data_type = "100_bing") |
|
27 |
+#' |
|
28 |
+#' p53_edges_HA1E_data_MAPPED <- add_edge_data(p53_edges, p53_KEGG_mappings, |
|
29 |
+#' p53_HA1E_data, c(3, 10,12), |
|
30 |
+#' only_mapped = TRUE) |
|
31 |
+#' |
|
32 |
+#' p53_edge_mapping_HA1E <- edge_mapping_info(p53_edges_HA1E_data_MAPPED, |
|
33 |
+#' data_added = TRUE) |
|
34 |
+#' |
|
35 |
+ |
|
36 |
+edge_mapping_info <- |
|
37 |
+function(expanded_edges, data_added = FALSE, significance_markup = FALSE){ |
|
38 |
+ expanded_edges <- expanded_edges[,-c(1)] |
|
39 |
+ for(i in 1:nrow(expanded_edges)){ |
|
40 |
+ if (expanded_edges$subtype1[i] == "activation"){ |
|
41 |
+ expanded_edges$color[i] = "#b20000" |
|
42 |
+ } |
|
43 |
+ else if (expanded_edges$subtype1[i] =="expression"){ |
|
44 |
+ expanded_edges$color[i] = "#b20000" |
|
45 |
+ } |
|
46 |
+ else if (expanded_edges$subtype1[i] =="inhibition"){ |
|
47 |
+ expanded_edges$color[i] = "#0015b2" |
|
48 |
+ } |
|
49 |
+ else if (expanded_edges$subtype1[i] =="irreversible"){ |
|
50 |
+ expanded_edges$color[i] = "#b20000" |
|
51 |
+ } |
|
52 |
+ else if (expanded_edges$subtype1[i] =="repression"){ |
|
53 |
+ expanded_edges$color[i] = "#0015b2" |
|
54 |
+ } |
|
55 |
+ else if (expanded_edges$subtype1[i] == "compound" | |
|
56 |
+ expanded_edges$subtype1[i] == "indirect_effect" | |
|
57 |
+ expanded_edges$subtype1[i] == "binding_association"){ |
|
58 |
+ expanded_edges$color[i] = "#000000" |
|
59 |
+ } |
|
60 |
+ else if (expanded_edges$subtype1[i] == "de_novo") { |
|
61 |
+ expanded_edges$color[i] = "#808080" |
|
62 |
+ } |
|
63 |
+ else if (expanded_edges$subtype1[i] == "Not defined in KEGG" | |
|
64 |
+ expanded_edges$type[i] == "dummy") { |
|
65 |
+ expanded_edges$color[i] = "#808080" |
|
66 |
+ } |
|
67 |
+ else { |
|
68 |
+ expanded_edges$color[i] = "#1c9900" |
|
69 |
+ } |
|
70 |
+ |
|
71 |
+ if (expanded_edges$color[i] == "#1c9900"){ |
|
72 |
+ expanded_edges$edge_label[i] = expanded_edges$value[i] |
|
73 |
+ } |
|
74 |
+ else { |
|
75 |
+ expanded_edges$edge_label[i] = NA |
|
76 |
+ } |
|
77 |
+ |
|
78 |
+ if (is.na(expanded_edges$subtype2[i]) == FALSE & |
|
79 |
+ expanded_edges$subtype2[i] != "indirect"){ |
|
80 |
+ if (expanded_edges$subtype1[i] == "inhibition"){ |
|
81 |
+ expanded_edges$color[i] = "#690099" |
|
82 |
+ } |
|
83 |
+ else if (expanded_edges$subtype1[i] == "activation"){ |
|
84 |
+ expanded_edges$color[i] = "#FF6600" |
|
85 |
+ } |
|
86 |
+ expanded_edges$edge_label[i] = expanded_edges$value2[i] |
|
87 |
+ } |
|
88 |
+ if(data_added){ |
|
89 |
+ if (expanded_edges$premapped[i] == 1){ |
|
90 |
+ if (expanded_edges$value[i] == "--|"){ |
|
91 |
+ expanded_edges$tooltip[i] <- |
|
92 |
+ paste0(expanded_edges$entry1symbol[i]," --| ", |
|
93 |
+ expanded_edges$entry2symbol[i]) |
|
94 |
+ } |
|
95 |
+ else { |
|
96 |
+ expanded_edges$tooltip[i] <- |
|
97 |
+ paste0(expanded_edges$entry1symbol[i], " --> ", |
|
98 |
+ expanded_edges$entry2symbol[i]) |
|
99 |
+ } |
|
100 |
+ } |
|
101 |
+ else { |
|
102 |
+ expanded_edges$tooltip[i] <- paste0(expanded_edges$entry1symbol[i], |
|
103 |
+ " - ", |
|
104 |
+ expanded_edges$entry2symbol[i]) |
|
105 |
+ } |
|
106 |
+ } |
|
107 |
+ else { |
|
108 |
+ if (expanded_edges$value[i] == "--|"){ |
|
109 |
+ expanded_edges$tooltip[i] <- |
|
110 |
+ paste0(expanded_edges$entry1symbol[i], " --| ", |
|
111 |
+ expanded_edges$entry2symbol[i]) |
|
112 |
+ } |
|
113 |
+ else { |
|
114 |
+ expanded_edges$tooltip <- |
|
115 |
+ paste0(expanded_edges$entry1symbol, " --> ", |
|
116 |
+ expanded_edges$entry2symbol) |
|
117 |
+ } |
|
118 |
+ } |
|
119 |
+ } |
|
120 |
+ |
|
121 |
+ expanded_edges$name <- paste0(expanded_edges$entry1, |
|
122 |
+ " (", expanded_edges$type, ") ", |
|
123 |
+ expanded_edges$entry2) |
|
124 |
+ if (significance_markup){ |
|
125 |
+ for (i in 1:nrow(expanded_edges)){ |
|
126 |
+ if (expanded_edges$has_data[i] == 1){ |
|
127 |
+ if (expanded_edges$summary_score[i] > 0 & |
|
128 |
+ expanded_edges$significant[i] == 1){ |
|
129 |
+ expanded_edges$color[i] <- "#b20000" |
|
130 |
+ } |
|
131 |
+ else if (expanded_edges$summary_score[i] > 0 & |
|
132 |
+ expanded_edges$significant[i] == 0){ |
|
133 |
+ expanded_edges$color[i] <- "#FF6600" |
|
134 |
+ } |
|
135 |
+ else if (expanded_edges$summary_score[i] <= 0 & |
|
136 |
+ expanded_edges$significant[i] == 1){ |
|
137 |
+ expanded_edges$color[i] <- "#0015b2" |
|
138 |
+ } |
|
139 |
+ else { |
|
140 |
+ expanded_edges$color[i] <- "#690099" |
|
141 |
+ } |
|
142 |
+ } |
|
143 |
+ else { |
|
144 |
+ expanded_edges$color[i] = "#808080" |
|
145 |
+ } |
|
146 |
+ } |
|
147 |
+ } |
|
148 |
+ expanded_edges <- expanded_edges[,c(3:4, 1:2, 5:ncol(expanded_edges))] |
|
149 |
+ return(expanded_edges) |
|
150 |
+} |
0 | 151 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,285 @@ |
1 |
+#' Get detailed KEGG mapping information for each relation [edge] documented |
|
2 |
+#' in KEGG |
|
3 |
+#' @description Extract relationship information from KGML object and re-map |
|
4 |
+#' based on normalized node information |
|
5 |
+#' |
|
6 |
+#' @param KGML_file An object of formal class KEGGPathway |
|
7 |
+#' @param KEGG_mappings The data.frame object generated by the function |
|
8 |
+#' expand_KEGG_mappings |
|
9 |
+#' |
|
10 |
+#' @return A dataframe object with unique entry information for all edges |
|
11 |
+#' documented in the KEGG pathway. |
|
12 |
+#' Note that each row has a unique combination of values for |
|
13 |
+#' (entry1, entry2, entry1symbol, entry2symbol). |
|
14 |
+#' @export |
|
15 |
+#' @importMethodsFrom KEGGgraph edges |
|
16 |
+#' @examples |
|
17 |
+#' p53_KGML <- get_KGML("hsa04115") |
|
18 |
+#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML, FALSE) |
|
19 |
+#' p53_edges <- expand_KEGG_edges(p53_KGML, p53_KEGG_mappings) |
|
20 |
+ |
|
21 |
+ |
|
22 |
+expand_KEGG_edges <- |
|
23 |
+function(KGML_file,KEGG_mappings){ |
|
24 |
+ num_edges <- length(KGML_file@edges) |
|
25 |
+ if (num_edges == 0){ |
|
26 |
+ print("No Documented Edges in Pathway") |
|
27 |
+ expanded_edges <- data.frame("edgeID"=1, "entry1"= "1", "entry2" = "1", |
|
28 |
+ "entry1accession"="dummy", " |
|
29 |
+ entry2accession"="dummy", |
|
30 |
+ "type"= "dummy", "subtype1"="dummy", |
|
31 |
+ "value"="dummy", "subtype2" = "dummy", |
|
32 |
+ "value2" = "dummy", |
|
33 |
+ "specific_subtype"="dummy", |
|
34 |
+ "is_direct"= 0, "entry1type"="dummy", |
|
35 |
+ "entry2type"= "dummy", "entry1symbol"= "1", |
|
36 |
+ "entry2symbol"="1", |
|
37 |
+ stringsAsFactors = FALSE) |
|
38 |
+ return(expanded_edges) |
|
39 |
+ } |
|
40 |
+ map_edge_data<- KGML_file@edges |
|
41 |
+ get_edges<- data.frame(edgeID = seq(1:length(map_edge_data ))) |
|
42 |
+ for (i in 1:nrow(get_edges)){ |
|
43 |
+ get_edges$entry1[i] <- as.numeric(map_edge_data[[i]]@entry1ID) |
|
44 |
+ get_edges$entry2[i] <- as.numeric(map_edge_data[[i]]@entry2ID) |
|
45 |
+ get_edges$type[i] <- map_edge_data[[i]]@type |
|
46 |
+ if (length(map_edge_data[[i]]@subtype) == 1){ |
|
47 |
+ get_edges$subtype1[i] <- map_edge_data[[i]]@subtype[[1]]@name |
|
48 |
+ if (get_edges$subtype1[i] == "binding/association"){ |
|
49 |
+ get_edges$subtype1[i] <- "binding_association" |
|
50 |
+ } |
|
51 |
+ get_edges$value[i] <- map_edge_data[[i]]@subtype[[1]]@value |
|
52 |
+ get_edges$subtype2[i] <- NA |
|
53 |
+ get_edges$value2[i] <- NA |
|
54 |
+ get_edges$specific_subtype[i] <- get_edges$subtype1[i] |
|
55 |
+ get_edges$is_direct[i] <- 1 |
|
56 |
+ if (get_edges$subtype1[i] == "indirect effect"){ |
|
57 |
+ get_edges$is_direct[i] <- 0 |
|
58 |
+ get_edges$subtype1[i] <- "indirect_effect" |
|
59 |
+ } |
|
60 |
+ } |
|
61 |
+ else if (length(map_edge_data[[i]]@subtype) == 2){ |
|
62 |
+ get_edges$subtype1[i] <- map_edge_data[[i]]@subtype[[1]]@name |
|
63 |
+ if (get_edges$subtype1[i] == "binding/association"){ |
|
64 |
+ get_edges$subtype1[i] <- "binding_association" |
|
65 |
+ } |
|
66 |
+ get_edges$value[i] <- map_edge_data[[i]]@subtype[[1]]@value |
|
67 |
+ get_edges$subtype2[i] <- map_edge_data[[i]]@subtype[[2]]@name |
|
68 |
+ get_edges$specific_subtype[i] <- paste0(get_edges$subtype1[i], "_", |
|
69 |
+ get_edges$subtype2[i]) |
|
70 |
+ get_edges$value2[i] <- map_edge_data[[i]]@subtype[[2]]@value |
|
71 |
+ get_edges$is_direct[i] <- 1 |
|
72 |
+ if (get_edges$subtype2[i] == "indirect effect"){ |
|
73 |
+ get_edges$subtype2[i] <- "indirect" |
|
74 |
+ get_edges$is_direct[i] <- 0 |
|
75 |
+ } |
|
76 |
+ } |
|
77 |
+ else { |
|
78 |
+ get_edges$subtype1[i] <- "Not defined in KEGG" |
|
79 |
+ get_edges$value[i] <- "Not defined in KEGG" |
|
80 |
+ get_edges$subtype2[i] <- NA |
|
81 |
+ get_edges$value2[i] <- NA |
|
82 |
+ get_edges$specific_subtype[i] <- NA |
|
83 |
+ get_edges$is_direct[i] <- 1 |
|
84 |
+ } |
|
85 |
+ get_edges$entry1type[i] <- |
|
86 |
+ KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == |
|
87 |
+ get_edges$entry1[i])][1] |
|
88 |
+ get_edges$entry2type[i] <- |
|
89 |
+ KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == |
|
90 |
+ get_edges$entry2[i])][1] |
|
91 |
+ } |
|
92 |
+ get_edges <- get_edges[!is.na(get_edges$entry1type) & |
|
93 |
+ !is.na(get_edges$entry2type),] |
|
94 |
+ if (nrow(get_edges) ==0){ |
|
95 |
+ print("No Documented Edges in Pathway for selected cell type; |
|
96 |
+ all edges are between non-expressed genes") |
|
97 |
+ expanded_edges <- data.frame("edgeID"=1, "entry1"= "1", "entry2" = "1", |
|
98 |
+ "entry1accession"="dummy", |
|
99 |
+ "entry2accession"="dummy","type"= "dummy", |
|
100 |
+ "subtype1"="dummy", "value"="dummy", |
|
101 |
+ "subtype2" = "dummy", "value2" = "dummy", |
|
102 |
+ "specific_subtype"="dummy", "is_direct"= 0, |
|
103 |
+ "entry1type"="dummy", |
|
104 |
+ "entry2type"= "dummy","entry1symbol"= "1", |
|
105 |
+ "entry2symbol"="1", |
|
106 |
+ stringsAsFactors = FALSE) |
|
107 |
+ return(expanded_edges) |
|
108 |
+ } |
|
109 |
+ |
|
110 |
+ ##At some point include option not to ungroup edges |
|
111 |
+ edges_no_groups <- subset(get_edges, get_edges$entry1type != "group" |
|
112 |
+ & get_edges$entry2type != "group", select = -c(1)) |
|
113 |
+ edges_with_groups <- subset(get_edges, get_edges$entry1type == "group" | |
|
114 |
+ get_edges$entry2type == "group") |
|
115 |
+ edges_one_group <- subset(get_edges, get_edges$entry1type == "group" & |
|
116 |
+ !get_edges$entry2type == "group" | |
|
117 |
+ get_edges$entry2type == "group" & |
|
118 |
+ !get_edges$entry1type == "group") |
|
119 |
+ edges_two_groups <- subset(get_edges, get_edges$entry1type == "group" & |
|
120 |
+ get_edges$entry2type == "group") |
|
121 |
+ if (nrow(edges_with_groups) > 0){ |
|
122 |
+ if (nrow(edges_one_group) > 0){ |
|
123 |
+ edges_one_group$entry1all <- NA |
|
124 |
+ edges_one_group$entry2all <- NA |
|
125 |
+ for (i in 1:nrow(edges_one_group)){ |
|
126 |
+ if (edges_one_group$entry1type[i] == "group"){ |
|
127 |
+ edges_one_group$entry1all[i] <- |
|
128 |
+ KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == |
|
129 |
+ edges_one_group$entry1[i])] |
|
130 |
+ l <- length(unlist(edges_one_group$entry1all[i])) |
|
131 |
+ edges_one_group$entry2all[i] <- |
|
132 |
+ list(as.character(rep(edges_one_group$entry2[i], l))) |
|
133 |
+ } |
|
134 |
+ else if (edges_one_group$entry2type[i] == "group"){ |
|
135 |
+ edges_one_group$entry2all[i] <- |
|
136 |
+ KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == |
|
137 |
+ edges_one_group$entry2[i])] |
|
138 |
+ l = length(unlist(edges_one_group$entry2all[i])) |
|
139 |
+ edges_one_group$entry1all[i] <- |
|
140 |
+ list(as.character(rep(edges_one_group$entry1[i], l))) |
|
141 |
+ } |
|
142 |
+ l = length(unlist(edges_one_group$entry1all[i])) |
|
143 |
+ edges_one_group$edgeID_list[i] <- |
|
144 |
+ list(rep(edges_one_group$edgeID[i],l)) |
|
145 |
+ } |
|
146 |
+ ewg_temp <- data.frame("edgeID" = |
|
147 |
+ unlist(edges_one_group$edgeID_list), |
|
148 |
+ "entry1" = unlist(edges_one_group$entry1all), |
|
149 |
+ "entry2" = unlist(edges_one_group$entry2all), |
|
150 |
+ stringsAsFactors = FALSE) |
|
151 |
+ edges_one_group <- edges_one_group[,-c(2:3, 13:15)] |
|
152 |
+ edges_one_group <- merge(ewg_temp,edges_one_group, by = "edgeID") |
|
153 |
+ edges_one_group <- edges_one_group[,-c(1)] |
|
154 |
+ } |
|
155 |
+ if (nrow(edges_two_groups) > 0){ |
|
156 |
+ edges_two_groups$entry1all <- NA |
|
157 |
+ edges_two_groups$entry2all <- NA |
|
158 |
+ for (i in 1:nrow(edges_two_groups)){ |
|
159 |
+ edges_two_groups$entry1all[i] <- |
|
160 |
+ KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == |
|
161 |
+ edges_two_groups$entry1[i])] |
|
162 |
+ edges_two_groups$entry2all[i] <- |
|
163 |
+ KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == |
|
164 |
+ edges_two_groups$entry2[i])] |
|
165 |
+ x <- length(unlist(edges_two_groups$entry1all[i])) |
|
166 |
+ y <- length(unlist(edges_two_groups$entry2all[i])) |
|
167 |
+ if (x == 1 & y >1){ |
|
168 |
+ edges_two_groups$entry1all[i] <- |
|
169 |
+ list(rep(edges_two_groups$entry1all[i], y)) |
|
170 |
+ } |
|
171 |
+ else if (x > 1 & y == 1){ |
|
172 |
+ edges_two_groups$entry2all[i] <- |
|
173 |
+ list(rep(edges_two_groups$entry2all[i], x)) |
|
174 |
+ } |
|
175 |
+ else if (x > 1 & y > 1){ |
|
176 |
+ edges_two_groups$entry1all[i] <- |
|
177 |
+ list(rep(edges_two_groups$entry1all[i],y)) |
|
178 |
+ edges_two_groups$entry1all[i] <- |
|
179 |
+ list(unlist(edges_two_groups$entry1all[i])[ |
|
180 |
+ sort.list(unlist(edges_two_groups$entry1all[i]))]) |
|
181 |
+ edges_two_groups$entry2all[i] <- |
|
182 |
+ list(rep(edges_two_groups$entry2all[i],x)) |
|
183 |
+ } |
|
184 |
+ l <- length(unlist(edges_two_groups$entry1all[i])) |
|
185 |
+ edges_two_groups$edgeID_list[i] <- |
|
186 |
+ list(rep(edges_two_groups$edgeID[i],l)) |
|
187 |
+ } |
|
188 |
+ ewg_temp <- data.frame("edgeID" = |
|
189 |
+ unlist(edges_two_groups$edgeID_list), |
|
190 |
+ "entry1" = unlist(edges_two_groups$entry1all), |
|
191 |
+ "entry2" = unlist(edges_two_groups$entry2all), |
|
192 |
+ stringsAsFactors = FALSE) |
|
193 |
+ edges_two_groups <- edges_two_groups[,-c(2:3, 13:15)] |
|
194 |
+ edges_two_groups <- merge(ewg_temp,edges_two_groups, by = "edgeID") |
|
195 |
+ edges_two_groups <- edges_two_groups[,-c(1)] |
|
196 |
+ } |
|
197 |
+ if (nrow(edges_one_group) > 0 & nrow(edges_two_groups) > 0){ |
|
198 |
+ all_edges <- rbind(edges_no_groups, edges_one_group, |
|
199 |
+ edges_two_groups) |
|
200 |
+ } |
|
201 |
+ else if (nrow(edges_one_group) > 0){ |
|
202 |
+ all_edges <- rbind(edges_no_groups, edges_one_group) |
|
203 |
+ } |
|
204 |
+ else if (nrow(edges_two_groups) > 0){ |
|
205 |
+ all_edges <- rbind(edges_no_groups, edges_two_groups) |
|
206 |
+ } |
|
207 |
+ } |
|
208 |
+ else { |
|
209 |
+ all_edges <- edges_no_groups |
|
210 |
+ } |
|
211 |
+ for (i in 1:nrow(all_edges)){ |
|
212 |
+ all_edges$entry1all_accession[i] <- |
|
213 |
+ list(KEGG_mappings$entryACCESSION[which(KEGG_mappings$entryID == |
|
214 |
+ all_edges$entry1[i])]) |
|
215 |
+ all_edges$entry2all_accession[i] <- |
|
216 |
+ list(KEGG_mappings$entryACCESSION[which(KEGG_mappings$entryID == |
|
217 |
+ all_edges$entry2[i])]) |
|
218 |
+ x <- length(unlist(all_edges$entry1all_accession[i])) |
|
219 |
+ y <- length(unlist(all_edges$entry2all_accession[i])) |
|
220 |
+ if (x == 1 & y >1){ |
|
221 |
+ all_edges$entry1all_accession[i] <- |
|
222 |
+ list(rep(all_edges$entry1all_accession[i], y)) |
|
223 |
+ } |
|
224 |
+ else if (x > 1 & y == 1) { |
|
225 |
+ all_edges$entry2all_accession[i] <- |
|
226 |
+ list(rep(all_edges$entry2all_accession[i], x)) |
|
227 |
+ } |
|
228 |
+ else if (x > 1 & y > 1) { |
|
229 |
+ all_edges$entry1all_accession[i] <- |
|
230 |
+ list(rep(all_edges$entry1all_accession[i],y)) |
|
231 |
+ all_edges$entry1all_accession[i] <- |
|
232 |
+ list(unlist(all_edges$entry1all_accession[i])[sort.list(unlist( |
|
233 |
+ all_edges$entry1all_accession[i]))]) |
|
234 |
+ all_edges$entry2all_accession[i] <- |
|
235 |
+ list(rep(all_edges$entry2all_accession[i],x)) |
|
236 |
+ } |
|
237 |
+ l <- length(unlist(all_edges$entry1all_accession[i])) |
|
238 |
+ all_edges$edgeID[i] = list(rep(i,l)) |
|
239 |
+ } |
|
240 |
+ for (i in 1:nrow(all_edges)){ |
|
241 |
+ all_edges$l1[i] <- length(all_edges$entry1all_accession[[i]]) |
|
242 |
+ all_edges$l2[i] <- length(all_edges$entry2all_accession[[i]]) |
|
243 |
+ } |
|
244 |
+ all_edges <- all_edges[all_edges$l1 >0 & all_edges$l2 > 0, -c(15,16)] |
|
245 |
+ expanded_edges <- data.frame("edgeID" = unlist(all_edges$edgeID), |
|
246 |
+ "entry1accession" = |
|
247 |
+ unlist(all_edges$entry1all_accession), |
|
248 |
+ "entry2accession" = |
|
249 |
+ unlist(all_edges$entry2all_accession), |
|
250 |
+ stringsAsFactors = FALSE) |
|
251 |
+ |
|
252 |
+ all_edges <- all_edges[,-c(12:14)] |
|
253 |
+ all_edges$edgeID <- seq(1:nrow(all_edges)) |
|
254 |
+ expanded_edges <- merge(expanded_edges, all_edges, by = "edgeID") |
|
255 |
+ for (i in 1:nrow(expanded_edges)){ |
|
256 |
+ expanded_edges$entry1type[i] <- |
|
257 |
+ KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == |
|
258 |
+ expanded_edges$entry1[i])][1] |
|
259 |
+ expanded_edges$entry2type[i] <- |
|
260 |
+ KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == |
|
261 |
+ expanded_edges$entry2[i])][1] |
|
262 |
+ if (expanded_edges$entry1type[i] == "gene"| |
|
263 |
+ expanded_edges$entry1type[i] == "compound") { |
|
264 |
+ expanded_edges$entry1symbol[i] <- |
|
265 |
+ KEGG_mappings$entrySYMBOL[which(KEGG_mappings$entryACCESSION == |
|
266 |
+ expanded_edges$entry1accession[i])][1] |
|
267 |
+ } |
|
268 |
+ else { |
|
269 |
+ expanded_edges$entry1symbol[i] <- NA |
|
270 |
+ } |
|
271 |
+ if (expanded_edges$entry2type[i] == "gene" | |
|
272 |
+ expanded_edges$entry2type[i] == "compound") { |
|
273 |
+ expanded_edges$entry2symbol[i] <- |
|
274 |
+ KEGG_mappings$entrySYMBOL[which(KEGG_mappings$entryACCESSION == |
|
275 |
+ expanded_edges$entry2accession[i])][1] |
|
276 |
+ } |
|
277 |
+ else { |
|
278 |
+ expanded_edges$entry2symbol[i] <- NA |
|
279 |
+ } |
|
280 |
+ } |
|
281 |
+ expanded_edges$entry1symbol <- unlist(expanded_edges$entry1symbol) |
|
282 |
+ expanded_edges$entry2symbol <- unlist(expanded_edges$entry2symbol) |
|
283 |
+ expanded_edges$is_direct <- as.numeric(expanded_edges$is_direct) |
|
284 |
+ return(expanded_edges) |
|
285 |
+} |
0 | 286 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,269 @@ |
1 |
+#' Get detailed KEGG mapping information for each map entity |
|
2 |
+#' @description Extract mapping information from KGML object and normalize |
|
3 |
+#' mappings based on multi-valued name attribute |
|
4 |
+#' |
|
5 |
+#' @param KGML_file An object of formal class KEGGPathway |
|
6 |
+#' @param convert_KEGG_IDs A logical indicator; if set to FALSE will run faster |
|
7 |
+#' however genes and compounds will remain labeled via KEGG codes (compounds) |
|
8 |
+#' or accession numbers (genes). This option must be taken into account if |
|
9 |
+#' data is being added. For example, the genes in 'KO_data' are identified by |
|
10 |
+#' symbols, thus it is neccessary to retain the default option to convert IDs |
|
11 |
+#' to symbols when planning to add edge data of this type. |
|
12 |
+#' |
|
13 |
+#' @return A dataframe object with unique entry information for all [node] |
|
14 |
+#' objects documented in the KEGG pathway. |
|
15 |
+#' Note that if mutiple objects (i.e. genes or compounds) have the same |
|
16 |
+#' entryID, this indicates that they share the same node [location] |
|
17 |
+#' in the pathway. |
|
18 |
+#' @export |
|
19 |
+#' @import org.Hs.eg.db |
|
20 |
+#' @import KEGGREST |
|
21 |
+#' @import AnnotationDbi |
|
22 |
+#' @importMethodsFrom KEGGgraph edges |
|
23 |
+#' @importMethodsFrom KEGGgraph nodes |
|
24 |
+#' @examples |
|
25 |
+#' p53_KGML <- get_KGML("hsa04115") |
|
26 |
+#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML, FALSE) |
|
27 |
+ |
|
28 |
+expand_KEGG_mappings <- |
|
29 |
+function (KGML_file, convert_KEGG_IDs = TRUE){ |
|
30 |
+ nodesN <- KEGGgraph::nodes(KGML_file) |
|
31 |
+ entryMAP<- data.frame(entry = seq(1:length(nodesN))) |
|
32 |
+ for (i in 1:nrow(entryMAP)){ |
|
33 |
+ entryMAP$entryID[i] <- nodesN[[i]]@entryID |
|
34 |
+ entryMAP$entryTYPE[i] <- nodesN[[i]]@type |
|
35 |
+ entryMAP$entryIDENTIFIER[i] <- list(nodesN[[i]]@name) |
|
36 |
+ if (entryMAP$entryTYPE[i] == "group"){ |
|
37 |
+ entryMAP$groupMEMBERS[i] <- |
|
38 |
+ list(nodesN[[entryMAP$entryID[i]]]@component) |
|
39 |
+ entryMAP$entryIDENTIFIER[i] <- |
|
40 |
+ list(entryMAP$entryIDENTIFIER[which(entryMAP$entryID %in% |
|
41 |
+ unlist(entryMAP$groupMEMBERS[i]))]) |
|
42 |
+ } |
|
43 |
+ else { |
|
44 |
+ entryMAP$groupMEMBERS[i] <- NA |
|
45 |
+ } |
|
46 |
+ if (entryMAP$entryTYPE[i] == "gene" | |
|
47 |
+ entryMAP$entryTYPE[i] == "compound"| |
|
48 |
+ entryMAP$entryTYPE[i] == "group"){ |
|
49 |
+ entryMAP$entryACCESSION[i]<- list(gsub(".*:", "", |
|
50 |
+ unlist(entryMAP$entryIDENTIFIER[i]))) |
|
51 |
+ } |
|
52 |
+ else { |
|
53 |
+ entryMAP$entryACCESSION[i] <- NA |
|
54 |
+ } |
|
55 |
+ entryMAP$entryTYPE[i] <- nodesN[[i]]@type |
|
56 |
+ entryMAP$entryNAMES[i] <- nodesN[[i]]@graphics@name |
|
57 |
+ entryMAP$FGcolor[i] <- nodesN[[i]]@graphics@fgcolor |
|
58 |
+ entryMAP$BGcolor[i] <- nodesN[[i]]@graphics@bgcolor |
|
59 |
+ entryMAP$shape[i] <- nodesN[[i]]@graphics@type |
|
60 |
+ entryMAP$Xcoord[i] <- nodesN[[i]]@graphics@x |
|
61 |
+ entryMAP$Ycoord[i] <- nodesN[[i]]@graphics@y |
|
62 |
+ entryMAP$width[i] <- nodesN[[i]]@graphics@width |
|
63 |
+ entryMAP$height[i] <- nodesN[[i]]@graphics@height |
|
64 |
+ if(is.na(entryMAP$entryNAMES[i])){ |
|
65 |
+ entryMAP$entryNAMES[i] <- "NoNameInKGML" |
|
66 |
+ } |
|
67 |
+ } |
|
68 |
+ entryMAP <- entryMAP[,-c(1,4)] |
|
69 |
+ entryMAP_groupings <- entryMAP[entryMAP$entryTYPE == "group",] |
|
70 |
+ entryMAP_extra <- entryMAP[entryMAP$entryTYPE == "map",] |
|
71 |
+ entryMAP_extra$entrySYMBOL <- NA |
|
72 |
+ entryMAP_dictionary <- entryMAP[entryMAP$entryTYPE == "gene" | |
|
73 |
+ entryMAP$entryTYPE == "compound",] |
|
74 |
+ for (i in 1:nrow(entryMAP_dictionary)){ |
|
75 |
+ if (entryMAP_dictionary$entryTYPE[i] == "compound"){ |
|
76 |
+ entryMAP_dictionary$entryACCESSION[i] <- |
|
77 |
+ entryMAP_dictionary$entryACCESSION[[i]][1] |
|
78 |
+ } |
|
79 |
+ } |
|
80 |
+ if(convert_KEGG_IDs){ |
|
81 |
+ for (i in 1:nrow(entryMAP_dictionary)){ |
|
82 |
+ if (entryMAP_dictionary$entryTYPE[i] == "compound"){ |
|
83 |
+ testval <- |
|
84 |
+ keggGet(entryMAP_dictionary$entryACCESSION[i])[[1]]$NAME |
|
85 |
+ if(!is.null(testval)){ |
|
86 |
+ entryMAP_dictionary$entryNAMES[i] <- list(testval) |
|
87 |
+ entryMAP_dictionary$entryNAMES[i] <- |
|
88 |
+ list(gsub(";", "", |
|
89 |
+ unlist(entryMAP_dictionary$entryNAMES[i]))) |
|
90 |
+ } |
|
91 |
+ } |
|
92 |
+ } |
|
93 |
+ } |
|
94 |
+ for (i in 1:nrow(entryMAP_dictionary)){ |
|
95 |
+ l = length(unlist(entryMAP_dictionary$entryACCESSION[i])) |
|
96 |
+ entryMAP_dictionary$entryID[i] <- |
|
97 |
+ list(rep(entryMAP_dictionary$entryID[i], l)) |
|
98 |
+ entryMAP_dictionary$entryTYPE[i] <- |
|
99 |
+ list(rep(entryMAP_dictionary$entryTYPE[i], l)) |
|
100 |
+ } |
|
101 |
+ emd_temp <- data.frame(cbind(unlist(entryMAP_dictionary$entryID), |
|
102 |
+ unlist(entryMAP_dictionary$entryTYPE), |
|
103 |
+ unlist(entryMAP_dictionary$entryACCESSION)), |
|
104 |
+ stringsAsFactors = FALSE) |
|
105 |
+ names(emd_temp) <- names(entryMAP[c(1,2,4)]) |
|
106 |
+ for (i in 1:nrow(emd_temp)){ |
|
107 |
+ if (emd_temp$entryTYPE[i] == "gene"){ |
|
108 |
+ if (convert_KEGG_IDs){ |
|
109 |
+ emd_temp$entrySYMBOL[i] <- |
|
110 |
+ suppressMessages(as.character(AnnotationDbi::select( |
|
111 |
+ org.Hs.eg.db::org.Hs.eg.db,keys = emd_temp$entryACCESSION[i], |
|
112 |
+ columns=c("SYMBOL"),keytype="ENTREZID")[2])) |
|
113 |
+ } |
|
114 |
+ else { |
|
115 |
+ emd_temp$entrySYMBOL[i] <- emd_temp$entryACCESSION[i] |
|
116 |
+ } |
|
117 |
+ } |
|
118 |
+ else if (emd_temp$entryTYPE[i] == "compound"){ |
|
119 |
+ emd_temp$entrySYMBOL[i] <- unlist( |
|
120 |
+ entryMAP_dictionary$entryNAMES[ |
|
121 |
+ entryMAP_dictionary$entryACCESSION == |
|
122 |
+ emd_temp$entryACCESSION[i]])[1] |
|
123 |
+ } |
|
124 |
+ else { |
|
125 |
+ emd_temp$entrySYMBOL[i] <- NA |
|
126 |
+ } |
|
127 |
+ } |
|
128 |
+ for (i in 1:nrow(emd_temp)){ |
|
129 |
+ if (gsub("[[:digit:]]","",emd_temp$entryACCESSION[i]) == "G" & |
|
130 |
+ is.na(emd_temp$entrySYMBOL[i])){ |
|
131 |
+ emd_temp$entrySYMBOL[i] <- emd_temp$entryACCESSION[i] |
|
132 |
+ } |
|
133 |
+ } |
|
134 |
+ emd_temp$groupMEMBERS <- NA |
|
135 |
+ for(i in 1:nrow(entryMAP_dictionary)){ |
|
136 |
+ entryMAP_dictionary$entryID[i] <- |
|
137 |
+ unlist(entryMAP_dictionary$entryID[i])[1] |
|
138 |
+ entryMAP_dictionary$entryTYPE[i] <- |
|
139 |
+ unlist(entryMAP_dictionary$entryTYPE[i])[1] |
|
140 |
+ if(entryMAP_dictionary$entryTYPE[i] == "compound"){ |
|
141 |
+ entryMAP_dictionary$entryNAMES[i] <- |
|
142 |
+ toString(unlist(entryMAP_dictionary$entryNAMES[[i]])) |
|
143 |
+ } |
|
144 |
+ } |
|
145 |
+ entryMAP_dictionary <- entryMAP_dictionary[,-c(2:4)] |
|
146 |
+ entryMAP_dictionary <- merge(emd_temp, entryMAP_dictionary, by = "entryID") |
|
147 |
+ if (nrow(entryMAP_groupings) > 0){ |
|
148 |
+ for(i in 1:nrow(entryMAP_groupings)){ |
|
149 |
+ entryMAP_groupings$entrySYMBOL[i] <- |
|
150 |
+ list(entryMAP_dictionary$entrySYMBOL[match(unlist( |
|
151 |
+ entryMAP_groupings$entryACCESSION[i]), |
|
152 |
+ entryMAP_dictionary$entryACCESSION)]) |
|
153 |
+ } |
|
154 |
+ entryMAP_dictionary <- |
|
155 |
+ rbind(entryMAP_extra, entryMAP_dictionary, entryMAP_groupings) |
|
156 |
+ no_groups <- FALSE |
|
157 |
+ } |
|
158 |
+ if (nrow(entryMAP_groupings) == 0){ |
|
159 |
+ entryMAP_dictionary <- rbind(entryMAP_extra, entryMAP_dictionary) |
|
160 |
+ entryMAP_dictionary$groupMEMBERS <- NA |
|
161 |
+ entryMAP_dictionary$groupID <- NA |
|
162 |
+ no_groups = TRUE |
|
163 |
+ } |
|
164 |
+ |
|
165 |
+ entryMAP_dictionary <- |
|
166 |
+ entryMAP_dictionary[order(as.double(entryMAP_dictionary$entryID)),] |
|
167 |
+ rownames(entryMAP_dictionary) <- seq(1:nrow(entryMAP_dictionary)) |
|
168 |
+ entryMAP_dictionary$entryNAMES <- gsub("...", "", |
|
169 |
+ entryMAP_dictionary$entryNAMES, |
|
170 |
+ fixed = TRUE) |
|
171 |
+ for (i in 1:nrow(entryMAP_dictionary)){ |
|
172 |
+ if (entryMAP_dictionary$entryTYPE[i] =="gene"){ |
|
173 |
+ entryMAP_dictionary$LABEL[i] <- strsplit(as.character( |
|
174 |
+ entryMAP_dictionary$entryNAMES[[i]]), ",")[[1]][1] |
|
175 |
+ } |
|
176 |
+ else if (entryMAP_dictionary$entryTYPE[i] == "compound"){ |
|
177 |
+ entryMAP_dictionary$LABEL[i] <- entryMAP_dictionary$entrySYMBOL[i] |
|
178 |
+ } |
|
179 |
+ else { |
|
180 |
+ entryMAP_dictionary$LABEL[i] = entryMAP_dictionary$entryNAMES[i] |
|
181 |
+ } |
|
182 |
+ } |
|
183 |
+ for (i in 1:nrow(entryMAP_dictionary)){ |
|
184 |
+ if (entryMAP_dictionary$entryTYPE[i] == "group"){ |
|
185 |
+ entryMAP_dictionary$entryNAMES[i] <- |
|
186 |
+ paste0(paste(unique(entryMAP_dictionary$LABEL[which( |
|
187 |
+ entryMAP_dictionary$entryID %in% |
|
188 |
+ unlist(entryMAP_dictionary$groupMEMBERS[i]))]),collapse = ":"), |
|
189 |
+ " Complex") |
|
190 |
+ entryMAP_dictionary$LABEL[i] <- entryMAP_dictionary$entryNAMES[i] |
|
191 |
+ } |
|
192 |
+ } |
|
193 |
+ if (!no_groups){ |
|
194 |
+ for (i in 1:nrow(entryMAP_groupings)){ |
|
195 |
+ l <- length(unlist(entryMAP_groupings$groupMEMBERS[i])) |
|
196 |
+ entryMAP_groupings$entryID[i] <- |
|
197 |
+ list(rep(entryMAP_groupings$entryID[i], l)) |
|
198 |
+ } |
|
199 |
+ group_mapper <- data.frame("groupID" = |
|
200 |
+ unlist(entryMAP_groupings$entryID), |
|
201 |
+ "entryID" = |
|
202 |
+ unlist(entryMAP_groupings$groupMEMBERS), |
|
203 |
+ stringsAsFactors = FALSE) |
|
204 |
+ for (i in 1:nrow(entryMAP_dictionary)){ |
|
205 |
+ if (entryMAP_dictionary$entryID[i] %in% group_mapper$entryID){ |
|
206 |
+ entryMAP_dictionary$groupID[i] <- |
|
207 |
+ group_mapper$groupID[match(entryMAP_dictionary$entryID[i], |
|
208 |
+ group_mapper$entryID)] |
|
209 |
+ } |
|
210 |
+ else { |
|
211 |
+ entryMAP_dictionary$groupID[i] <- NA |
|
212 |
+ } |
|
213 |
+ } |
|
214 |
+ } |
|
215 |
+ |
|
216 |
+ num_edges <- length(KGML_file@edges) |
|
217 |
+ if (num_edges > 0){ |
|
218 |
+ map_edge_data<- KEGGgraph::edges(KGML_file) |
|
219 |
+ edges<- data.frame(edgeID = seq(1:length(map_edge_data ))) |
|
220 |
+ for (i in 1:nrow(edges)){ |
|
221 |
+ edges$entry1[i] <- as.character(map_edge_data[[i]]@entry1ID) |
|
222 |
+ edges$entry2[i] <- as.character(map_edge_data[[i]]@entry2ID) |
|
223 |
+ } |
|
224 |
+ for (i in 1:nrow(entryMAP_dictionary)){ |
|
225 |
+ if (entryMAP_dictionary$entryID[i] %in% edges$entry1 | |
|
226 |
+ entryMAP_dictionary$entryID[i] %in% edges$entry2){ |
|
227 |
+ entryMAP_dictionary$in_relationship[i] = 1 |
|
228 |
+ } |
|
229 |
+ else if (entryMAP_dictionary$groupID[i] %in% edges$entry1 | |
|
230 |
+ entryMAP_dictionary$groupID[i] %in% edges$entry2){ |
|
231 |
+ entryMAP_dictionary$in_relationship[i] = 1 |
|
232 |
+ } |
|
233 |
+ else { |
|
234 |
+ entryMAP_dictionary$in_relationship[i] = 0 |
|
235 |
+ } |
|
236 |
+ } |
|
237 |
+ } |
|
238 |
+ if (num_edges == 0){ |
|
239 |
+ entryMAP_dictionary$in_relationship = 0 |
|
240 |
+ } |
|
241 |
+ for (i in 1:nrow(entryMAP_dictionary)){ |
|
242 |
+ if (is.na(entryMAP_dictionary$entrySYMBOL[i])){ |
|
243 |
+ entryMAP_dictionary$entrySYMBOL[i] <- entryMAP_dictionary$LABEL[i] |
|
244 |
+ } |
|
245 |
+ } |
|
246 |
+ map_nodes <- entryMAP_dictionary$LABEL[entryMAP_dictionary$entryTYPE=="map"] |
|
247 |
+ j = 0 |
|
248 |
+ for (i in 1:length(map_nodes)){ |
|
249 |
+ if (substring(map_nodes[i],1,5) == "TITLE"){ |
|
250 |
+ j = j+1 |
|
251 |
+ } |
|
252 |
+ } |
|
253 |
+ if (j == 0){ |
|
254 |
+ title <- KGML_file@pathwayInfo@title |
|
255 |
+ min_Y <- min(entryMAP_dictionary$Ycoord) |
|
256 |
+ min_X <- min(entryMAP_dictionary$Xcoord)+100 |
|
257 |
+ title_node <- data.frame("entryID" = 0, "entryTYPE" = "map", |
|
258 |
+ "groupMEMBERS" = NA, "entryACCESSION" = NA, |
|
259 |
+ "entryNAMES" = title, |
|
260 |
+ "FGcolor" = "#000000", "BGcolor" = "#FFFFFF", |
|
261 |
+ "shape" = "rectangle", "Xcoord" = min_X, |
|
262 |
+ "Ycoord"= min_Y, "height" = 25, "width" = 220, |
|
263 |
+ "entrySYMBOL"= title, "groupID"=NA, |
|
264 |
+ "LABEL"=paste0("TITLE:",title), |
|
265 |
+ "in_relationship" = 0) |
|
266 |
+ entryMAP_dictionary <- rbind(entryMAP_dictionary, title_node) |
|
267 |
+ } |
|
268 |
+ return(entryMAP_dictionary) |
|
269 |
+} |
|
0 | 270 |
\ No newline at end of file |
1 | 271 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,95 @@ |
1 |
+#' cyREST utility functions |
|
2 |
+#' @description A subset of the R utility functions available from/defined |
|
3 |
+#' by cyREST. The function mapAttributes is called from within toCytoscape |
|
4 |
+#' which, in turn, is called from within cyto_vis. |
|
5 |
+#' @export |
|
6 |
+#' @import methods |
|
7 |
+#' @importFrom igraph E edge_attr graph.attributes V vertex.attributes |
|
8 |
+#' list.vertex.attributes get.edgelist ecount edge.attributes |
|
9 |
+#' list.edge.attributes |
|
10 |
+#' @param attr.names Attribute names of an igraph object |
|
11 |
+#' @param all.attr The attribute value if an igraph object |
|
12 |
+#' @param i The index for a given igraph object |
|
13 |
+#' @param igraphobj A graph object compatible for use with the package igraph |
|