Browse code

Adding KEGGlincs, flowPloidy, MAST, PathoStat, matter, MoonlightR, psichomics, anamiR, MutationalPatterns, HelloRanges, crisprseekplus, annotatr, meshes

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/KEGGlincs@122278 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 12/10/2016 17:44:52
Showing 57 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,4 @@
1
+^.*\.Rproj$
2
+^\.Rproj\.user$
3
+^README\.md$
4
+^image_files$
0 5
new file mode 100644
... ...
@@ -0,0 +1,5 @@
1
+.Rproj.user
2
+.Rhistory
3
+.RData
4
+.Ruserdata
5
+inst/doc
0 6
new file mode 100644
... ...
@@ -0,0 +1,23 @@
1
+Package: KEGGlincs
2
+Type: Package
3
+Title: Visualize all edges within a KEGG pathway and overlay LINCS data
4
+        [option]
5
+Version: 0.99.4
6
+Date: 2016-06-02
7
+Author: Shana White
8
+Maintainer: Shana White <vandersm@mail.uc.edu>
9
+Description: See what is going on 'under the hood' of KEGG pathways by
10
+        explicitly re-creating the pathway maps from information
11
+        obtained from KGML files.
12
+License: GPL-3
13
+LazyData: true
14
+RoxygenNote: 5.0.1
15
+Depends: R (>= 3.3), KOdata, hgu133a.db, org.Hs.eg.db (>= 3.3.0)
16
+SystemRequirements: Cytoscape (>= 3.3.0), Java (>= 8)
17
+Suggests: BiocInstaller (>= 1.20.3), knitr, rmarkdown, graph
18
+biocViews: NetworkInference, GeneExpression, DataRepresentation,
19
+        ThirdPartyClient,CellBiology,GraphAndNetwork,Pathways,KEGG,Network
20
+Imports:
21
+        AnnotationDbi,KEGGgraph,igraph,plyr,gtools,httr,RJSONIO,KEGGREST,
22
+        methods,graphics,stats,utils
23
+VignetteBuilder: knitr
0 24
new file mode 100644
... ...
@@ -0,0 +1,18 @@
1
+Version: 1.0
2
+
3
+RestoreWorkspace: Default
4
+SaveWorkspace: Default
5
+AlwaysSaveHistory: Default
6
+
7
+EnableCodeIndexing: Yes
8
+UseSpacesForTab: Yes
9
+NumSpacesForTab: 4
10
+Encoding: UTF-8
11
+
12
+RnwWeave: Sweave
13
+LaTeX: pdfLaTeX
14
+
15
+BuildType: Package
16
+PackageUseDevtools: Yes
17
+PackageInstallArgs: --no-multiarch --with-keep.source
18
+PackageBuildArgs: --resave-data
0 19
new file mode 100644
... ...
@@ -0,0 +1,48 @@
1
+# Generated by roxygen2: do not edit by hand
2
+
3
+export(KEGG_lincs)
4
+export(add_edge_data)
5
+export(cyto_vis)
6
+export(edge_mapping_info)
7
+export(expand_KEGG_edges)
8
+export(expand_KEGG_mappings)
9
+export(generate_mappings)
10
+export(get_KGML)
11
+export(get_fisher_info)
12
+export(get_graph_object)
13
+export(node_mapping_info)
14
+export(overlap_info)
15
+export(path_genes_by_cell_type)
16
+export(refine_mappings)
17
+export(toCytoscape)
18
+import(AnnotationDbi)
19
+import(KEGGREST)
20
+import(KOdata)
21
+import(RJSONIO)
22
+import(hgu133a.db)
23
+import(httr)
24
+import(methods)
25
+import(org.Hs.eg.db)
26
+importFrom(graphics,barplot)
27
+importFrom(graphics,legend)
28
+importFrom(graphics,par)
29
+importFrom(gtools,smartbind)
30
+importFrom(igraph,"V<-")
31
+importFrom(igraph,E)
32
+importFrom(igraph,V)
33
+importFrom(igraph,ecount)
34
+importFrom(igraph,edge.attributes)
35
+importFrom(igraph,edge_attr)
36
+importFrom(igraph,get.edgelist)
37
+importFrom(igraph,graph.attributes)
38
+importFrom(igraph,list.edge.attributes)
39
+importFrom(igraph,list.vertex.attributes)
40
+importFrom(igraph,vertex.attributes)
41
+importFrom(plyr,rename)
42
+importFrom(stats,fisher.test)
43
+importFrom(stats,p.adjust)
44
+importFrom(stats,p.adjust.methods)
45
+importFrom(utils,combn)
46
+importFrom(utils,data)
47
+importMethodsFrom(KEGGgraph,edges)
48
+importMethodsFrom(KEGGgraph,nodes)
0 49
new file mode 100644
... ...
@@ -0,0 +1 @@
1
+News for KEGGlincs
0 2
\ No newline at end of file
1 3
new file mode 100644
... ...
@@ -0,0 +1,149 @@
1
+#' Combines all other package functions for one-step pathway visualization
2
+#' @export
3
+#' @param pathwayid A KEGG pathway ID of the form "hsa12345" 
4
+#' (only human pathways currently)
5
+#' @param cell_line If left as NA will generate a pathway map without 
6
+#' data-dependent attributes (such as edge width).  To use in combination 
7
+#' with LINCS data, choose from the set of cell lines: 
8
+#' (A375,A549,ASC,HA1E,HCC515,HEK293T,HEKTE,HEPG2,HT29,MCF7,NCIH716,NPC,PC3,
9
+#' SHSY5Y,SKL,SW480,VCAP)
10
+#' @param refine_by_cell_line A logical indicator
11
+#' @param add_L1000_edge_data A logical indicator 
12
+#' @param data_type Choose from data types: (100_full, 100_bing, 50_lm)
13
+#' @param pert_time Choose from (6,24,48,96,120,144,168)
14
+#' @param only_mapped A logical indicator; if set to FALSE will return 'de-novo'
15
+#' edges that 'exist' in data but are not documented in KEGG
16
+#' @param significance_markup A logical indicator; if set to TRUE will color
17
+#'  edges based on direction and significance of correlation (as determined by 
18
+#'  user-data-analysis)
19
+#' @param layered_nodes A logical indicator; if set to TRUE will create a graph 
20
+#' with 'stacked' nodes that the user can manipulate when multiple nodes are 
21
+#' mapped to one location
22
+#' @param graph_title An optional user-specified graph title
23
+#' @param get_data A logical indicator; if set to true, will return the 
24
+#' 'expanded' edge information for the specified pathway
25
+#' @param convert_KEGG_IDs A logical indicator; if set to TRUE KEGG 
26
+#' compounds will remain labeled via KEGG codes (do not need KEGGREST)
27
+#' @return  A dynamic map in Cytoscape automatically formatted for convenient 
28
+#' viewing and, if idicated by user, a data.frame object with detailed 
29
+#' information for 'expanded' KEGG edges
30
+#' @examples \dontrun{ 
31
+#' 
32
+#' #Default KEGG pathway with colored edges representing type of relationship:
33
+#' KEGG_lincs("hsa04115", convert_KEGG_IDs = FALSE)
34
+#' 
35
+#' #KEGG pathway with edge width and color based on observed experimental data:
36
+#' KEGG_lincs("hsa04115", "HA1E")
37
+#' 
38
+#' #Have edge information data.frame to be output to the global environment:
39
+#' p53_edge_info <- KEGG_lincs("hsa04115", graph_title = "p53"
40
+#'                              convert_KEGG_IDs = FALSE, get_data = TRUE)
41
+#'                              }
42
+
43
+
44
+KEGG_lincs <-
45
+function(pathwayid, cell_line = NA,
46
+                            refine_by_cell_line = NA,
47
+                            add_L1000_edge_data = TRUE,  
48
+                            significance_markup = TRUE,
49
+                            data_type = "100_full",
50
+                            pert_time = 96,
51
+                            only_mapped = TRUE,
52
+                            layered_nodes = FALSE, graph_title = "default",
53
+                            get_data = FALSE,
54
+                            convert_KEGG_IDs = TRUE){
55
+    if (is.na(refine_by_cell_line)){
56
+        if (is.na(cell_line)){
57
+            refine_by_cell_line <- FALSE
58
+        }
59
+        else {
60
+            refine_by_cell_line <- TRUE
61
+        }
62
+    }
63
+
64
+    KGML <- get_KGML(pathwayid, get_if_no_edges = TRUE)
65
+    if(!isS4(KGML)){
66
+        return()
67
+    }
68
+    KEGG_mappings <- 
69
+        expand_KEGG_mappings(KGML,convert_KEGG_IDs = convert_KEGG_IDs)
70
+    if (refine_by_cell_line) {
71
+        full_mappings <- KEGG_mappings
72
+        KEGG_mappings <- refine_mappings(KEGG_mappings, cell_line)
73
+        for (i in 1:nrow(full_mappings)){
74
+            if(!full_mappings$entryID[i] %in% KEGG_mappings$entryID){
75
+                full_mappings$BGcolor[i] <- "#d3d3d3"
76
+                full_mappings$in_relationship <- 0
77
+            }
78
+        }
79
+    }
80
+    expanded_edges <- expand_KEGG_edges(KGML, KEGG_mappings)
81
+    if(expanded_edges$type[1] == "dummy"){
82
+        graph_title <- paste0("Pathway = ", pathwayid, ":", 
83
+                        KGML@pathwayInfo@title, "Cell-Line: ", cell_line, 
84
+                        "  *No Edges in Pathway")
85
+    }
86
+
87
+    if (is.na(cell_line)){
88
+        edge_map <- edge_mapping_info(expanded_edges)
89
+    if (graph_title == "default"){
90
+        graph_title <- paste0("Pathway = ", pathwayid, ":", 
91
+                            KGML@pathwayInfo@title)
92
+        }
93
+    }
94
+    if (!is.na(cell_line) & !add_L1000_edge_data){
95
+        edge_map <- edge_mapping_info(expanded_edges)
96
+        if (graph_title == "default"){
97
+            graph_title <- paste0("Pathway = ", pathwayid, ":", 
98
+                            KGML@pathwayInfo@title, "Refined by Cell-Line:  ",
99
+                            cell_line)
100
+        }
101
+    }
102
+    if (nrow(expanded_edges[expanded_edges$type == "maplink",]) == 
103
+        nrow(expanded_edges) & only_mapped) {
104
+            edge_map <- edge_mapping_info(expanded_edges)
105
+            if (graph_title == "default"){
106
+                graph_title <- paste0("Pathway = ", pathwayid, ":", 
107
+                            KGML@pathwayInfo@title, "Cell-Line: ", 
108
+                            cell_line,  "  *No Edges in Data")
109
+            }
110
+        warning("All documented edges are of type 'maplink'; 
111
+            Overlap data cannot be mapped to selected pathway")
112
+    }
113
+    if(!is.na(cell_line) & add_L1000_edge_data) {
114
+        user_data <- overlap_info(KGML, KEGG_mappings, cell_line, 
115
+                                data_type = data_type, pert_time = pert_time)
116
+        if (!is.na(user_data)[1,1]) {
117
+            edges_plus_data <- add_edge_data(expanded_edges, KEGG_mappings, 
118
+                                            user_data, c(10,12), 
119
+                                            only_mapped = only_mapped)
120
+            edge_map <- edge_mapping_info(edges_plus_data, data_added = TRUE, 
121
+                                    significance_markup = significance_markup)
122
+            if (graph_title == "default"){
123
+                graph_title <- paste0("Pathway = ", pathwayid, ":", 
124
+                                KGML@pathwayInfo@title, ",  Cell-Line: ", 
125
+                                cell_line, ",  Data type: ", data_type)
126
+            }
127
+        }
128
+        else {
129
+            edge_map <- edge_mapping_info(expanded_edges)
130
+            if (graph_title == "default"){
131
+                graph_title <- paste0("Pathway = ", pathwayid, ":", 
132
+                                KGML@pathwayInfo@title, ",  Cell-Line: ", 
133
+                                cell_line, "   *No Edges in Data")
134
+            }
135
+        }
136
+    }
137
+    if(refine_by_cell_line){
138
+        node_map <- node_mapping_info(full_mappings)
139
+    }
140
+    else{
141
+        node_map <- node_mapping_info(KEGG_mappings)
142
+    }
143
+    graph_object <- get_graph_object(node_map, edge_map, 
144
+                                    layered_nodes = layered_nodes)
145
+    cyto_vis(graph_object, title = graph_title)
146
+    if(get_data){
147
+        return(edge_map)
148
+    }
149
+}
0 150
new file mode 100644
... ...
@@ -0,0 +1,5 @@
1
+#' KEGGlincs: an R package designed to explore the edges in KEGG pathways
2
+#' @docType package
3
+#' @name KEGGlincs
4
+
5
+NULL
0 6
\ No newline at end of file
1 7
new file mode 100644
... ...
@@ -0,0 +1,266 @@
1
+#' Annotate KEGG edge mappings with user data
2
+#' @description Add data column[s] to object created from function 
3
+#' expand_KEGG_edges 
4
+#' @export
5
+#' @importFrom gtools smartbind
6
+#' @importFrom plyr rename
7
+#' @param expanded_edges The data frame object generated via the function 
8
+#' expand_KEGG_edges  
9
+#' @param KEGG_mappings KEGG_mappings The data.frame object generated by the 
10
+#' function expand_KEGG_mappings
11
+#' @param user_data A data frame where in which the first two columns contain 
12
+#' gene symbols representing an edge and any/all other column[s] contain 
13
+#' corresponding edge data.
14
+#' @param data_column_no The column index for desired user data to be added
15
+#' @param only_mapped A logical indicator; if set to FALSE will return 'de-novo'
16
+#' edges that 'exist' in data but are not documented in KEGG 
17
+#' @return A data frame object with detailed KEGG edge mappings annotated with 
18
+#' user data
19
+#' @examples 
20
+#' p53_KGML <- get_KGML('hsa04115')
21
+#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML)
22
+#' p53_edges <- expand_KEGG_edges(p53_KGML, p53_KEGG_mappings)
23
+#' p53_HA1E_data <- overlap_info(p53_KGML, p53_KEGG_mappings, 'HA1E', 
24
+#'                                data_type = '100_bing', only_mapped = FALSE)
25
+#'                                
26
+#' p53_edges_HA1E_ALL <- add_edge_data(p53_edges, p53_KEGG_mappings, 
27
+#'                                      p53_HA1E_data, c(3, 10,12))
28
+#' p53_edges_HA1E_MAPPED <- add_edge_data(p53_edges, p53_KEGG_mappings, 
29
+#'                                         p53_HA1E_data, c(3, 10,12), 
30
+#'                                         only_mapped = TRUE)
31
+
32
+
33
+add_edge_data <- function(expanded_edges, KEGG_mappings, 
34
+                        user_data, data_column_no = 3, only_mapped = FALSE) {
35
+    
36
+    expanded_edges <- expanded_edges[expanded_edges$type != "maplink", ]
37
+    
38
+    if ("pre_mapped" %in% names(user_data)) {
39
+        user_data <- user_data[, -c(which(colnames(user_data) == "pre_mapped"))]
40
+    }
41
+    
42
+    if (nrow(expanded_edges) > 0) {
43
+        
44
+        expanded_edges$unique_ID = paste0(expanded_edges$entry1symbol, ",", 
45
+            expanded_edges$entry2symbol)
46
+        expanded_edges$unique_IDR = paste0(expanded_edges$entry2symbol, ",", 
47
+            expanded_edges$entry1symbol)
48
+        user_data$unique_ID = paste0(user_data$knockout1, ",", 
49
+                                        user_data$knockout2)
50
+        pre_mapped1 <- subset(user_data, user_data$unique_ID %in% 
51
+                                    expanded_edges$unique_ID)
52
+        pre_mapped2 <- subset(user_data, user_data$unique_ID %in% 
53
+                                    expanded_edges$unique_IDR)
54
+        pre_mapped2 <- pre_mapped2[, c(2, 1, 3:ncol(pre_mapped2))]
55
+        names(pre_mapped2) = names(pre_mapped1)
56
+        if (nrow(pre_mapped2) >= 1 & nrow(pre_mapped1) >= 1) {
57
+            pre_mapped2$unique_ID <- paste0(pre_mapped2$knockout1, ",", 
58
+                                            pre_mapped2$knockout2)
59
+            pre_mapped <- rbind(pre_mapped1, pre_mapped2)
60
+        }
61
+        if (nrow(pre_mapped2) == 0 & (nrow(pre_mapped1) == 0)) {
62
+            pre_mapped <- data.frame(unique_ID = NA)
63
+        }
64
+        if (nrow(pre_mapped1) == 0 & nrow(pre_mapped2) >= 1) {
65
+            pre_mapped2$unique_ID <- paste0(pre_mapped2$knockout1, ",", 
66
+                                            pre_mapped2$knockout2)
67
+            pre_mapped <- pre_mapped2
68
+        }
69
+        if (nrow(pre_mapped2) == 0 & nrow(pre_mapped1) >= 1) {
70
+            pre_mapped <- pre_mapped1
71
+        }
72
+        if (!is.na(pre_mapped[1, 1])) {
73
+            expanded_edges_1 <- subset(expanded_edges, expanded_edges$unique_ID
74
+                                        %in% pre_mapped$unique_ID)
75
+            expanded_edges_1 <- expanded_edges[expanded_edges$unique_ID %in% 
76
+                pre_mapped$unique_ID, ]
77
+            expanded_edges_2 <- expanded_edges[!expanded_edges$unique_ID %in% 
78
+                pre_mapped$unique_ID, ]
79
+            expanded_edges_1$has_data = 1
80
+            testval <- nrow(expanded_edges_2)
81
+            if (testval > 0) {
82
+                expanded_edges_2$has_data = 0
83
+                edge_set <- rbind(expanded_edges_1, expanded_edges_2)
84
+            } else {
85
+                edge_set <- expanded_edges_1
86
+            }
87
+            
88
+            edge_set <- edge_set[order(edge_set$unique_ID), ]
89
+            
90
+            data_to_add <- cbind(as.character(pre_mapped[, "unique_ID"]), 
91
+                                pre_mapped[, data_column_no])
92
+            data_to_add <- data.frame(data_to_add, stringsAsFactors = FALSE)
93
+            names(data_to_add)[1] <- "unique_ID"
94
+            if (ncol(data_to_add) == 2) {
95
+                names(data_to_add)[2] <- names(pre_mapped)[data_column_no]
96
+            }
97
+            
98
+            data_to_add <- data_to_add[order(data_to_add$unique_ID), ]
99
+            
100
+            annotated_edges <- merge(edge_set, data_to_add, "unique_ID", 
101
+                                     all.x = TRUE)
102
+            drops <- c("unique_ID", "unique_IDR")
103
+            annotated_edges <- annotated_edges[, !(names(annotated_edges) %in% 
104
+                drops)]
105
+            cat(paste0("Number of edges documented in selected pathway = ", 
106
+                nrow(annotated_edges)), "\n")
107
+            cat(paste0("Number of edges with corresponding user data = ", 
108
+                        sum(annotated_edges$has_data), "\n"))
109
+            cat(paste0("Coverage = ", round(sum(annotated_edges$has_data)/
110
+                        nrow(annotated_edges) * 100, 2), "%", "\n"))
111
+            annotated_edges$premapped <- 1
112
+            un_mapped <- subset(user_data, !user_data$unique_ID %in% 
113
+                                expanded_edges$unique_ID & 
114
+                            !user_data$unique_ID %in% expanded_edges$unique_IDR)
115
+            un_mapped_edges <- un_mapped[, c(1:2, data_column_no,
116
+                                            ncol(un_mapped))]
117
+            
118
+            if (only_mapped) {
119
+                return(annotated_edges)
120
+            }
121
+        } else {
122
+            annotated_edges <- expanded_edges
123
+            annotated_edges$premapped <- 1
124
+            annotated_edges$has_data <- 0
125
+            un_mapped <- user_data
126
+            un_mapped$unique_ID <- paste0(un_mapped$knockout1, 
127
+                                        un_mapped$knockout2)
128
+            un_mapped_edges <- un_mapped[, c(1:2, data_column_no, 
129
+                                            ncol(un_mapped))]
130
+            un_mapped_edges$premapped <- 0
131
+            un_mapped_edges$has_data <- 0
132
+            cat(paste0("Number of edges documented in selected pathway = ", 
133
+                nrow(annotated_edges)), "\n")
134
+            cat(paste0("Number of edges with corresponding user data = ", 
135
+                        sum(annotated_edges$has_data), "\n"))
136
+            cat(paste0("Coverage = ", round(sum(annotated_edges$has_data)/
137
+                        nrow(annotated_edges) * 100, 2), "%", "\n"))
138
+            if (only_mapped) {
139
+                cat(paste0("No documented edges are found in data; 
140
+                    only data for de-novo edges can be mapped \n"))
141
+                return(annotated_edges)
142
+            }
143
+        }
144
+    } else if (nrow(expanded_edges) == 0) {
145
+        annotated_edges <- expanded_edges
146
+        annotated_edges$premapped <- 1
147
+        annotated_edges$has_data <- 0
148
+        un_mapped <- user_data
149
+        un_mapped$unique_ID <- paste0(un_mapped$knockout1, un_mapped$knockout2)
150
+        un_mapped_edges <- un_mapped[, c(1:2, data_column_no, ncol(un_mapped))]
151
+        un_mapped_edges$premapped <- 0
152
+        un_mapped_edges$has_data <- 0
153
+        cat(paste0("Number of edges documented in selected pathway = ", 
154
+                    nrow(annotated_edges)), "\n")
155
+        cat(paste0("Number of edges with corresponding user data = ", 
156
+                    sum(annotated_edges$has_data), "\n"))
157
+        cat(paste0("Coverage = ", round(sum(annotated_edges$has_data)/
158
+                    nrow(annotated_edges) * 100, 2), "%", "\n"))
159
+        if (only_mapped) {
160
+            return(annotated_edges)
161
+        }
162
+    }
163
+    names(un_mapped_edges)[1:2] <- c("entryNAME_1", "entryNAME_2")
164
+    for (i in 1:nrow(un_mapped_edges)) {
165
+        un_mapped_edges$Source_eid[i] <- 
166
+            list(KEGG_mappings$entryID[which(KEGG_mappings$entrySYMBOL == 
167
+            un_mapped_edges$entryNAME_1[i])])
168
+        un_mapped_edges$Target_eid[i] <- 
169
+            list(KEGG_mappings$entryID[which(KEGG_mappings$entrySYMBOL == 
170
+            un_mapped_edges$entryNAME_2[i])])
171
+        x <- length(unlist(un_mapped_edges$Source_eid[i]))
172
+        y <- length(unlist(un_mapped_edges$Target_eid[i]))
173
+        if (x > 1 | y > 1) {
174
+            un_mapped_edges$simple[i] <- FALSE
175
+        } else {
176
+            un_mapped_edges$simple[i] <- TRUE
177
+        }
178
+    }
179
+    simple_un_mapped_edges <- un_mapped_edges[un_mapped_edges$simple == TRUE, 
180
+        -ncol(un_mapped_edges)]
181
+    complex_un_mapped_edges <- un_mapped_edges[un_mapped_edges$simple == FALSE, 
182
+        -ncol(un_mapped_edges)]
183
+    test_val <- nrow(complex_un_mapped_edges)
184
+    if (test_val == 0) {
185
+        un_mapped_edges <- simple_un_mapped_edges
186
+    } else {
187
+        keeps <- c("Source_eid", "Target_eid", "unique_ID")
188
+        c_temp <- complex_un_mapped_edges[, (names(complex_un_mapped_edges) %in% 
189
+            keeps)]
190
+        
191
+        for (i in 1:nrow(c_temp)) {
192
+            x <- length(unlist(c_temp$Source_eid[i]))
193
+            y <- length(unlist(c_temp$Target_eid[i]))
194
+            l <- x * y
195
+            
196
+            c_temp$Source_eid[i] <- list(sort(unlist(rep(c_temp$Source_eid[i], 
197
+                y))))
198
+            c_temp$Target_eid[i] <- list(unlist(rep(c_temp$Target_eid[i], x)))
199
+            c_temp$unique_ID[i] <- list(rep(c_temp$unique_ID[i], l))
200
+        }
201
+        
202
+        c_temp <- data.frame(unique_ID = unlist(c_temp$unique_ID), Source_eid = 
203
+                            unlist(c_temp$Source_eid),
204
+                            Target_eid = unlist(c_temp$Target_eid))
205
+        drops <- c("Source_eid", "Target_eid")
206
+        complex_un_mapped_edges <- 
207
+            complex_un_mapped_edges[, !(names(complex_un_mapped_edges) %in% 
208
+            drops)]
209
+        complex_un_mapped_edges <- merge(complex_un_mapped_edges, c_temp)
210
+        un_mapped_edges <- rbind(simple_un_mapped_edges, 
211
+                                 complex_un_mapped_edges)
212
+    }
213
+    un_mapped_edges$Source_eid <- unlist(un_mapped_edges$Source_eid)
214
+    un_mapped_edges$Target_eid <- unlist(un_mapped_edges$Target_eid)
215
+    un_mapped_edges <- plyr::rename(un_mapped_edges, c(Source_eid = "entry1", 
216
+        Target_eid = "entry2", entryNAME_1 = "entry1symbol", 
217
+        entryNAME_2 = "entry2symbol"))
218
+    un_mapped_edges$subtype1 <- "de_novo"
219
+    
220
+    drops <- c("unique_ID")
221
+    un_mapped_edges <- un_mapped_edges[, !(names(un_mapped_edges) %in% drops)]
222
+    un_mapped_edges$has_data <- 1
223
+    un_mapped_edges$premapped <- 0
224
+    for (i in 1:nrow(un_mapped_edges)) {
225
+        un_mapped_edges$entry1accession[i] <- 
226
+            KEGG_mappings$entryACCESSION[which(KEGG_mappings$entrySYMBOL == 
227
+            un_mapped_edges$entry1symbol[i])][1]
228
+        un_mapped_edges$entry2accession[i] <- 
229
+            KEGG_mappings$entryACCESSION[which(KEGG_mappings$entrySYMBOL == 
230
+            un_mapped_edges$entry2symbol[i])][1]
231
+        un_mapped_edges$entry1type[i] <- 
232
+            KEGG_mappings$entryTYPE[which(KEGG_mappings$entrySYMBOL == 
233
+            un_mapped_edges$entry1symbol[i])][1]
234
+        un_mapped_edges$entry2type[i] <- 
235
+            KEGG_mappings$entryTYPE[which(KEGG_mappings$entrySYMBOL == 
236
+            un_mapped_edges$entry2symbol[i])][1]
237
+    }
238
+    
239
+    if (nrow(annotated_edges) > 0 & nrow(un_mapped_edges) > 0) {
240
+        all_edges <- gtools::smartbind(annotated_edges, un_mapped_edges)
241
+    } else if (nrow(annotated_edges) > 0) 
242
+        all_edges <- annotated_edges else {
243
+        all_edges <- un_mapped_edges
244
+        all_edges$value <- NA
245
+        all_edges$subtype2 <- NA
246
+        all_edges$value2 <- NA
247
+        all_edges$specific_subtype <- NA
248
+        all_edges$type <- NA
249
+        all_edges$is_direct <- 1
250
+        all_edges$edgeID <- seq(1:nrow(un_mapped_edges))
251
+        refcols <- c("edgeID", "entry1accession", "entry2accession", "entry1", 
252
+            "entry2")
253
+        all_edges <- all_edges[, c(refcols, setdiff(names(all_edges), refcols))]
254
+        cat(paste0("All documented edges are of type maplink; only data for 
255
+                    de-novo edges can be mapped \n"))
256
+    }
257
+    
258
+    for (i in 1:nrow(all_edges)) {
259
+        if (all_edges$entry1[i] == all_edges$entry2[i]) {
260
+            all_edges$paralogs[i] <- 1
261
+        } else {
262
+            all_edges$paralogs[i] <- 0
263
+        }
264
+    }
265
+    return(all_edges)
266
+}
0 267
new file mode 100644
... ...
@@ -0,0 +1,95 @@
1
+#' Send graph to Cytoscape via CyREST
2
+#' @description View the KEGG pathway in Cytoscape. With either the 
3
+#' 'expanded edges' or 'stacked nodes' layout, users can visualize and interact 
4
+#' with the graphs [strictly] as they are documented in the most recent KGML
5
+#' available from KEGG.  
6
+#' This function is a modified version of the function send2cy(),
7
+#'  which is part of the cyREST utility functions.
8
+#' @export
9
+#' @import methods
10
+#' @import httr
11
+#' @import RJSONIO
12
+#' @param graph_object An igraph object such as the one generated by the 
13
+#' function \code{\link{get_graph_object}}
14
+#' @param title An optional title for the graph when it is in Cytoscape
15
+#' @param edge_width_attribute The attribute that will be used for edge width; 
16
+#' if data is not added or the attribute is not part of the graphing 
17
+#' information, the edge width will default to 1. 
18
+#' @param port.number The port address for Cytoscape 
19
+#' @return A dynamic map in Cytoscape automatically formatted for convenient 
20
+#' viewing.  
21
+#' @examples 
22
+#' p53_KGML <- get_KGML("hsa04115")
23
+#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML, FALSE)
24
+#' nodes <- node_mapping_info(p53_KEGG_mappings)
25
+#' 
26
+#' p53_edges <- expand_KEGG_edges(p53_KGML, p53_KEGG_mappings)
27
+#' edges <- edge_mapping_info(p53_edges) 
28
+#' 
29
+#' p53_graph_object <- get_graph_object(nodes, edges)
30
+#'                                       
31
+#' @examples \dontrun{
32
+#' cyto_vis(p53_graph_object, "Default p53 Graph [no data added]")
33
+#' 
34
+#' #Workflow to visualize graph with data-dependent attributes:
35
+#' 
36
+#' p53_KGML <- get_KGML("hsa04115")
37
+#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML)
38
+#' nodes <- node_mapping_info(p53_KEGG_mappings)
39
+#' 
40
+#' p53_edges <- expand_KEGG_edges(p53_KGML, p53_KEGG_mappings)
41
+#' 
42
+#' p53_HA1E_data <- overlap_info(p53_KGML, p53_KEGG_mappings, "HA1E", 
43
+#'                                data_type = "100_bing")
44
+#' p53_edges_plus_data <- add_edge_data(p53_edges, p53_KEGG_mappings, 
45
+#'                                       p53_HA1E_data, c(3, 10,12),
46
+#'                                       only_mapped = TRUE)
47
+#' 
48
+#' edges <- edge_mapping_info(p53_edges_plus_data, data_added = TRUE)
49
+#'                                             
50
+#' p53_plus_data_graph_object <- get_graph_object(nodes, edges)
51
+#'                                                 
52
+#' cyto_vis(p53_plus_data_graph_object, "p53 Graph: Mapped Edges + HA1E Data",
53
+#'          edge_width_attribute = "UP")
54
+#'          }
55
+
56
+cyto_vis <-
57
+    function(graph_object, title = "Cytoscape Graph Window",
58
+            edge_width_attribute = "summary_score",
59
+            port.number = 1234) {
60
+    base.url = paste("http://localhost:", toString(port.number), "/v1", sep="")
61
+
62
+    if (edge_width_attribute %in% names(igraph::edge_attr(graph_object))){
63
+        min.summary_score <- min(abs(igraph::E(graph_object)$summary_score),
64
+                                na.rm = TRUE)
65
+        max.summary_score <- max(abs(igraph::E(graph_object)$summary_score), 
66
+                                na.rm = TRUE)
67
+        map_edge_width <- TRUE
68
+    }
69
+    else {
70
+        min.summary_score <- NA
71
+        max.summary_score <- NA
72
+        map_edge_width <- FALSE
73
+    }
74
+    
75
+    graph_object$name <- title
76
+    cygraph <- toCytoscape(graph_object)
77
+        
78
+    network.url = paste(base.url, "networks", sep="/")
79
+    res <- httr::POST(url=network.url, body=cygraph, encode="json")
80
+    network.suid = unname(RJSONIO::fromJSON(rawToChar(res$content)))
81
+    
82
+    style.name = "myKEGGstyle"
83
+    mappings <- generate_mappings(style.name, map_edge_width, 
84
+                                    edge_width_attribute, 
85
+                                    min_score = min.summary_score, 
86
+                                    max_score = max.summary_score)
87
+    style.url = paste(base.url, "styles", sep="/")
88
+    style <- list(title=style.name, defaults = mappings[[1]],
89
+                  mappings = mappings[[2]])
90
+    style.JSON <- RJSONIO::toJSON(style)
91
+    httr::POST(url=style.url, body=style.JSON, encode = "json")
92
+    apply.style.url = paste(base.url, "apply/styles", style.name ,
93
+                            toString(network.suid), sep="/")
94
+    httr::GET(apply.style.url)
95
+    }
0 96
new file mode 100644
... ...
@@ -0,0 +1,150 @@
1
+#' Prepare edges for mapping
2
+#' @description Modify the mapping information for desired look when graphed in
3
+#'  Cytoscape
4
+#' @param expanded_edges The data frame object generated via the function 
5
+#' expand_KEGG_edges() OR has been modified by the function add_edge_data()
6
+#' @param data_added A logical indicator; must be set to TRUE if user data has
7
+#'  been added (i.e. edges modified by function add_edge_data())
8
+#' @param significance_markup A logical indicator; if set to TRUE will color 
9
+#' edges based on direction and significance of correlation (as determined by 
10
+#' user-data-analysis)
11
+#' @return A data.frame object for edges that will be passed on to the function 
12
+#' get_graph_object
13
+#' @export
14
+#' @examples
15
+#' p53_KGML <- get_KGML("hsa04115")
16
+#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML)
17
+#' 
18
+#' #Default; no data added to edges:
19
+#' 
20
+#' p53_edges <- expand_KEGG_edges(p53_KGML, p53_KEGG_mappings)
21
+#' p53_edge_mapping_info <- edge_mapping_info(p53_edges) 
22
+#' 
23
+#' #If data is added to edges as additional attribute[s]:
24
+#' 
25
+#' p53_HA1E_data <- overlap_info(p53_KGML, p53_KEGG_mappings, 
26
+#'                                "HA1E", data_type = "100_bing")
27
+#' 
28
+#' p53_edges_HA1E_data_MAPPED <- add_edge_data(p53_edges, p53_KEGG_mappings, 
29
+#'                                                p53_HA1E_data, c(3, 10,12),
30
+#'                                                only_mapped = TRUE)
31
+#'                                                
32
+#' p53_edge_mapping_HA1E <- edge_mapping_info(p53_edges_HA1E_data_MAPPED, 
33
+#'                                                       data_added = TRUE)
34
+#'                                                       
35
+
36
+edge_mapping_info <-
37
+function(expanded_edges, data_added = FALSE, significance_markup = FALSE){
38
+    expanded_edges <- expanded_edges[,-c(1)]
39
+    for(i in 1:nrow(expanded_edges)){
40
+        if (expanded_edges$subtype1[i] == "activation"){
41
+            expanded_edges$color[i] = "#b20000"
42
+        }
43
+        else if (expanded_edges$subtype1[i] =="expression"){
44
+            expanded_edges$color[i] = "#b20000"
45
+        }
46
+        else if (expanded_edges$subtype1[i] =="inhibition"){
47
+            expanded_edges$color[i] = "#0015b2"
48
+        } 
49
+        else if (expanded_edges$subtype1[i] =="irreversible"){
50
+          expanded_edges$color[i] = "#b20000"
51
+        } 
52
+        else if (expanded_edges$subtype1[i] =="repression"){
53
+             expanded_edges$color[i] = "#0015b2"
54
+        }
55
+        else if (expanded_edges$subtype1[i] == "compound" | 
56
+                    expanded_edges$subtype1[i] == "indirect_effect" | 
57
+                    expanded_edges$subtype1[i] == "binding_association"){
58
+             expanded_edges$color[i] = "#000000"
59
+        }
60
+        else if (expanded_edges$subtype1[i] == "de_novo") {
61
+          expanded_edges$color[i] = "#808080"
62
+        }
63
+        else if (expanded_edges$subtype1[i] == "Not defined in KEGG" | 
64
+                    expanded_edges$type[i] == "dummy") {
65
+             expanded_edges$color[i] = "#808080"
66
+        }
67
+        else {
68
+             expanded_edges$color[i] = "#1c9900"
69
+        }
70
+        
71
+        if (expanded_edges$color[i] == "#1c9900"){
72
+             expanded_edges$edge_label[i] = expanded_edges$value[i]
73
+        }
74
+        else {
75
+             expanded_edges$edge_label[i] = NA
76
+        }
77
+    
78
+        if (is.na(expanded_edges$subtype2[i]) == FALSE &  
79
+            expanded_edges$subtype2[i] != "indirect"){
80
+                if (expanded_edges$subtype1[i] == "inhibition"){
81
+                    expanded_edges$color[i] = "#690099"
82
+                }
83
+                else if (expanded_edges$subtype1[i] == "activation"){
84
+                    expanded_edges$color[i] = "#FF6600"
85
+                }
86
+            expanded_edges$edge_label[i] = expanded_edges$value2[i]
87
+        }
88
+        if(data_added){
89
+            if (expanded_edges$premapped[i] == 1){
90
+                if (expanded_edges$value[i] == "--|"){
91
+                    expanded_edges$tooltip[i] <- 
92
+                        paste0(expanded_edges$entry1symbol[i]," --| ", 
93
+                                expanded_edges$entry2symbol[i])
94
+                    }
95
+                else {
96
+                    expanded_edges$tooltip[i] <- 
97
+                        paste0(expanded_edges$entry1symbol[i], " --> ", 
98
+                                expanded_edges$entry2symbol[i])
99
+                }
100
+            }
101
+          else {
102
+            expanded_edges$tooltip[i] <- paste0(expanded_edges$entry1symbol[i],
103
+                                                " - ",
104
+                                                expanded_edges$entry2symbol[i])
105
+            }
106
+        }
107
+        else {
108
+            if (expanded_edges$value[i] == "--|"){
109
+                expanded_edges$tooltip[i] <- 
110
+                    paste0(expanded_edges$entry1symbol[i], " --| ",
111
+                                                expanded_edges$entry2symbol[i])
112
+                }
113
+            else {
114
+                expanded_edges$tooltip <- 
115
+                    paste0(expanded_edges$entry1symbol, " --> ", 
116
+                                                expanded_edges$entry2symbol)
117
+            }
118
+        }
119
+    }
120
+    
121
+    expanded_edges$name <- paste0(expanded_edges$entry1, 
122
+                                " (", expanded_edges$type, ") ", 
123
+                                expanded_edges$entry2)
124
+    if (significance_markup){
125
+        for (i in 1:nrow(expanded_edges)){
126
+            if (expanded_edges$has_data[i] == 1){
127
+                if (expanded_edges$summary_score[i] > 0 & 
128
+                    expanded_edges$significant[i] == 1){
129
+                        expanded_edges$color[i] <- "#b20000"
130
+                        }
131
+                else if (expanded_edges$summary_score[i] > 0 & 
132
+                        expanded_edges$significant[i] == 0){
133
+                        expanded_edges$color[i] <- "#FF6600"
134
+                        }
135
+                else if (expanded_edges$summary_score[i] <= 0 & 
136
+                        expanded_edges$significant[i] == 1){
137
+                        expanded_edges$color[i] <- "#0015b2"
138
+                        }
139
+                else {
140
+                        expanded_edges$color[i] <- "#690099"
141
+                }
142
+            }
143
+            else {
144
+                expanded_edges$color[i] = "#808080"
145
+            }
146
+        }
147
+    }
148
+    expanded_edges <- expanded_edges[,c(3:4, 1:2, 5:ncol(expanded_edges))]
149
+    return(expanded_edges)
150
+}
0 151
new file mode 100644
... ...
@@ -0,0 +1,285 @@
1
+#' Get detailed KEGG mapping information for each relation [edge] documented 
2
+#' in KEGG
3
+#' @description Extract relationship information from KGML object and re-map
4
+#'  based on normalized node information
5
+#'
6
+#' @param KGML_file An object of formal class KEGGPathway
7
+#' @param KEGG_mappings The data.frame object generated by the function 
8
+#' expand_KEGG_mappings
9
+#'
10
+#' @return A dataframe object with unique entry information for all edges 
11
+#' documented in the KEGG pathway. 
12
+#'         Note that each row has a unique combination of values for
13
+#'          (entry1, entry2, entry1symbol, entry2symbol).
14
+#' @export
15
+#' @importMethodsFrom KEGGgraph edges
16
+#' @examples
17
+#' p53_KGML <- get_KGML("hsa04115")
18
+#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML, FALSE)
19
+#' p53_edges <- expand_KEGG_edges(p53_KGML, p53_KEGG_mappings)
20
+
21
+
22
+expand_KEGG_edges <-
23
+function(KGML_file,KEGG_mappings){
24
+    num_edges <- length(KGML_file@edges)
25
+    if (num_edges == 0){
26
+        print("No Documented Edges in Pathway")
27
+        expanded_edges <- data.frame("edgeID"=1, "entry1"= "1", "entry2" = "1",
28
+                                    "entry1accession"="dummy",  "
29
+                                    entry2accession"="dummy",
30
+                                    "type"= "dummy", "subtype1"="dummy",       
31
+                                    "value"="dummy", "subtype2" = "dummy", 
32
+                                    "value2" = "dummy", 
33
+                                    "specific_subtype"="dummy", 
34
+                                    "is_direct"= 0, "entry1type"="dummy",     
35
+                                    "entry2type"= "dummy", "entry1symbol"= "1",
36
+                                    "entry2symbol"="1", 
37
+                                    stringsAsFactors = FALSE) 
38
+    return(expanded_edges)
39
+    }
40
+    map_edge_data<- KGML_file@edges
41
+    get_edges<- data.frame(edgeID = seq(1:length(map_edge_data )))
42
+    for (i in 1:nrow(get_edges)){
43
+        get_edges$entry1[i] <- as.numeric(map_edge_data[[i]]@entry1ID)
44
+        get_edges$entry2[i] <- as.numeric(map_edge_data[[i]]@entry2ID)
45
+        get_edges$type[i] <- map_edge_data[[i]]@type
46
+        if (length(map_edge_data[[i]]@subtype) == 1){
47
+            get_edges$subtype1[i] <- map_edge_data[[i]]@subtype[[1]]@name
48
+            if (get_edges$subtype1[i] == "binding/association"){
49
+                get_edges$subtype1[i] <- "binding_association"
50
+                }
51
+            get_edges$value[i] <- map_edge_data[[i]]@subtype[[1]]@value
52
+            get_edges$subtype2[i] <- NA
53
+            get_edges$value2[i] <- NA
54
+            get_edges$specific_subtype[i] <- get_edges$subtype1[i]
55
+            get_edges$is_direct[i] <- 1
56
+            if (get_edges$subtype1[i] == "indirect effect"){
57
+                get_edges$is_direct[i] <- 0
58
+                get_edges$subtype1[i] <- "indirect_effect"
59
+            }
60
+        }
61
+        else if (length(map_edge_data[[i]]@subtype) == 2){
62
+            get_edges$subtype1[i] <- map_edge_data[[i]]@subtype[[1]]@name
63
+                if (get_edges$subtype1[i] == "binding/association"){
64
+                    get_edges$subtype1[i] <- "binding_association"
65
+                    }
66
+            get_edges$value[i] <- map_edge_data[[i]]@subtype[[1]]@value
67
+            get_edges$subtype2[i] <- map_edge_data[[i]]@subtype[[2]]@name
68
+            get_edges$specific_subtype[i] <- paste0(get_edges$subtype1[i], "_",
69
+                                                    get_edges$subtype2[i])
70
+            get_edges$value2[i] <- map_edge_data[[i]]@subtype[[2]]@value
71
+            get_edges$is_direct[i] <- 1
72
+            if (get_edges$subtype2[i] == "indirect effect"){
73
+                get_edges$subtype2[i] <- "indirect"
74
+                get_edges$is_direct[i] <- 0
75
+            }
76
+        }
77
+        else {
78
+            get_edges$subtype1[i] <- "Not defined in KEGG"
79
+            get_edges$value[i] <- "Not defined in KEGG"
80
+            get_edges$subtype2[i] <- NA
81
+            get_edges$value2[i] <- NA
82
+            get_edges$specific_subtype[i] <- NA
83
+            get_edges$is_direct[i] <- 1
84
+        }
85
+        get_edges$entry1type[i] <- 
86
+            KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == 
87
+                                         get_edges$entry1[i])][1]
88
+        get_edges$entry2type[i] <- 
89
+            KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == 
90
+                                         get_edges$entry2[i])][1]
91
+    }
92
+    get_edges <- get_edges[!is.na(get_edges$entry1type) & 
93
+                            !is.na(get_edges$entry2type),]
94
+    if (nrow(get_edges) ==0){
95
+        print("No Documented Edges in Pathway for selected cell type; 
96
+            all edges are between non-expressed genes")
97
+        expanded_edges <- data.frame("edgeID"=1, "entry1"= "1", "entry2" = "1",
98
+                                     "entry1accession"="dummy",  
99
+                                    "entry2accession"="dummy","type"= "dummy", 
100
+                                    "subtype1"="dummy", "value"="dummy", 
101
+                                    "subtype2" = "dummy", "value2" = "dummy",
102
+                                    "specific_subtype"="dummy", "is_direct"= 0,
103
+                                    "entry1type"="dummy", 
104
+                                    "entry2type"= "dummy","entry1symbol"= "1",
105
+                                    "entry2symbol"="1", 
106
+                                    stringsAsFactors = FALSE)
107
+        return(expanded_edges)
108
+    }
109
+
110
+  ##At some point include option not to ungroup edges
111
+    edges_no_groups <- subset(get_edges, get_edges$entry1type != "group" 
112
+                            & get_edges$entry2type != "group", select = -c(1))
113
+    edges_with_groups <- subset(get_edges, get_edges$entry1type == "group" | 
114
+                                get_edges$entry2type == "group")
115
+    edges_one_group <- subset(get_edges, get_edges$entry1type == "group" & 
116
+                                !get_edges$entry2type == "group" | 
117
+                                get_edges$entry2type == "group" & 
118
+                                !get_edges$entry1type == "group")
119
+    edges_two_groups <- subset(get_edges, get_edges$entry1type == "group" & 
120
+                                get_edges$entry2type == "group")
121
+    if (nrow(edges_with_groups) > 0){
122
+        if (nrow(edges_one_group) > 0){
123
+            edges_one_group$entry1all <- NA
124
+            edges_one_group$entry2all <- NA
125
+            for (i in 1:nrow(edges_one_group)){
126
+                if (edges_one_group$entry1type[i] == "group"){
127
+                    edges_one_group$entry1all[i] <-  
128
+                    KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == 
129
+                                                edges_one_group$entry1[i])]
130
+                    l <- length(unlist(edges_one_group$entry1all[i]))
131
+                    edges_one_group$entry2all[i] <- 
132
+                        list(as.character(rep(edges_one_group$entry2[i], l)))
133
+                    }
134
+                else if (edges_one_group$entry2type[i] == "group"){
135
+                    edges_one_group$entry2all[i] <-  
136
+                    KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == 
137
+                                                edges_one_group$entry2[i])]
138
+                    l = length(unlist(edges_one_group$entry2all[i]))
139
+                    edges_one_group$entry1all[i] <- 
140
+                        list(as.character(rep(edges_one_group$entry1[i], l)))
141
+                    }
142
+            l = length(unlist(edges_one_group$entry1all[i]))
143
+            edges_one_group$edgeID_list[i] <- 
144
+                list(rep(edges_one_group$edgeID[i],l))
145
+            }
146
+            ewg_temp <- data.frame("edgeID" = 
147
+                                        unlist(edges_one_group$edgeID_list), 
148
+                            "entry1" = unlist(edges_one_group$entry1all), 
149
+                            "entry2" = unlist(edges_one_group$entry2all),
150
+                            stringsAsFactors = FALSE)
151
+            edges_one_group <- edges_one_group[,-c(2:3, 13:15)]
152
+            edges_one_group <- merge(ewg_temp,edges_one_group, by = "edgeID")
153
+            edges_one_group <- edges_one_group[,-c(1)]
154
+        }
155
+        if (nrow(edges_two_groups) > 0){
156
+            edges_two_groups$entry1all <- NA
157
+            edges_two_groups$entry2all <- NA
158
+                for (i in 1:nrow(edges_two_groups)){
159
+                    edges_two_groups$entry1all[i] <-  
160
+                    KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == 
161
+                                                edges_two_groups$entry1[i])]
162
+                    edges_two_groups$entry2all[i] <-  
163
+                    KEGG_mappings$groupMEMBERS[which(KEGG_mappings$entryID == 
164
+                                                edges_two_groups$entry2[i])]
165
+                    x <- length(unlist(edges_two_groups$entry1all[i]))
166
+                    y <- length(unlist(edges_two_groups$entry2all[i]))
167
+                    if (x == 1 & y >1){
168
+                        edges_two_groups$entry1all[i] <- 
169
+                            list(rep(edges_two_groups$entry1all[i], y))
170
+                        }
171
+                    else if (x > 1 & y == 1){
172
+                        edges_two_groups$entry2all[i] <- 
173
+                            list(rep(edges_two_groups$entry2all[i], x))
174
+                        }
175
+                    else if (x > 1 & y > 1){
176
+                        edges_two_groups$entry1all[i] <- 
177
+                            list(rep(edges_two_groups$entry1all[i],y))
178
+                        edges_two_groups$entry1all[i] <- 
179
+                            list(unlist(edges_two_groups$entry1all[i])[
180
+                            sort.list(unlist(edges_two_groups$entry1all[i]))])
181
+                        edges_two_groups$entry2all[i] <- 
182
+                            list(rep(edges_two_groups$entry2all[i],x))
183
+                    }
184
+                    l <- length(unlist(edges_two_groups$entry1all[i]))
185
+                    edges_two_groups$edgeID_list[i] <- 
186
+                        list(rep(edges_two_groups$edgeID[i],l))
187
+                    }
188
+            ewg_temp <- data.frame("edgeID" = 
189
+                                    unlist(edges_two_groups$edgeID_list),
190
+                                "entry1" = unlist(edges_two_groups$entry1all), 
191
+                                "entry2" = unlist(edges_two_groups$entry2all), 
192
+                                stringsAsFactors = FALSE)
193
+            edges_two_groups <- edges_two_groups[,-c(2:3, 13:15)]
194
+            edges_two_groups <- merge(ewg_temp,edges_two_groups, by = "edgeID")
195
+            edges_two_groups <- edges_two_groups[,-c(1)]
196
+        }
197
+        if (nrow(edges_one_group) > 0 & nrow(edges_two_groups) > 0){
198
+            all_edges <- rbind(edges_no_groups, edges_one_group, 
199
+                                edges_two_groups)
200
+        }
201
+        else if (nrow(edges_one_group) > 0){
202
+            all_edges <- rbind(edges_no_groups, edges_one_group)
203
+        }
204
+        else if (nrow(edges_two_groups) > 0){
205
+             all_edges <- rbind(edges_no_groups, edges_two_groups)
206
+        }
207
+    }
208
+    else {
209
+        all_edges <- edges_no_groups
210
+    }
211
+    for (i in 1:nrow(all_edges)){
212
+        all_edges$entry1all_accession[i] <- 
213
+            list(KEGG_mappings$entryACCESSION[which(KEGG_mappings$entryID == 
214
+                                                all_edges$entry1[i])])
215
+        all_edges$entry2all_accession[i] <- 
216
+            list(KEGG_mappings$entryACCESSION[which(KEGG_mappings$entryID ==
217
+                                                all_edges$entry2[i])])
218
+        x <- length(unlist(all_edges$entry1all_accession[i]))
219
+        y <- length(unlist(all_edges$entry2all_accession[i]))
220
+        if (x == 1 & y >1){
221
+            all_edges$entry1all_accession[i] <- 
222
+                list(rep(all_edges$entry1all_accession[i], y))
223
+            }
224
+        else if (x > 1 & y == 1) {
225
+            all_edges$entry2all_accession[i] <- 
226
+                list(rep(all_edges$entry2all_accession[i], x))
227
+        }
228
+        else if (x > 1 & y > 1) {
229
+            all_edges$entry1all_accession[i] <-
230
+                list(rep(all_edges$entry1all_accession[i],y))
231
+            all_edges$entry1all_accession[i] <- 
232
+                list(unlist(all_edges$entry1all_accession[i])[sort.list(unlist(
233
+                    all_edges$entry1all_accession[i]))])
234
+            all_edges$entry2all_accession[i] <- 
235
+                list(rep(all_edges$entry2all_accession[i],x))
236
+            }
237
+        l <- length(unlist(all_edges$entry1all_accession[i]))
238
+        all_edges$edgeID[i] = list(rep(i,l))
239
+    }
240
+    for (i in 1:nrow(all_edges)){
241
+        all_edges$l1[i] <- length(all_edges$entry1all_accession[[i]])
242
+        all_edges$l2[i] <- length(all_edges$entry2all_accession[[i]])
243
+    }
244
+    all_edges <- all_edges[all_edges$l1 >0 & all_edges$l2 > 0, -c(15,16)]
245
+    expanded_edges <- data.frame("edgeID" = unlist(all_edges$edgeID), 
246
+                            "entry1accession" = 
247
+                                unlist(all_edges$entry1all_accession), 
248
+                            "entry2accession" =
249
+                                unlist(all_edges$entry2all_accession), 
250
+                            stringsAsFactors = FALSE)
251
+  
252
+    all_edges <- all_edges[,-c(12:14)]
253
+    all_edges$edgeID <- seq(1:nrow(all_edges))
254
+    expanded_edges <- merge(expanded_edges, all_edges, by = "edgeID")
255
+    for (i in 1:nrow(expanded_edges)){
256
+        expanded_edges$entry1type[i] <- 
257
+            KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID == 
258
+                                      expanded_edges$entry1[i])][1]
259
+        expanded_edges$entry2type[i] <- 
260
+            KEGG_mappings$entryTYPE[which(KEGG_mappings$entryID ==
261
+                                      expanded_edges$entry2[i])][1]
262
+        if (expanded_edges$entry1type[i] == "gene"|
263
+            expanded_edges$entry1type[i] == "compound") {
264
+            expanded_edges$entry1symbol[i] <- 
265
+            KEGG_mappings$entrySYMBOL[which(KEGG_mappings$entryACCESSION == 
266
+                                        expanded_edges$entry1accession[i])][1]
267
+            }
268
+        else {
269
+            expanded_edges$entry1symbol[i] <- NA
270
+        }
271
+        if (expanded_edges$entry2type[i] == "gene" |
272
+            expanded_edges$entry2type[i] == "compound") {
273
+            expanded_edges$entry2symbol[i] <- 
274
+            KEGG_mappings$entrySYMBOL[which(KEGG_mappings$entryACCESSION == 
275
+                                        expanded_edges$entry2accession[i])][1]
276
+        }
277
+        else {
278
+            expanded_edges$entry2symbol[i] <- NA
279
+        }
280
+    }
281
+    expanded_edges$entry1symbol <- unlist(expanded_edges$entry1symbol)
282
+    expanded_edges$entry2symbol <- unlist(expanded_edges$entry2symbol)
283
+    expanded_edges$is_direct <- as.numeric(expanded_edges$is_direct)
284
+    return(expanded_edges)
285
+}
0 286
new file mode 100644
... ...
@@ -0,0 +1,269 @@
1
+#' Get detailed KEGG mapping information for each map entity
2
+#' @description Extract mapping information from KGML object and normalize 
3
+#' mappings based on multi-valued name attribute
4
+#'
5
+#' @param KGML_file An object of formal class KEGGPathway
6
+#' @param convert_KEGG_IDs A logical indicator; if set to FALSE will run faster
7
+#'  however genes and compounds will remain labeled via KEGG codes (compounds) 
8
+#'  or accession numbers (genes).  This option must be taken into account if 
9
+#'  data is being added.  For example, the genes in 'KO_data' are identified by
10
+#'  symbols, thus it is neccessary to retain the default option to convert IDs
11
+#'  to symbols when planning to add edge data of this type.
12
+#'
13
+#' @return A dataframe object with unique entry information for all [node]
14
+#'  objects documented in the KEGG pathway. 
15
+#'         Note that if mutiple objects (i.e. genes or compounds) have the same 
16
+#'         entryID, this indicates that they share the same node [location] 
17
+#'         in the pathway.  
18
+#' @export
19
+#' @import org.Hs.eg.db 
20
+#' @import KEGGREST
21
+#' @import AnnotationDbi
22
+#' @importMethodsFrom KEGGgraph edges
23
+#' @importMethodsFrom KEGGgraph nodes
24
+#' @examples
25
+#' p53_KGML <- get_KGML("hsa04115")
26
+#' p53_KEGG_mappings <-  expand_KEGG_mappings(p53_KGML, FALSE)
27
+
28
+expand_KEGG_mappings <-
29
+function (KGML_file, convert_KEGG_IDs = TRUE){
30
+    nodesN <- KEGGgraph::nodes(KGML_file)
31
+    entryMAP<- data.frame(entry = seq(1:length(nodesN)))
32
+    for (i in 1:nrow(entryMAP)){
33
+        entryMAP$entryID[i] <- nodesN[[i]]@entryID
34
+        entryMAP$entryTYPE[i] <- nodesN[[i]]@type
35
+        entryMAP$entryIDENTIFIER[i] <- list(nodesN[[i]]@name)
36
+        if (entryMAP$entryTYPE[i] == "group"){
37
+            entryMAP$groupMEMBERS[i] <-
38
+                list(nodesN[[entryMAP$entryID[i]]]@component)
39
+            entryMAP$entryIDENTIFIER[i] <- 
40
+                list(entryMAP$entryIDENTIFIER[which(entryMAP$entryID %in% 
41
+                                            unlist(entryMAP$groupMEMBERS[i]))])
42
+        }
43
+        else {
44
+            entryMAP$groupMEMBERS[i] <- NA
45
+        }
46
+        if (entryMAP$entryTYPE[i] == "gene" | 
47
+            entryMAP$entryTYPE[i] == "compound"|
48
+            entryMAP$entryTYPE[i] == "group"){
49
+                entryMAP$entryACCESSION[i]<- list(gsub(".*:", "", 
50
+                                        unlist(entryMAP$entryIDENTIFIER[i])))
51
+        }
52
+        else {
53
+            entryMAP$entryACCESSION[i] <- NA
54
+        }
55
+        entryMAP$entryTYPE[i] <- nodesN[[i]]@type
56
+        entryMAP$entryNAMES[i] <- nodesN[[i]]@graphics@name
57
+        entryMAP$FGcolor[i] <- nodesN[[i]]@graphics@fgcolor
58
+        entryMAP$BGcolor[i] <- nodesN[[i]]@graphics@bgcolor
59
+        entryMAP$shape[i] <- nodesN[[i]]@graphics@type
60
+        entryMAP$Xcoord[i] <- nodesN[[i]]@graphics@x
61
+        entryMAP$Ycoord[i] <- nodesN[[i]]@graphics@y
62
+        entryMAP$width[i] <- nodesN[[i]]@graphics@width
63
+        entryMAP$height[i] <- nodesN[[i]]@graphics@height
64
+        if(is.na(entryMAP$entryNAMES[i])){
65
+            entryMAP$entryNAMES[i] <- "NoNameInKGML"
66
+        }
67
+    }
68
+    entryMAP <- entryMAP[,-c(1,4)]
69
+    entryMAP_groupings <- entryMAP[entryMAP$entryTYPE == "group",]
70
+    entryMAP_extra <- entryMAP[entryMAP$entryTYPE == "map",]
71
+    entryMAP_extra$entrySYMBOL <- NA
72
+    entryMAP_dictionary <- entryMAP[entryMAP$entryTYPE == "gene" | 
73
+                                    entryMAP$entryTYPE == "compound",]
74
+    for (i in 1:nrow(entryMAP_dictionary)){  
75
+        if (entryMAP_dictionary$entryTYPE[i] == "compound"){
76
+            entryMAP_dictionary$entryACCESSION[i] <- 
77
+            entryMAP_dictionary$entryACCESSION[[i]][1]
78
+        }
79
+    }
80
+    if(convert_KEGG_IDs){
81
+        for (i in 1:nrow(entryMAP_dictionary)){  
82
+            if (entryMAP_dictionary$entryTYPE[i] == "compound"){
83
+                testval <- 
84
+                    keggGet(entryMAP_dictionary$entryACCESSION[i])[[1]]$NAME
85
+                if(!is.null(testval)){
86
+                    entryMAP_dictionary$entryNAMES[i] <- list(testval)
87
+                    entryMAP_dictionary$entryNAMES[i] <- 
88
+                        list(gsub(";", "",
89
+                                unlist(entryMAP_dictionary$entryNAMES[i])))
90
+                }
91
+            }
92
+        }
93
+    }
94
+    for (i in 1:nrow(entryMAP_dictionary)){  
95
+        l = length(unlist(entryMAP_dictionary$entryACCESSION[i]))
96
+        entryMAP_dictionary$entryID[i] <- 
97
+            list(rep(entryMAP_dictionary$entryID[i], l))
98
+        entryMAP_dictionary$entryTYPE[i] <- 
99
+            list(rep(entryMAP_dictionary$entryTYPE[i], l))
100
+    }
101
+    emd_temp <- data.frame(cbind(unlist(entryMAP_dictionary$entryID), 
102
+                                unlist(entryMAP_dictionary$entryTYPE), 
103
+                                unlist(entryMAP_dictionary$entryACCESSION)), 
104
+                                stringsAsFactors = FALSE)
105
+    names(emd_temp) <- names(entryMAP[c(1,2,4)])
106
+    for (i in 1:nrow(emd_temp)){
107
+        if (emd_temp$entryTYPE[i] == "gene"){
108
+            if (convert_KEGG_IDs){
109
+                emd_temp$entrySYMBOL[i] <- 
110
+                suppressMessages(as.character(AnnotationDbi::select(
111
+                org.Hs.eg.db::org.Hs.eg.db,keys = emd_temp$entryACCESSION[i], 
112
+                columns=c("SYMBOL"),keytype="ENTREZID")[2]))
113
+            }
114
+            else {
115
+                emd_temp$entrySYMBOL[i] <- emd_temp$entryACCESSION[i]
116
+            }
117
+        }
118
+        else if (emd_temp$entryTYPE[i] == "compound"){
119
+            emd_temp$entrySYMBOL[i] <- unlist(
120
+                entryMAP_dictionary$entryNAMES[
121
+                    entryMAP_dictionary$entryACCESSION == 
122
+                    emd_temp$entryACCESSION[i]])[1]
123
+        }
124
+        else {
125
+            emd_temp$entrySYMBOL[i] <- NA
126
+        }
127
+    }
128
+    for (i in 1:nrow(emd_temp)){
129
+        if (gsub("[[:digit:]]","",emd_temp$entryACCESSION[i]) == "G" & 
130
+            is.na(emd_temp$entrySYMBOL[i])){
131
+            emd_temp$entrySYMBOL[i] <- emd_temp$entryACCESSION[i]
132
+        }
133
+    }
134
+    emd_temp$groupMEMBERS <- NA
135
+    for(i in 1:nrow(entryMAP_dictionary)){
136
+        entryMAP_dictionary$entryID[i] <- 
137
+            unlist(entryMAP_dictionary$entryID[i])[1] 
138
+        entryMAP_dictionary$entryTYPE[i] <- 
139
+            unlist(entryMAP_dictionary$entryTYPE[i])[1]
140
+        if(entryMAP_dictionary$entryTYPE[i] == "compound"){
141
+            entryMAP_dictionary$entryNAMES[i] <- 
142
+                toString(unlist(entryMAP_dictionary$entryNAMES[[i]]))
143
+        }
144
+    }
145
+    entryMAP_dictionary <- entryMAP_dictionary[,-c(2:4)]
146
+    entryMAP_dictionary <- merge(emd_temp, entryMAP_dictionary, by = "entryID")
147
+    if (nrow(entryMAP_groupings) > 0){
148
+        for(i in 1:nrow(entryMAP_groupings)){
149
+            entryMAP_groupings$entrySYMBOL[i] <- 
150
+                list(entryMAP_dictionary$entrySYMBOL[match(unlist(
151
+                    entryMAP_groupings$entryACCESSION[i]),
152
+                    entryMAP_dictionary$entryACCESSION)])
153
+        }
154
+        entryMAP_dictionary <- 
155
+            rbind(entryMAP_extra, entryMAP_dictionary, entryMAP_groupings)
156
+        no_groups <- FALSE
157
+    }
158
+    if (nrow(entryMAP_groupings) == 0){
159
+        entryMAP_dictionary <- rbind(entryMAP_extra, entryMAP_dictionary)
160
+        entryMAP_dictionary$groupMEMBERS <- NA
161
+        entryMAP_dictionary$groupID <- NA
162
+        no_groups = TRUE
163
+    }
164
+
165
+    entryMAP_dictionary <- 
166
+        entryMAP_dictionary[order(as.double(entryMAP_dictionary$entryID)),]
167
+    rownames(entryMAP_dictionary) <- seq(1:nrow(entryMAP_dictionary))
168
+    entryMAP_dictionary$entryNAMES <- gsub("...", "", 
169
+                                            entryMAP_dictionary$entryNAMES,
170
+                                            fixed = TRUE)
171
+    for (i in 1:nrow(entryMAP_dictionary)){
172
+        if (entryMAP_dictionary$entryTYPE[i] =="gene"){
173
+            entryMAP_dictionary$LABEL[i] <- strsplit(as.character(
174
+                entryMAP_dictionary$entryNAMES[[i]]), ",")[[1]][1]
175
+        }
176
+        else if (entryMAP_dictionary$entryTYPE[i] == "compound"){
177
+            entryMAP_dictionary$LABEL[i] <- entryMAP_dictionary$entrySYMBOL[i]
178
+        }
179
+        else {
180
+            entryMAP_dictionary$LABEL[i] = entryMAP_dictionary$entryNAMES[i]
181
+        }
182
+    }
183
+    for (i in 1:nrow(entryMAP_dictionary)){
184
+        if (entryMAP_dictionary$entryTYPE[i] == "group"){
185
+            entryMAP_dictionary$entryNAMES[i] <- 
186
+                paste0(paste(unique(entryMAP_dictionary$LABEL[which(
187
+                entryMAP_dictionary$entryID %in% 
188
+                unlist(entryMAP_dictionary$groupMEMBERS[i]))]),collapse = ":"),
189
+                " Complex")
190
+            entryMAP_dictionary$LABEL[i] <- entryMAP_dictionary$entryNAMES[i]
191
+        }
192
+    }
193
+    if (!no_groups){
194
+        for (i in 1:nrow(entryMAP_groupings)){
195
+            l <- length(unlist(entryMAP_groupings$groupMEMBERS[i]))
196
+            entryMAP_groupings$entryID[i] <-  
197
+                list(rep(entryMAP_groupings$entryID[i], l))
198
+        }
199
+        group_mapper <- data.frame("groupID" = 
200
+                                    unlist(entryMAP_groupings$entryID), 
201
+                            "entryID" = 
202
+                                unlist(entryMAP_groupings$groupMEMBERS),
203
+                            stringsAsFactors = FALSE)
204
+        for (i in 1:nrow(entryMAP_dictionary)){
205
+            if (entryMAP_dictionary$entryID[i] %in% group_mapper$entryID){
206
+                entryMAP_dictionary$groupID[i] <-
207
+                    group_mapper$groupID[match(entryMAP_dictionary$entryID[i],
208
+                                        group_mapper$entryID)]
209
+            }
210
+            else {
211
+                entryMAP_dictionary$groupID[i] <- NA
212
+            }
213
+        }
214
+    }
215
+
216
+    num_edges <- length(KGML_file@edges)
217
+    if (num_edges > 0){
218
+        map_edge_data<- KEGGgraph::edges(KGML_file)
219
+        edges<- data.frame(edgeID = seq(1:length(map_edge_data )))
220
+        for (i in 1:nrow(edges)){
221
+            edges$entry1[i] <- as.character(map_edge_data[[i]]@entry1ID)
222
+            edges$entry2[i] <- as.character(map_edge_data[[i]]@entry2ID)
223
+        }
224
+        for (i in 1:nrow(entryMAP_dictionary)){
225
+            if (entryMAP_dictionary$entryID[i] %in% edges$entry1 | 
226
+                entryMAP_dictionary$entryID[i] %in% edges$entry2){
227
+                    entryMAP_dictionary$in_relationship[i] = 1
228
+            }
229
+            else if (entryMAP_dictionary$groupID[i] %in% edges$entry1 | 
230
+                    entryMAP_dictionary$groupID[i] %in% edges$entry2){
231
+                    entryMAP_dictionary$in_relationship[i] = 1
232
+            }
233
+            else {
234
+                entryMAP_dictionary$in_relationship[i] = 0
235
+            }
236
+        }
237
+    }
238
+    if (num_edges == 0){
239
+      entryMAP_dictionary$in_relationship = 0
240
+    }
241
+    for (i in 1:nrow(entryMAP_dictionary)){
242
+        if (is.na(entryMAP_dictionary$entrySYMBOL[i])){
243
+            entryMAP_dictionary$entrySYMBOL[i] <- entryMAP_dictionary$LABEL[i]
244
+        }
245
+    }
246
+    map_nodes <- entryMAP_dictionary$LABEL[entryMAP_dictionary$entryTYPE=="map"]
247
+    j = 0
248
+    for (i in 1:length(map_nodes)){
249
+        if (substring(map_nodes[i],1,5) == "TITLE"){
250
+            j = j+1
251
+        }
252
+    }
253
+    if (j == 0){
254
+        title <- KGML_file@pathwayInfo@title
255
+        min_Y <- min(entryMAP_dictionary$Ycoord)
256
+        min_X <- min(entryMAP_dictionary$Xcoord)+100
257
+        title_node <- data.frame("entryID" = 0, "entryTYPE" = "map", 
258
+                                "groupMEMBERS" = NA, "entryACCESSION" = NA, 
259
+                                "entryNAMES" = title,
260
+                                "FGcolor" = "#000000", "BGcolor" = "#FFFFFF",
261
+                                "shape" = "rectangle", "Xcoord" = min_X, 
262
+                                "Ycoord"= min_Y, "height" = 25, "width" = 220, 
263
+                                "entrySYMBOL"= title, "groupID"=NA,
264
+                                "LABEL"=paste0("TITLE:",title), 
265
+                                "in_relationship" = 0)
266
+        entryMAP_dictionary <- rbind(entryMAP_dictionary, title_node)
267
+    }
268
+    return(entryMAP_dictionary)
269
+}
0 270
\ No newline at end of file
1 271
new file mode 100644
... ...
@@ -0,0 +1,95 @@
1
+#' cyREST utility functions
2
+#' @description A subset of the R utility functions available from/defined
3
+#'  by cyREST.  The function mapAttributes is called from within toCytoscape 
4
+#'  which, in turn, is called from within cyto_vis. 
5
+#' @export
6
+#' @import methods
7
+#' @importFrom igraph E edge_attr graph.attributes V vertex.attributes
8
+#'  list.vertex.attributes get.edgelist ecount edge.attributes 
9
+#'  list.edge.attributes
10
+#' @param attr.names Attribute names of an igraph object
11
+#' @param all.attr The attribute value if an igraph object
12
+#' @param i The index for a given igraph object
13
+#' @param igraphobj A graph object compatible for use with the package igraph