Browse code

Add function keggerize_edges

Shana White authored on 27/09/2017 17:23:17
Showing 3 changed files

... ...
@@ -13,6 +13,7 @@ export(get_KGML)
13 13
 export(get_drug_table)
14 14
 export(get_fisher_info)
15 15
 export(get_graph_object)
16
+export(keggerize_edges)
16 17
 export(node_mapping_info)
17 18
 export(overlap_info)
18 19
 export(path_genes_by_cell_type)
19 20
new file mode 100644
... ...
@@ -0,0 +1,290 @@
1
+#' Add in edges to map documented in other pathways
2
+#' @description For a specific pathway entity(gene), search KEGG databases
3
+#' to see if it has any other documented relationships in KEGG.
4
+#' expand_KEGG_edges 
5
+#' @export
6
+#' @param entry_accession The Accession # of the pathway entity to 'keggerize'
7
+#' @param KGML The KGML file of the current pathway
8
+#' @param KEGG_mappings KEGG mappings for the current pathway
9
+#' @param edges The expanded edges for the current pathway
10
+#' @return A modified expanded edges data frame with additional rows for new 
11
+#' entries
12
+#' @examples \dontrun{
13
+#' KGML <- get_KGML("hsa04150")
14
+#' KEGG_mappings <- expand_KEGG_mappings(KGML)
15
+#' edges <- expand_KEGG_edges(KGML, KEGG_mappings)
16
+#' entry_accession <- "2475"
17
+#' mtor_plus_mtor <- keggerize_edges(entry_accession = entry_accession, 
18
+#'                                   KGML = KGML,KEGG_mappings = KEGG_mappings,
19
+#'                                   edges = edges) }
20
+#' 
21
+
22
+keggerize_edges <- function(entry_accession, KGML, KEGG_mappings, edges){
23
+    if(!"primary_pathway" %in% names(edges)){
24
+        edges$primary_pathwayID <- strsplit(KGML@pathwayInfo@name, ":")[[1]][2]
25
+        edges$primary_pathway <- KGML@pathwayInfo@title
26
+    }
27
+    entry_code <- paste0("hsa:",entry_accession)
28
+    entry_info <- keggGet(entry_code)
29
+    entry_pathways <- entry_info[[1]]["PATHWAY"]
30
+    entry_pathways <- names(entry_pathways$PATHWAY[1:length(unlist(entry_pathways))])
31
+    current_pathway <- strsplit(KGML@pathwayInfo@name, ":")[[1]][2]
32
+    entry_pathways <- entry_pathways[!entry_pathways %in% current_pathway]
33
+    
34
+    for(p in 1:length(entry_pathways)){
35
+        pathway_KGML <- get_KGML(entry_pathways[p])
36
+        pathway_KEGG_mappings <- expand_KEGG_mappings(pathway_KGML)
37
+        pathway_edges <- expand_KEGG_edges(pathway_KGML, pathway_KEGG_mappings)
38
+        entry_edges <- pathway_edges[pathway_edges$entry1accession == entry_accession |
39
+                                         pathway_edges$entry2accession == entry_accession, ]
40
+        entry_sources <- entry_edges[entry_edges$entry1accession != entry_accession &
41
+                                         entry_edges$entry1accession %in% KEGG_mappings$entryACCESSION,]
42
+        entry_sources <- entry_sources[!duplicated(entry_sources[,6:16]),]
43
+        if(nrow(entry_sources) > 0){
44
+            for(i in 1:nrow(entry_sources)){
45
+                entry1_locations <- KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_sources$entry1symbol[i]]
46
+                entry2_locations <- KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_sources$entry2symbol[i]]
47
+                if(length(entry1_locations) > 1 | length(entry2_locations > 1)){
48
+                    
49
+                    distance_matrix <- matrix(nrow = length(entry2_locations),
50
+                                              ncol = length(entry1_locations))
51
+                    colnames(distance_matrix) <- entry1_locations
52
+                    rownames(distance_matrix) <- entry2_locations
53
+                    for(j in 1:nrow(distance_matrix)){
54
+                        for (k in 1:ncol(distance_matrix)){
55
+                            value <-
56
+                                sqrt((KEGG_mappings$Xcoord[KEGG_mappings$entryID ==
57
+                                                               colnames(distance_matrix)[k]][1]
58
+                                      - KEGG_mappings$Xcoord[KEGG_mappings$entryID ==
59
+                                                                 rownames(distance_matrix)[j]][1])^2
60
+                                     + (KEGG_mappings$Ycoord[KEGG_mappings$entryID ==
61
+                                                                 colnames(distance_matrix)[k]][1]
62
+                                        - KEGG_mappings$Ycoord[KEGG_mappings$entryID ==
63
+                                                                   rownames(distance_matrix)[j]][1])^2)
64
+                            # print(value)
65
+                            distance_matrix[j,k] <- value
66
+                        }
67
+                    }
68
+                    location <- which(distance_matrix == min(distance_matrix), arr.ind = TRUE)
69
+                    entry_sources$entry1[i] <- colnames(distance_matrix)[location[1,2]]
70
+                    entry_sources$entry2[i] <- rownames(distance_matrix)[location[1,1]]
71
+                }
72
+                if(length(entry1_locations) == 1 & length(entry2_locations) == 1){
73
+                    entry_sources$entry1[i] <- entry1_locations
74
+                    entry_sources$entry2[i] <- entry2_locations
75
+                }
76
+            }
77
+        }
78
+        entry_targets <- entry_edges[entry_edges$entry1accession == entry_accession &
79
+                                         entry_edges$entry2accession %in% KEGG_mappings$entryACCESSION,]
80
+        entry_targets <- entry_targets[!duplicated(entry_targets[,6:16]),]
81
+        if(nrow(entry_targets) > 0){
82
+            for(i in 1:nrow(entry_targets)){
83
+                entry1_locations <- KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_targets$entry1symbol[i]]
84
+                entry2_locations <- KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_targets$entry2symbol[i]]
85
+                if(length(entry1_locations) > 1 | length(entry2_locations > 1)){
86
+                    
87
+                    distance_matrix <- matrix(nrow = length(entry2_locations),
88
+                                              ncol = length(entry1_locations))
89
+                    colnames(distance_matrix) <- entry1_locations
90
+                    rownames(distance_matrix) <- entry2_locations
91
+                    for(j in 1:nrow(distance_matrix)){
92
+                        for (k in 1:ncol(distance_matrix)){
93
+                            value <-
94
+                                sqrt((KEGG_mappings$Xcoord[KEGG_mappings$entryID ==
95
+                                                               colnames(distance_matrix)[k]][1]
96
+                                      - KEGG_mappings$Xcoord[KEGG_mappings$entryID ==
97
+                                                                 rownames(distance_matrix)[j]][1])^2
98
+                                     + (KEGG_mappings$Ycoord[KEGG_mappings$entryID ==
99
+                                                                 colnames(distance_matrix)[k]][1]
100
+                                        - KEGG_mappings$Ycoord[KEGG_mappings$entryID ==
101
+                                                                   rownames(distance_matrix)[j]][1])^2)
102
+                            # print(value)
103
+                            distance_matrix[j,k] <- value
104
+                        }
105
+                    }
106
+                    location <- which(distance_matrix == min(distance_matrix), arr.ind = TRUE)
107
+                    entry_targets$entry1[i] <- colnames(distance_matrix)[location[1,2]]
108
+                    entry_targets$entry2[i] <- rownames(distance_matrix)[location[1,1]]
109
+                }
110
+                if(length(entry1_locations) == 1 & length(entry2_locations) == 1){
111
+                    entry_targets$entry1[i] <- entry1_locations
112
+                    entry_targets$entry2[i] <- entry2_locations
113
+                }
114
+            }
115
+        }
116
+        new_edges <- rbind(entry_sources, entry_targets)
117
+        if(nrow(new_edges) > 0) {
118
+            new_edges$primary_pathwayID <- strsplit(pathway_KGML@pathwayInfo@name, ":")[[1]][2]
119
+            new_edges$primary_pathway <- pathway_KGML@pathwayInfo@title
120
+            edges <- rbind(edges, new_edges)
121
+        }
122
+        print(p)
123
+    }
124
+    
125
+    edges$edge_entry_ID <- paste0(edges$entry1, ":", edges$entry2)
126
+    edge_ID_sets <- edges$edge_entry_ID[!duplicated(edges$edge_entry_ID)]
127
+    edge_ID_mapper <- data.frame("edges" = edge_ID_sets, 
128
+                                 edgeID = 1:length(edge_ID_sets),
129
+                                 stringsAsFactors = FALSE)
130
+    for(i in 1:nrow(edges)){
131
+        edges$edgeID[i] <- edge_ID_mapper$edgeID[match(edges$edge_entry_ID[i], 
132
+                                                       edge_ID_mapper$edges)]
133
+    }
134
+    edges <- edges[,names(edges) != "edge_entry_ID"]
135
+    return(edges)
136
+}
137
+
138
+
139
+
140
+# KGML <- get_KGML("hsa04150")
141
+# KEGG_mappings <- expand_KEGG_mappings(KGML)
142
+# edges <- expand_KEGG_edges(KGML, KEGG_mappings)
143
+# 
144
+
145
+# 
146
+# keggerize_edges <- function(entry_accession, KGML, KEGG_mappings, edges){
147
+#     if(!"primary_pathway" %in% names(edges)){
148
+#         edges$primary_pathwayID <- strsplit(KGML@pathwayInfo@name, ":")[[1]][2]
149
+#         edges$primary_pathway <- KGML@pathwayInfo@title
150
+#     }
151
+#     entry_code <- paste0("hsa:",entry_accession)
152
+#     entry_info <- keggGet(entry_code)
153
+#     entry_pathways <- entry_info[[1]]["PATHWAY"]
154
+#     entry_pathways <- names(entry_pathways$PATHWAY[1:length(unlist(entry_pathways))])
155
+#     current_pathway <- strsplit(KGML@pathwayInfo@name, ":")[[1]][2]
156
+#     entry_pathways <- entry_pathways[!entry_pathways %in% current_pathway]
157
+#     for(i in 1:length(entry_pathways)){
158
+#         pathway_KGML <- get_KGML(entry_pathways[i])
159
+#         pathway_KEGG_mappings <- expand_KEGG_mappings(pathway_KGML)
160
+#         pathway_edges <- expand_KEGG_edges(pathway_KGML, pathway_KEGG_mappings)
161
+#         entry_edges <- pathway_edges[pathway_edges$entry1accession == entry_accession |
162
+#                                          pathway_edges$entry2accession == entry_accession, ]
163
+#         entry_sources <- entry_edges[entry_edges$entry1accession != entry_accession &
164
+#                                          entry_edges$entry1accession %in% KEGG_mappings$entryACCESSION,]
165
+#         # for(i in 1:nrow(entry_sources)){
166
+#         #     entry1_locations <- KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_sources$entry1symbol[i]]
167
+#         #     entry2_locations <- KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_sources$entry2symbol[i]]
168
+#         #     distance_matrix <- matrix(nrow = length(entry2_locations), 
169
+#         #                               ncol = length(entry1_locations))
170
+#         #     colnames(distance_matrix) <- entry1_locations
171
+#         #     rownames(distance_matrix) <- entry2_locations
172
+#         #     for(j in 1:nrow(distance_matrix)){
173
+#         #         for (k in 1:ncol(distance_matrix)){
174
+#         #             value <- 
175
+#         #                 sqrt((KEGG_mappings$Xcoord[KEGG_mappings$entryID ==
176
+#         #                                               colnames(distance_matrix)[k]][1]
177
+#         #                      - KEGG_mappings$Xcoord[KEGG_mappings$entryID ==
178
+#         #                                                 rownames(distance_matrix)[j]][1])^2
179
+#         #                      + (KEGG_mappings$Ycoord[KEGG_mappings$entryID ==
180
+#         #                                                  colnames(distance_matrix)[k]][1]
181
+#         #                         - KEGG_mappings$Ycoord[KEGG_mappings$entryID ==
182
+#         #                                                    rownames(distance_matrix)[j]][1])^2)
183
+#         #             print(value)
184
+#         #             distance_matrix[j,k] <- value
185
+#         #         }
186
+#         #     }
187
+#         #     
188
+#         # 
189
+#         # }
190
+#         # for(i in 1:nrow(entry_sources)){
191
+#         #     entry_sources$entry2[i] <- list(KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_sources$entry2symbol[i]])
192
+#         # }
193
+#         # }
194
+#         entry_targets <- entry_edges[entry_edges$entry1accession == entry_accession &
195
+#                                          entry_edges$entry2accession %in% KEGG_mappings$entryACCESSION,]
196
+#         new_edges <- rbind(entry_sources, entry_targets)
197
+#         if(nrow(new_edges) > 0) {
198
+#             new_edges$primary_pathwayID <- strsplit(pathway_KGML@pathwayInfo@name, ":")[[1]][2]
199
+#             new_edges$primary_pathway <- pathway_KGML@pathwayInfo@title
200
+#             
201
+#             max_edge_ID <- max(edges$edgeID)
202
+#             for(j in 1:nrow(new_edges)){
203
+#                 new_edges$edgeID[j] <- max_edge_ID + j
204
+#             }
205
+#             edges <- rbind(edges, new_edges)
206
+#         }
207
+#         print(i)
208
+#     }
209
+#     edges$edge_entry_ID <- paste0(edges$entry1, ":", edges$entry2)
210
+#     edge_ID_sets <- edges$edge_entry_ID[!duplicated(edges$edge_entry_ID)]
211
+#     edge_ID_mapper <- data.frame("edges" = edge_ID_sets, 
212
+#                                  edgeID = 1:length(edge_ID_sets),
213
+#                                  stringsAsFactors = FALSE)
214
+#     for(i in 1:nrow(edges)){
215
+#         edges$edgeID[i] <- edge_ID_mapper$edgeID[match(edges$edge_entry_ID[i], 
216
+#                                                        edge_ID_mapper$edges)]
217
+#     }
218
+#     edges <- edges[,names(edges) != "edge_entry_ID"]
219
+#     return(edges)
220
+# }
221
+# 
222
+# mtor_plus_mtor <- keggerize_edges(entry_accession = entry_accession, KGML = KGML,
223
+#                                   KEGG_mappings = KEGG_mappings,
224
+#                                   edges = edges)
225
+# #dim(edges)
226
+# #[1] 581  16
227
+# # dim(mtor_plus_mtor)
228
+# # [1] 757  18
229
+# mtor_plus_mtor_plus_deptor <- keggerize_edges(entry_accession = "64798", KGML = KGML,
230
+#                 KEGG_mappings = KEGG_mappings,
231
+#                 edges = mtor_plus_mtor)
232
+# 
233
+# 
234
+# 
235
+# 
236
+# 
237
+# na_sources <- KEGG_gene_edges[is.na(KEGG_gene_edges$source),]
238
+# na_targets<- KEGG_gene_edges[is.na(KEGG_gene_edges$target),]
239
+# na_sources_and_targets <-  KEGG_gene_edges[is.na(KEGG_gene_edges$target)& is.na(KEGG_gene_edges$source),]
240
+# 
241
+# edges_complete_targets <- KEGG_gene_edges[!is.na(KEGG_gene_edges$target),] 
242
+# complete_edges <- edges_complete_targets[!is.na(edges_complete_targets$source),]
243
+# 
244
+# # > dim(na_sources) + dim(na_targets) - dim(na_sources_and_targets)
245
+# # [1] 2176   10
246
+# # 
247
+# # > dim(KEGG_gene_edges) - dim(complete_edges)
248
+# # [1] 2176    0
249
+# 
250
+# 
251
+# for(j in 1:nrow(distance_matrix)){
252
+#     for (k in 1:ncol(distance_matrix)){
253
+#         distance_matrix[j,k] <
254
+#             sqrt((KEGG_mappings$Xcoord[KEGG_mappings$entryID ==
255
+#                                            colnames(distance_matrix)[1]][1]
256
+#                   - KEGG_mappings$Xcoord[KEGG_mappings$entryID ==
257
+#                                              rownames(distance_matrix)[1]][1])^2
258
+#                  + (KEGG_mappings$Ycoord[KEGG_mappings$entryID ==
259
+#                                              colnames(distance_matrix)[1]][1]
260
+#                     - KEGG_mappings$Ycoord[KEGG_mappings$entryID ==
261
+#                                                rownames(distance_matrix)[1]][1])^2)
262
+#     }
263
+# }
264
+# 
265
+# KEGG_gene_edges <- complete_edges
266
+# mtor_edges <- KEGG_gene_edges[KEGG_gene_edges$source == "MTOR" | KEGG_gene_edges$target == "MTOR",]
267
+# 
268
+# unique_mtor_edges <- mtor_edges[!duplicated(mtor_edges),]
269
+# mtor_edges_all_pathways <- unique_mtor_edges
270
+# 
271
+# mtor_sources <- mtor_edges_all_pathways$source
272
+# mtor_sources <- mtor_sources[!duplicated(mtor_sources)]
273
+# # length(mtor_sources)
274
+# # [1] 33
275
+# mtor_targets <- mtor_edges_all_pathways$target
276
+# mtor_targets <- mtor_targets[!duplicated(mtor_targets)]
277
+# # length(mtor_targets)
278
+# # [1] 26
279
+# 
280
+# mtor_KGML <- get_KGML("hsa04150")
281
+# mtor_KEGG_mappings <- expand_KEGG_mappings(mtor_KGML)
282
+# mtor_edges <- expand_KEGG_edges(mtor_KGML, mtor_KEGG_mappings)
283
+# 
284
+# sources_in_mtor_pathway <-  mtor_KEGG_mappings[mtor_KEGG_mappings$entrySYMBOL %in% mtor_sources,]
285
+# # dim(sources_in_mtor_pathway)
286
+# # [1] 28 16
287
+# targets_in_mtor_pathway <-  mtor_KEGG_mappings[mtor_KEGG_mappings$entrySYMBOL %in% mtor_targets,]
288
+# # dim(targets_in_mtor_pathway)
289
+# # [1] 19 16
290
+# internal_mtor_edges <- mtor_edges_all_pathways[mtor_edges_all_pathways$pathway_code == "path:hsa04150",]
0 291
new file mode 100644
... ...
@@ -0,0 +1,37 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/keggerize_edges.R
3
+\name{keggerize_edges}
4
+\alias{keggerize_edges}
5
+\title{Add in edges to map documented in other pathways}
6
+\usage{
7
+keggerize_edges(entry_accession, KGML, KEGG_mappings, edges)
8
+}
9
+\arguments{
10
+\item{entry_accession}{The Accession # of the pathway entity to 'keggerize'}
11
+
12
+\item{KGML}{The KGML file of the current pathway}
13
+
14
+\item{KEGG_mappings}{KEGG mappings for the current pathway}
15
+
16
+\item{edges}{The expanded edges for the current pathway}
17
+}
18
+\value{
19
+A modified expanded edges data frame with additional rows for new 
20
+entries
21
+}
22
+\description{
23
+For a specific pathway entity(gene), search KEGG databases
24
+to see if it has any other documented relationships in KEGG.
25
+expand_KEGG_edges
26
+}
27
+\examples{
28
+\dontrun{
29
+KGML <- get_KGML("hsa04150")
30
+KEGG_mappings <- expand_KEGG_mappings(KGML)
31
+edges <- expand_KEGG_edges(KGML, KEGG_mappings)
32
+entry_accession <- "2475"
33
+mtor_plus_mtor <- keggerize_edges(entry_accession = entry_accession, 
34
+                                  KGML = KGML,KEGG_mappings = KEGG_mappings,
35
+                                  edges = edges) }
36
+
37
+}