19 | 20 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,290 @@ |
1 |
+#' Add in edges to map documented in other pathways |
|
2 |
+#' @description For a specific pathway entity(gene), search KEGG databases |
|
3 |
+#' to see if it has any other documented relationships in KEGG. |
|
4 |
+#' expand_KEGG_edges |
|
5 |
+#' @export |
|
6 |
+#' @param entry_accession The Accession # of the pathway entity to 'keggerize' |
|
7 |
+#' @param KGML The KGML file of the current pathway |
|
8 |
+#' @param KEGG_mappings KEGG mappings for the current pathway |
|
9 |
+#' @param edges The expanded edges for the current pathway |
|
10 |
+#' @return A modified expanded edges data frame with additional rows for new |
|
11 |
+#' entries |
|
12 |
+#' @examples \dontrun{ |
|
13 |
+#' KGML <- get_KGML("hsa04150") |
|
14 |
+#' KEGG_mappings <- expand_KEGG_mappings(KGML) |
|
15 |
+#' edges <- expand_KEGG_edges(KGML, KEGG_mappings) |
|
16 |
+#' entry_accession <- "2475" |
|
17 |
+#' mtor_plus_mtor <- keggerize_edges(entry_accession = entry_accession, |
|
18 |
+#' KGML = KGML,KEGG_mappings = KEGG_mappings, |
|
19 |
+#' edges = edges) } |
|
20 |
+#' |
|
21 |
+ |
|
22 |
+keggerize_edges <- function(entry_accession, KGML, KEGG_mappings, edges){ |
|
23 |
+ if(!"primary_pathway" %in% names(edges)){ |
|
24 |
+ edges$primary_pathwayID <- strsplit(KGML@pathwayInfo@name, ":")[[1]][2] |
|
25 |
+ edges$primary_pathway <- KGML@pathwayInfo@title |
|
26 |
+ } |
|
27 |
+ entry_code <- paste0("hsa:",entry_accession) |
|
28 |
+ entry_info <- keggGet(entry_code) |
|
29 |
+ entry_pathways <- entry_info[[1]]["PATHWAY"] |
|
30 |
+ entry_pathways <- names(entry_pathways$PATHWAY[1:length(unlist(entry_pathways))]) |
|
31 |
+ current_pathway <- strsplit(KGML@pathwayInfo@name, ":")[[1]][2] |
|
32 |
+ entry_pathways <- entry_pathways[!entry_pathways %in% current_pathway] |
|
33 |
+ |
|
34 |
+ for(p in 1:length(entry_pathways)){ |
|
35 |
+ pathway_KGML <- get_KGML(entry_pathways[p]) |
|
36 |
+ pathway_KEGG_mappings <- expand_KEGG_mappings(pathway_KGML) |
|
37 |
+ pathway_edges <- expand_KEGG_edges(pathway_KGML, pathway_KEGG_mappings) |
|
38 |
+ entry_edges <- pathway_edges[pathway_edges$entry1accession == entry_accession | |
|
39 |
+ pathway_edges$entry2accession == entry_accession, ] |
|
40 |
+ entry_sources <- entry_edges[entry_edges$entry1accession != entry_accession & |
|
41 |
+ entry_edges$entry1accession %in% KEGG_mappings$entryACCESSION,] |
|
42 |
+ entry_sources <- entry_sources[!duplicated(entry_sources[,6:16]),] |
|
43 |
+ if(nrow(entry_sources) > 0){ |
|
44 |
+ for(i in 1:nrow(entry_sources)){ |
|
45 |
+ entry1_locations <- KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_sources$entry1symbol[i]] |
|
46 |
+ entry2_locations <- KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_sources$entry2symbol[i]] |
|
47 |
+ if(length(entry1_locations) > 1 | length(entry2_locations > 1)){ |
|
48 |
+ |
|
49 |
+ distance_matrix <- matrix(nrow = length(entry2_locations), |
|
50 |
+ ncol = length(entry1_locations)) |
|
51 |
+ colnames(distance_matrix) <- entry1_locations |
|
52 |
+ rownames(distance_matrix) <- entry2_locations |
|
53 |
+ for(j in 1:nrow(distance_matrix)){ |
|
54 |
+ for (k in 1:ncol(distance_matrix)){ |
|
55 |
+ value <- |
|
56 |
+ sqrt((KEGG_mappings$Xcoord[KEGG_mappings$entryID == |
|
57 |
+ colnames(distance_matrix)[k]][1] |
|
58 |
+ - KEGG_mappings$Xcoord[KEGG_mappings$entryID == |
|
59 |
+ rownames(distance_matrix)[j]][1])^2 |
|
60 |
+ + (KEGG_mappings$Ycoord[KEGG_mappings$entryID == |
|
61 |
+ colnames(distance_matrix)[k]][1] |
|
62 |
+ - KEGG_mappings$Ycoord[KEGG_mappings$entryID == |
|
63 |
+ rownames(distance_matrix)[j]][1])^2) |
|
64 |
+ # print(value) |
|
65 |
+ distance_matrix[j,k] <- value |
|
66 |
+ } |
|
67 |
+ } |
|
68 |
+ location <- which(distance_matrix == min(distance_matrix), arr.ind = TRUE) |
|
69 |
+ entry_sources$entry1[i] <- colnames(distance_matrix)[location[1,2]] |
|
70 |
+ entry_sources$entry2[i] <- rownames(distance_matrix)[location[1,1]] |
|
71 |
+ } |
|
72 |
+ if(length(entry1_locations) == 1 & length(entry2_locations) == 1){ |
|
73 |
+ entry_sources$entry1[i] <- entry1_locations |
|
74 |
+ entry_sources$entry2[i] <- entry2_locations |
|
75 |
+ } |
|
76 |
+ } |
|
77 |
+ } |
|
78 |
+ entry_targets <- entry_edges[entry_edges$entry1accession == entry_accession & |
|
79 |
+ entry_edges$entry2accession %in% KEGG_mappings$entryACCESSION,] |
|
80 |
+ entry_targets <- entry_targets[!duplicated(entry_targets[,6:16]),] |
|
81 |
+ if(nrow(entry_targets) > 0){ |
|
82 |
+ for(i in 1:nrow(entry_targets)){ |
|
83 |
+ entry1_locations <- KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_targets$entry1symbol[i]] |
|
84 |
+ entry2_locations <- KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_targets$entry2symbol[i]] |
|
85 |
+ if(length(entry1_locations) > 1 | length(entry2_locations > 1)){ |
|
86 |
+ |
|
87 |
+ distance_matrix <- matrix(nrow = length(entry2_locations), |
|
88 |
+ ncol = length(entry1_locations)) |
|
89 |
+ colnames(distance_matrix) <- entry1_locations |
|
90 |
+ rownames(distance_matrix) <- entry2_locations |
|
91 |
+ for(j in 1:nrow(distance_matrix)){ |
|
92 |
+ for (k in 1:ncol(distance_matrix)){ |
|
93 |
+ value <- |
|
94 |
+ sqrt((KEGG_mappings$Xcoord[KEGG_mappings$entryID == |
|
95 |
+ colnames(distance_matrix)[k]][1] |
|
96 |
+ - KEGG_mappings$Xcoord[KEGG_mappings$entryID == |
|
97 |
+ rownames(distance_matrix)[j]][1])^2 |
|
98 |
+ + (KEGG_mappings$Ycoord[KEGG_mappings$entryID == |
|
99 |
+ colnames(distance_matrix)[k]][1] |
|
100 |
+ - KEGG_mappings$Ycoord[KEGG_mappings$entryID == |
|
101 |
+ rownames(distance_matrix)[j]][1])^2) |
|
102 |
+ # print(value) |
|
103 |
+ distance_matrix[j,k] <- value |
|
104 |
+ } |
|
105 |
+ } |
|
106 |
+ location <- which(distance_matrix == min(distance_matrix), arr.ind = TRUE) |
|
107 |
+ entry_targets$entry1[i] <- colnames(distance_matrix)[location[1,2]] |
|
108 |
+ entry_targets$entry2[i] <- rownames(distance_matrix)[location[1,1]] |
|
109 |
+ } |
|
110 |
+ if(length(entry1_locations) == 1 & length(entry2_locations) == 1){ |
|
111 |
+ entry_targets$entry1[i] <- entry1_locations |
|
112 |
+ entry_targets$entry2[i] <- entry2_locations |
|
113 |
+ } |
|
114 |
+ } |
|
115 |
+ } |
|
116 |
+ new_edges <- rbind(entry_sources, entry_targets) |
|
117 |
+ if(nrow(new_edges) > 0) { |
|
118 |
+ new_edges$primary_pathwayID <- strsplit(pathway_KGML@pathwayInfo@name, ":")[[1]][2] |
|
119 |
+ new_edges$primary_pathway <- pathway_KGML@pathwayInfo@title |
|
120 |
+ edges <- rbind(edges, new_edges) |
|
121 |
+ } |
|
122 |
+ print(p) |
|
123 |
+ } |
|
124 |
+ |
|
125 |
+ edges$edge_entry_ID <- paste0(edges$entry1, ":", edges$entry2) |
|
126 |
+ edge_ID_sets <- edges$edge_entry_ID[!duplicated(edges$edge_entry_ID)] |
|
127 |
+ edge_ID_mapper <- data.frame("edges" = edge_ID_sets, |
|
128 |
+ edgeID = 1:length(edge_ID_sets), |
|
129 |
+ stringsAsFactors = FALSE) |
|
130 |
+ for(i in 1:nrow(edges)){ |
|
131 |
+ edges$edgeID[i] <- edge_ID_mapper$edgeID[match(edges$edge_entry_ID[i], |
|
132 |
+ edge_ID_mapper$edges)] |
|
133 |
+ } |
|
134 |
+ edges <- edges[,names(edges) != "edge_entry_ID"] |
|
135 |
+ return(edges) |
|
136 |
+} |
|
137 |
+ |
|
138 |
+ |
|
139 |
+ |
|
140 |
+# KGML <- get_KGML("hsa04150") |
|
141 |
+# KEGG_mappings <- expand_KEGG_mappings(KGML) |
|
142 |
+# edges <- expand_KEGG_edges(KGML, KEGG_mappings) |
|
143 |
+# |
|
144 |
+ |
|
145 |
+# |
|
146 |
+# keggerize_edges <- function(entry_accession, KGML, KEGG_mappings, edges){ |
|
147 |
+# if(!"primary_pathway" %in% names(edges)){ |
|
148 |
+# edges$primary_pathwayID <- strsplit(KGML@pathwayInfo@name, ":")[[1]][2] |
|
149 |
+# edges$primary_pathway <- KGML@pathwayInfo@title |
|
150 |
+# } |
|
151 |
+# entry_code <- paste0("hsa:",entry_accession) |
|
152 |
+# entry_info <- keggGet(entry_code) |
|
153 |
+# entry_pathways <- entry_info[[1]]["PATHWAY"] |
|
154 |
+# entry_pathways <- names(entry_pathways$PATHWAY[1:length(unlist(entry_pathways))]) |
|
155 |
+# current_pathway <- strsplit(KGML@pathwayInfo@name, ":")[[1]][2] |
|
156 |
+# entry_pathways <- entry_pathways[!entry_pathways %in% current_pathway] |
|
157 |
+# for(i in 1:length(entry_pathways)){ |
|
158 |
+# pathway_KGML <- get_KGML(entry_pathways[i]) |
|
159 |
+# pathway_KEGG_mappings <- expand_KEGG_mappings(pathway_KGML) |
|
160 |
+# pathway_edges <- expand_KEGG_edges(pathway_KGML, pathway_KEGG_mappings) |
|
161 |
+# entry_edges <- pathway_edges[pathway_edges$entry1accession == entry_accession | |
|
162 |
+# pathway_edges$entry2accession == entry_accession, ] |
|
163 |
+# entry_sources <- entry_edges[entry_edges$entry1accession != entry_accession & |
|
164 |
+# entry_edges$entry1accession %in% KEGG_mappings$entryACCESSION,] |
|
165 |
+# # for(i in 1:nrow(entry_sources)){ |
|
166 |
+# # entry1_locations <- KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_sources$entry1symbol[i]] |
|
167 |
+# # entry2_locations <- KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_sources$entry2symbol[i]] |
|
168 |
+# # distance_matrix <- matrix(nrow = length(entry2_locations), |
|
169 |
+# # ncol = length(entry1_locations)) |
|
170 |
+# # colnames(distance_matrix) <- entry1_locations |
|
171 |
+# # rownames(distance_matrix) <- entry2_locations |
|
172 |
+# # for(j in 1:nrow(distance_matrix)){ |
|
173 |
+# # for (k in 1:ncol(distance_matrix)){ |
|
174 |
+# # value <- |
|
175 |
+# # sqrt((KEGG_mappings$Xcoord[KEGG_mappings$entryID == |
|
176 |
+# # colnames(distance_matrix)[k]][1] |
|
177 |
+# # - KEGG_mappings$Xcoord[KEGG_mappings$entryID == |
|
178 |
+# # rownames(distance_matrix)[j]][1])^2 |
|
179 |
+# # + (KEGG_mappings$Ycoord[KEGG_mappings$entryID == |
|
180 |
+# # colnames(distance_matrix)[k]][1] |
|
181 |
+# # - KEGG_mappings$Ycoord[KEGG_mappings$entryID == |
|
182 |
+# # rownames(distance_matrix)[j]][1])^2) |
|
183 |
+# # print(value) |
|
184 |
+# # distance_matrix[j,k] <- value |
|
185 |
+# # } |
|
186 |
+# # } |
|
187 |
+# # |
|
188 |
+# # |
|
189 |
+# # } |
|
190 |
+# # for(i in 1:nrow(entry_sources)){ |
|
191 |
+# # entry_sources$entry2[i] <- list(KEGG_mappings$entryID[KEGG_mappings$entrySYMBOL == entry_sources$entry2symbol[i]]) |
|
192 |
+# # } |
|
193 |
+# # } |
|
194 |
+# entry_targets <- entry_edges[entry_edges$entry1accession == entry_accession & |
|
195 |
+# entry_edges$entry2accession %in% KEGG_mappings$entryACCESSION,] |
|
196 |
+# new_edges <- rbind(entry_sources, entry_targets) |
|
197 |
+# if(nrow(new_edges) > 0) { |
|
198 |
+# new_edges$primary_pathwayID <- strsplit(pathway_KGML@pathwayInfo@name, ":")[[1]][2] |
|
199 |
+# new_edges$primary_pathway <- pathway_KGML@pathwayInfo@title |
|
200 |
+# |
|
201 |
+# max_edge_ID <- max(edges$edgeID) |
|
202 |
+# for(j in 1:nrow(new_edges)){ |
|
203 |
+# new_edges$edgeID[j] <- max_edge_ID + j |
|
204 |
+# } |
|
205 |
+# edges <- rbind(edges, new_edges) |
|
206 |
+# } |
|
207 |
+# print(i) |
|
208 |
+# } |
|
209 |
+# edges$edge_entry_ID <- paste0(edges$entry1, ":", edges$entry2) |
|
210 |
+# edge_ID_sets <- edges$edge_entry_ID[!duplicated(edges$edge_entry_ID)] |
|
211 |
+# edge_ID_mapper <- data.frame("edges" = edge_ID_sets, |
|
212 |
+# edgeID = 1:length(edge_ID_sets), |
|
213 |
+# stringsAsFactors = FALSE) |
|
214 |
+# for(i in 1:nrow(edges)){ |
|
215 |
+# edges$edgeID[i] <- edge_ID_mapper$edgeID[match(edges$edge_entry_ID[i], |
|
216 |
+# edge_ID_mapper$edges)] |
|
217 |
+# } |
|
218 |
+# edges <- edges[,names(edges) != "edge_entry_ID"] |
|
219 |
+# return(edges) |
|
220 |
+# } |
|
221 |
+# |
|
222 |
+# mtor_plus_mtor <- keggerize_edges(entry_accession = entry_accession, KGML = KGML, |
|
223 |
+# KEGG_mappings = KEGG_mappings, |
|
224 |
+# edges = edges) |
|
225 |
+# #dim(edges) |
|
226 |
+# #[1] 581 16 |
|
227 |
+# # dim(mtor_plus_mtor) |
|
228 |
+# # [1] 757 18 |
|
229 |
+# mtor_plus_mtor_plus_deptor <- keggerize_edges(entry_accession = "64798", KGML = KGML, |
|
230 |
+# KEGG_mappings = KEGG_mappings, |
|
231 |
+# edges = mtor_plus_mtor) |
|
232 |
+# |
|
233 |
+# |
|
234 |
+# |
|
235 |
+# |
|
236 |
+# |
|
237 |
+# na_sources <- KEGG_gene_edges[is.na(KEGG_gene_edges$source),] |
|
238 |
+# na_targets<- KEGG_gene_edges[is.na(KEGG_gene_edges$target),] |
|
239 |
+# na_sources_and_targets <- KEGG_gene_edges[is.na(KEGG_gene_edges$target)& is.na(KEGG_gene_edges$source),] |
|
240 |
+# |
|
241 |
+# edges_complete_targets <- KEGG_gene_edges[!is.na(KEGG_gene_edges$target),] |
|
242 |
+# complete_edges <- edges_complete_targets[!is.na(edges_complete_targets$source),] |
|
243 |
+# |
|
244 |
+# # > dim(na_sources) + dim(na_targets) - dim(na_sources_and_targets) |
|
245 |
+# # [1] 2176 10 |
|
246 |
+# # |
|
247 |
+# # > dim(KEGG_gene_edges) - dim(complete_edges) |
|
248 |
+# # [1] 2176 0 |
|
249 |
+# |
|
250 |
+# |
|
251 |
+# for(j in 1:nrow(distance_matrix)){ |
|
252 |
+# for (k in 1:ncol(distance_matrix)){ |
|
253 |
+# distance_matrix[j,k] < |
|
254 |
+# sqrt((KEGG_mappings$Xcoord[KEGG_mappings$entryID == |
|
255 |
+# colnames(distance_matrix)[1]][1] |
|
256 |
+# - KEGG_mappings$Xcoord[KEGG_mappings$entryID == |
|
257 |
+# rownames(distance_matrix)[1]][1])^2 |
|
258 |
+# + (KEGG_mappings$Ycoord[KEGG_mappings$entryID == |
|
259 |
+# colnames(distance_matrix)[1]][1] |
|
260 |
+# - KEGG_mappings$Ycoord[KEGG_mappings$entryID == |
|
261 |
+# rownames(distance_matrix)[1]][1])^2) |
|
262 |
+# } |
|
263 |
+# } |
|
264 |
+# |
|
265 |
+# KEGG_gene_edges <- complete_edges |
|
266 |
+# mtor_edges <- KEGG_gene_edges[KEGG_gene_edges$source == "MTOR" | KEGG_gene_edges$target == "MTOR",] |
|
267 |
+# |
|
268 |
+# unique_mtor_edges <- mtor_edges[!duplicated(mtor_edges),] |
|
269 |
+# mtor_edges_all_pathways <- unique_mtor_edges |
|
270 |
+# |
|
271 |
+# mtor_sources <- mtor_edges_all_pathways$source |
|
272 |
+# mtor_sources <- mtor_sources[!duplicated(mtor_sources)] |
|
273 |
+# # length(mtor_sources) |
|
274 |
+# # [1] 33 |
|
275 |
+# mtor_targets <- mtor_edges_all_pathways$target |
|
276 |
+# mtor_targets <- mtor_targets[!duplicated(mtor_targets)] |
|
277 |
+# # length(mtor_targets) |
|
278 |
+# # [1] 26 |
|
279 |
+# |
|
280 |
+# mtor_KGML <- get_KGML("hsa04150") |
|
281 |
+# mtor_KEGG_mappings <- expand_KEGG_mappings(mtor_KGML) |
|
282 |
+# mtor_edges <- expand_KEGG_edges(mtor_KGML, mtor_KEGG_mappings) |
|
283 |
+# |
|
284 |
+# sources_in_mtor_pathway <- mtor_KEGG_mappings[mtor_KEGG_mappings$entrySYMBOL %in% mtor_sources,] |
|
285 |
+# # dim(sources_in_mtor_pathway) |
|
286 |
+# # [1] 28 16 |
|
287 |
+# targets_in_mtor_pathway <- mtor_KEGG_mappings[mtor_KEGG_mappings$entrySYMBOL %in% mtor_targets,] |
|
288 |
+# # dim(targets_in_mtor_pathway) |
|
289 |
+# # [1] 19 16 |
|
290 |
+# internal_mtor_edges <- mtor_edges_all_pathways[mtor_edges_all_pathways$pathway_code == "path:hsa04150",] |
0 | 291 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,37 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/keggerize_edges.R |
|
3 |
+\name{keggerize_edges} |
|
4 |
+\alias{keggerize_edges} |
|
5 |
+\title{Add in edges to map documented in other pathways} |
|
6 |
+\usage{ |
|
7 |
+keggerize_edges(entry_accession, KGML, KEGG_mappings, edges) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{entry_accession}{The Accession # of the pathway entity to 'keggerize'} |
|
11 |
+ |
|
12 |
+\item{KGML}{The KGML file of the current pathway} |
|
13 |
+ |
|
14 |
+\item{KEGG_mappings}{KEGG mappings for the current pathway} |
|
15 |
+ |
|
16 |
+\item{edges}{The expanded edges for the current pathway} |
|
17 |
+} |
|
18 |
+\value{ |
|
19 |
+A modified expanded edges data frame with additional rows for new |
|
20 |
+entries |
|
21 |
+} |
|
22 |
+\description{ |
|
23 |
+For a specific pathway entity(gene), search KEGG databases |
|
24 |
+to see if it has any other documented relationships in KEGG. |
|
25 |
+expand_KEGG_edges |
|
26 |
+} |
|
27 |
+\examples{ |
|
28 |
+\dontrun{ |
|
29 |
+KGML <- get_KGML("hsa04150") |
|
30 |
+KEGG_mappings <- expand_KEGG_mappings(KGML) |
|
31 |
+edges <- expand_KEGG_edges(KGML, KEGG_mappings) |
|
32 |
+entry_accession <- "2475" |
|
33 |
+mtor_plus_mtor <- keggerize_edges(entry_accession = entry_accession, |
|
34 |
+ KGML = KGML,KEGG_mappings = KEGG_mappings, |
|
35 |
+ edges = edges) } |
|
36 |
+ |
|
37 |
+} |