7 | 8 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,245 @@ |
1 |
+#' Combines all other package functions for one-step cell line comparison |
|
2 |
+#' @export |
|
3 |
+#' @param pathwayid A KEGG pathway ID of the form "hsa12345" |
|
4 |
+#' (only human pathways currently) |
|
5 |
+#' @param cell_line1 Choose from the set of cell lines: |
|
6 |
+#' (A375,A549,ASC,HA1E,HCC515,HEK293T,HEKTE,HEPG2,HT29,MCF7,NCIH716,NPC,PC3, |
|
7 |
+#' SHSY5Y,SKL,SW480,VCAP) |
|
8 |
+#' @param cell_line2 A cell line such that cell_line1 != cell_line2 |
|
9 |
+#' @param refine_by_cell_line A logical indicator |
|
10 |
+#' @param add_L1000_edge_data A logical indicator |
|
11 |
+#' @param data_type Choose from data types: (100_full, 100_bing, 50_lm) |
|
12 |
+#' @param pert_time Choose from (6,24,48,96,120,144,168) |
|
13 |
+#' @param only_mapped A logical indicator; if set to FALSE will return 'de-novo' |
|
14 |
+#' edges that 'exist' in data but are not documented in KEGG |
|
15 |
+#' @param significance_markup A logical indicator; if set to TRUE will color |
|
16 |
+#' edges based on direction and significance of correlation (as determined by |
|
17 |
+#' user-data-analysis) |
|
18 |
+#' @param layered_nodes A logical indicator; if set to TRUE will create a graph |
|
19 |
+#' with 'stacked' nodes that the user can manipulate when multiple nodes are |
|
20 |
+#' mapped to one location |
|
21 |
+#' @param graph_title An optional user-specified graph title |
|
22 |
+#' @param get_data A logical indicator; if set to true, will return the |
|
23 |
+#' 'expanded' edge information for the specified pathway |
|
24 |
+#' @param convert_KEGG_IDs A logical indicator; if set to TRUE KEGG |
|
25 |
+#' compounds will remain labeled via KEGG codes (do not need KEGGREST) |
|
26 |
+#' @return A dynamic map in Cytoscape automatically formatted for convenient |
|
27 |
+#' viewing and, if idicated by user, a data.frame object with detailed |
|
28 |
+#' information for 'expanded' KEGG edges |
|
29 |
+#' @examples \dontrun{ |
|
30 |
+#' |
|
31 |
+#' # Compare p53 pathway between cell lines A375 and A549: |
|
32 |
+#' KL_compare("hsa04115", "A375", "A549") |
|
33 |
+#'} |
|
34 |
+ |
|
35 |
+KL_compare <- |
|
36 |
+ function(pathwayid, cell_line1 = NA, cell_line2 = NA, |
|
37 |
+ refine_by_cell_line = TRUE, |
|
38 |
+ add_L1000_edge_data = TRUE, |
|
39 |
+ significance_markup = TRUE, |
|
40 |
+ data_type = "100_full", |
|
41 |
+ pert_time = 96, |
|
42 |
+ only_mapped = TRUE, |
|
43 |
+ get_data = FALSE, |
|
44 |
+ convert_KEGG_IDs = TRUE, |
|
45 |
+ graph_title = "default", |
|
46 |
+ tidy_edges = TRUE, |
|
47 |
+ layered_nodes = FALSE){ |
|
48 |
+ cell_lines <- c("A375","A549","ASC","HA1E","HCC515","HEK293T","HEKTE", |
|
49 |
+ "HEPG2","HT29","MCF7","NCIH716","NPC","PC3","SHSY5Y", |
|
50 |
+ "SKL","SW480","VCAP") |
|
51 |
+ if (is.na(cell_line1)){ |
|
52 |
+ warning(paste0( |
|
53 |
+ 'Please choose one of the following for cell_line1: ', |
|
54 |
+ cell_lines)) |
|
55 |
+ return() |
|
56 |
+ } |
|
57 |
+ if (is.na(cell_line2)){ |
|
58 |
+ warning(paste0('Please choose one the following for cell_line2: ', |
|
59 |
+ list(cell_lines[cell_lines != cell_line1]))) |
|
60 |
+ return() |
|
61 |
+ } |
|
62 |
+ |
|
63 |
+ if (cell_line2 == cell_line1){ |
|
64 |
+ warning(paste0('cell_line1 = cell_line2; ', |
|
65 |
+ 'please choose from the following for cell_line2: ', |
|
66 |
+ list(cell_lines[cell_lines != cell_line1]))) |
|
67 |
+ return() |
|
68 |
+ } |
|
69 |
+ |
|
70 |
+ KGML <- get_KGML(pathwayid, get_if_no_edges = TRUE) |
|
71 |
+ if(!isS4(KGML)){ |
|
72 |
+ return() |
|
73 |
+ } |
|
74 |
+ KEGG_mappings <- |
|
75 |
+ expand_KEGG_mappings(KGML,convert_KEGG_IDs = convert_KEGG_IDs) |
|
76 |
+ if (refine_by_cell_line) { |
|
77 |
+ full_mappings1 <- KEGG_mappings |
|
78 |
+ KEGG_mappings1 <- refine_mappings(KEGG_mappings, cell_line1) |
|
79 |
+ for (i in 1:nrow(full_mappings1)){ |
|
80 |
+ if(!full_mappings1$entryID[i] %in% KEGG_mappings$entryID){ |
|
81 |
+ full_mappings1$BGcolor[i] <- "#d3d3d3" |
|
82 |
+ full_mappings1$in_relationship <- 0 |
|
83 |
+ } |
|
84 |
+ } |
|
85 |
+ full_mappings2 <- KEGG_mappings |
|
86 |
+ KEGG_mappings2 <- refine_mappings(KEGG_mappings, cell_line2) |
|
87 |
+ for (i in 1:nrow(full_mappings2)){ |
|
88 |
+ if(!full_mappings2$entryID[i] %in% KEGG_mappings$entryID){ |
|
89 |
+ full_mappings2$BGcolor[i] <- "#d3d3d3" |
|
90 |
+ full_mappings2$in_relationship <- 0 |
|
91 |
+ } |
|
92 |
+ } |
|
93 |
+ } |
|
94 |
+ expanded_edges <- expand_KEGG_edges(KGML, KEGG_mappings) |
|
95 |
+ if(expanded_edges$type[1] == "dummy"){ |
|
96 |
+ graph_title <- paste0("Pathway = ", pathwayid, ":", |
|
97 |
+ KGML@pathwayInfo@title, |
|
98 |
+ "Cell-Line: ", cell_line, |
|
99 |
+ " *No Edges in Pathway") |
|
100 |
+ } |
|
101 |
+ |
|
102 |
+ if (nrow(expanded_edges[expanded_edges$type == "maplink",]) == |
|
103 |
+ nrow(expanded_edges) & only_mapped) { |
|
104 |
+ edge_map <- edge_mapping_info(expanded_edges) |
|
105 |
+ if (graph_title == "default"){ |
|
106 |
+ graph_title <- paste0("Pathway = ", pathwayid, ":", |
|
107 |
+ KGML@pathwayInfo@title, "Cell-Line: ", |
|
108 |
+ cell_line, " *No Edges in Data") |
|
109 |
+ } |
|
110 |
+ warning("All documented edges are of type 'maplink'; |
|
111 |
+ Overlap data cannot be mapped to selected pathway") |
|
112 |
+ } |
|
113 |
+ cell_data1 <- overlap_info(KGML, KEGG_mappings, cell_line1, |
|
114 |
+ data_type = data_type, |
|
115 |
+ pert_time = pert_time) |
|
116 |
+ |
|
117 |
+ cell_data2 <- overlap_info(KGML, KEGG_mappings, cell_line2, |
|
118 |
+ data_type = data_type, |
|
119 |
+ pert_time = pert_time) |
|
120 |
+ |
|
121 |
+ for (i in 1:nrow(cell_data1)){ |
|
122 |
+ UP <- cell_data1$UP[i] + 0.5 |
|
123 |
+ DOWN <- cell_data1$DOWN[i] + 0.5 |
|
124 |
+ UK1_DK2 <- cell_data1$UK1_DK2[i] + 0.5 |
|
125 |
+ DK1_UK2 <- cell_data1$DK1_UK2[i] + 0.5 |
|
126 |
+ cell_data1$OR_1[i] <- (UP*DOWN)/(UK1_DK2*DK1_UK2) |
|
127 |
+ cell_data1$SE_1[i] <-sqrt(1/UP + 1/DOWN + 1/UK1_DK2 + 1/DK1_UK2) |
|
128 |
+ cell_data1$log_OR_1[i] <- log(cell_data1$OR_1[i]) |
|
129 |
+ } |
|
130 |
+ |
|
131 |
+ for (i in 1:nrow(cell_data2)){ |
|
132 |
+ UP <- cell_data2$UP[i] + 0.5 |
|
133 |
+ DOWN <- cell_data2$DOWN[i] + 0.5 |
|
134 |
+ UK1_DK2 <- cell_data2$UK1_DK2[i] + 0.5 |
|
135 |
+ DK1_UK2 <- cell_data2$DK1_UK2[i] + 0.5 |
|
136 |
+ cell_data2$OR_2[i] <- (UP*DOWN)/(UK1_DK2*DK1_UK2) |
|
137 |
+ cell_data2$SE_2[i] <-sqrt(1/UP + 1/DOWN + 1/UK1_DK2 + 1/DK1_UK2) |
|
138 |
+ cell_data2$log_OR_2[i] <- log(cell_data2$OR_2[i]) |
|
139 |
+ } |
|
140 |
+ |
|
141 |
+ if (!is.na(cell_data1)[1,1] & !is.na(cell_data2)[1,1]) { |
|
142 |
+ edges_plus_data1 <- add_edge_data(expanded_edges, KEGG_mappings, |
|
143 |
+ cell_data1, c(15,16), |
|
144 |
+ only_mapped = only_mapped) |
|
145 |
+ edges_plus_data2 <- add_edge_data(expanded_edges, KEGG_mappings, |
|
146 |
+ cell_data2, c(15,16), |
|
147 |
+ only_mapped = only_mapped) |
|
148 |
+ edges_plus_data1$unique_ID <- paste0(edges_plus_data1$entry1symbol, ":", edges_plus_data1$entry2symbol, ":", edges_plus_data1$edgeID) |
|
149 |
+ edges_plus_data2$unique_ID <- paste0(edges_plus_data2$entry1symbol, ":", edges_plus_data2$entry2symbol, ":", edges_plus_data2$edgeID) |
|
150 |
+ |
|
151 |
+ edges_compare <- merge(edges_plus_data1, edges_plus_data2[,c(18,19,21)], by = "unique_ID") |
|
152 |
+ |
|
153 |
+ for(i in 1:nrow(edges_compare)){ |
|
154 |
+ if (!is.na(edges_compare$log_OR_1[i]) & !is.na(edges_compare$log_OR_2[i])){ |
|
155 |
+ edges_compare$test[i] <- (edges_compare$log_OR_1[i] - edges_compare$log_OR_2[i])/ |
|
156 |
+ sqrt(edges_compare$SE_1[i]^2 + edges_compare$SE_2[i]^2) |
|
157 |
+ edges_compare$summary_score[i] <- exp(abs(edges_compare$test[i])) |
|
158 |
+ |
|
159 |
+ |
|
160 |
+ if (edges_compare$test[i] <= qnorm(0.1) | edges_compare$test[i] >= qnorm(0.9)){ |
|
161 |
+ edges_compare$significant[i] <- 1 |
|
162 |
+ } |
|
163 |
+ else { |
|
164 |
+ edges_compare$significant[i] <- 0 |
|
165 |
+ } |
|
166 |
+ } |
|
167 |
+ else { |
|
168 |
+ edges_compare$test[i] <- NA |
|
169 |
+ edges_compare$summary_score[i] <- 0 |
|
170 |
+ edges_compare$significant[i] <- NA |
|
171 |
+ } |
|
172 |
+ } |
|
173 |
+ |
|
174 |
+ edge_map <- edge_mapping_info(edges_compare, data_added = TRUE) |
|
175 |
+ for (i in 1:nrow(edge_map)){ |
|
176 |
+ if (is.na(edge_map$test[i])){ |
|
177 |
+ edge_map$has_data[i] <- 0 |
|
178 |
+ edge_map$color[i] <- "#808080" |
|
179 |
+ } |
|
180 |
+ } |
|
181 |
+ if ("premapped" %in% names (edge_map)) { |
|
182 |
+ premapped <- edge_map$premapped |
|
183 |
+ drop <- "premapped" |
|
184 |
+ edge_map <- edge_map[, ! names(edge_map) %in% drop] |
|
185 |
+ edge_map$premapped <- premapped |
|
186 |
+ } |
|
187 |
+ for (i in 1:nrow(edge_map)){ |
|
188 |
+ if (!is.na(edge_map$log_OR_1[i]) & |
|
189 |
+ !is.na(edge_map$log_OR_2[i])){ |
|
190 |
+ if (edge_map$test[i] < 0){ |
|
191 |
+ if (edge_map$significant[i] == 0){ |
|
192 |
+ edge_map$color[i] <- "#82E0AA" |
|
193 |
+ } |
|
194 |
+ if (edge_map$significant[i] == 1){ |
|
195 |
+ edge_map$color[i] <- "#1E8449" |
|
196 |
+ } |
|
197 |
+ } |
|
198 |
+ if (edge_map$test[i] > 0){ |
|
199 |
+ if (edge_map$significant[i] == 0){ |
|
200 |
+ edge_map$color[i] <- "#E67E22" |
|
201 |
+ } |
|
202 |
+ if (edge_map$significant[i] == 1){ |
|
203 |
+ edge_map$color[i] <- "#BA4A00" |
|
204 |
+ } |
|
205 |
+ } |
|
206 |
+ } |
|
207 |
+ } |
|
208 |
+ if (tidy_edges == TRUE) { |
|
209 |
+ edge_IDs <- seq(min(edge_map$edgeID), max(edge_map$edgeID)) |
|
210 |
+ for (i in edge_IDs){ |
|
211 |
+ edge_map <- tidy_edge(edges = edge_map, |
|
212 |
+ edge_id = edge_IDs[i], |
|
213 |
+ by_significance = TRUE) |
|
214 |
+ } |
|
215 |
+ } |
|
216 |
+ |
|
217 |
+ if (graph_title == "default"){ |
|
218 |
+ graph_title <- paste0("Pathway = ", pathwayid, ":", |
|
219 |
+ KGML@pathwayInfo@title, " - ", |
|
220 |
+ cell_line1 ,"vs", cell_line2, ", |
|
221 |
+ Data type: ", data_type) |
|
222 |
+ } |
|
223 |
+ } |
|
224 |
+ |
|
225 |
+ node_map <- node_mapping_info(KEGG_mappings) |
|
226 |
+ |
|
227 |
+ graph_object <- get_graph_object(node_map, edge_map, |
|
228 |
+ layered_nodes = layered_nodes) |
|
229 |
+ |
|
230 |
+ edge_width_attribute = "summary_score" |
|
231 |
+ |
|
232 |
+ if (edge_width_attribute %in% names(igraph::edge_attr(graph_object))){ |
|
233 |
+ min.summary_score <- min(abs(igraph::E(graph_object)$summary_score), |
|
234 |
+ na.rm = TRUE) |
|
235 |
+ max.summary_score <- max(abs(igraph::E(graph_object)$summary_score), |
|
236 |
+ na.rm = TRUE) |
|
237 |
+ map_edge_width <- TRUE |
|
238 |
+ } |
|
239 |
+ |
|
240 |
+ cyto_vis(graph_object, title = graph_title, |
|
241 |
+ edge_width_attribute = "summary_score") |
|
242 |
+ if(get_data){ |
|
243 |
+ return(edge_map) |
|
244 |
+ } |
|
245 |
+ } |
0 | 246 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,64 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/KL_compare.R |
|
3 |
+\name{KL_compare} |
|
4 |
+\alias{KL_compare} |
|
5 |
+\title{Combines all other package functions for one-step cell line comparison} |
|
6 |
+\usage{ |
|
7 |
+KL_compare(pathwayid, cell_line1 = NA, cell_line2 = NA, |
|
8 |
+ refine_by_cell_line = TRUE, add_L1000_edge_data = TRUE, |
|
9 |
+ significance_markup = TRUE, data_type = "100_full", pert_time = 96, |
|
10 |
+ only_mapped = TRUE, get_data = FALSE, convert_KEGG_IDs = TRUE, |
|
11 |
+ graph_title = "default", tidy_edges = TRUE, layered_nodes = FALSE) |
|
12 |
+} |
|
13 |
+\arguments{ |
|
14 |
+\item{pathwayid}{A KEGG pathway ID of the form "hsa12345" |
|
15 |
+(only human pathways currently)} |
|
16 |
+ |
|
17 |
+\item{cell_line1}{Choose from the set of cell lines: |
|
18 |
+(A375,A549,ASC,HA1E,HCC515,HEK293T,HEKTE,HEPG2,HT29,MCF7,NCIH716,NPC,PC3, |
|
19 |
+SHSY5Y,SKL,SW480,VCAP)} |
|
20 |
+ |
|
21 |
+\item{cell_line2}{A cell line such that cell_line1 != cell_line2} |
|
22 |
+ |
|
23 |
+\item{refine_by_cell_line}{A logical indicator} |
|
24 |
+ |
|
25 |
+\item{add_L1000_edge_data}{A logical indicator} |
|
26 |
+ |
|
27 |
+\item{significance_markup}{A logical indicator; if set to TRUE will color |
|
28 |
+edges based on direction and significance of correlation (as determined by |
|
29 |
+user-data-analysis)} |
|
30 |
+ |
|
31 |
+\item{data_type}{Choose from data types: (100_full, 100_bing, 50_lm)} |
|
32 |
+ |
|
33 |
+\item{pert_time}{Choose from (6,24,48,96,120,144,168)} |
|
34 |
+ |
|
35 |
+\item{only_mapped}{A logical indicator; if set to FALSE will return 'de-novo' |
|
36 |
+edges that 'exist' in data but are not documented in KEGG} |
|
37 |
+ |
|
38 |
+\item{get_data}{A logical indicator; if set to true, will return the |
|
39 |
+'expanded' edge information for the specified pathway} |
|
40 |
+ |
|
41 |
+\item{convert_KEGG_IDs}{A logical indicator; if set to TRUE KEGG |
|
42 |
+compounds will remain labeled via KEGG codes (do not need KEGGREST)} |
|
43 |
+ |
|
44 |
+\item{graph_title}{An optional user-specified graph title} |
|
45 |
+ |
|
46 |
+\item{layered_nodes}{A logical indicator; if set to TRUE will create a graph |
|
47 |
+with 'stacked' nodes that the user can manipulate when multiple nodes are |
|
48 |
+mapped to one location} |
|
49 |
+} |
|
50 |
+\value{ |
|
51 |
+A dynamic map in Cytoscape automatically formatted for convenient |
|
52 |
+viewing and, if idicated by user, a data.frame object with detailed |
|
53 |
+information for 'expanded' KEGG edges |
|
54 |
+} |
|
55 |
+\description{ |
|
56 |
+Combines all other package functions for one-step cell line comparison |
|
57 |
+} |
|
58 |
+\examples{ |
|
59 |
+\dontrun{ |
|
60 |
+ |
|
61 |
+# Compare p53 pathway between cell lines A375 and A549: |
|
62 |
+KL_compare("hsa04115", "A375", "A549") |
|
63 |
+} |
|
64 |
+} |