Browse code

Add function KL_compare

Shana White authored on 05/09/2017 17:43:01
Showing 3 changed files

... ...
@@ -1,6 +1,7 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3 3
 export(KEGG_lincs)
4
+export(KL_compare)
4 5
 export(add_KEGG_drugs)
5 6
 export(add_edge_data)
6 7
 export(cyto_vis)
7 8
new file mode 100644
... ...
@@ -0,0 +1,245 @@
1
+#' Combines all other package functions for one-step cell line comparison
2
+#' @export
3
+#' @param pathwayid A KEGG pathway ID of the form "hsa12345" 
4
+#' (only human pathways currently)
5
+#' @param cell_line1 Choose from the set of cell lines: 
6
+#' (A375,A549,ASC,HA1E,HCC515,HEK293T,HEKTE,HEPG2,HT29,MCF7,NCIH716,NPC,PC3,
7
+#' SHSY5Y,SKL,SW480,VCAP)
8
+#' @param cell_line2 A cell line such that cell_line1 != cell_line2
9
+#' @param refine_by_cell_line A logical indicator
10
+#' @param add_L1000_edge_data A logical indicator 
11
+#' @param data_type Choose from data types: (100_full, 100_bing, 50_lm)
12
+#' @param pert_time Choose from (6,24,48,96,120,144,168)
13
+#' @param only_mapped A logical indicator; if set to FALSE will return 'de-novo'
14
+#' edges that 'exist' in data but are not documented in KEGG
15
+#' @param significance_markup A logical indicator; if set to TRUE will color
16
+#'  edges based on direction and significance of correlation (as determined by 
17
+#'  user-data-analysis)
18
+#' @param layered_nodes A logical indicator; if set to TRUE will create a graph 
19
+#' with 'stacked' nodes that the user can manipulate when multiple nodes are 
20
+#' mapped to one location
21
+#' @param graph_title An optional user-specified graph title
22
+#' @param get_data A logical indicator; if set to true, will return the 
23
+#' 'expanded' edge information for the specified pathway
24
+#' @param convert_KEGG_IDs A logical indicator; if set to TRUE KEGG 
25
+#' compounds will remain labeled via KEGG codes (do not need KEGGREST)
26
+#' @return  A dynamic map in Cytoscape automatically formatted for convenient 
27
+#' viewing and, if idicated by user, a data.frame object with detailed 
28
+#' information for 'expanded' KEGG edges
29
+#' @examples \dontrun{ 
30
+#' 
31
+#' # Compare p53 pathway between cell lines A375 and A549:
32
+#' KL_compare("hsa04115", "A375", "A549")
33
+#'}
34
+
35
+KL_compare <-
36
+    function(pathwayid, cell_line1 = NA, cell_line2 = NA,
37
+             refine_by_cell_line = TRUE,
38
+             add_L1000_edge_data = TRUE,  
39
+             significance_markup = TRUE,
40
+             data_type = "100_full",
41
+             pert_time = 96,
42
+             only_mapped = TRUE,
43
+             get_data = FALSE,
44
+             convert_KEGG_IDs = TRUE,
45
+             graph_title = "default",
46
+             tidy_edges = TRUE,
47
+             layered_nodes = FALSE){
48
+        cell_lines <- c("A375","A549","ASC","HA1E","HCC515","HEK293T","HEKTE",
49
+                        "HEPG2","HT29","MCF7","NCIH716","NPC","PC3","SHSY5Y",
50
+                        "SKL","SW480","VCAP")
51
+        if (is.na(cell_line1)){
52
+            warning(paste0(
53
+                'Please choose one of the following for cell_line1: ', 
54
+                cell_lines))
55
+            return()
56
+        }
57
+        if (is.na(cell_line2)){
58
+            warning(paste0('Please choose one the following for cell_line2: ',
59
+                           list(cell_lines[cell_lines != cell_line1])))
60
+            return()
61
+        }
62
+        
63
+        if (cell_line2 == cell_line1){
64
+            warning(paste0('cell_line1 = cell_line2; ', 
65
+                           'please choose from the following for cell_line2: ',
66
+                           list(cell_lines[cell_lines != cell_line1])))
67
+            return()
68
+        }
69
+        
70
+        KGML <- get_KGML(pathwayid, get_if_no_edges = TRUE)
71
+        if(!isS4(KGML)){
72
+            return()
73
+        }
74
+        KEGG_mappings <- 
75
+            expand_KEGG_mappings(KGML,convert_KEGG_IDs = convert_KEGG_IDs)
76
+        if (refine_by_cell_line) {
77
+            full_mappings1 <- KEGG_mappings
78
+            KEGG_mappings1 <- refine_mappings(KEGG_mappings, cell_line1)
79
+            for (i in 1:nrow(full_mappings1)){
80
+                if(!full_mappings1$entryID[i] %in% KEGG_mappings$entryID){
81
+                    full_mappings1$BGcolor[i] <- "#d3d3d3"
82
+                    full_mappings1$in_relationship <- 0
83
+                }
84
+            }
85
+            full_mappings2 <- KEGG_mappings
86
+            KEGG_mappings2 <- refine_mappings(KEGG_mappings, cell_line2)
87
+            for (i in 1:nrow(full_mappings2)){
88
+                if(!full_mappings2$entryID[i] %in% KEGG_mappings$entryID){
89
+                    full_mappings2$BGcolor[i] <- "#d3d3d3"
90
+                    full_mappings2$in_relationship <- 0
91
+                }
92
+            }
93
+        }
94
+        expanded_edges <- expand_KEGG_edges(KGML, KEGG_mappings)
95
+        if(expanded_edges$type[1] == "dummy"){
96
+            graph_title <- paste0("Pathway = ", pathwayid, ":", 
97
+                                  KGML@pathwayInfo@title,
98
+                                  "Cell-Line: ", cell_line, 
99
+                                  "  *No Edges in Pathway")
100
+        }
101
+        
102
+        if (nrow(expanded_edges[expanded_edges$type == "maplink",]) == 
103
+            nrow(expanded_edges) & only_mapped) {
104
+            edge_map <- edge_mapping_info(expanded_edges)
105
+            if (graph_title == "default"){
106
+                graph_title <- paste0("Pathway = ", pathwayid, ":", 
107
+                                      KGML@pathwayInfo@title, "Cell-Line: ", 
108
+                                      cell_line,  "  *No Edges in Data")
109
+            }
110
+            warning("All documented edges are of type 'maplink'; 
111
+                    Overlap data cannot be mapped to selected pathway")
112
+        }
113
+        cell_data1 <- overlap_info(KGML, KEGG_mappings, cell_line1, 
114
+                                   data_type = data_type, 
115
+                                   pert_time = pert_time)
116
+        
117
+        cell_data2 <- overlap_info(KGML, KEGG_mappings, cell_line2, 
118
+                                   data_type = data_type, 
119
+                                   pert_time = pert_time)
120
+        
121
+        for (i in 1:nrow(cell_data1)){
122
+            UP <- cell_data1$UP[i] + 0.5
123
+            DOWN <- cell_data1$DOWN[i] + 0.5
124
+            UK1_DK2 <- cell_data1$UK1_DK2[i] + 0.5
125
+            DK1_UK2 <- cell_data1$DK1_UK2[i] + 0.5
126
+            cell_data1$OR_1[i] <- (UP*DOWN)/(UK1_DK2*DK1_UK2)
127
+            cell_data1$SE_1[i] <-sqrt(1/UP + 1/DOWN + 1/UK1_DK2 + 1/DK1_UK2)
128
+            cell_data1$log_OR_1[i] <- log(cell_data1$OR_1[i])
129
+        }
130
+        
131
+        for (i in 1:nrow(cell_data2)){
132
+            UP <- cell_data2$UP[i] + 0.5
133
+            DOWN <- cell_data2$DOWN[i] + 0.5
134
+            UK1_DK2 <- cell_data2$UK1_DK2[i] + 0.5
135
+            DK1_UK2 <- cell_data2$DK1_UK2[i] + 0.5
136
+            cell_data2$OR_2[i] <- (UP*DOWN)/(UK1_DK2*DK1_UK2)
137
+            cell_data2$SE_2[i] <-sqrt(1/UP + 1/DOWN + 1/UK1_DK2 + 1/DK1_UK2)
138
+            cell_data2$log_OR_2[i] <- log(cell_data2$OR_2[i])
139
+        }
140
+        
141
+        if (!is.na(cell_data1)[1,1] & !is.na(cell_data2)[1,1]) {
142
+            edges_plus_data1 <- add_edge_data(expanded_edges, KEGG_mappings, 
143
+                                              cell_data1, c(15,16), 
144
+                                              only_mapped = only_mapped)
145
+            edges_plus_data2 <- add_edge_data(expanded_edges, KEGG_mappings, 
146
+                                              cell_data2, c(15,16), 
147
+                                              only_mapped = only_mapped)
148
+            edges_plus_data1$unique_ID <- paste0(edges_plus_data1$entry1symbol, ":", edges_plus_data1$entry2symbol, ":", edges_plus_data1$edgeID)
149
+            edges_plus_data2$unique_ID <- paste0(edges_plus_data2$entry1symbol, ":", edges_plus_data2$entry2symbol, ":", edges_plus_data2$edgeID)
150
+            
151
+            edges_compare <- merge(edges_plus_data1, edges_plus_data2[,c(18,19,21)], by = "unique_ID")
152
+            
153
+            for(i in 1:nrow(edges_compare)){
154
+                if (!is.na(edges_compare$log_OR_1[i]) & !is.na(edges_compare$log_OR_2[i])){
155
+                    edges_compare$test[i] <- (edges_compare$log_OR_1[i] - edges_compare$log_OR_2[i])/
156
+                        sqrt(edges_compare$SE_1[i]^2 + edges_compare$SE_2[i]^2)
157
+                    edges_compare$summary_score[i] <- exp(abs(edges_compare$test[i]))
158
+                    
159
+                    
160
+                    if (edges_compare$test[i] <= qnorm(0.1) | edges_compare$test[i] >= qnorm(0.9)){
161
+                        edges_compare$significant[i] <- 1
162
+                    }
163
+                    else {
164
+                        edges_compare$significant[i] <- 0
165
+                    }
166
+                }
167
+                else {
168
+                    edges_compare$test[i] <- NA
169
+                    edges_compare$summary_score[i] <- 0
170
+                    edges_compare$significant[i] <- NA
171
+                }
172
+            }
173
+            
174
+            edge_map <- edge_mapping_info(edges_compare, data_added = TRUE)
175
+            for (i in 1:nrow(edge_map)){
176
+                if (is.na(edge_map$test[i])){
177
+                    edge_map$has_data[i] <- 0
178
+                    edge_map$color[i] <- "#808080"
179
+                }
180
+            }
181
+            if ("premapped" %in% names (edge_map)) {
182
+                premapped <- edge_map$premapped
183
+                drop <- "premapped"
184
+                edge_map <- edge_map[, ! names(edge_map) %in% drop]
185
+                edge_map$premapped <- premapped
186
+            }
187
+            for (i in 1:nrow(edge_map)){
188
+                if (!is.na(edge_map$log_OR_1[i]) & 
189
+                    !is.na(edge_map$log_OR_2[i])){
190
+                    if (edge_map$test[i] < 0){
191
+                        if (edge_map$significant[i] == 0){
192
+                            edge_map$color[i] <- "#82E0AA"
193
+                        }
194
+                        if (edge_map$significant[i] == 1){
195
+                            edge_map$color[i] <- "#1E8449"
196
+                        }
197
+                    }
198
+                    if (edge_map$test[i] > 0){
199
+                        if (edge_map$significant[i] == 0){
200
+                            edge_map$color[i] <- "#E67E22"
201
+                        }
202
+                        if (edge_map$significant[i] == 1){
203
+                            edge_map$color[i] <- "#BA4A00"
204
+                        }
205
+                    }
206
+                }
207
+            }
208
+            if (tidy_edges == TRUE) {
209
+                edge_IDs <- seq(min(edge_map$edgeID), max(edge_map$edgeID))
210
+                for (i in edge_IDs){
211
+                    edge_map <- tidy_edge(edges = edge_map, 
212
+                                          edge_id = edge_IDs[i],
213
+                                          by_significance = TRUE)
214
+                }
215
+            }
216
+            
217
+            if (graph_title == "default"){
218
+                graph_title <- paste0("Pathway = ", pathwayid, ":",
219
+                                      KGML@pathwayInfo@title, " - ",
220
+                                      cell_line1 ,"vs", cell_line2, ", 
221
+                                      Data type: ", data_type)
222
+            }
223
+            }
224
+        
225
+        node_map <- node_mapping_info(KEGG_mappings)
226
+        
227
+        graph_object <- get_graph_object(node_map, edge_map, 
228
+                                         layered_nodes = layered_nodes)
229
+        
230
+        edge_width_attribute = "summary_score"
231
+        
232
+        if (edge_width_attribute %in% names(igraph::edge_attr(graph_object))){
233
+            min.summary_score <- min(abs(igraph::E(graph_object)$summary_score),
234
+                                     na.rm = TRUE)
235
+            max.summary_score <- max(abs(igraph::E(graph_object)$summary_score), 
236
+                                     na.rm = TRUE)
237
+            map_edge_width <- TRUE
238
+        }
239
+        
240
+        cyto_vis(graph_object, title = graph_title, 
241
+                 edge_width_attribute = "summary_score")
242
+        if(get_data){
243
+            return(edge_map)
244
+        }
245
+    }
0 246
new file mode 100644
... ...
@@ -0,0 +1,64 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/KL_compare.R
3
+\name{KL_compare}
4
+\alias{KL_compare}
5
+\title{Combines all other package functions for one-step cell line comparison}
6
+\usage{
7
+KL_compare(pathwayid, cell_line1 = NA, cell_line2 = NA,
8
+  refine_by_cell_line = TRUE, add_L1000_edge_data = TRUE,
9
+  significance_markup = TRUE, data_type = "100_full", pert_time = 96,
10
+  only_mapped = TRUE, get_data = FALSE, convert_KEGG_IDs = TRUE,
11
+  graph_title = "default", tidy_edges = TRUE, layered_nodes = FALSE)
12
+}
13
+\arguments{
14
+\item{pathwayid}{A KEGG pathway ID of the form "hsa12345" 
15
+(only human pathways currently)}
16
+
17
+\item{cell_line1}{Choose from the set of cell lines: 
18
+(A375,A549,ASC,HA1E,HCC515,HEK293T,HEKTE,HEPG2,HT29,MCF7,NCIH716,NPC,PC3,
19
+SHSY5Y,SKL,SW480,VCAP)}
20
+
21
+\item{cell_line2}{A cell line such that cell_line1 != cell_line2}
22
+
23
+\item{refine_by_cell_line}{A logical indicator}
24
+
25
+\item{add_L1000_edge_data}{A logical indicator}
26
+
27
+\item{significance_markup}{A logical indicator; if set to TRUE will color
28
+edges based on direction and significance of correlation (as determined by 
29
+user-data-analysis)}
30
+
31
+\item{data_type}{Choose from data types: (100_full, 100_bing, 50_lm)}
32
+
33
+\item{pert_time}{Choose from (6,24,48,96,120,144,168)}
34
+
35
+\item{only_mapped}{A logical indicator; if set to FALSE will return 'de-novo'
36
+edges that 'exist' in data but are not documented in KEGG}
37
+
38
+\item{get_data}{A logical indicator; if set to true, will return the 
39
+'expanded' edge information for the specified pathway}
40
+
41
+\item{convert_KEGG_IDs}{A logical indicator; if set to TRUE KEGG 
42
+compounds will remain labeled via KEGG codes (do not need KEGGREST)}
43
+
44
+\item{graph_title}{An optional user-specified graph title}
45
+
46
+\item{layered_nodes}{A logical indicator; if set to TRUE will create a graph 
47
+with 'stacked' nodes that the user can manipulate when multiple nodes are 
48
+mapped to one location}
49
+}
50
+\value{
51
+A dynamic map in Cytoscape automatically formatted for convenient 
52
+viewing and, if idicated by user, a data.frame object with detailed 
53
+information for 'expanded' KEGG edges
54
+}
55
+\description{
56
+Combines all other package functions for one-step cell line comparison
57
+}
58
+\examples{
59
+\dontrun{ 
60
+
61
+# Compare p53 pathway between cell lines A375 and A549:
62
+KL_compare("hsa04115", "A375", "A549")
63
+}
64
+}