Browse code

gtl parameter exposed in most other functions, even if requiring only one input

Federico Marini authored on 22/06/2021 14:28:23
Showing1 changed files
... ...
@@ -5,6 +5,9 @@
5 5
 #' @param res_enrich A `data.frame` object, storing the result of the functional
6 6
 #' enrichment analysis. See more in the main function, [GeneTonic()], to check the
7 7
 #' formatting requirements (a minimal set of columns should be present).
8
+#' @param gtl A `GeneTonic`-list object, containing in its slots the arguments
9
+#' specified above: `dds`, `res_de`, `res_enrich`, and `annotation_obj` - the names
10
+#' of the list _must_ be specified following the content they are expecting
8 11
 #' @param n_gs Integer value, corresponding to the maximal number of gene sets to
9 12
 #' be displayed
10 13
 #' @param p_value_column Character string, specifying the column of `res_enrich`
... ...
@@ -59,14 +62,23 @@
59 62
 #' gs_summary_overview(res_enrich)
60 63
 #'
61 64
 #' # if desired, it can also be shown as a barplot
62
-#' gs_summary_overview(res_enrich, 30, return_barchart = TRUE)
65
+#' gs_summary_overview(res_enrich, n_gs = 30, return_barchart = TRUE)
63 66
 gs_summary_overview <- function(res_enrich,
67
+                                gtl = NULL,
64 68
                                 n_gs = 20,
65 69
                                 p_value_column = "gs_pvalue",
66 70
                                 color_by = "z_score",
67 71
                                 return_barchart = FALSE
68 72
                                 # , size_by = "gs_de_count"
69 73
 ) {
74
+  if (!is.null(gtl)) {
75
+    checkup_gtl(gtl)
76
+    dds <- gtl$dds
77
+    res_de <- gtl$res_de
78
+    res_enrich <- gtl$res_enrich
79
+    annotation_obj <- gtl$annotation_obj
80
+  }
81
+  
70 82
   if (!is.null(color_by)) {
71 83
     if (!(color_by %in% colnames(res_enrich))) {
72 84
       stop(
Browse code

one full round of styler on the codebase, also re-rendering manpages

Federico Marini authored on 27/05/2021 11:22:48
Showing1 changed files
... ...
@@ -13,7 +13,7 @@
13 13
 #' p-value - have been specified).
14 14
 #' @param color_by Character, specifying the column of `res_enrich` to be used
15 15
 #' for coloring the plotted gene sets. Defaults sensibly to `z_score`.
16
-#' @param return_barchart Logical, whether to return a barchart (instead of the 
16
+#' @param return_barchart Logical, whether to return a barchart (instead of the
17 17
 #' default dot-segment plot); defaults to FALSE.
18 18
 #'
19 19
 #' @return A `ggplot` object
... ...
@@ -30,7 +30,7 @@
30 30
 #'
31 31
 #' # dds object
32 32
 #' data("gse", package = "macrophage")
33
-#' dds_macrophage <- DESeqDataSet(gse, design = ~line + condition)
33
+#' dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition)
34 34
 #' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
35 35
 #' dds_macrophage <- estimateSizeFactors(dds_macrophage)
36 36
 #'
... ...
@@ -38,9 +38,10 @@
38 38
 #' anno_df <- data.frame(
39 39
 #'   gene_id = rownames(dds_macrophage),
40 40
 #'   gene_name = mapIds(org.Hs.eg.db,
41
-#'                      keys = rownames(dds_macrophage),
42
-#'                      column = "SYMBOL",
43
-#'                      keytype = "ENSEMBL"),
41
+#'     keys = rownames(dds_macrophage),
42
+#'     column = "SYMBOL",
43
+#'     keytype = "ENSEMBL"
44
+#'   ),
44 45
 #'   stringsAsFactors = FALSE,
45 46
 #'   row.names = rownames(dds_macrophage)
46 47
 #' )
... ...
@@ -56,7 +57,7 @@
56 57
 #' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
57 58
 #'
58 59
 #' gs_summary_overview(res_enrich)
59
-#' 
60
+#'
60 61
 #' # if desired, it can also be shown as a barplot
61 62
 #' gs_summary_overview(res_enrich, 30, return_barchart = TRUE)
62 63
 gs_summary_overview <- function(res_enrich,
... ...
@@ -65,13 +66,15 @@ gs_summary_overview <- function(res_enrich,
65 66
                                 color_by = "z_score",
66 67
                                 return_barchart = FALSE
67 68
                                 # , size_by = "gs_de_count"
68
-                                ) {
69
+) {
69 70
   if (!is.null(color_by)) {
70 71
     if (!(color_by %in% colnames(res_enrich))) {
71
-      stop("Your res_enrich object does not contain the ",
72
-           color_by,
73
-           " column.\n",
74
-           "Compute this first or select another column to use for the color.")
72
+      stop(
73
+        "Your res_enrich object does not contain the ",
74
+        color_by,
75
+        " column.\n",
76
+        "Compute this first or select another column to use for the color."
77
+      )
75 78
     }
76 79
   }
77 80
 
... ...
@@ -82,42 +85,48 @@ gs_summary_overview <- function(res_enrich,
82 85
   re_sorted <- re %>%
83 86
     arrange(.data$logp10) %>%
84 87
     mutate(gs_description = factor(.data$gs_description, .data$gs_description))
85
-  
88
+
86 89
   if (return_barchart) {
87
-    p <- ggplot(re_sorted, (aes_string(x = "gs_description", y = "logp10"))) 
88
-    
90
+    p <- ggplot(re_sorted, (aes_string(x = "gs_description", y = "logp10")))
91
+
89 92
     if (is.null(color_by)) {
90 93
       p <- p + geom_col()
91 94
     } else {
92
-      p <- p + geom_col(aes(fill = .data[[color_by]])) + 
95
+      p <- p + geom_col(aes(fill = .data[[color_by]])) +
93 96
         scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026")
94 97
     }
95
-    
98
+
96 99
     p <- p +
97
-      coord_flip() + 
98
-      labs(x = "Gene set description",
99
-           y = "-log10 p-value",
100
-           col = color_by) +
100
+      coord_flip() +
101
+      labs(
102
+        x = "Gene set description",
103
+        y = "-log10 p-value",
104
+        col = color_by
105
+      ) +
101 106
       theme_minimal()
102 107
   } else {
103 108
     p <- ggplot(re_sorted, (aes_string(x = "gs_description", y = "logp10"))) +
104
-      geom_segment(aes_string(x = "gs_description", 
105
-                              xend = "gs_description", 
106
-                              y = 0, 
107
-                              yend = "logp10"), color = "grey")
108
-    
109
+      geom_segment(aes_string(
110
+        x = "gs_description",
111
+        xend = "gs_description",
112
+        y = 0,
113
+        yend = "logp10"
114
+      ), color = "grey")
115
+
109 116
     if (is.null(color_by)) {
110 117
       p <- p + geom_point(size = 4)
111 118
     } else {
112 119
       p <- p + geom_point(aes(col = .data[[color_by]]), size = 4) +
113 120
         scale_color_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026")
114 121
     }
115
-    
122
+
116 123
     p <- p +
117 124
       coord_flip() +
118
-      labs(x = "Gene set description",
119
-           y = "-log10 p-value",
120
-           col = color_by) +
125
+      labs(
126
+        x = "Gene set description",
127
+        y = "-log10 p-value",
128
+        col = color_by
129
+      ) +
121 130
       theme_minimal()
122 131
   }
123 132
   return(p)
... ...
@@ -158,7 +167,7 @@ gs_summary_overview <- function(res_enrich,
158 167
 #'
159 168
 #' # dds object
160 169
 #' data("gse", package = "macrophage")
161
-#' dds_macrophage <- DESeqDataSet(gse, design = ~line + condition)
170
+#' dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition)
162 171
 #' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
163 172
 #' dds_macrophage <- estimateSizeFactors(dds_macrophage)
164 173
 #'
... ...
@@ -166,9 +175,10 @@ gs_summary_overview <- function(res_enrich,
166 175
 #' anno_df <- data.frame(
167 176
 #'   gene_id = rownames(dds_macrophage),
168 177
 #'   gene_name = mapIds(org.Hs.eg.db,
169
-#'                      keys = rownames(dds_macrophage),
170
-#'                      column = "SYMBOL",
171
-#'                      keytype = "ENSEMBL"),
178
+#'     keys = rownames(dds_macrophage),
179
+#'     column = "SYMBOL",
180
+#'     keytype = "ENSEMBL"
181
+#'   ),
172 182
 #'   stringsAsFactors = FALSE,
173 183
 #'   row.names = rownames(dds_macrophage)
174 184
 #' )
... ...
@@ -189,8 +199,10 @@ gs_summary_overview <- function(res_enrich,
189 199
 #' res_enrich2$z_score <- res_enrich2$z_score[shuffled_ones]
190 200
 #' res_enrich2$aggr_score <- res_enrich2$aggr_score[shuffled_ones]
191 201
 #' # ideally, I would also permute the z scores and aggregated scores
192
-#' gs_summary_overview_pair(res_enrich = res_enrich,
193
-#'                          res_enrich2 = res_enrich2)
202
+#' gs_summary_overview_pair(
203
+#'   res_enrich = res_enrich,
204
+#'   res_enrich2 = res_enrich2
205
+#' )
194 206
 gs_summary_overview_pair <- function(res_enrich,
195 207
                                      res_enrich2,
196 208
                                      n_gs = 20,
... ...
@@ -198,17 +210,21 @@ gs_summary_overview_pair <- function(res_enrich,
198 210
                                      color_by = "z_score",
199 211
                                      alpha_set2 = 1) {
200 212
   if (!(color_by %in% colnames(res_enrich))) {
201
-    stop("Your res_enrich object does not contain the ",
202
-         color_by,
203
-         " column.\n",
204
-         "Compute this first or select another column to use for the color.")
213
+    stop(
214
+      "Your res_enrich object does not contain the ",
215
+      color_by,
216
+      " column.\n",
217
+      "Compute this first or select another column to use for the color."
218
+    )
205 219
   }
206 220
   # same for set2
207 221
   if (!(color_by %in% colnames(res_enrich2))) {
208
-    stop("Your res_enrich object does not contain the ",
209
-         color_by,
210
-         " column.\n",
211
-         "Compute this first or select another column to use for the color.")
222
+    stop(
223
+      "Your res_enrich object does not contain the ",
224
+      color_by,
225
+      " column.\n",
226
+      "Compute this first or select another column to use for the color."
227
+    )
212 228
   }
213 229
 
214 230
   gs_set1 <- res_enrich$gs_id
... ...
@@ -241,13 +257,16 @@ gs_summary_overview_pair <- function(res_enrich,
241 257
     geom_segment(aes_string(x = "gs_description", xend = "gs_description", y = "logp10_2", yend = "logp10"), color = "grey") +
242 258
     geom_point(aes(fill = .data[[color_by]]), size = 4, pch = 21) +
243 259
     geom_point(aes_string(y = "logp10_2", col = paste0(color_by, "_2")),
244
-               size = 4, alpha = alpha_set2) +
260
+      size = 4, alpha = alpha_set2
261
+    ) +
245 262
     scale_color_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026", name = paste0(color_by, " set 2")) +
246 263
     scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026", name = paste0(color_by, " set 1")) +
247 264
     coord_flip() +
248
-    labs(x = "Gene set description",
249
-         y = "-log10 p-value",
250
-         col = color_by) +
265
+    labs(
266
+      x = "Gene set description",
267
+      y = "-log10 p-value",
268
+      col = color_by
269
+    ) +
251 270
     ylim(0, NA) +
252 271
     theme_minimal()
253 272
 
... ...
@@ -305,7 +324,7 @@ gs_summary_overview_pair <- function(res_enrich,
305 324
 #'
306 325
 #' # dds object
307 326
 #' data("gse", package = "macrophage")
308
-#' dds_macrophage <- DESeqDataSet(gse, design = ~line + condition)
327
+#' dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition)
309 328
 #' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
310 329
 #' dds_macrophage <- estimateSizeFactors(dds_macrophage)
311 330
 #'
... ...
@@ -313,9 +332,10 @@ gs_summary_overview_pair <- function(res_enrich,
313 332
 #' anno_df <- data.frame(
314 333
 #'   gene_id = rownames(dds_macrophage),
315 334
 #'   gene_name = mapIds(org.Hs.eg.db,
316
-#'                      keys = rownames(dds_macrophage),
317
-#'                      column = "SYMBOL",
318
-#'                      keytype = "ENSEMBL"),
335
+#'     keys = rownames(dds_macrophage),
336
+#'     column = "SYMBOL",
337
+#'     keytype = "ENSEMBL"
338
+#'   ),
319 339
 #'   stringsAsFactors = FALSE,
320 340
 #'   row.names = rownames(dds_macrophage)
321 341
 #' )
... ...
@@ -333,19 +353,19 @@ gs_summary_overview_pair <- function(res_enrich,
333 353
 #' res_enrich3 <- res_enrich[1:42, ]
334 354
 #' res_enrich4 <- res_enrich[1:42, ]
335 355
 #'
336
-#' set.seed(2*42)
356
+#' set.seed(2 * 42)
337 357
 #' shuffled_ones_2 <- sample(seq_len(42)) # to generate permuted p-values
338 358
 #' res_enrich2$gs_pvalue <- res_enrich2$gs_pvalue[shuffled_ones_2]
339 359
 #' res_enrich2$z_score <- res_enrich2$z_score[shuffled_ones_2]
340 360
 #' res_enrich2$aggr_score <- res_enrich2$aggr_score[shuffled_ones_2]
341 361
 #'
342
-#' set.seed(3*42)
362
+#' set.seed(3 * 42)
343 363
 #' shuffled_ones_3 <- sample(seq_len(42)) # to generate permuted p-values
344 364
 #' res_enrich3$gs_pvalue <- res_enrich3$gs_pvalue[shuffled_ones_3]
345 365
 #' res_enrich3$z_score <- res_enrich3$z_score[shuffled_ones_3]
346 366
 #' res_enrich3$aggr_score <- res_enrich3$aggr_score[shuffled_ones_3]
347 367
 #'
348
-#' set.seed(4*42)
368
+#' set.seed(4 * 42)
349 369
 #' shuffled_ones_4 <- sample(seq_len(42)) # to generate permuted p-values
350 370
 #' res_enrich4$gs_pvalue <- res_enrich4$gs_pvalue[shuffled_ones_4]
351 371
 #' res_enrich4$z_score <- res_enrich4$z_score[shuffled_ones_4]
... ...
@@ -358,13 +378,15 @@ gs_summary_overview_pair <- function(res_enrich,
358 378
 #' )
359 379
 #'
360 380
 #' gs_horizon(res_enrich,
361
-#'            compared_res_enrich_list = compa_list,
362
-#'            n_gs = 50,
363
-#'            sort_by = "clustered")
381
+#'   compared_res_enrich_list = compa_list,
382
+#'   n_gs = 50,
383
+#'   sort_by = "clustered"
384
+#' )
364 385
 #' gs_horizon(res_enrich,
365
-#'            compared_res_enrich_list = compa_list,
366
-#'            n_gs = 20,
367
-#'            sort_by = "first_set")
386
+#'   compared_res_enrich_list = compa_list,
387
+#'   n_gs = 20,
388
+#'   sort_by = "first_set"
389
+#' )
368 390
 gs_horizon <- function(res_enrich,
369 391
                        compared_res_enrich_list,
370 392
                        n_gs = 20,
... ...
@@ -373,10 +395,12 @@ gs_horizon <- function(res_enrich,
373 395
                        ref_name = "ref_scenario",
374 396
                        sort_by = c("clustered", "first_set")) {
375 397
   if (!(color_by %in% colnames(res_enrich))) {
376
-    stop("Your res_enrich object does not contain the ",
377
-         color_by,
378
-         " column.\n",
379
-         "Compute this first or select another column to use for the color.")
398
+    stop(
399
+      "Your res_enrich object does not contain the ",
400
+      color_by,
401
+      " column.\n",
402
+      "Compute this first or select another column to use for the color."
403
+    )
380 404
   }
381 405
 
382 406
   if (!n_gs > 0) {
... ...
@@ -393,31 +417,42 @@ gs_horizon <- function(res_enrich,
393 417
     stop("You need to provide a list for comparison (even versus one scenario)")
394 418
   }
395 419
 
396
-  colnames_res_enrich <- c("gs_id",
397
-                           "gs_description",
398
-                           "gs_pvalue",
399
-                           "gs_genes",
400
-                           "gs_de_count",
401
-                           "gs_bg_count")
420
+  colnames_res_enrich <- c(
421
+    "gs_id",
422
+    "gs_description",
423
+    "gs_pvalue",
424
+    "gs_genes",
425
+    "gs_de_count",
426
+    "gs_bg_count"
427
+  )
402 428
   for (i in seq_len(length(compared_res_enrich_list))) {
403 429
     this_re <- compared_res_enrich_list[[i]]
404 430
 
405
-    if (!all(colnames_res_enrich %in% colnames(this_re)))
406
-      stop("One of the provided `res_enrich` objects does not respect the format ",
407
-           "required to use in GeneTonic\n",
408
-           "e.g. all required column names have to be present.\n",
409
-           "You might want to use one of the `shake_*` functions to convert it.\n",
410
-           "Required columns: ", paste(colnames_res_enrich, collapse = ", "),
411
-           "\nThis occurred at the element ", i, " in your `compared_res_enrich_list`")
412
-
413
-    if (!p_value_column %in% colnames(this_re))
414
-      stop("Required column (p-value) `", p_value_column, "` not found in a component of ",
415
-           "`compared_res_enrich_list` object.",
416
-           "\nThis occurred at the element ", i, " in your `compared_res_enrich_list`")
417
-    if (!color_by %in% colnames(this_re))
418
-      stop("Required column (for coloring) `", color_by, "` not found in a component of ",
419
-           "`compared_res_enrich_list` object.",
420
-           "\nThis occurred at the element ", i, " in your `compared_res_enrich_list`")
431
+    if (!all(colnames_res_enrich %in% colnames(this_re))) {
432
+      stop(
433
+        "One of the provided `res_enrich` objects does not respect the format ",
434
+        "required to use in GeneTonic\n",
435
+        "e.g. all required column names have to be present.\n",
436
+        "You might want to use one of the `shake_*` functions to convert it.\n",
437
+        "Required columns: ", paste(colnames_res_enrich, collapse = ", "),
438
+        "\nThis occurred at the element ", i, " in your `compared_res_enrich_list`"
439
+      )
440
+    }
441
+
442
+    if (!p_value_column %in% colnames(this_re)) {
443
+      stop(
444
+        "Required column (p-value) `", p_value_column, "` not found in a component of ",
445
+        "`compared_res_enrich_list` object.",
446
+        "\nThis occurred at the element ", i, " in your `compared_res_enrich_list`"
447
+      )
448
+    }
449
+    if (!color_by %in% colnames(this_re)) {
450
+      stop(
451
+        "Required column (for coloring) `", color_by, "` not found in a component of ",
452
+        "`compared_res_enrich_list` object.",
453
+        "\nThis occurred at the element ", i, " in your `compared_res_enrich_list`"
454
+      )
455
+    }
421 456
   }
422 457
 
423 458
   sort_by <- match.arg(sort_by, c("clustered", "first_set"))
... ...
@@ -440,21 +475,25 @@ gs_horizon <- function(res_enrich,
440 475
 
441 476
   # append scenario info
442 477
   res_enrich[["scenario"]] <- ref_name
443
-  compared_res_enrich_list <- lapply(seq_len(length(compared_res_enrich_list)),
444
-                                     function(arg) {
445
-                                       re <- compared_res_enrich_list[[arg]]
446
-                                       re[["scenario"]] <- names(compared_res_enrich_list)[arg]
447
-                                       return(re)
448
-                                     })
478
+  compared_res_enrich_list <- lapply(
479
+    seq_len(length(compared_res_enrich_list)),
480
+    function(arg) {
481
+      re <- compared_res_enrich_list[[arg]]
482
+      re[["scenario"]] <- names(compared_res_enrich_list)[arg]
483
+      return(re)
484
+    }
485
+  )
449 486
 
450 487
   # reduce to common sets
451 488
   re_ref <- res_enrich[gs_common, ]
452
-  re_comp <- lapply(seq_len(length(compared_res_enrich_list)),
453
-                    function(arg) {
454
-                      re <- compared_res_enrich_list[[arg]]
455
-                      re <- re[gs_common, ]
456
-                      return(re)
457
-                    })
489
+  re_comp <- lapply(
490
+    seq_len(length(compared_res_enrich_list)),
491
+    function(arg) {
492
+      re <- compared_res_enrich_list[[arg]]
493
+      re <- re[gs_common, ]
494
+      return(re)
495
+    }
496
+  )
458 497
 
459 498
   merged_res_enh <- rbind(
460 499
     re_ref,
... ...
@@ -479,8 +518,10 @@ gs_horizon <- function(res_enrich,
479 518
     # with a nicer sorting - "grouped" by scenario
480 519
     nicerorder_terms <- merged_res_enh %>%
481 520
       group_by(.data$gs_description) %>%
482
-      mutate(main_category = .data$scenario[which.max(.data$logp10)],
483
-             max_value = max(.data$logp10)) %>%
521
+      mutate(
522
+        main_category = .data$scenario[which.max(.data$logp10)],
523
+        max_value = max(.data$logp10)
524
+      ) %>%
484 525
       arrange(.data$main_category, desc(.data$max_value)) %>%
485 526
       dplyr::pull(.data$gs_description)
486 527
 
... ...
@@ -498,9 +539,11 @@ gs_horizon <- function(res_enrich,
498 539
       theme_minimal()
499 540
   }
500 541
 
501
-  p <- p + labs(x = "Gene set description",
502
-                y = "-log10 p-value",
503
-                col = color_by)
542
+  p <- p + labs(
543
+    x = "Gene set description",
544
+    y = "-log10 p-value",
545
+    col = color_by
546
+  )
504 547
 
505 548
   return(p)
506 549
 }
... ...
@@ -535,7 +578,7 @@ gs_horizon <- function(res_enrich,
535 578
 #'
536 579
 #' # dds object
537 580
 #' data("gse", package = "macrophage")
538
-#' dds_macrophage <- DESeqDataSet(gse, design = ~line + condition)
581
+#' dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition)
539 582
 #' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
540 583
 #' dds_macrophage <- estimateSizeFactors(dds_macrophage)
541 584
 #'
... ...
@@ -543,9 +586,10 @@ gs_horizon <- function(res_enrich,
543 586
 #' anno_df <- data.frame(
544 587
 #'   gene_id = rownames(dds_macrophage),
545 588
 #'   gene_name = mapIds(org.Hs.eg.db,
546
-#'                      keys = rownames(dds_macrophage),
547
-#'                      column = "SYMBOL",
548
-#'                      keytype = "ENSEMBL"),
589
+#'     keys = rownames(dds_macrophage),
590
+#'     column = "SYMBOL",
591
+#'     keytype = "ENSEMBL"
592
+#'   ),
549 593
 #'   stringsAsFactors = FALSE,
550 594
 #'   row.names = rownames(dds_macrophage)
551 595
 #' )
... ...
@@ -559,10 +603,12 @@ gs_horizon <- function(res_enrich,
559 603
 #' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
560 604
 #' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
561 605
 #'
562
-#' gs_summary_heat(res_enrich = res_enrich,
563
-#'                 res_de = res_de,
564
-#'                 annotation_obj = anno_df,
565
-#'                 n_gs = 20)
606
+#' gs_summary_heat(
607
+#'   res_enrich = res_enrich,
608
+#'   res_de = res_de,
609
+#'   annotation_obj = anno_df,
610
+#'   n_gs = 20
611
+#' )
566 612
 gs_summary_heat <- function(res_enrich,
567 613
                             res_de,
568 614
                             annotation_obj,
... ...
@@ -593,17 +639,26 @@ gs_summary_heat <- function(res_enrich,
593 639
   gs_expanded[["gs_description"]] <- factor(gs_expanded[["gs_description"]], levels = res_enrich2[["gs_description"]])
594 640
   gs_expanded[["gs_genes"]] <- factor(gs_expanded[["gs_genes"]], levels = unique(gs_expanded[["gs_genes"]]))
595 641
 
596
-  p <- ggplot(gs_expanded,
597
-              aes_string(x = "gs_genes", y = "gs_description")) +
642
+  p <- ggplot(
643
+    gs_expanded,
644
+    aes_string(x = "gs_genes", y = "gs_description")
645
+  ) +
598 646
     geom_tile(aes_string(fill = "log2FoldChange"),
599
-              color = "white") +
600
-    scale_fill_gradient2(low = muted("deepskyblue"),
601
-                         mid = "lightyellow",
602
-                         high = muted("firebrick"),
603
-                         name = "log2 Fold Change") +
604
-    xlab(NULL) + ylab(NULL) + theme_minimal() +
605
-    theme(panel.grid.major = element_blank(),
606
-          axis.text.x = element_text(angle = 75, hjust = 1))
647
+      color = "white"
648
+    ) +
649
+    scale_fill_gradient2(
650
+      low = muted("deepskyblue"),
651
+      mid = "lightyellow",
652
+      high = muted("firebrick"),
653
+      name = "log2 Fold Change"
654
+    ) +
655
+    xlab(NULL) +
656
+    ylab(NULL) +
657
+    theme_minimal() +
658
+    theme(
659
+      panel.grid.major = element_blank(),
660
+      axis.text.x = element_text(angle = 75, hjust = 1)
661
+    )
607 662
 
608 663
   return(p)
609 664
 }
Browse code

reverting to previous of using expression - not compatible when using ggplotly

Federico Marini authored on 03/05/2021 19:52:05
Showing1 changed files
... ...
@@ -96,7 +96,7 @@ gs_summary_overview <- function(res_enrich,
96 96
     p <- p +
97 97
       coord_flip() + 
98 98
       labs(x = "Gene set description",
99
-           y = expression(-log[10]~'p-value'),
99
+           y = "-log10 p-value",
100 100
            col = color_by) +
101 101
       theme_minimal()
102 102
   } else {
... ...
@@ -116,7 +116,7 @@ gs_summary_overview <- function(res_enrich,
116 116
     p <- p +
117 117
       coord_flip() +
118 118
       labs(x = "Gene set description",
119
-           y = expression(-log[10]~'p-value'),
119
+           y = "-log10 p-value",
120 120
            col = color_by) +
121 121
       theme_minimal()
122 122
   }
... ...
@@ -246,7 +246,7 @@ gs_summary_overview_pair <- function(res_enrich,
246 246
     scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026", name = paste0(color_by, " set 1")) +
247 247
     coord_flip() +
248 248
     labs(x = "Gene set description",
249
-         y = expression(-log[10]~'p-value'),
249
+         y = "-log10 p-value",
250 250
          col = color_by) +
251 251
     ylim(0, NA) +
252 252
     theme_minimal()
... ...
@@ -499,7 +499,7 @@ gs_horizon <- function(res_enrich,
499 499
   }
500 500
 
501 501
   p <- p + labs(x = "Gene set description",
502
-                y = expression(-log[10]~'p-value'),
502
+                y = "-log10 p-value",
503 503
                 col = color_by)
504 504
 
505 505
   return(p)
... ...
@@ -563,7 +563,6 @@ gs_horizon <- function(res_enrich,
563 563
 #'                 res_de = res_de,
564 564
 #'                 annotation_obj = anno_df,
565 565
 #'                 n_gs = 20)
566
-
567 566
 gs_summary_heat <- function(res_enrich,
568 567
                             res_de,
569 568
                             annotation_obj,
... ...
@@ -601,7 +600,7 @@ gs_summary_heat <- function(res_enrich,
601 600
     scale_fill_gradient2(low = muted("deepskyblue"),
602 601
                          mid = "lightyellow",
603 602
                          high = muted("firebrick"),
604
-                         name = expression(log[2]~'Fold Change')) +
603
+                         name = "log2 Fold Change") +
605 604
     xlab(NULL) + ylab(NULL) + theme_minimal() +
606 605
     theme(panel.grid.major = element_blank(),
607 606
           axis.text.x = element_text(angle = 75, hjust = 1))
Browse code

replacing log* with expression calls in the ggplot code

Federico Marini authored on 03/05/2021 19:23:17
Showing1 changed files
... ...
@@ -96,7 +96,7 @@ gs_summary_overview <- function(res_enrich,
96 96
     p <- p +
97 97
       coord_flip() + 
98 98
       labs(x = "Gene set description",
99
-           y = "-log10 p-value",
99
+           y = expression(-log[10]~'p-value'),
100 100
            col = color_by) +
101 101
       theme_minimal()
102 102
   } else {
... ...
@@ -116,7 +116,7 @@ gs_summary_overview <- function(res_enrich,
116 116
     p <- p +
117 117
       coord_flip() +
118 118
       labs(x = "Gene set description",
119
-           y = "-log10 p-value",
119
+           y = expression(-log[10]~'p-value'),
120 120
            col = color_by) +
121 121
       theme_minimal()
122 122
   }
... ...
@@ -246,7 +246,7 @@ gs_summary_overview_pair <- function(res_enrich,
246 246
     scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026", name = paste0(color_by, " set 1")) +
247 247
     coord_flip() +
248 248
     labs(x = "Gene set description",
249
-         y = "-log10 p-value",
249
+         y = expression(-log[10]~'p-value'),
250 250
          col = color_by) +
251 251
     ylim(0, NA) +
252 252
     theme_minimal()
... ...
@@ -499,7 +499,7 @@ gs_horizon <- function(res_enrich,
499 499
   }
500 500
 
501 501
   p <- p + labs(x = "Gene set description",
502
-                y = "-log10 p-value",
502
+                y = expression(-log[10]~'p-value'),
503 503
                 col = color_by)
504 504
 
505 505
   return(p)
... ...
@@ -601,7 +601,7 @@ gs_summary_heat <- function(res_enrich,
601 601
     scale_fill_gradient2(low = muted("deepskyblue"),
602 602
                          mid = "lightyellow",
603 603
                          high = muted("firebrick"),
604
-                         name = "log2FoldChange") +
604
+                         name = expression(log[2]~'Fold Change')) +
605 605
     xlab(NULL) + ylab(NULL) + theme_minimal() +
606 606
     theme(panel.grid.major = element_blank(),
607 607
           axis.text.x = element_text(angle = 75, hjust = 1))
Browse code

tiny extra example with the bars for the summary

Federico Marini authored on 19/02/2021 17:35:47
Showing1 changed files
... ...
@@ -56,7 +56,9 @@
56 56
 #' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
57 57
 #'
58 58
 #' gs_summary_overview(res_enrich)
59
-#'
59
+#' 
60
+#' # if desired, it can also be shown as a barplot
61
+#' gs_summary_overview(res_enrich, 30, return_barchart = TRUE)
60 62
 gs_summary_overview <- function(res_enrich,
61 63
                                 n_gs = 20,
62 64
                                 p_value_column = "gs_pvalue",
Browse code

summary overview for one set, also working without color and as a barchart

Federico Marini authored on 19/02/2021 17:04:43
Showing1 changed files
... ...
@@ -13,6 +13,8 @@
13 13
 #' p-value - have been specified).
14 14
 #' @param color_by Character, specifying the column of `res_enrich` to be used
15 15
 #' for coloring the plotted gene sets. Defaults sensibly to `z_score`.
16
+#' @param return_barchart Logical, whether to return a barchart (instead of the 
17
+#' default dot-segment plot); defaults to FALSE.
16 18
 #'
17 19
 #' @return A `ggplot` object
18 20
 #'
... ...
@@ -58,14 +60,17 @@
58 60
 gs_summary_overview <- function(res_enrich,
59 61
                                 n_gs = 20,
60 62
                                 p_value_column = "gs_pvalue",
61
-                                color_by = "z_score"
63
+                                color_by = "z_score",
64
+                                return_barchart = FALSE
62 65
                                 # , size_by = "gs_de_count"
63 66
                                 ) {
64
-  if (!(color_by %in% colnames(res_enrich))) {
65
-    stop("Your res_enrich object does not contain the ",
66
-         color_by,
67
-         " column.\n",
68
-         "Compute this first or select another column to use for the color.")
67
+  if (!is.null(color_by)) {
68
+    if (!(color_by %in% colnames(res_enrich))) {
69
+      stop("Your res_enrich object does not contain the ",
70
+           color_by,
71
+           " column.\n",
72
+           "Compute this first or select another column to use for the color.")
73
+    }
69 74
   }
70 75
 
71 76
   re <- res_enrich
... ...
@@ -75,17 +80,44 @@ gs_summary_overview <- function(res_enrich,
75 80
   re_sorted <- re %>%
76 81
     arrange(.data$logp10) %>%
77 82
     mutate(gs_description = factor(.data$gs_description, .data$gs_description))
78
-  p <- ggplot(re_sorted, (aes_string(x = "gs_description", y = "logp10"))) +
79
-    geom_segment(aes_string(x = "gs_description", xend = "gs_description", y = 0, yend = "logp10"), color = "grey") +
80
-    geom_point(aes(col = .data[[color_by]]), size = 4) +
81
-    # geom_point(aes(col = .data[[color_by]], size = .data[[size_by]])) +
82
-    scale_color_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
83
-    coord_flip() +
84
-    labs(x = "Gene set description",
85
-         y = "-log10 p-value",
86
-         col = color_by) +
87
-    theme_minimal()
88
-
83
+  
84
+  if (return_barchart) {
85
+    p <- ggplot(re_sorted, (aes_string(x = "gs_description", y = "logp10"))) 
86
+    
87
+    if (is.null(color_by)) {
88
+      p <- p + geom_col()
89
+    } else {
90
+      p <- p + geom_col(aes(fill = .data[[color_by]])) + 
91
+        scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026")
92
+    }
93
+    
94
+    p <- p +
95
+      coord_flip() + 
96
+      labs(x = "Gene set description",
97
+           y = "-log10 p-value",
98
+           col = color_by) +
99
+      theme_minimal()
100
+  } else {
101
+    p <- ggplot(re_sorted, (aes_string(x = "gs_description", y = "logp10"))) +
102
+      geom_segment(aes_string(x = "gs_description", 
103
+                              xend = "gs_description", 
104
+                              y = 0, 
105
+                              yend = "logp10"), color = "grey")
106
+    
107
+    if (is.null(color_by)) {
108
+      p <- p + geom_point(size = 4)
109
+    } else {
110
+      p <- p + geom_point(aes(col = .data[[color_by]]), size = 4) +
111
+        scale_color_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026")
112
+    }
113
+    
114
+    p <- p +
115
+      coord_flip() +
116
+      labs(x = "Gene set description",
117
+           y = "-log10 p-value",
118
+           col = color_by) +
119
+      theme_minimal()
120
+  }
89 121
   return(p)
90 122
 }
91 123
 
Browse code

showing double legend, for set 1 and set 2 of the two res_enrich objects, instead that for the second one - enables both values to be read out if they do not overlap

Federico Marini authored on 19/02/2021 16:16:58
Showing1 changed files
... ...
@@ -208,8 +208,8 @@ gs_summary_overview_pair <- function(res_enrich,
208 208
     geom_point(aes(fill = .data[[color_by]]), size = 4, pch = 21) +
209 209
     geom_point(aes_string(y = "logp10_2", col = paste0(color_by, "_2")),
210 210
                size = 4, alpha = alpha_set2) +
211
-    scale_color_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
212
-    scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026", guide = FALSE) +
211
+    scale_color_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026", name = paste0(color_by, " set 2")) +
212
+    scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026", name = paste0(color_by, " set 1")) +
213 213
     coord_flip() +
214 214
     labs(x = "Gene set description",
215 215
          y = "-log10 p-value",
Browse code

Added functionality when parameter set can be passed also via gtl

Federico Marini authored on 23/10/2020 09:26:45
Showing1 changed files
... ...
@@ -483,6 +483,9 @@ gs_horizon <- function(res_enrich,
483 483
 #' @param res_de A `DESeqResults` object.
484 484
 #' @param annotation_obj A `data.frame` object with the feature annotation
485 485
 #' information, with at least two columns, `gene_id` and `gene_name`.
486
+#' @param gtl A `GeneTonic`-list object, containing in its slots the arguments
487
+#' specified above: `dds`, `res_de`, `res_enrich`, and `annotation_obj` - the names
488
+#' of the list _must_ be specified following the content they are expecting
486 489
 #' @param n_gs Integer value, corresponding to the maximal number of gene sets to
487 490
 #' be displayed
488 491
 #'
... ...
@@ -530,7 +533,15 @@ gs_horizon <- function(res_enrich,
530 533
 gs_summary_heat <- function(res_enrich,
531 534
                             res_de,
532 535
                             annotation_obj,
536
+                            gtl = NULL,
533 537
                             n_gs = 80) {
538
+  if (!is.null(gtl)) {
539
+    checkup_gtl(gtl)
540
+    dds <- gtl$dds
541
+    res_de <- gtl$res_de
542
+    res_enrich <- gtl$res_enrich
543
+    annotation_obj <- gtl$annotation_obj
544
+  }
534 545
 
535 546
   res_enrich2 <- res_enrich[seq_len(n_gs), ]
536 547
 
Browse code

Fixing the labels for the plots, where the -log10 is displayed - added the sign

Federico Marini authored on 29/09/2020 15:04:10
Showing1 changed files
... ...
@@ -82,7 +82,7 @@ gs_summary_overview <- function(res_enrich,
82 82
     scale_color_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
83 83
     coord_flip() +
84 84
     labs(x = "Gene set description",
85
-         y = "log10 p-value",
85
+         y = "-log10 p-value",
86 86
          col = color_by) +
87 87
     theme_minimal()
88 88
 
... ...
@@ -212,7 +212,7 @@ gs_summary_overview_pair <- function(res_enrich,
212 212
     scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026", guide = FALSE) +
213 213
     coord_flip() +
214 214
     labs(x = "Gene set description",
215
-         y = "log10 p-value",
215
+         y = "-log10 p-value",
216 216
          col = color_by) +
217 217
     ylim(0, NA) +
218 218
     theme_minimal()
... ...
@@ -465,7 +465,7 @@ gs_horizon <- function(res_enrich,
465 465
   }
466 466
 
467 467
   p <- p + labs(x = "Gene set description",
468
-                y = "log10 p-value",
468
+                y = "-log10 p-value",
469 469
                 col = color_by)
470 470
 
471 471
   return(p)
Browse code

Different palette for gs_horizon, plus better axis labels

Federico Marini authored on 27/01/2020 10:06:37
Showing1 changed files
... ...
@@ -436,6 +436,7 @@ gs_horizon <- function(res_enrich,
436 436
       ggplot(aes_string(x = "gs_description", y = "logp10")) +
437 437
       geom_line(aes_string(group = "scenario", col = "scenario"), size = 3, alpha = 0.7) +
438 438
       geom_point(aes_string(fill = "z_score"), size = 4, pch = 21) +
439
+      scale_color_brewer(palette = "Set2") +
439 440
       scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
440 441
       ylim(c(0, NA)) +
441 442
       coord_flip() +
... ...
@@ -455,6 +456,7 @@ gs_horizon <- function(res_enrich,
455 456
       arrange(desc(.data$logp10)) %>%
456 457
       ggplot(aes_string(x = "gs_description", y = "logp10")) +
457 458
       geom_line(aes_string(group = "scenario", col = "scenario"), size = 3, alpha = 0.7) +
459
+      scale_color_brewer(palette = "Set2") +
458 460
       geom_point(aes_string(fill = "z_score"), size = 4, pch = 21) +
459 461
       scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
460 462
       ylim(c(0, NA)) +
... ...
@@ -462,6 +464,10 @@ gs_horizon <- function(res_enrich,
462 464
       theme_minimal()
463 465
   }
464 466
 
467
+  p <- p + labs(x = "Gene set description",
468
+                y = "log10 p-value",
469
+                col = color_by)
470
+
465 471
   return(p)
466 472
 }
467 473
 
Browse code

Fix text for error message suggesting the correct shake_* function

Federico Marini authored on 24/01/2020 09:52:18
Showing1 changed files
... ...
@@ -372,7 +372,7 @@ gs_horizon <- function(res_enrich,
372 372
       stop("One of the provided `res_enrich` objects does not respect the format ",
373 373
            "required to use in GeneTonic\n",
374 374
            "e.g. all required column names have to be present.\n",
375
-           "You might want to use one of the `shaker_*` functions to convert it.\n",
375
+           "You might want to use one of the `shake_*` functions to convert it.\n",
376 376
            "Required columns: ", paste(colnames_res_enrich, collapse = ", "),
377 377
            "\nThis occurred at the element ", i, " in your `compared_res_enrich_list`")
378 378
 
Browse code

Additional checks on the input to gs_horizon

Federico Marini authored on 20/01/2020 21:11:11
Showing1 changed files
... ...
@@ -344,16 +344,47 @@ gs_horizon <- function(res_enrich,
344 344
          " column.\n",
345 345
          "Compute this first or select another column to use for the color.")
346 346
   }
347
-  # checks on the list with other res_enrich to compare against
348
-  # TODO TODO
349
-  # need to be res_enrichs
350
-  # need to have color_by
351
-  # n_gs must be >0
352
-  # list must be named, otherwise message and assign
353 347
 
348
+  if (!n_gs > 0) {
349
+    stop("Please select a value for `n_gs` greater than 0")
350
+  }
354 351
 
352
+  if (is.null(names(compared_res_enrich_list))) {
353
+    message("You provided a list for comparison without specifying names, adding some defaults")
354
+    names(compared_res_enrich_list) <-
355
+      paste0("other_", seq_len(length(compared_res_enrich_list)))
356
+  }
355 357
 
358
+  if (!is(compared_res_enrich_list, "list")) {
359
+    stop("You need to provide a list for comparison (even versus one scenario)")
360
+  }
356 361
 
362
+  colnames_res_enrich <- c("gs_id",
363
+                           "gs_description",
364
+                           "gs_pvalue",
365
+                           "gs_genes",
366
+                           "gs_de_count",
367
+                           "gs_bg_count")
368
+  for (i in seq_len(length(compared_res_enrich_list))) {
369
+    this_re <- compared_res_enrich_list[[i]]
370
+
371
+    if (!all(colnames_res_enrich %in% colnames(this_re)))
372
+      stop("One of the provided `res_enrich` objects does not respect the format ",
373
+           "required to use in GeneTonic\n",
374
+           "e.g. all required column names have to be present.\n",
375
+           "You might want to use one of the `shaker_*` functions to convert it.\n",
376
+           "Required columns: ", paste(colnames_res_enrich, collapse = ", "),
377
+           "\nThis occurred at the element ", i, " in your `compared_res_enrich_list`")
378
+
379
+    if (!p_value_column %in% colnames(this_re))
380
+      stop("Required column (p-value) `", p_value_column, "` not found in a component of ",
381
+           "`compared_res_enrich_list` object.",
382
+           "\nThis occurred at the element ", i, " in your `compared_res_enrich_list`")
383
+    if (!color_by %in% colnames(this_re))
384
+      stop("Required column (for coloring) `", color_by, "` not found in a component of ",
385
+           "`compared_res_enrich_list` object.",
386
+           "\nThis occurred at the element ", i, " in your `compared_res_enrich_list`")
387
+  }
357 388
 
358 389
   sort_by <- match.arg(sort_by, c("clustered", "first_set"))
359 390
 
Browse code

cleanup older commented code

Federico Marini authored on 17/01/2020 20:43:14
Showing1 changed files
... ...
@@ -432,75 +432,6 @@ gs_horizon <- function(res_enrich,
432 432
   }
433 433
 
434 434
   return(p)
435
-
436
-
437
- ####### #################
438
-#######
439
- ####### common_re1 <- res_enrich[gs_common, ]
440
- ####### common_re2 <- res_enrich2[gs_common, ]
441
-#######
442
- ####### common_re1$logp10 <- -log10(common_re1[[p_value_column]])
443
- ####### common_re2$logp10 <- -log10(common_re2[[p_value_column]])
444
-#######
445
-#######
446
-#######
447
- ####### re_ref$logp10 <- -log10(re_ref$gs_pvalue)
448
-#######
449
- ####### # res_enrich <- get_aggrscores(topgoDE_macrophage_IFNg_vs_naive,res_macrophage_IFNg_vs_naive, a#######nnotation_obj = anno_df)
450
- ####### res_enriched_1 <- res_enrich
451
-#######
452
- ####### res_enriched_1 <- res_enriched_1[seq_len(n_gs), ]
453
- ####### res_enriched_1$logp10 <-  -log10(res_enriched_1[[p_value_column]])
454
-#######
455
- ####### res_enriched_2 <-
456
- #######   res_enriched_3 <-
457
- #######   res_enriched_4 <- res_enriched_1
458
-#######
459
- ####### set.seed(42)
460
- ####### shuffled_r2 <- sample(seq_len(nrow(res_enriched_1)))
461
- ####### shuffled_r3 <- sample(seq_len(nrow(res_enriched_1)))
462
- ####### shuffled_r4 <- sample(seq_len(nrow(res_enriched_1)))
463
-#######
464
- ####### res_enriched_2$z_score <- res_enriched_2$z_score[shuffled_r2]
465
- ####### res_enriched_2$aggr_score <- res_enriched_2$aggr_score[shuffled_r2]
466
- ####### res_enriched_2$logp10 <- res_enriched_2$logp10[shuffled_r2]
467
- ####### res_enriched_3$z_score <- res_enriched_3$z_score[shuffled_r3]
468
- ####### res_enriched_3$aggr_score <- res_enriched_3$aggr_score[shuffled_r3]
469
- ####### res_enriched_3$logp10 <- res_enriched_3$logp10[shuffled_r3]
470
- ####### res_enriched_4$z_score <- res_enriched_4$z_score[shuffled_r4]
471
- ####### res_enriched_4$aggr_score <- res_enriched_4$aggr_score[shuffled_r4]
472
- ####### res_enriched_4$logp10 <- res_enriched_4$logp10[shuffled_r4]
473
-#######
474
- ####### res_enriched_1$scenario <- "original"
475
- ####### res_enriched_2$scenario <- "shuffled_2"
476
- ####### res_enriched_3$scenario <- "shuffled_3"
477
- ####### res_enriched_4$scenario <- "shuffled_4"
478
-#######
479
- ####### # to preserve the order of the terms
480
- ####### re_ref <- re_ref %>%
481
- #######   arrange(.data$logp10) %>%
482
- #######   mutate(gs_description = factor(.data$gs_description, unique(.data$gs_description)))
483
-#######
484
- ####### # to preserve the sorting of scenarios
485
- ####### merged_res_enh <- rbind(res_enriched_1,
486
- #######                         res_enriched_2,
487
- #######                         res_enriched_3,
488
- #######                         res_enriched_4)
489
- ####### merged_res_enh$scenario <- factor(merged_res_enh$scenario, unique(merged_res_enh$scenario))
490
-#######
491
-#######
492
- ####### # if only with one...
493
- ####### re_ref %>%
494
- #######   # arrange(logp10) %>%
495
- #######   # mutate(gs_description=factor(gs_description, unique(gs_description))) %>%
496
- #######   ggplot(aes_string(x = "gs_description", y = "logp10")) +
497
- #######   geom_line(aes_string(group = "scenario", col = "scenario"), size = 3, alpha = 0.7) +
498
- #######   geom_point(aes_string(fill = "z_score"), size = 4, pch = 21) +
499
- #######   scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
500
- #######   ylim(c(0, NA)) +
501
- #######   coord_flip() +
502
- #######   theme_minimal()
503
-
504 435
 }
505 436
 
506 437
 
Browse code

Restyled gs_horizon function, with new params as well. Docs and tests added as well

Federico Marini authored on 17/01/2020 20:42:07
Showing1 changed files
... ...
@@ -226,14 +226,20 @@ gs_summary_overview_pair <- function(res_enrich,
226 226
 #' Plots a summary of enrichment results - horizon plot to compare one or more
227 227
 #' sets of results
228 228
 #'
229
-#' It makes sense to have the results in `res_enrich` sorted by increasing `gs_pvalue`,
230
-#' to make sure the top results are first sorted by the significance (when selecting
231
-#' the common gene sets across the `res_enrich` elements provided in
232
-#' `compared_res_enrich_list`)
229
+#' @details It makes sense to have the results in `res_enrich` sorted by
230
+#' increasing `gs_pvalue`, to make sure the top results are first sorted by the
231
+#' significance (when selecting the common gene sets across the `res_enrich`
232
+#' elements provided in `compared_res_enrich_list`)
233
+#'
234
+#' The gene sets included are a subset of the ones in common to all different
235
+#' scenarios included in `res_enrich` and the elements of `compared_res_enrich_list`.
233 236
 #'
234 237
 #' @param res_enrich A `data.frame` object, storing the result of the functional
235 238
 #' enrichment analysis. See more in the main function, [GeneTonic()], to check the
236 239
 #' formatting requirements (a minimal set of columns should be present).
240
+#' @param compared_res_enrich_list A named list, where each element is a `data.frame`
241
+#' formatted like the standard `res_enrich` objects used by `GeneTonic`. The
242
+#' names of the list are the names of the scenarios.
237 243
 #' @param n_gs Integer value, corresponding to the maximal number of gene sets to
238 244
 #' be displayed
239 245
 #' @param p_value_column Character string, specifying the column of `res_enrich`
... ...
@@ -242,6 +248,13 @@ gs_summary_overview_pair <- function(res_enrich,
242 248
 #' p-value - have been specified).
243 249
 #' @param color_by Character, specifying the column of `res_enrich` to be used
244 250
 #' for coloring the plotted gene sets. Defaults sensibly to `z_score`.
251
+#' @param ref_name Character, defining the name of the scenario to compare
252
+#' against (the one in `res_enrich`) - defaults to "ref_scenario".
253
+#' @param sort_by Character string, either "clustered", or "first_set". This
254
+#' controls the sorting order of the included terms in the final plot.
255
+#' "clustered" presents the terms grouped by the scenario where they assume the
256
+#' highest values. "first_set" sorts the terms by the significance value in the
257
+#' reference scenario.
245 258
 #'
246 259
 #' @return A `ggplot` object
247 260
 #'
... ...
@@ -282,9 +295,6 @@ gs_summary_overview_pair <- function(res_enrich,
282 295
 #' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
283 296
 #' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
284 297
 #'
285
-#' #gs_horizon(res_enrich,
286
-#' #            n_gs = 15)
287
-#'
288 298
 #' res_enrich2 <- res_enrich[1:42, ]
289 299
 #' res_enrich3 <- res_enrich[1:42, ]
290 300
 #' res_enrich4 <- res_enrich[1:42, ]
... ...
@@ -312,6 +322,15 @@ gs_summary_overview_pair <- function(res_enrich,
312 322
 #'   scenario3 = res_enrich3,
313 323
 #'   scenario4 = res_enrich4
314 324
 #' )
325
+#'
326
+#' gs_horizon(res_enrich,
327
+#'            compared_res_enrich_list = compa_list,
328
+#'            n_gs = 50,
329
+#'            sort_by = "clustered")
330
+#' gs_horizon(res_enrich,
331
+#'            compared_res_enrich_list = compa_list,
332
+#'            n_gs = 20,
333
+#'            sort_by = "first_set")
315 334
 gs_horizon <- function(res_enrich,
316 335
                        compared_res_enrich_list,
317 336
                        n_gs = 20,
... ...
@@ -330,7 +349,7 @@ gs_horizon <- function(res_enrich,
330 349
   # need to be res_enrichs
331 350
   # need to have color_by
332 351
   # n_gs must be >0
333
-
352
+  # list must be named, otherwise message and assign
334 353
 
335 354
 
336 355
 
... ...
@@ -377,117 +396,110 @@ gs_horizon <- function(res_enrich,
377 396
     do.call(rbind, re_comp)
378 397
   )
379 398
   merged_res_enh$logp10 <- -log10(merged_res_enh$gs_pvalue)
380
-  # TODO; here!
381
-
382
-
383
-
384
-
385
-
386
-
387
-
388
-
389
-
390
-
391
-  common_re1 <- res_enrich[gs_common, ]
392
-  common_re2 <- res_enrich2[gs_common, ]
393
-
394
-  common_re1$logp10 <- -log10(common_re1[[p_value_column]])
395
-  common_re2$logp10 <- -log10(common_re2[[p_value_column]])
396
-
397
-
398
-
399
-
400
-
401
-  # res_enrich <- get_aggrscores(topgoDE_macrophage_IFNg_vs_naive,res_macrophage_IFNg_vs_naive, annotation_obj = anno_df)
402
-  res_enriched_1 <- res_enrich
403
-
404
-  res_enriched_1 <- res_enriched_1[seq_len(n_gs), ]
405
-  res_enriched_1$logp10 <-  -log10(res_enriched_1[[p_value_column]])
406
-
407
-  res_enriched_2 <-
408
-    res_enriched_3 <-
409
-    res_enriched_4 <- res_enriched_1
410
-
411
-  set.seed(42)
412
-  shuffled_r2 <- sample(seq_len(nrow(res_enriched_1)))
413
-  shuffled_r3 <- sample(seq_len(nrow(res_enriched_1)))
414
-  shuffled_r4 <- sample(seq_len(nrow(res_enriched_1)))
415
-
416
-  res_enriched_2$z_score <- res_enriched_2$z_score[shuffled_r2]
417
-  res_enriched_2$aggr_score <- res_enriched_2$aggr_score[shuffled_r2]
418
-  res_enriched_2$logp10 <- res_enriched_2$logp10[shuffled_r2]
419
-  res_enriched_3$z_score <- res_enriched_3$z_score[shuffled_r3]
420
-  res_enriched_3$aggr_score <- res_enriched_3$aggr_score[shuffled_r3]
421
-  res_enriched_3$logp10 <- res_enriched_3$logp10[shuffled_r3]
422
-  res_enriched_4$z_score <- res_enriched_4$z_score[shuffled_r4]
423
-  res_enriched_4$aggr_score <- res_enriched_4$aggr_score[shuffled_r4]
424
-  res_enriched_4$logp10 <- res_enriched_4$logp10[shuffled_r4]
425
-
426
-  res_enriched_1$scenario <- "original"
427
-  res_enriched_2$scenario <- "shuffled_2"
428
-  res_enriched_3$scenario <- "shuffled_3"
429
-  res_enriched_4$scenario <- "shuffled_4"
430
-
431
-  # to preserve the order of the terms
432
-  res_enriched_1 <- res_enriched_1 %>%
433
-    arrange(.data$logp10) %>%
434
-    mutate(gs_description = factor(.data$gs_description, unique(.data$gs_description)))
435
-
436
-  # to preserve the sorting of scenarios
437
-  merged_res_enh <- rbind(res_enriched_1,
438
-                          res_enriched_2,
439
-                          res_enriched_3,
440
-                          res_enriched_4)
441
-  merged_res_enh$scenario <- factor(merged_res_enh$scenario, unique(merged_res_enh$scenario))
442
-
443
-
444
-  # if only with one...
445
-  res_enriched_1 %>%
446
-    # arrange(logp10) %>%
447
-    # mutate(gs_description=factor(gs_description, unique(gs_description))) %>%
448
-    ggplot(aes_string(x = "gs_description", y = "logp10")) +
449
-    geom_line(aes_string(group = "scenario", col = "scenario"), size = 3, alpha = 0.7) +
450
-    geom_point(aes_string(fill = "z_score"), size = 4, pch = 21) +
451
-    scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
452
-    ylim(c(0, NA)) +
453
-    coord_flip() +
454
-    theme_minimal()
455
-
456
-  # sorted by category in scenario1
457
-  merged_res_enh %>%
458
-    mutate(gs_description = factor(.data$gs_description, unique(.data$gs_description))) %>%
459
-    arrange(desc(.data$logp10)) %>%
460
-    ggplot(aes_string(x = "gs_description", y = "logp10")) +
461
-    geom_line(aes_string(group = "scenario", col = "scenario"), size = 3, alpha = 0.7) +
462
-    geom_point(aes_string(fill = "z_score"), size = 4, pch = 21) +
463
-    scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
464
-    ylim(c(0, NA)) +
465
-    coord_flip() +
466
-    theme_minimal()
467
-
468
-  # with a nicer sorting - "grouped" by scenario
469
-
470
-  nicerorder_terms <- merged_res_enh %>%
471
-    group_by(.data$gs_description) %>%
472
-    mutate(main_category = .data$scenario[which.max(.data$logp10)],
473
-           max_value = max(.data$logp10)) %>%
474
-    arrange(.data$main_category, desc(.data$max_value)) %>%
475
-    dplyr::pull(.data$gs_description)
476 399
 
400
+  if (sort_by == "first_set") {
401
+    # sorted by category in scenario1
402
+    p <- merged_res_enh %>%
403
+      mutate(gs_description = factor(.data$gs_description, rev(unique(.data$gs_description)))) %>%
404
+      arrange((.data$logp10)) %>%
405
+      ggplot(aes_string(x = "gs_description", y = "logp10")) +
406
+      geom_line(aes_string(group = "scenario", col = "scenario"), size = 3, alpha = 0.7) +
407
+      geom_point(aes_string(fill = "z_score"), size = 4, pch = 21) +
408
+      scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
409
+      ylim(c(0, NA)) +
410
+      coord_flip() +
411
+      theme_minimal()
412
+  } else if (sort_by == "clustered") {
413
+    # with a nicer sorting - "grouped" by scenario
414
+    nicerorder_terms <- merged_res_enh %>%
415
+      group_by(.data$gs_description) %>%
416
+      mutate(main_category = .data$scenario[which.max(.data$logp10)],
417
+             max_value = max(.data$logp10)) %>%
418
+      arrange(.data$main_category, desc(.data$max_value)) %>%
419
+      dplyr::pull(.data$gs_description)
420
+
421
+    p <- merged_res_enh %>%
422
+      # mutate(gs_description=factor(gs_description, unique(gs_description))) %>%
423
+      mutate(gs_description = factor(.data$gs_description, rev(unique(nicerorder_terms)))) %>%
424
+      arrange(desc(.data$logp10)) %>%
425
+      ggplot(aes_string(x = "gs_description", y = "logp10")) +
426
+      geom_line(aes_string(group = "scenario", col = "scenario"), size = 3, alpha = 0.7) +
427
+      geom_point(aes_string(fill = "z_score"), size = 4, pch = 21) +
428
+      scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
429
+      ylim(c(0, NA)) +
430
+      coord_flip() +
431
+      theme_minimal()
432
+  }
477 433
 
434
+  return(p)
478 435
 
479
-  merged_res_enh %>%
480
-    # mutate(gs_description=factor(gs_description, unique(gs_description))) %>%
481
-    mutate(gs_description = factor(.data$gs_description, rev(unique(nicerorder_terms)))) %>%
482
-    arrange(desc(.data$logp10)) %>%
483
-    ggplot(aes_string(x = "gs_description", y = "logp10")) +
484
-    geom_line(aes_string(group = "scenario", col = "scenario"), size = 3, alpha = 0.7) +
485
-    geom_point(aes_string(fill = "z_score"), size = 4, pch = 21) +
486
-    scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
487
-    ylim(c(0, NA)) +
488
-    coord_flip() +
489
-    theme_minimal()
490 436
 
437
+ ####### #################
438
+#######
439
+ ####### common_re1 <- res_enrich[gs_common, ]
440
+ ####### common_re2 <- res_enrich2[gs_common, ]
441
+#######
442
+ ####### common_re1$logp10 <- -log10(common_re1[[p_value_column]])
443
+ ####### common_re2$logp10 <- -log10(common_re2[[p_value_column]])
444
+#######
445
+#######
446
+#######
447
+ ####### re_ref$logp10 <- -log10(re_ref$gs_pvalue)
448
+#######
449
+ ####### # res_enrich <- get_aggrscores(topgoDE_macrophage_IFNg_vs_naive,res_macrophage_IFNg_vs_naive, a#######nnotation_obj = anno_df)
450
+ ####### res_enriched_1 <- res_enrich
451
+#######
452
+ ####### res_enriched_1 <- res_enriched_1[seq_len(n_gs), ]
453
+ ####### res_enriched_1$logp10 <-  -log10(res_enriched_1[[p_value_column]])
454
+#######
455
+ ####### res_enriched_2 <-
456
+ #######   res_enriched_3 <-
457
+ #######   res_enriched_4 <- res_enriched_1
458
+#######
459
+ ####### set.seed(42)
460
+ ####### shuffled_r2 <- sample(seq_len(nrow(res_enriched_1)))
461
+ ####### shuffled_r3 <- sample(seq_len(nrow(res_enriched_1)))
462
+ ####### shuffled_r4 <- sample(seq_len(nrow(res_enriched_1)))
463
+#######
464
+ ####### res_enriched_2$z_score <- res_enriched_2$z_score[shuffled_r2]
465
+ ####### res_enriched_2$aggr_score <- res_enriched_2$aggr_score[shuffled_r2]
466
+ ####### res_enriched_2$logp10 <- res_enriched_2$logp10[shuffled_r2]
467
+ ####### res_enriched_3$z_score <- res_enriched_3$z_score[shuffled_r3]
468
+ ####### res_enriched_3$aggr_score <- res_enriched_3$aggr_score[shuffled_r3]
469
+ ####### res_enriched_3$logp10 <- res_enriched_3$logp10[shuffled_r3]
470
+ ####### res_enriched_4$z_score <- res_enriched_4$z_score[shuffled_r4]
471
+ ####### res_enriched_4$aggr_score <- res_enriched_4$aggr_score[shuffled_r4]
472
+ ####### res_enriched_4$logp10 <- res_enriched_4$logp10[shuffled_r4]
473
+#######
474
+ ####### res_enriched_1$scenario <- "original"
475
+ ####### res_enriched_2$scenario <- "shuffled_2"
476
+ ####### res_enriched_3$scenario <- "shuffled_3"
477
+ ####### res_enriched_4$scenario <- "shuffled_4"
478
+#######
479
+ ####### # to preserve the order of the terms
480
+ ####### re_ref <- re_ref %>%
481
+ #######   arrange(.data$logp10) %>%
482
+ #######   mutate(gs_description = factor(.data$gs_description, unique(.data$gs_description)))
483
+#######
484
+ ####### # to preserve the sorting of scenarios
485
+ ####### merged_res_enh <- rbind(res_enriched_1,
486
+ #######                         res_enriched_2,
487
+ #######                         res_enriched_3,
488
+ #######                         res_enriched_4)
489
+ ####### merged_res_enh$scenario <- factor(merged_res_enh$scenario, unique(merged_res_enh$scenario))
490
+#######
491
+#######
492
+ ####### # if only with one...
493
+ ####### re_ref %>%
494
+ #######   # arrange(logp10) %>%
495
+ #######   # mutate(gs_description=factor(gs_description, unique(gs_description))) %>%
496
+ #######   ggplot(aes_string(x = "gs_description", y = "logp10")) +
497
+ #######   geom_line(aes_string(group = "scenario", col = "scenario"), size = 3, alpha = 0.7) +
498
+ #######   geom_point(aes_string(fill = "z_score"), size = 4, pch = 21) +
499
+ #######   scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
500
+ #######   ylim(c(0, NA)) +
501
+ #######   coord_flip() +
502
+ #######   theme_minimal()
491 503
 
492 504
 }
493 505
 
Browse code

WIP for gs_horizon

Federico Marini authored on 17/01/2020 16:26:08
Showing1 changed files
... ...
@@ -226,6 +226,11 @@ gs_summary_overview_pair <- function(res_enrich,
226 226
 #' Plots a summary of enrichment results - horizon plot to compare one or more
227 227
 #' sets of results
228 228
 #'
229
+#' It makes sense to have the results in `res_enrich` sorted by increasing `gs_pvalue`,
230
+#' to make sure the top results are first sorted by the significance (when selecting
231
+#' the common gene sets across the `res_enrich` elements provided in
232
+#' `compared_res_enrich_list`)
233
+#'
229 234
 #' @param res_enrich A `data.frame` object, storing the result of the functional
230 235
 #' enrichment analysis. See more in the main function, [GeneTonic()], to check the
231 236
 #' formatting requirements (a minimal set of columns should be present).
... ...
@@ -277,19 +282,121 @@ gs_summary_overview_pair <- function(res_enrich,
277 282
 #' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
278 283
 #' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
279 284
 #'
280
-#' gs_horizon(res_enrich,
281
-#'            n_gs = 15)
285
+#' #gs_horizon(res_enrich,
286
+#' #            n_gs = 15)
282 287
 #'
283
-gs_horizon <- function(res_enrich, # TODO: should be a list of res_enrich objects!
288
+#' res_enrich2 <- res_enrich[1:42, ]
289
+#' res_enrich3 <- res_enrich[1:42, ]
290
+#' res_enrich4 <- res_enrich[1:42, ]
291
+#'
292
+#' set.seed(2*42)
293
+#' shuffled_ones_2 <- sample(seq_len(42)) # to generate permuted p-values
294
+#' res_enrich2$gs_pvalue <- res_enrich2$gs_pvalue[shuffled_ones_2]
295
+#' res_enrich2$z_score <- res_enrich2$z_score[shuffled_ones_2]
296
+#' res_enrich2$aggr_score <- res_enrich2$aggr_score[shuffled_ones_2]
297
+#'
298
+#' set.seed(3*42)
299
+#' shuffled_ones_3 <- sample(seq_len(42)) # to generate permuted p-values
300
+#' res_enrich3$gs_pvalue <- res_enrich3$gs_pvalue[shuffled_ones_3]
301
+#' res_enrich3$z_score <- res_enrich3$z_score[shuffled_ones_3]
302
+#' res_enrich3$aggr_score <- res_enrich3$aggr_score[shuffled_ones_3]
303
+#'
304
+#' set.seed(4*42)
305
+#' shuffled_ones_4 <- sample(seq_len(42)) # to generate permuted p-values
306
+#' res_enrich4$gs_pvalue <- res_enrich4$gs_pvalue[shuffled_ones_4]
307
+#' res_enrich4$z_score <- res_enrich4$z_score[shuffled_ones_4]
308
+#' res_enrich4$aggr_score <- res_enrich4$aggr_score[shuffled_ones_4]
309
+#'
310
+#' compa_list <- list(
311
+#'   scenario2 = res_enrich2,
312
+#'   scenario3 = res_enrich3,
313
+#'   scenario4 = res_enrich4
314
+#' )
315
+gs_horizon <- function(res_enrich,
316
+                       compared_res_enrich_list,
284 317
                        n_gs = 20,
285 318
                        p_value_column = "gs_pvalue",
286
-                       color_by = "z_score") {
319
+                       color_by = "z_score",
320
+                       ref_name = "ref_scenario",
321
+                       sort_by = c("clustered", "first_set")) {
287 322
   if (!(color_by %in% colnames(res_enrich))) {
288 323
     stop("Your res_enrich object does not contain the ",
289 324
          color_by,
290 325
          " column.\n",
291 326
          "Compute this first or select another column to use for the color.")
292 327
   }
328
+  # checks on the list with other res_enrich to compare against
329
+  # TODO TODO
330
+  # need to be res_enrichs
331
+  # need to have color_by
332
+  # n_gs must be >0
333
+
334
+
335
+
336
+
337
+
338
+
339
+  sort_by <- match.arg(sort_by, c("clustered", "first_set"))
340
+
341
+  # compared_res_enrich_list
342
+  # append original ref
343
+  all_res_enrichs <- compared_res_enrich_list
344
+  all_res_enrichs[[ref_name]] <- res_enrich
345
+
346
+  all_gsids <- lapply(all_res_enrichs, function(arg) arg[["gs_id"]])
347
+
348
+  gs_common <- Reduce(intersect, all_gsids)
349
+
350
+  if (length(gs_common) == 0) {
351
+    stop("No gene sets have been found in common to the two enrichment results")
352
+  }
353
+
354
+  # restrict to the top common n_gs
355
+  gs_common <- gs_common[seq_len(min(n_gs, length(gs_common)))]
356
+
357
+  # append scenario info
358
+  res_enrich[["scenario"]] <- ref_name
359
+  compared_res_enrich_list <- lapply(seq_len(length(compared_res_enrich_list)),
360
+                                     function(arg) {
361
+                                       re <- compared_res_enrich_list[[arg]]
362
+                                       re[["scenario"]] <- names(compared_res_enrich_list)[arg]
363
+                                       return(re)
364
+                                     })
365
+
366
+  # reduce to common sets
367
+  re_ref <- res_enrich[gs_common, ]
368
+  re_comp <- lapply(seq_len(length(compared_res_enrich_list)),
369
+                    function(arg) {
370
+                      re <- compared_res_enrich_list[[arg]]
371
+                      re <- re[gs_common, ]
372
+                      return(re)
373
+                    })
374
+
375
+  merged_res_enh <- rbind(
376
+    re_ref,
377
+    do.call(rbind, re_comp)
378
+  )
379
+  merged_res_enh$logp10 <- -log10(merged_res_enh$gs_pvalue)
380
+  # TODO; here!
381
+
382
+
383
+
384
+
385
+
386
+
387