Browse code

Merge pull request #47 from federicomarini/ridge_genesets

Ridge genesets

Federico Marini authored on 10/03/2023 15:06:32 • GitHub committed on 10/03/2023 15:06:32
Showing 9 changed files

... ...
@@ -41,6 +41,7 @@ Imports:
41 41
     ggforce,
42 42
     ggplot2,
43 43
     ggrepel,
44
+    ggridges,
44 45
     GO.db,
45 46
     graphics,
46 47
     grDevices,
... ...
@@ -81,7 +82,7 @@ Encoding: UTF-8
81 82
 VignetteBuilder: knitr
82 83
 URL: https://github.com/federicomarini/GeneTonic
83 84
 BugReports: https://github.com/federicomarini/GeneTonic/issues
84
-RoxygenNote: 7.2.1
85
+RoxygenNote: 7.2.3
85 86
 Roxygen: list(markdown = TRUE)
86 87
 biocViews:
87 88
     GUI, GeneExpression, Software, Transcription, Transcriptomics, Visualization,
... ...
@@ -124,6 +124,8 @@ importFrom(expm,"%^%")
124 124
 importFrom(ggforce,geom_sina)
125 125
 importFrom(ggrepel,geom_label_repel)
126 126
 importFrom(ggrepel,geom_text_repel)
127
+importFrom(ggridges,geom_density_ridges)
128
+importFrom(ggridges,position_points_jitter)
127 129
 importFrom(grDevices,col2rgb)
128 130
 importFrom(grDevices,colorRampPalette)
129 131
 importFrom(grDevices,rgb)
... ...
@@ -37,6 +37,7 @@
37 37
 #' @importFrom ggforce geom_sina
38 38
 #' @import ggplot2
39 39
 #' @importFrom ggrepel geom_label_repel geom_text_repel
40
+#' @importFrom ggridges geom_density_ridges position_points_jitter
40 41
 #' @import GO.db
41 42
 #' @importFrom graphics par plot
42 43
 #' @importFrom grDevices colorRampPalette rgb col2rgb
... ...
@@ -19,9 +19,14 @@
19 19
 #' available in `res_enrich`. Lists the gene sets to be displayed.
20 20
 #' @param chars_limit Integer, number of characters to be displayed for each
21 21
 #' geneset name.
22
+#' @param plot_style Character value, one of "point" or "ridgeline". Defines the 
23
+#' style of the plot to summarize visually the table.
24
+#' @param ridge_color Character value, one of "gs_id" or "gs_score", controls the
25
+#' fill color of the ridge lines. If selecting "gs_score", the `z_score` column
26
+#' must be present in the enrichment results table - see `get_aggrscores()` to do
27
+#' that.
22 28
 #' @param plot_title Character string, used as title for the plot. If left `NULL`,
23
-#' it defaults to a general description of the plot and of the DE contrast
24
-#'
29
+#' it defaults to a general description of the plot and of the DE contrast.
25 30
 #'
26 31
 #' @return A `ggplot` object
27 32
 #' @export
... ...
@@ -64,6 +69,20 @@
64 69
 #'   anno_df,
65 70
 #'   n_gs = 10
66 71
 #' )
72
+#' 
73
+#' # using the ridge line as a style, also coloring by the Z score
74
+#' res_enrich_withscores <- get_aggrscores(
75
+#'   res_enrich,
76
+#'   res_de,
77
+#'   anno_df
78
+#' )
79
+#' enhance_table(res_enrich_withscores,
80
+#'   res_de,
81
+#'   anno_df,
82
+#'   n_gs = 10, 
83
+#'   plot_style = "ridgeline",
84
+#'   ridge_color = "gs_score"
85
+#' )
67 86
 enhance_table <- function(res_enrich,
68 87
                           res_de,
69 88
                           annotation_obj,
... ...
@@ -71,6 +90,8 @@ enhance_table <- function(res_enrich,
71 90
                           n_gs = 50,
72 91
                           gs_ids = NULL,
73 92
                           chars_limit = 70,
93
+                          plot_style = c("point", "ridgeline"),
94
+                          ridge_color = c("gs_id", "gs_score"),
74 95
                           plot_title = NULL) {
75 96
   if (!is.null(gtl)) {
76 97
     checkup_gtl(gtl)
... ...
@@ -79,7 +100,10 @@ enhance_table <- function(res_enrich,
79 100
     res_enrich <- gtl$res_enrich
80 101
     annotation_obj <- gtl$annotation_obj
81 102
   }
82
-
103
+  
104
+  plot_style <- match.arg(plot_style, c("point", "ridgeline"))
105
+  ridge_color <- match.arg(ridge_color, c("gs_id", "gs_score"))
106
+  
83 107
   n_gs <- min(n_gs, nrow(res_enrich))
84 108
 
85 109
   gs_to_use <- unique(
... ...
@@ -128,27 +152,89 @@ enhance_table <- function(res_enrich,
128 152
   gs_fulllist$gs_desc <- factor(gs_fulllist$gs_desc, levels = rev(levels(gs_fulllist$gs_desc)))
129 153
   max_lfc <- max(abs(range(gs_fulllist$log2FoldChange)))
130 154
 
131
-  p <- ggplot(
132
-    gs_fulllist, aes_string(
133
-      x = "log2FoldChange",
134
-      y = "gs_desc",
135
-      fill = "gs_id",
136
-      text = "gene_name"
137
-    )
138
-  ) +
139
-    scale_x_continuous(limits = c(-max_lfc, max_lfc)) +
140
-    geom_point(alpha = 0.7, shape = 21, size = 2) +
141
-    theme_minimal() +
142
-    geom_vline(aes(xintercept = 0), col = "steelblue", alpha = 0.4) +
143
-    theme(legend.position = "none") +
144
-    scale_y_discrete(
145
-      name = "",
146
-      labels = paste0(
147
-        substr(as.character(unique(gs_fulllist$gs_desc)), 1, chars_limit),
148
-        " | ", unique(gs_fulllist$gs_id)
155
+  # common elements here
156
+  gs_labels <- paste0(
157
+    substr(as.character(unique(gs_fulllist$gs_desc)), 1, chars_limit),
158
+    " | ", unique(gs_fulllist$gs_id)
159
+  )
160
+  
161
+  if (plot_style == "point") {
162
+    p <- ggplot(
163
+      gs_fulllist, aes_string(
164
+        x = "log2FoldChange",
165
+        y = "gs_desc",
166
+        fill = "gs_id",
167
+        text = "gene_name"
149 168
       )
150 169
     ) +
151
-    labs(x = "log2 Fold Change")
170
+      scale_x_continuous(limits = c(-max_lfc, max_lfc)) +
171
+      geom_point(alpha = 0.7, shape = 21, size = 2) +
172
+      theme_minimal() +
173
+      geom_vline(aes(xintercept = 0), col = "steelblue", alpha = 0.4) +
174
+      theme(legend.position = "none") +
175
+      scale_y_discrete(
176
+        name = "",
177
+        labels = gs_labels
178
+      ) +
179
+      labs(x = "log2 Fold Change")
180
+    
181
+  } else if (plot_style == "ridgeline") {
182
+    
183
+    if (ridge_color == "gs_score" & is.null(res_enrich$z_score)) {
184
+      message("Fallback to plotting the ridgelines according to geneset id (Z score required)")
185
+      ridge_color <- "gs_id"
186
+    }  
187
+    
188
+    if (ridge_color == "gs_score") {
189
+      gs_fulllist$gs_zscore <- res_enrich$z_score[match(gs_fulllist$gs_id, res_enrich$gs_id)]
190
+      p <- ggplot(
191
+        gs_fulllist, aes_string(
192
+          x = "log2FoldChange",
193
+          y = "gs_desc",
194
+          fill = "gs_zscore"
195
+        )
196
+      ) +
197
+        scale_x_continuous(limits = c(-max_lfc, max_lfc)) + 
198
+        scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") + 
199
+        ggridges::geom_density_ridges(
200
+          aes_string(group = "gs_id"),
201
+          point_color = "#00000066",
202
+          jittered_points = TRUE, scale = .95, rel_min_height = .01,
203
+          point_shape = "|", point_size = 3, size = 0.25,
204
+          position = ggridges::position_points_jitter(height = 0)) +
205
+        theme_minimal() +
206
+        geom_vline(aes(xintercept = 0), col = "steelblue", alpha = 0.4) +
207
+        scale_y_discrete(
208
+          name = "",
209
+          labels = gs_labels
210
+        ) +
211
+        labs(x = "log2 Fold Change")
212
+    }
213
+    else if (ridge_color == "gs_id") {
214
+      p <- ggplot(
215
+        gs_fulllist, aes_string(
216
+          x = "log2FoldChange",
217
+          y = "gs_desc",
218
+          fill = "gs_id"
219
+        )
220
+      ) +
221
+        scale_x_continuous(limits = c(-max_lfc, max_lfc)) + 
222
+        ggridges::geom_density_ridges(
223
+          aes_string(group = "gs_id"),
224
+          point_color = "#00000066",
225
+          jittered_points = TRUE, scale = .95, rel_min_height = .01,
226
+          point_shape = "|", point_size = 3, size = 0.25,
227
+          position = ggridges::position_points_jitter(height = 0)) +
228
+        theme_minimal() +
229
+        geom_vline(aes(xintercept = 0), col = "steelblue", alpha = 0.4) +
230
+        theme(legend.position = "none") +
231
+        scale_y_discrete(
232
+          name = "",
233
+          labels = gs_labels
234
+        ) +
235
+        labs(x = "log2 Fold Change")
236
+    }
237
+  }
152 238
 
153 239
   if (is.null(plot_title)) {
154 240
     p <- p + ggtitle(paste0("Enrichment overview - ", this_contrast))
... ...
@@ -223,7 +223,7 @@ gs_fuzzyclustering <- function(res_enrich,
223 223
     group_by(.data$gs_fuzzycluster) %>%
224 224
     arrange(.data$gs_pvalue) %>%
225 225
     slice(1) %>%
226
-    select(.data$gs_id, .data$gs_fuzzycluster)
226
+    select("gs_id", "gs_fuzzycluster")
227 227
   # handling this with tuple to account where the gene set is most representative
228 228
   gs_mostsig_tuple <- paste0(gs_mostsig$gs_id, "|", gs_mostsig$gs_fuzzycluster)
229 229
 
... ...
@@ -519,7 +519,7 @@ gs_horizon <- function(res_enrich,
519 519
       mutate(gs_description = factor(.data$gs_description, rev(unique(.data$gs_description)))) %>%
520 520
       arrange((.data$logp10)) %>%
521 521
       ggplot(aes_string(x = "gs_description", y = "logp10")) +
522
-      geom_line(aes_string(group = "scenario", col = "scenario"), size = 3, alpha = 0.7) +
522
+      geom_line(aes_string(group = "scenario", col = "scenario"), linewidth = 3, alpha = 0.7) +
523 523
       geom_point(aes_string(fill = "z_score"), size = 4, pch = 21) +
524 524
       scale_color_brewer(palette = "Set2") +
525 525
       scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
... ...
@@ -542,7 +542,7 @@ gs_horizon <- function(res_enrich,
542 542
       mutate(gs_description = factor(.data$gs_description, rev(unique(nicerorder_terms)))) %>%
543 543
       arrange(desc(.data$logp10)) %>%
544 544
       ggplot(aes_string(x = "gs_description", y = "logp10")) +
545
-      geom_line(aes_string(group = "scenario", col = "scenario"), size = 3, alpha = 0.7) +
545
+      geom_line(aes_string(group = "scenario", col = "scenario"), linewidth = 3, alpha = 0.7) +
546 546
       scale_color_brewer(palette = "Set2") +
547 547
       geom_point(aes_string(fill = "z_score"), size = 4, pch = 21) +
548 548
       scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
... ...
@@ -12,6 +12,8 @@ enhance_table(
12 12
   n_gs = 50,
13 13
   gs_ids = NULL,
14 14
   chars_limit = 70,
15
+  plot_style = c("point", "ridgeline"),
16
+  ridge_color = c("gs_id", "gs_score"),
15 17
   plot_title = NULL
16 18
 )
17 19
 }
... ...
@@ -38,8 +40,16 @@ available in \code{res_enrich}. Lists the gene sets to be displayed.}
38 40
 \item{chars_limit}{Integer, number of characters to be displayed for each
39 41
 geneset name.}
40 42
 
43
+\item{plot_style}{Character value, one of "point" or "ridgeline". Defines the
44
+style of the plot to summarize visually the table.}
45
+
46
+\item{ridge_color}{Character value, one of "gs_id" or "gs_score", controls the
47
+fill color of the ridge lines. If selecting "gs_score", the \code{z_score} column
48
+must be present in the enrichment results table - see \code{get_aggrscores()} to do
49
+that.}
50
+
41 51
 \item{plot_title}{Character string, used as title for the plot. If left \code{NULL},
42
-it defaults to a general description of the plot and of the DE contrast}
52
+it defaults to a general description of the plot and of the DE contrast.}
43 53
 }
44 54
 \value{
45 55
 A \code{ggplot} object
... ...
@@ -87,4 +97,18 @@ enhance_table(res_enrich,
87 97
   anno_df,
88 98
   n_gs = 10
89 99
 )
100
+
101
+# using the ridge line as a style, also coloring by the Z score
102
+res_enrich_withscores <- get_aggrscores(
103
+  res_enrich,
104
+  res_de,
105
+  anno_df
106
+)
107
+enhance_table(res_enrich_withscores,
108
+  res_de,
109
+  anno_df,
110
+  n_gs = 10, 
111
+  plot_style = "ridgeline",
112
+  ridge_color = "gs_score"
113
+)
90 114
 }
... ...
@@ -37,6 +37,32 @@ test_that("Enhanced table is created", {
37 37
   )
38 38
   expect_is(p2, "gg")
39 39
   
40
+  p_ridge <- enhance_table(with_scores,
41
+    res_macrophage_IFNg_vs_naive,
42
+    annotation_obj = anno_df,
43
+    n_gs = 5,
44
+    plot_style = "ridgeline"
45
+  )
46
+  expect_is(p_ridge, "gg")
47
+  
48
+  p_ridge_zscorecol <- enhance_table(with_scores,
49
+    res_macrophage_IFNg_vs_naive,
50
+    annotation_obj = anno_df,
51
+    n_gs = 5,
52
+    plot_style = "ridgeline",
53
+    ridge_color = "gs_score"
54
+  )
55
+  expect_is(p_ridge_zscorecol, "gg")
56
+  
57
+  expect_message({
58
+    p_ridge_fallback <- enhance_table(
59
+      gtl = gtl_macrophage,
60
+      n_gs = 5,
61
+      plot_style = "ridgeline",
62
+      ridge_color = "gs_score")
63
+  })
64
+  
65
+  
40 66
   re_modified <- res_enrich_IFNg_vs_naive
41 67
   # patching up some letters to mess up the name of a gene
42 68
   re_modified$gs_genes[1] <- 
... ...
@@ -24,6 +24,14 @@ test_that("Gene set dendrogram is created", {
24 24
   expect_is(my_dend2, "dendrogram")
25 25
   expect_is(my_dend3, "dendrogram")
26 26
   expect_is(my_dend4, "dendrogram")
27
+  
28
+  my_dend_pval <- gs_dendro(res_enrich_withscores, n_gs = 20,
29
+                            color_leaves_by = "gs_pvalue")
30
+  expect_is(my_dend_pval, "dendrogram")
31
+  
32
+  re_subset <- res_enrich_withscores[res_enrich_withscores[1:12, ]$z_score >= 0, ]
33
+  my_dend_subset <- gs_dendro(re_subset, n_gs = 12, color_leaves_by = "z_score")
34
+  expect_is(my_dend_subset, "dendrogram")
27 35
 
28 36
   expect_warning(gs_dendro(res_enrich_withscores,
29 37
     n_gs = 5,