R/gs_summaries.R
98f0fc9b
 #' Plots a summary of enrichment results
a08fdd82
 #'
98f0fc9b
 #' Plots a summary of enrichment results for one set
a08fdd82
 #'
98f0fc9b
 #' @param res_enrich A `data.frame` object, storing the result of the functional
a1130645
 #' enrichment analysis. See more in the main function, [GeneTonic()], to check the
 #' formatting requirements (a minimal set of columns should be present).
dea71931
 #' @param gtl A `GeneTonic`-list object, containing in its slots the arguments
 #' specified above: `dds`, `res_de`, `res_enrich`, and `annotation_obj` - the names
 #' of the list _must_ be specified following the content they are expecting
98f0fc9b
 #' @param n_gs Integer value, corresponding to the maximal number of gene sets to
 #' be displayed
77f8287b
 #' @param p_value_column Character string, specifying the column of `res_enrich`
 #' where the p-value to be represented is specified. Defaults to `gs_pvalue`
 #' (it could have other values, in case more than one p-value - or an adjusted
 #' p-value - have been specified).
98f0fc9b
 #' @param color_by Character, specifying the column of `res_enrich` to be used
 #' for coloring the plotted gene sets. Defaults sensibly to `z_score`.
fc8e87b6
 #' @param return_barchart Logical, whether to return a barchart (instead of the
da58aa79
 #' default dot-segment plot); defaults to FALSE.
a08fdd82
 #'
98f0fc9b
 #' @return A `ggplot` object
ffe7b48d
 #'
 #' @seealso [gs_summary_overview_pair()], [gs_horizon()]
a08fdd82
 #' @export
 #'
 #' @examples
dff9a788
 #'
 #' library("macrophage")
 #' library("DESeq2")
 #' library("org.Hs.eg.db")
 #' library("AnnotationDbi")
 #'
 #' # dds object
 #' data("gse", package = "macrophage")
fc8e87b6
 #' dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition)
dff9a788
 #' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
 #' dds_macrophage <- estimateSizeFactors(dds_macrophage)
 #'
 #' # annotation object
 #' anno_df <- data.frame(
 #'   gene_id = rownames(dds_macrophage),
 #'   gene_name = mapIds(org.Hs.eg.db,
fc8e87b6
 #'     keys = rownames(dds_macrophage),
 #'     column = "SYMBOL",
 #'     keytype = "ENSEMBL"
 #'   ),
dff9a788
 #'   stringsAsFactors = FALSE,
 #'   row.names = rownames(dds_macrophage)
 #' )
 #'
 #' # res object
 #' data(res_de_macrophage, package = "GeneTonic")
 #' res_de <- res_macrophage_IFNg_vs_naive
 #'
 #'
 #' # res_enrich object
 #' data(res_enrich_macrophage, package = "GeneTonic")
 #' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
 #' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
 #'
 #' gs_summary_overview(res_enrich)
fc8e87b6
 #'
45eb4367
 #' # if desired, it can also be shown as a barplot
dea71931
 #' gs_summary_overview(res_enrich, n_gs = 30, return_barchart = TRUE)
9cb1dff0
 gs_summary_overview <- function(res_enrich,
dea71931
                                 gtl = NULL,
9cb1dff0
                                 n_gs = 20,
0d759a3f
                                 p_value_column = "gs_pvalue",
da58aa79
                                 color_by = "z_score",
                                 return_barchart = FALSE
93cb7845
                                 # , size_by = "gs_de_count"
fc8e87b6
 ) {
dea71931
   if (!is.null(gtl)) {
     checkup_gtl(gtl)
     dds <- gtl$dds
     res_de <- gtl$res_de
     res_enrich <- gtl$res_enrich
     annotation_obj <- gtl$annotation_obj
   }
   
da58aa79
   if (!is.null(color_by)) {
     if (!(color_by %in% colnames(res_enrich))) {
fc8e87b6
       stop(
         "Your res_enrich object does not contain the ",
         color_by,
         " column.\n",
         "Compute this first or select another column to use for the color."
       )
da58aa79
     }
93cb7845
   }
9cb1dff0
 
   re <- res_enrich
   re$logp10 <- -log10(res_enrich[[p_value_column]])
   re <- re[seq_len(n_gs), ]
 
   re_sorted <- re %>%
95afafc2
     arrange(.data$logp10) %>%
54e66b6b
     mutate(gs_description = factor(.data$gs_description, .data$gs_description))
fc8e87b6
 
da58aa79
   if (return_barchart) {
fc8e87b6
     p <- ggplot(re_sorted, (aes_string(x = "gs_description", y = "logp10")))
 
da58aa79
     if (is.null(color_by)) {
       p <- p + geom_col()
     } else {
fc8e87b6
       p <- p + geom_col(aes(fill = .data[[color_by]])) +
da58aa79
         scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026")
     }
fc8e87b6
 
da58aa79
     p <- p +
fc8e87b6
       coord_flip() +
       labs(
         x = "Gene set description",
         y = "-log10 p-value",
         col = color_by
       ) +
da58aa79
       theme_minimal()
   } else {
     p <- ggplot(re_sorted, (aes_string(x = "gs_description", y = "logp10"))) +
fc8e87b6
       geom_segment(aes_string(
         x = "gs_description",
         xend = "gs_description",
         y = 0,
         yend = "logp10"
       ), color = "grey")
 
da58aa79
     if (is.null(color_by)) {
       p <- p + geom_point(size = 4)
     } else {
       p <- p + geom_point(aes(col = .data[[color_by]]), size = 4) +
         scale_color_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026")
     }
fc8e87b6
 
da58aa79
     p <- p +
       coord_flip() +
fc8e87b6
       labs(
         x = "Gene set description",
         y = "-log10 p-value",
         col = color_by
       ) +
da58aa79
       theme_minimal()
   }
9cb1dff0
   return(p)
 }
 
98f0fc9b
 #' Plots a summary of enrichment results
a08fdd82
 #'
98f0fc9b
 #' Plots a summary of enrichment results - for two sets of results
a08fdd82
 #'
98f0fc9b
 #' @param res_enrich A `data.frame` object, storing the result of the functional
a1130645
 #' enrichment analysis. See more in the main function, [GeneTonic()], to check the
 #' formatting requirements (a minimal set of columns should be present).
98f0fc9b
 #' @param res_enrich2 As `res_enrich`, the result of functional enrichment analysis,
 #' in a scenario/contrast different than the first set.
 #' @param n_gs Integer value, corresponding to the maximal number of gene sets to
 #' be displayed
77f8287b
 #' @param p_value_column Character string, specifying the column of `res_enrich`
 #' where the p-value to be represented is specified. Defaults to `gs_pvalue`
 #' (it could have other values, in case more than one p-value - or an adjusted
 #' p-value - have been specified).
98f0fc9b
 #' @param color_by Character, specifying the column of `res_enrich` to be used
 #' for coloring the plotted gene sets. Defaults sensibly to `z_score`.
 #' @param alpha_set2 Numeric value, between 0 and 1, which specified the alpha
 #' transparency used for plotting the points for gene set 2.
a08fdd82
 #'
98f0fc9b
 #' @return A `ggplot` object
ffe7b48d
 #'
 #' @seealso [gs_summary_overview()], [gs_horizon()]
 #'
a08fdd82
 #' @export
 #'
 #' @examples
dff9a788
 #'
 #' library("macrophage")
 #' library("DESeq2")
 #' library("org.Hs.eg.db")
 #' library("AnnotationDbi")
 #'
 #' # dds object
 #' data("gse", package = "macrophage")
fc8e87b6
 #' dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition)
dff9a788
 #' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
 #' dds_macrophage <- estimateSizeFactors(dds_macrophage)
 #'
 #' # annotation object
 #' anno_df <- data.frame(
 #'   gene_id = rownames(dds_macrophage),
 #'   gene_name = mapIds(org.Hs.eg.db,
fc8e87b6
 #'     keys = rownames(dds_macrophage),
 #'     column = "SYMBOL",
 #'     keytype = "ENSEMBL"
 #'   ),
dff9a788
 #'   stringsAsFactors = FALSE,
 #'   row.names = rownames(dds_macrophage)
 #' )
 #'
 #' # res object
 #' data(res_de_macrophage, package = "GeneTonic")
 #' res_de <- res_macrophage_IFNg_vs_naive
 #'
 #' # res_enrich object
 #' data(res_enrich_macrophage, package = "GeneTonic")
 #' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
 #' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
 #'
d42b760b
 #' res_enrich2 <- res_enrich[1:42, ]
 #' set.seed(42)
 #' shuffled_ones <- sample(seq_len(42)) # to generate permuted p-values
 #' res_enrich2$gs_pvalue <- res_enrich2$gs_pvalue[shuffled_ones]
 #' res_enrich2$z_score <- res_enrich2$z_score[shuffled_ones]
 #' res_enrich2$aggr_score <- res_enrich2$aggr_score[shuffled_ones]
 #' # ideally, I would also permute the z scores and aggregated scores
fc8e87b6
 #' gs_summary_overview_pair(
 #'   res_enrich = res_enrich,
 #'   res_enrich2 = res_enrich2
 #' )
4e30f31f
 gs_summary_overview_pair <- function(res_enrich,
                                      res_enrich2,
98f0fc9b
                                      n_gs = 20,
0d759a3f
                                      p_value_column = "gs_pvalue",
98f0fc9b
                                      color_by = "z_score",
d42b760b
                                      alpha_set2 = 1) {
af907a83
   if (!(color_by %in% colnames(res_enrich))) {
fc8e87b6
     stop(
       "Your res_enrich object does not contain the ",
       color_by,
       " column.\n",
       "Compute this first or select another column to use for the color."
     )
af907a83
   }
d42b760b
   # same for set2
   if (!(color_by %in% colnames(res_enrich2))) {
fc8e87b6
     stop(
       "Your res_enrich object does not contain the ",
       color_by,
       " column.\n",
       "Compute this first or select another column to use for the color."
     )
d42b760b
   }
4e30f31f
 
d42b760b
   gs_set1 <- res_enrich$gs_id
   gs_set2 <- res_enrich2$gs_id
   gs_common <- intersect(gs_set1, gs_set2)
595e7b1e
 
12cea184
   if (length(gs_common) == 0) {
595e7b1e
     stop("No gene sets have been found in common to the two enrichment results")
   }
 
d42b760b
   # restrict to the top common n_gs
   gs_common <- gs_common[seq_len(min(n_gs, length(gs_common)))]
4e30f31f
 
d42b760b
   common_re1 <- res_enrich[gs_common, ]
   common_re2 <- res_enrich2[gs_common, ]
95afafc2
 
d42b760b
   common_re1$logp10 <- -log10(common_re1[[p_value_column]])
   common_re2$logp10 <- -log10(common_re2[[p_value_column]])
4e30f31f
 
d42b760b
   re_both <- common_re1
   re_both[["logp10_2"]] <- common_re2$logp10
   re_both[[color_by]] <- common_re1[[color_by]]
   re_both[[paste0(color_by, "_2")]] <- common_re2[[color_by]]
4e30f31f
 
   re_both_sorted <- re_both %>%
95afafc2
     arrange(.data$logp10) %>%
54e66b6b
     mutate(gs_description = factor(.data$gs_description, .data$gs_description))
4e30f31f
 
54e66b6b
   p <- ggplot(re_both_sorted, aes_string(x = "gs_description", y = "logp10")) +
     geom_segment(aes_string(x = "gs_description", xend = "gs_description", y = "logp10_2", yend = "logp10"), color = "grey") +
d42b760b
     geom_point(aes(fill = .data[[color_by]]), size = 4, pch = 21) +
0ca5ed74
     geom_point(aes_string(y = "logp10_2", col = paste0(color_by, "_2")),
fc8e87b6
       size = 4, alpha = alpha_set2
     ) +
c1023dfb
     scale_color_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026", name = paste0(color_by, " set 2")) +
     scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026", name = paste0(color_by, " set 1")) +
4e30f31f
     coord_flip() +
fc8e87b6
     labs(
       x = "Gene set description",
       y = "-log10 p-value",
       col = color_by
     ) +
d42b760b
     ylim(0, NA) +
4e30f31f
     theme_minimal()
 
   return(p)
 }
 
af69d9a3
 
98f0fc9b
 #' Plots a summary of enrichment results
af69d9a3
 #'
98f0fc9b
 #' Plots a summary of enrichment results - horizon plot to compare one or more
 #' sets of results
af69d9a3
 #'
c0d104ae
 #' @details It makes sense to have the results in `res_enrich` sorted by
 #' increasing `gs_pvalue`, to make sure the top results are first sorted by the
 #' significance (when selecting the common gene sets across the `res_enrich`
 #' elements provided in `compared_res_enrich_list`)
 #'
 #' The gene sets included are a subset of the ones in common to all different
 #' scenarios included in `res_enrich` and the elements of `compared_res_enrich_list`.
45b7ee06
 #'
98f0fc9b
 #' @param res_enrich A `data.frame` object, storing the result of the functional
a1130645
 #' enrichment analysis. See more in the main function, [GeneTonic()], to check the
 #' formatting requirements (a minimal set of columns should be present).
c0d104ae
 #' @param compared_res_enrich_list A named list, where each element is a `data.frame`
 #' formatted like the standard `res_enrich` objects used by `GeneTonic`. The
 #' names of the list are the names of the scenarios.
98f0fc9b
 #' @param n_gs Integer value, corresponding to the maximal number of gene sets to
 #' be displayed
77f8287b
 #' @param p_value_column Character string, specifying the column of `res_enrich`
 #' where the p-value to be represented is specified. Defaults to `gs_pvalue`
 #' (it could have other values, in case more than one p-value - or an adjusted
 #' p-value - have been specified).
98f0fc9b
 #' @param color_by Character, specifying the column of `res_enrich` to be used
 #' for coloring the plotted gene sets. Defaults sensibly to `z_score`.
c0d104ae
 #' @param ref_name Character, defining the name of the scenario to compare
 #' against (the one in `res_enrich`) - defaults to "ref_scenario".
 #' @param sort_by Character string, either "clustered", or "first_set". This
 #' controls the sorting order of the included terms in the final plot.
 #' "clustered" presents the terms grouped by the scenario where they assume the
 #' highest values. "first_set" sorts the terms by the significance value in the
 #' reference scenario.
af69d9a3
 #'
ffe7b48d
 #' @return A `ggplot` object
 #'
 #' @seealso [gs_summary_overview()], [gs_summary_overview_pair()]
 #'
af69d9a3
 #' @export
 #'
 #' @examples
dff9a788
 #'
 #' library("macrophage")
 #' library("DESeq2")
 #' library("org.Hs.eg.db")
 #' library("AnnotationDbi")
 #'
 #' # dds object
 #' data("gse", package = "macrophage")
fc8e87b6
 #' dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition)
dff9a788
 #' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
 #' dds_macrophage <- estimateSizeFactors(dds_macrophage)
 #'
 #' # annotation object
 #' anno_df <- data.frame(
 #'   gene_id = rownames(dds_macrophage),
 #'   gene_name = mapIds(org.Hs.eg.db,
fc8e87b6
 #'     keys = rownames(dds_macrophage),
 #'     column = "SYMBOL",
 #'     keytype = "ENSEMBL"
 #'   ),
dff9a788
 #'   stringsAsFactors = FALSE,
 #'   row.names = rownames(dds_macrophage)
 #' )
 #'
 #' # res object
 #' data(res_de_macrophage, package = "GeneTonic")
 #' res_de <- res_macrophage_IFNg_vs_naive
 #'
 #' # res_enrich object
 #' data(res_enrich_macrophage, package = "GeneTonic")
 #' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
 #' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
 #'
45b7ee06
 #' res_enrich2 <- res_enrich[1:42, ]
 #' res_enrich3 <- res_enrich[1:42, ]
 #' res_enrich4 <- res_enrich[1:42, ]
 #'
fc8e87b6
 #' set.seed(2 * 42)
45b7ee06
 #' shuffled_ones_2 <- sample(seq_len(42)) # to generate permuted p-values
 #' res_enrich2$gs_pvalue <- res_enrich2$gs_pvalue[shuffled_ones_2]
 #' res_enrich2$z_score <- res_enrich2$z_score[shuffled_ones_2]
 #' res_enrich2$aggr_score <- res_enrich2$aggr_score[shuffled_ones_2]
 #'
fc8e87b6
 #' set.seed(3 * 42)
45b7ee06
 #' shuffled_ones_3 <- sample(seq_len(42)) # to generate permuted p-values
 #' res_enrich3$gs_pvalue <- res_enrich3$gs_pvalue[shuffled_ones_3]
 #' res_enrich3$z_score <- res_enrich3$z_score[shuffled_ones_3]
 #' res_enrich3$aggr_score <- res_enrich3$aggr_score[shuffled_ones_3]
 #'
fc8e87b6
 #' set.seed(4 * 42)
45b7ee06
 #' shuffled_ones_4 <- sample(seq_len(42)) # to generate permuted p-values
 #' res_enrich4$gs_pvalue <- res_enrich4$gs_pvalue[shuffled_ones_4]
 #' res_enrich4$z_score <- res_enrich4$z_score[shuffled_ones_4]
 #' res_enrich4$aggr_score <- res_enrich4$aggr_score[shuffled_ones_4]
 #'
 #' compa_list <- list(
 #'   scenario2 = res_enrich2,
 #'   scenario3 = res_enrich3,
 #'   scenario4 = res_enrich4
 #' )
c0d104ae
 #'
 #' gs_horizon(res_enrich,
fc8e87b6
 #'   compared_res_enrich_list = compa_list,
 #'   n_gs = 50,
 #'   sort_by = "clustered"
 #' )
c0d104ae
 #' gs_horizon(res_enrich,
fc8e87b6
 #'   compared_res_enrich_list = compa_list,
 #'   n_gs = 20,
 #'   sort_by = "first_set"
 #' )
45b7ee06
 gs_horizon <- function(res_enrich,
                        compared_res_enrich_list,
af69d9a3
                        n_gs = 20,
0d759a3f
                        p_value_column = "gs_pvalue",
45b7ee06
                        color_by = "z_score",
                        ref_name = "ref_scenario",
                        sort_by = c("clustered", "first_set")) {
ef85e69d
   if (!(color_by %in% colnames(res_enrich))) {
fc8e87b6
     stop(
       "Your res_enrich object does not contain the ",
       color_by,
       " column.\n",
       "Compute this first or select another column to use for the color."
     )
ef85e69d
   }
45b7ee06
 
4a3daef1
   if (!n_gs > 0) {
     stop("Please select a value for `n_gs` greater than 0")
   }
 
   if (is.null(names(compared_res_enrich_list))) {
     message("You provided a list for comparison without specifying names, adding some defaults")
     names(compared_res_enrich_list) <-
       paste0("other_", seq_len(length(compared_res_enrich_list)))
   }
45b7ee06
 
4a3daef1
   if (!is(compared_res_enrich_list, "list")) {
     stop("You need to provide a list for comparison (even versus one scenario)")
   }
 
fc8e87b6
   colnames_res_enrich <- c(
     "gs_id",
     "gs_description",
     "gs_pvalue",
     "gs_genes",
     "gs_de_count",
     "gs_bg_count"
   )
4a3daef1
   for (i in seq_len(length(compared_res_enrich_list))) {
     this_re <- compared_res_enrich_list[[i]]
45b7ee06
 
fc8e87b6
     if (!all(colnames_res_enrich %in% colnames(this_re))) {
       stop(
         "One of the provided `res_enrich` objects does not respect the format ",
         "required to use in GeneTonic\n",
         "e.g. all required column names have to be present.\n",
         "You might want to use one of the `shake_*` functions to convert it.\n",
         "Required columns: ", paste(colnames_res_enrich, collapse = ", "),
         "\nThis occurred at the element ", i, " in your `compared_res_enrich_list`"
       )
     }
 
     if (!p_value_column %in% colnames(this_re)) {
       stop(
         "Required column (p-value) `", p_value_column, "` not found in a component of ",
         "`compared_res_enrich_list` object.",
         "\nThis occurred at the element ", i, " in your `compared_res_enrich_list`"
       )
     }
     if (!color_by %in% colnames(this_re)) {
       stop(
         "Required column (for coloring) `", color_by, "` not found in a component of ",
         "`compared_res_enrich_list` object.",
         "\nThis occurred at the element ", i, " in your `compared_res_enrich_list`"
       )
     }
4a3daef1
   }
45b7ee06
 
   sort_by <- match.arg(sort_by, c("clustered", "first_set"))
 
   # compared_res_enrich_list
   # append original ref
   all_res_enrichs <- compared_res_enrich_list
   all_res_enrichs[[ref_name]] <- res_enrich
 
   all_gsids <- lapply(all_res_enrichs, function(arg) arg[["gs_id"]])
 
   gs_common <- Reduce(intersect, all_gsids)
 
   if (length(gs_common) == 0) {
     stop("No gene sets have been found in common to the two enrichment results")
   }
 
   # restrict to the top common n_gs
   gs_common <- gs_common[seq_len(min(n_gs, length(gs_common)))]
 
   # append scenario info
   res_enrich[["scenario"]] <- ref_name
fc8e87b6
   compared_res_enrich_list <- lapply(
     seq_len(length(compared_res_enrich_list)),
     function(arg) {
       re <- compared_res_enrich_list[[arg]]
       re[["scenario"]] <- names(compared_res_enrich_list)[arg]
       return(re)
     }
   )
45b7ee06
 
   # reduce to common sets
   re_ref <- res_enrich[gs_common, ]
fc8e87b6
   re_comp <- lapply(
     seq_len(length(compared_res_enrich_list)),
     function(arg) {
       re <- compared_res_enrich_list[[arg]]
       re <- re[gs_common, ]
       return(re)
     }
   )
45b7ee06
 
   merged_res_enh <- rbind(
     re_ref,
     do.call(rbind, re_comp)
   )
   merged_res_enh$logp10 <- -log10(merged_res_enh$gs_pvalue)
af69d9a3
 
c0d104ae
   if (sort_by == "first_set") {
     # sorted by category in scenario1
     p <- merged_res_enh %>%
       mutate(gs_description = factor(.data$gs_description, rev(unique(.data$gs_description)))) %>%
       arrange((.data$logp10)) %>%
       ggplot(aes_string(x = "gs_description", y = "logp10")) +
       geom_line(aes_string(group = "scenario", col = "scenario"), size = 3, alpha = 0.7) +
       geom_point(aes_string(fill = "z_score"), size = 4, pch = 21) +
e2b5e71e
       scale_color_brewer(palette = "Set2") +
c0d104ae
       scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
       ylim(c(0, NA)) +
       coord_flip() +
       theme_minimal()
   } else if (sort_by == "clustered") {
     # with a nicer sorting - "grouped" by scenario
     nicerorder_terms <- merged_res_enh %>%
       group_by(.data$gs_description) %>%
fc8e87b6
       mutate(
         main_category = .data$scenario[which.max(.data$logp10)],
         max_value = max(.data$logp10)
       ) %>%
c0d104ae
       arrange(.data$main_category, desc(.data$max_value)) %>%
       dplyr::pull(.data$gs_description)
 
     p <- merged_res_enh %>%
       # mutate(gs_description=factor(gs_description, unique(gs_description))) %>%
       mutate(gs_description = factor(.data$gs_description, rev(unique(nicerorder_terms)))) %>%
       arrange(desc(.data$logp10)) %>%
       ggplot(aes_string(x = "gs_description", y = "logp10")) +
       geom_line(aes_string(group = "scenario", col = "scenario"), size = 3, alpha = 0.7) +
e2b5e71e
       scale_color_brewer(palette = "Set2") +
c0d104ae
       geom_point(aes_string(fill = "z_score"), size = 4, pch = 21) +
       scale_fill_gradient2(low = "#313695", mid = "#FFFFE5", high = "#A50026") +
       ylim(c(0, NA)) +
       coord_flip() +
       theme_minimal()
   }
af69d9a3
 
fc8e87b6
   p <- p + labs(
     x = "Gene set description",
     y = "-log10 p-value",
     col = color_by
   )
e2b5e71e
 
c0d104ae
   return(p)
af69d9a3
 }
 
 
98f0fc9b
 #' Plots a heatmap for genes and genesets
e9d263c7
 #'
98f0fc9b
 #' Plots a heatmap for genes and genesets, useful to spot out intersections across
 #' genesets and an overview of them
e9d263c7
 #'
98f0fc9b
 #' @param res_enrich A `data.frame` object, storing the result of the functional
a1130645
 #' enrichment analysis. See more in the main function, [GeneTonic()], to check the
 #' formatting requirements (a minimal set of columns should be present).
98f0fc9b
 #' @param res_de A `DESeqResults` object.
 #' @param annotation_obj A `data.frame` object with the feature annotation
 #' information, with at least two columns, `gene_id` and `gene_name`.
2e537e86
 #' @param gtl A `GeneTonic`-list object, containing in its slots the arguments
 #' specified above: `dds`, `res_de`, `res_enrich`, and `annotation_obj` - the names
 #' of the list _must_ be specified following the content they are expecting
98f0fc9b
 #' @param n_gs Integer value, corresponding to the maximal number of gene sets to
 #' be displayed
a19a2562
 #'
98f0fc9b
 #' @return A `ggplot` object
e9d263c7
 #' @export
 #'
 #' @examples
dff9a788
 #'
 #' library("macrophage")
 #' library("DESeq2")
 #' library("org.Hs.eg.db")
 #' library("AnnotationDbi")
 #'
 #' # dds object
 #' data("gse", package = "macrophage")
fc8e87b6
 #' dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition)
dff9a788
 #' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
 #' dds_macrophage <- estimateSizeFactors(dds_macrophage)
 #'
 #' # annotation object
 #' anno_df <- data.frame(
 #'   gene_id = rownames(dds_macrophage),
 #'   gene_name = mapIds(org.Hs.eg.db,
fc8e87b6
 #'     keys = rownames(dds_macrophage),
 #'     column = "SYMBOL",
 #'     keytype = "ENSEMBL"
 #'   ),
dff9a788
 #'   stringsAsFactors = FALSE,
 #'   row.names = rownames(dds_macrophage)
 #' )
 #'
 #' # res object
 #' data(res_de_macrophage, package = "GeneTonic")
 #' res_de <- res_macrophage_IFNg_vs_naive
 #'
 #' # res_enrich object
 #' data(res_enrich_macrophage, package = "GeneTonic")
 #' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
 #' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
 #'
fc8e87b6
 #' gs_summary_heat(
 #'   res_enrich = res_enrich,
 #'   res_de = res_de,
 #'   annotation_obj = anno_df,
 #'   n_gs = 20
 #' )
ba9ef266
 gs_summary_heat <- function(res_enrich,
                             res_de,
                             annotation_obj,
2e537e86
                             gtl = NULL,
0d759a3f
                             n_gs = 80) {
2e537e86
   if (!is.null(gtl)) {
     checkup_gtl(gtl)
     dds <- gtl$dds
     res_de <- gtl$res_de
     res_enrich <- gtl$res_enrich
     annotation_obj <- gtl$annotation_obj
   }
e9d263c7
 
ba9ef266
   res_enrich2 <- res_enrich[seq_len(n_gs), ]
e9d263c7
 
e2852708
   # enriched_gsids <- res_enrich2[["gs_id"]]
54e66b6b
   # enriched_gsnames <- res_enrich2[["gs_description"]]
   # enriched_gsdescs <- vapply(enriched_gsids, function(arg) Definition(GOTERM[[arg]]), character(1))
e9d263c7
 
   # rownames(res_enrich) <- enriched_gsids
 
0d759a3f
   gs_expanded <- tidyr::separate_rows(res_enrich2, "gs_genes", sep = ",")
e9d263c7
   gs_expanded$log2FoldChange <-
0d759a3f
     res_de[annotation_obj$gene_id[match(gs_expanded$gs_genes, annotation_obj$gene_name)], ]$log2FoldChange
e9d263c7
 
   # keep them as factor to prevent rearrangement!
0d759a3f
   gs_expanded[["gs_id"]] <- factor(gs_expanded[["gs_id"]], levels = res_enrich2[["gs_id"]])
   gs_expanded[["gs_description"]] <- factor(gs_expanded[["gs_description"]], levels = res_enrich2[["gs_description"]])
   gs_expanded[["gs_genes"]] <- factor(gs_expanded[["gs_genes"]], levels = unique(gs_expanded[["gs_genes"]]))
e9d263c7
 
fc8e87b6
   p <- ggplot(
     gs_expanded,
     aes_string(x = "gs_genes", y = "gs_description")
   ) +
e9d263c7
     geom_tile(aes_string(fill = "log2FoldChange"),
fc8e87b6
       color = "white"
     ) +
     scale_fill_gradient2(
       low = muted("deepskyblue"),
       mid = "lightyellow",
       high = muted("firebrick"),
       name = "log2 Fold Change"
     ) +
     xlab(NULL) +
     ylab(NULL) +
     theme_minimal() +
     theme(
       panel.grid.major = element_blank(),
       axis.text.x = element_text(angle = 75, hjust = 1)
     )
e9d263c7
 
   return(p)
 }