R/gs_upset.R
e085b85a
 #' Create a geneset upset dataset
9d5079eb
 #'
e085b85a
 #' Create a data frame that can be fed to the upset function
 #'
 #' @param res_enrich A `data.frame` object, storing the result of the functional
 #' enrichment analysis. See more in the main function, [GeneTonic()], to see the
 #' formatting requirements.
9d5079eb
 #' @param use_ids Logical - whether to use the `gs_id`entifiers as names, or the
e085b85a
 #' values provided as `gs_description`. Defaults to FALSE, using the full
 #' descriptions
 #'
 #' @return A data.frame to be used in `ComplexUpset::upset()`
 #' @export
 #'
 #' @examples
9d5079eb
 #' # res_enrich object
 #' data(res_enrich_macrophage, package = "GeneTonic")
 #' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
 #'
 #' create_upsetdata(res_enrich[1:20, ])
 #' dim(create_upsetdata(res_enrich[1:20, ]))
 #'
 #' create_upsetdata(res_enrich[1:5, ], use_ids = TRUE)
e085b85a
 create_upsetdata <- function(res_enrich, use_ids = FALSE) {
   my_gs_ids <- res_enrich$gs_id
   my_gs_desc <- res_enrich$gs_description
   my_gs_genes <- res_enrich$gs_genes
9d5079eb
 
e085b85a
   gsg_list <- strsplit(my_gs_genes, ",")
   if (use_ids) {
     names(gsg_list) <- my_gs_ids
   } else {
     names(gsg_list) <- my_gs_desc
   }
9d5079eb
 
e085b85a
   all_genes <- unique(unlist(gsg_list))
   upgsg <- unlist(lapply(gsg_list, function(x) {
     x <- as.vector(match(all_genes, x))
   }))
9d5079eb
 
e085b85a
   upgsg[is.na(upgsg)] <- as.integer(0)
   upgsg[upgsg != 0] <- as.integer(1)
9d5079eb
 
e085b85a
   upgsg <- data.frame(
     matrix(upgsg, ncol = length(gsg_list), byrow = FALSE)
   )
9d5079eb
 
e085b85a
   upgsg <- upgsg[which(rowSums(upgsg) != 0), ]
   names(upgsg) <- names(gsg_list)
   rownames(upgsg) <- all_genes
9d5079eb
 
e085b85a
   return(upgsg)
 }
 
 
 
 
 #' Upset plot for genesets
9d5079eb
 #'
e085b85a
 #' Create an upset plot for genesets
 #'
 #' @param res_enrich A `data.frame` object, storing the result of the functional
 #' enrichment analysis. See more in the main function, [GeneTonic()], to check the
 #' formatting requirements (a minimal set of columns should be present).
 #' @param res_de A `DESeqResults` object.
 #' @param annotation_obj A `data.frame` object with the feature annotation
 #' information, with at least two columns, `gene_id` and `gene_name`.
 #' @param n_gs Integer value, corresponding to the maximal number of gene sets to
 #' be included
 #' @param gtl A `GeneTonic`-list object, containing in its slots the arguments
 #' specified above: `dds`, `res_de`, `res_enrich`, and `annotation_obj` - the names
 #' of the list _must_ be specified following the content they are expecting
 #' @param gs_ids Character vector, containing a subset of `gs_id` as they are
 #' available in `res_enrich`. Lists the gene sets to be included in addition to
 #' the top ones (via `n_gs`)
9d5079eb
 #' @param add_de_direction Logical, whether to add an annotation with info on the
e085b85a
 #' DE direction of single genes
 #' @param add_de_gsgenes Logical, if set to TRUE adds an annotation with detail
 #' on the single components of each defined subset
 #' @param col_upDE Character, specifying the color value to be used to mark
 #' upregulated genes
 #' @param col_downDE Character, specifying the color value to be used to mark
 #' downregulated genes
 #' @param upset_geom A geom specification to be used in the upset chart. Defaults
 #' sensibly to `geom_point(size = 2)`
 #' @param return_upsetgsg Logical, controlling the returned value. If set to TRUE,
9d5079eb
 #' this function will not generate the plot but only create the corresponding
e085b85a
 #' data.frame, in case the user wants to proceed with a custom call to create an
 #' upset plot.
 #'
 #' @return A `ggplot` object (if plotting), or alternatively a data.frame
 #' @export
 #'
 #' @examples
9d5079eb
 #' library("macrophage")
 #' library("DESeq2")
 #' library("org.Hs.eg.db")
 #' library("AnnotationDbi")
 #'
 #' # dds object
 #' data("gse", package = "macrophage")
 #' dds_macrophage <- DESeqDataSet(gse, design = ~ line + condition)
 #' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
 #' dds_macrophage <- estimateSizeFactors(dds_macrophage)
 #'
 #' # annotation object
 #' anno_df <- data.frame(
 #'   gene_id = rownames(dds_macrophage),
 #'   gene_name = mapIds(org.Hs.eg.db,
 #'     keys = rownames(dds_macrophage),
 #'     column = "SYMBOL",
 #'     keytype = "ENSEMBL"
 #'   ),
 #'   stringsAsFactors = FALSE,
 #'   row.names = rownames(dds_macrophage)
 #' )
 #'
 #' # res object
 #' data(res_de_macrophage, package = "GeneTonic")
 #' res_de <- res_macrophage_IFNg_vs_naive
 #'
 #' # res_enrich object
 #' data(res_enrich_macrophage, package = "GeneTonic")
 #' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
 #' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
 #' gs_upset(res_enrich,
3f885645
 #'   n_gs = 10
 #' )
9d5079eb
 #'
3f885645
 #' gs_upset(res_enrich,
 #'   res_de = res_de, annotation_obj = anno_df,
 #'   n_gs = 8,
 #'   add_de_direction = TRUE, add_de_gsgenes = TRUE
 #' )
9d5079eb
 #'
 #' # or using the practical gtl (GeneTonicList)
 #' gtl_macrophage <- GeneTonic_list(
 #'   dds = dds_macrophage,
 #'   res_de = res_de,
 #'   res_enrich = res_enrich,
 #'   annotation_obj = anno_df
 #' )
 #'
3f885645
 #' gs_upset(
 #'   gtl = gtl_macrophage,
 #'   n_gs = 15,
 #'   add_de_direction = TRUE, add_de_gsgenes = TRUE
 #' )
e085b85a
 gs_upset <- function(res_enrich,
9d5079eb
                      res_de = NULL,
e085b85a
                      annotation_obj = NULL,
                      n_gs = 10,
                      gtl = NULL,
                      gs_ids = NULL,
                      add_de_direction = FALSE,
                      add_de_gsgenes = FALSE,
                      col_upDE = "#E41A1C",
                      col_downDE = "#377EB8",
                      upset_geom = geom_point(size = 2),
                      return_upsetgsg = FALSE) {
   if (!is.null(gtl)) {
     checkup_gtl(gtl)
     # dds <- gtl$dds
     res_de <- gtl$res_de
     res_enrich <- gtl$res_enrich
     annotation_obj <- gtl$annotation_obj
   }
9d5079eb
 
e085b85a
   stopifnot(is.numeric(n_gs))
9d5079eb
 
e085b85a
   n_gs <- min(n_gs, nrow(res_enrich))
9d5079eb
 
e085b85a
   gs_to_use <- unique(
     c(
       res_enrich$gs_id[seq_len(n_gs)], # the ones from the top
       gs_ids[gs_ids %in% res_enrich$gs_id] # the ones specified from the custom list
     )
   )
9d5079eb
 
e085b85a
   re <- res_enrich[gs_to_use, ]
9d5079eb
 
e085b85a
   upgsg <- create_upsetdata(re)
9d5079eb
 
   if (add_de_direction) {
3f885645
     if (is.null(res_de) | is.null(annotation_obj)) {
9d5079eb
       stop("DE results and annotation required if `add_de_direction` is TRUE, please provide them as `res_de` and `annotation_obj`")
3f885645
     }
e085b85a
     upgsg$logFC <- res_de[annotation_obj$gene_id[
3f885645
       match(rownames(upgsg), annotation_obj$gene_name)
     ], "log2FoldChange"]
e085b85a
     upgsg$logFCsign <- upgsg$logFC >= 0
9d5079eb
 
e085b85a
     param_upset_baseanno <- list(
3f885645
       "Intersection size" = intersection_size(
e085b85a
         counts = FALSE,
         mapping = aes_string(fill = "logFCsign")
9d5079eb
       ) +
e085b85a
         scale_fill_manual(
3f885645
           values = c("FALSE" = col_downDE, "TRUE" = col_upDE),
e085b85a
           labels = c("logFC<0", "logFC>0"),
3f885645
           name = ""
         ) +
         theme(legend.position = "bottom")
e085b85a
     )
   } else {
     param_upset_baseanno <- "auto"
   }
9d5079eb
 
3f885645
   if (add_de_gsgenes) {
     if (is.null(res_de) | is.null(annotation_obj)) {
9d5079eb
       stop("DE results and annotation required if `add_de_gsgenes` is TRUE, please provide them as `res_de` and `annotation_obj`")
3f885645
     }
9d5079eb
     upgsg$logFC <- res_de[annotation_obj$gene_id[
3f885645
       match(rownames(upgsg), annotation_obj$gene_name)
     ], "log2FoldChange"]
9d5079eb
     upgsg$logFCsign <- upgsg$logFC >= 0
 
e085b85a
     param_upset_anno <- list(
3f885645
       "logFC" = (
9d5079eb
         ggplot(mapping = aes_string(x = "intersection", y = "logFC")) +
           geom_jitter(aes_string(color = "logFC"), na.rm = TRUE) +
           # geom_violin(alpha = 0.5, na.rm = TRUE) +
           theme(legend.position = "none") +
e085b85a
           scale_colour_gradient2(low = col_downDE, high = col_upDE)
       )
     )
   } else {
     param_upset_anno <- NULL
   }
9d5079eb
 
e085b85a
   # if desired to work on this separately
   if (return_upsetgsg) {
     return(upgsg)
   }
 
   # ready to plot
   ComplexUpset::upset(
     data = upgsg,
     intersect = names(upgsg)[seq_len(length(gs_to_use))],
     name = "gsg upset",
     matrix = intersection_matrix(
       geom = upset_geom
3f885645
     ) + scale_y_discrete(position = "right"),
e085b85a
     base_annotations = param_upset_baseanno,
     annotations = param_upset_anno,
3f885645
     width_ratio = 0.1
e085b85a
   )
 }