R/geneSetEnrich.R
ee3b0333
 #' @title Gene set enrichment
 #' @description Identify and return significantly-enriched terms for each gene
 #'  module in a Celda object. Performs gene set enrichment analysis for Celda
 #'  identified modules using the enrichR package.
 #' @author Ahmed Youssef
 #' @param counts Integer count matrix. Rows represent genes and columns
 #'  represent cells. Row names of the matrix should be gene names.
 #' @param celdaModel Celda object of class `celda_G` or `celda_CG`.
 #' @param databases Character vector. Name of reference database. Available
 #'  databases can be viewed by running \code{enrichR::listEnrichrDbs()}.
 #' @param fdr False discovery rate (FDR). Numeric. Cutoff value for adjusted
 #'  p-value, terms with FDR below this value are considered significantly
 #'  enriched.
 #' @return List of length 'L' where each member contains the significantly
 #'  enriched terms for the corresponding module.
55fbbe69
 #' @examples
ee3b0333
 #' library(M3DExampleData)
 #' counts <- M3DExampleData::Mmus_example_list$data
 #' #subset 100 genes for fast clustering
aeb951bd
 #' counts <- counts[1500:2000, ]
ee3b0333
 #' #cluster genes into 10 modules for quick demo
6c7a34c0
 #' cm <- celda_G(counts = as.matrix(counts), L = 10, verbose = FALSE)
ee3b0333
 #' gse <- geneSetEnrich(counts,
 #'     cm,
 #'     databases = c('GO_Biological_Process_2018','GO_Molecular_Function_2018'))
6e297e50
 #' @importFrom enrichR enrichr
d7196f24
 #' @importFrom enrichR listEnrichrDbs
ee3b0333
 #' @export
 geneSetEnrich <- function(counts, celdaModel, databases, fdr = 0.05) {
     #check for correct celda object
     if (!(class(celdaModel) %in% c("celda_G", "celda_CG"))) {
         stop("No gene modules in celda object. ",
             "Please provide object of class celda_G or celda_CG.")
     }
 
     #initialize list with one entry for each gene module
06b0c870
     modules <- vector("list", length = params(celdaModel)$L)
ee3b0333
 
     #create dataframe with gene-module associations
06b0c870
     genes <- data.frame(gene = rownames(counts),
         module = clusters(celdaModel)$y)
ee3b0333
 
     #iterate over each module, get genes in that module, add to list
06b0c870
     for (i in seq_len(params(celdaModel)$L)) {
b42f118e
         modules[[i]] <- as.character(genes[genes$module == i, "gene"])
ee3b0333
     }
 
     #enrichment analysis
     enrichment <- lapply(modules, function(module) {
b42f118e
         invisible(utils::capture.output(table <- enrichR::enrichr(
             genes = module,
ee3b0333
             databases = databases)))
         table <- Reduce(f = rbind, x = table)
b42f118e
         table[table$Adjusted.P.value < fdr, "Term"]
ee3b0333
     })
 
     #return results as a list
     return(enrichment)
 }