... | ... |
@@ -20,14 +20,20 @@ |
20 | 20 |
#' @param fdr False discovery rate (FDR). Numeric. Cutoff value for adjusted |
21 | 21 |
#' p-value, terms with FDR below this value are considered significantly |
22 | 22 |
#' enriched. |
23 |
-#' @param ... Ignored. Placeholder to prevent check warning. |
|
24 | 23 |
#' @return List of length 'L' where each member contains the significantly |
25 | 24 |
#' enriched terms for the corresponding module. |
26 | 25 |
#' @importFrom enrichR enrichr |
27 | 26 |
#' @importFrom enrichR listEnrichrDbs |
28 | 27 |
#' @export |
29 |
-setGeneric("geneSetEnrich", function(x, ...) { |
|
30 |
- standardGeneric("geneSetEnrich")}) |
|
28 |
+setGeneric("geneSetEnrich", |
|
29 |
+ function(x, |
|
30 |
+ celdaModel, |
|
31 |
+ useAssay = "counts", |
|
32 |
+ altExpName = "featureSubset", |
|
33 |
+ databases, |
|
34 |
+ fdr = 0.05) { |
|
35 |
+ |
|
36 |
+ standardGeneric("geneSetEnrich")}) |
|
31 | 37 |
|
32 | 38 |
|
33 | 39 |
#' @rdname geneSetEnrich |
... | ... |
@@ -9,10 +9,10 @@ |
9 | 9 |
#' with the matrix located in the assay slot under \code{useAssay}. |
10 | 10 |
#' Rows represent features and columns represent cells. Rownames of the |
11 | 11 |
#' matrix or \linkS4class{SingleCellExperiment} object should be gene names. |
12 |
-#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay} |
|
12 |
+#' @param useAssay A string specifying which \link{assay} |
|
13 | 13 |
#' slot to use if \code{x} is a |
14 | 14 |
#' \linkS4class{SingleCellExperiment} object. Default "counts". |
15 |
-#' @param altExpName The name for the \link[SingleCellExperiment]{altExp} slot |
|
15 |
+#' @param altExpName The name for the \link{altExp} slot |
|
16 | 16 |
#' to use. Default "featureSubset". |
17 | 17 |
#' @param celdaModel Celda object of class \code{celda_G} or \code{celda_CG}. |
18 | 18 |
#' @param databases Character vector. Name of reference database. Available |
... | ... |
@@ -20,6 +20,7 @@ |
20 | 20 |
#' @param fdr False discovery rate (FDR). Numeric. Cutoff value for adjusted |
21 | 21 |
#' p-value, terms with FDR below this value are considered significantly |
22 | 22 |
#' enriched. |
23 |
+#' @param ... Ignored. Placeholder to prevent check warning. |
|
23 | 24 |
#' @return List of length 'L' where each member contains the significantly |
24 | 25 |
#' enriched terms for the corresponding module. |
25 | 26 |
#' @importFrom enrichR enrichr |
... | ... |
@@ -55,7 +55,8 @@ setMethod("geneSetEnrich", |
55 | 55 |
length = S4Vectors::metadata(altExp)$celda_parameters$L) |
56 | 56 |
|
57 | 57 |
# create dataframe with gene-module associations |
58 |
- genes <- data.frame(gene = rownames(altExp), module = celdaModules(x)) |
|
58 |
+ genes <- data.frame(gene = rownames(altExp), |
|
59 |
+ module = celdaModules(x, altExpName = altExpName)) |
|
59 | 60 |
|
60 | 61 |
# iterate over each module, get genes in that module, add to list |
61 | 62 |
for (i in seq_len(S4Vectors::metadata(altExp)$celda_parameters$L)) { |
... | ... |
@@ -12,6 +12,8 @@ |
12 | 12 |
#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay} |
13 | 13 |
#' slot to use if \code{x} is a |
14 | 14 |
#' \linkS4class{SingleCellExperiment} object. Default "counts". |
15 |
+#' @param altExpName The name for the \link[SingleCellExperiment]{altExp} slot |
|
16 |
+#' to use. Default "featureSubset". |
|
15 | 17 |
#' @param celdaModel Celda object of class \code{celda_G} or \code{celda_CG}. |
16 | 18 |
#' @param databases Character vector. Name of reference database. Available |
17 | 19 |
#' databases can be viewed by \link[enrichR]{listEnrichrDbs}. |
... | ... |
@@ -40,17 +42,23 @@ setGeneric("geneSetEnrich", function(x, ...) { |
40 | 42 |
#' @export |
41 | 43 |
setMethod("geneSetEnrich", |
42 | 44 |
signature(x = "SingleCellExperiment"), |
43 |
- function(x, useAssay = "counts", databases, fdr = 0.05) { |
|
45 |
+ function(x, |
|
46 |
+ useAssay = "counts", |
|
47 |
+ altExpName = "featureSubset", |
|
48 |
+ databases, |
|
49 |
+ fdr = 0.05) { |
|
50 |
+ |
|
51 |
+ altExp <- SingleCellExperiment::altExp(x, e = altExpName) |
|
44 | 52 |
|
45 | 53 |
# initialize list with one entry for each gene module |
46 | 54 |
modules <- vector("list", |
47 |
- length = S4Vectors::metadata(x)$celda_parameters$L) |
|
55 |
+ length = S4Vectors::metadata(altExp)$celda_parameters$L) |
|
48 | 56 |
|
49 | 57 |
# create dataframe with gene-module associations |
50 |
- genes <- data.frame(gene = rownames(x), module = celdaModules(x)) |
|
58 |
+ genes <- data.frame(gene = rownames(altExp), module = celdaModules(x)) |
|
51 | 59 |
|
52 | 60 |
# iterate over each module, get genes in that module, add to list |
53 |
- for (i in seq_len(S4Vectors::metadata(x)$celda_parameters$L)) { |
|
61 |
+ for (i in seq_len(S4Vectors::metadata(altExp)$celda_parameters$L)) { |
|
54 | 62 |
modules[[i]] <- as.character(genes[genes$module == i, "gene"]) |
55 | 63 |
} |
56 | 64 |
|
... | ... |
@@ -23,7 +23,8 @@ |
23 | 23 |
#' @importFrom enrichR enrichr |
24 | 24 |
#' @importFrom enrichR listEnrichrDbs |
25 | 25 |
#' @export |
26 |
-setGeneric("geneSetEnrich", function(x, ...) {standardGeneric("geneSetEnrich")}) |
|
26 |
+setGeneric("geneSetEnrich", function(x, ...) { |
|
27 |
+ standardGeneric("geneSetEnrich")}) |
|
27 | 28 |
|
28 | 29 |
|
29 | 30 |
#' @rdname geneSetEnrich |
... | ... |
@@ -33,7 +33,7 @@ setGeneric("geneSetEnrich", function(x, ...) {standardGeneric("geneSetEnrich")}) |
33 | 33 |
#' # subset 500 genes for fast clustering |
34 | 34 |
#' counts <- counts[seq(1501, 2000), ] |
35 | 35 |
#' # cluster genes into 10 modules for quick demo |
36 |
-#' sce <- celda_G(counts = as.matrix(counts), L = 10, verbose = FALSE) |
|
36 |
+#' sce <- celda_G(x = as.matrix(counts), L = 10, verbose = FALSE) |
|
37 | 37 |
#' gse <- geneSetEnrich(sce, |
38 | 38 |
#' databases = c("GO_Biological_Process_2018", "GO_Molecular_Function_2018")) |
39 | 39 |
#' @export |
... | ... |
@@ -1,65 +1,109 @@ |
1 | 1 |
#' @title Gene set enrichment |
2 | 2 |
#' @description Identify and return significantly-enriched terms for each gene |
3 |
-#' module in a Celda object. Performs gene set enrichment analysis for Celda |
|
4 |
-#' identified modules using the enrichR package. |
|
5 |
-#' @author Ahmed Youssef |
|
6 |
-#' @param counts Integer count matrix. Rows represent genes and columns |
|
7 |
-#' represent cells. Row names of the matrix should be gene names. |
|
8 |
-#' @param celdaModel Celda object of class `celda_G` or `celda_CG`. |
|
3 |
+#' module in a Celda object or a \linkS4class{SingleCellExperiment} object. |
|
4 |
+#' Performs gene set enrichment analysis for Celda |
|
5 |
+#' identified modules using the \link[enrichR]{enrichr}. |
|
6 |
+#' @author Ahmed Youssef, Zhe Wang |
|
7 |
+#' @param x A numeric \link{matrix} of counts or a |
|
8 |
+#' \linkS4class{SingleCellExperiment} |
|
9 |
+#' with the matrix located in the assay slot under \code{useAssay}. |
|
10 |
+#' Rows represent features and columns represent cells. Rownames of the |
|
11 |
+#' matrix or \linkS4class{SingleCellExperiment} object should be gene names. |
|
12 |
+#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay} |
|
13 |
+#' slot to use if \code{x} is a |
|
14 |
+#' \linkS4class{SingleCellExperiment} object. Default "counts". |
|
15 |
+#' @param celdaModel Celda object of class \code{celda_G} or \code{celda_CG}. |
|
9 | 16 |
#' @param databases Character vector. Name of reference database. Available |
10 |
-#' databases can be viewed by running \code{enrichR::listEnrichrDbs()}. |
|
17 |
+#' databases can be viewed by \link[enrichR]{listEnrichrDbs}. |
|
11 | 18 |
#' @param fdr False discovery rate (FDR). Numeric. Cutoff value for adjusted |
12 | 19 |
#' p-value, terms with FDR below this value are considered significantly |
13 | 20 |
#' enriched. |
14 | 21 |
#' @return List of length 'L' where each member contains the significantly |
15 | 22 |
#' enriched terms for the corresponding module. |
23 |
+#' @importFrom enrichR enrichr |
|
24 |
+#' @importFrom enrichR listEnrichrDbs |
|
25 |
+#' @export |
|
26 |
+setGeneric("geneSetEnrich", function(x, ...) {standardGeneric("geneSetEnrich")}) |
|
27 |
+ |
|
28 |
+ |
|
29 |
+#' @rdname geneSetEnrich |
|
16 | 30 |
#' @examples |
17 | 31 |
#' library(M3DExampleData) |
18 | 32 |
#' counts <- M3DExampleData::Mmus_example_list$data |
19 |
-#' # subset 100 genes for fast clustering |
|
20 |
-#' counts <- counts[1500:2000, ] |
|
33 |
+#' # subset 500 genes for fast clustering |
|
34 |
+#' counts <- counts[seq(1501, 2000), ] |
|
21 | 35 |
#' # cluster genes into 10 modules for quick demo |
22 |
-#' cm <- celda_G(counts = as.matrix(counts), L = 10, verbose = FALSE) |
|
23 |
-#' gse <- geneSetEnrich(counts, |
|
24 |
-#' cm, |
|
25 |
-#' databases = c("GO_Biological_Process_2018", "GO_Molecular_Function_2018") |
|
26 |
-#' ) |
|
27 |
-#' @importFrom enrichR enrichr |
|
28 |
-#' @importFrom enrichR listEnrichrDbs |
|
36 |
+#' sce <- celda_G(counts = as.matrix(counts), L = 10, verbose = FALSE) |
|
37 |
+#' gse <- geneSetEnrich(sce, |
|
38 |
+#' databases = c("GO_Biological_Process_2018", "GO_Molecular_Function_2018")) |
|
39 |
+#' @export |
|
40 |
+setMethod("geneSetEnrich", |
|
41 |
+ signature(x = "SingleCellExperiment"), |
|
42 |
+ function(x, useAssay = "counts", databases, fdr = 0.05) { |
|
43 |
+ |
|
44 |
+ # initialize list with one entry for each gene module |
|
45 |
+ modules <- vector("list", |
|
46 |
+ length = S4Vectors::metadata(x)$celda_parameters$L) |
|
47 |
+ |
|
48 |
+ # create dataframe with gene-module associations |
|
49 |
+ genes <- data.frame(gene = rownames(x), module = celdaModules(x)) |
|
50 |
+ |
|
51 |
+ # iterate over each module, get genes in that module, add to list |
|
52 |
+ for (i in seq_len(S4Vectors::metadata(x)$celda_parameters$L)) { |
|
53 |
+ modules[[i]] <- as.character(genes[genes$module == i, "gene"]) |
|
54 |
+ } |
|
55 |
+ |
|
56 |
+ # enrichment analysis |
|
57 |
+ enrichment <- lapply(modules, function(module) { |
|
58 |
+ invisible(utils::capture.output(table <- enrichR::enrichr( |
|
59 |
+ genes = module, |
|
60 |
+ databases = databases))) |
|
61 |
+ table <- Reduce(f = rbind, x = table) |
|
62 |
+ table[table$Adjusted.P.value < fdr, "Term"] |
|
63 |
+ }) |
|
64 |
+ |
|
65 |
+ # return results as a list |
|
66 |
+ return(enrichment) |
|
67 |
+ } |
|
68 |
+) |
|
69 |
+ |
|
70 |
+ |
|
71 |
+#' @rdname geneSetEnrich |
|
29 | 72 |
#' @export |
30 |
-geneSetEnrich <- function(counts, celdaModel, databases, fdr = 0.05) { |
|
31 |
- # check for correct celda object |
|
32 |
- if (!(class(celdaModel) %in% c("celda_G", "celda_CG"))) { |
|
33 |
- stop( |
|
34 |
- "No gene modules in celda object. ", |
|
35 |
- "Please provide object of class celda_G or celda_CG." |
|
36 |
- ) |
|
37 |
- } |
|
73 |
+setMethod("geneSetEnrich", |
|
74 |
+ signature(x = "matrix"), |
|
75 |
+ function(x, celdaModel, databases, fdr = 0.05) { |
|
76 |
+ # check for correct celda object |
|
77 |
+ if (!(class(celdaModel) %in% c("celda_G", "celda_CG"))) { |
|
78 |
+ stop( |
|
79 |
+ "No gene modules in celda object. ", |
|
80 |
+ "Please provide object of class celda_G or celda_CG." |
|
81 |
+ ) |
|
82 |
+ } |
|
38 | 83 |
|
39 |
- # initialize list with one entry for each gene module |
|
40 |
- modules <- vector("list", length = params(celdaModel)$L) |
|
84 |
+ # initialize list with one entry for each gene module |
|
85 |
+ modules <- vector("list", length = params(celdaModel)$L) |
|
41 | 86 |
|
42 |
- # create dataframe with gene-module associations |
|
43 |
- genes <- data.frame( |
|
44 |
- gene = rownames(counts), |
|
45 |
- module = clusters(celdaModel)$y |
|
46 |
- ) |
|
87 |
+ # create dataframe with gene-module associations |
|
88 |
+ genes <- data.frame(gene = rownames(x), |
|
89 |
+ module = celdaClusters(celdaModel)$y) |
|
47 | 90 |
|
48 |
- # iterate over each module, get genes in that module, add to list |
|
49 |
- for (i in seq_len(params(celdaModel)$L)) { |
|
50 |
- modules[[i]] <- as.character(genes[genes$module == i, "gene"]) |
|
51 |
- } |
|
91 |
+ # iterate over each module, get genes in that module, add to list |
|
92 |
+ for (i in seq_len(params(celdaModel)$L)) { |
|
93 |
+ modules[[i]] <- as.character(genes[genes$module == i, "gene"]) |
|
94 |
+ } |
|
52 | 95 |
|
53 |
- # enrichment analysis |
|
54 |
- enrichment <- lapply(modules, function(module) { |
|
55 |
- invisible(utils::capture.output(table <- enrichR::enrichr( |
|
56 |
- genes = module, |
|
57 |
- databases = databases |
|
58 |
- ))) |
|
59 |
- table <- Reduce(f = rbind, x = table) |
|
60 |
- table[table$Adjusted.P.value < fdr, "Term"] |
|
61 |
- }) |
|
96 |
+ # enrichment analysis |
|
97 |
+ enrichment <- lapply(modules, function(module) { |
|
98 |
+ invisible(utils::capture.output(table <- enrichR::enrichr( |
|
99 |
+ genes = module, |
|
100 |
+ databases = databases |
|
101 |
+ ))) |
|
102 |
+ table <- Reduce(f = rbind, x = table) |
|
103 |
+ table[table$Adjusted.P.value < fdr, "Term"] |
|
104 |
+ }) |
|
62 | 105 |
|
63 |
- # return results as a list |
|
64 |
- return(enrichment) |
|
65 |
-} |
|
106 |
+ # return results as a list |
|
107 |
+ return(enrichment) |
|
108 |
+ } |
|
109 |
+) |
... | ... |
@@ -16,44 +16,50 @@ |
16 | 16 |
#' @examples |
17 | 17 |
#' library(M3DExampleData) |
18 | 18 |
#' counts <- M3DExampleData::Mmus_example_list$data |
19 |
-#' #subset 100 genes for fast clustering |
|
19 |
+#' # subset 100 genes for fast clustering |
|
20 | 20 |
#' counts <- counts[1500:2000, ] |
21 |
-#' #cluster genes into 10 modules for quick demo |
|
21 |
+#' # cluster genes into 10 modules for quick demo |
|
22 | 22 |
#' cm <- celda_G(counts = as.matrix(counts), L = 10, verbose = FALSE) |
23 | 23 |
#' gse <- geneSetEnrich(counts, |
24 |
-#' cm, |
|
25 |
-#' databases = c('GO_Biological_Process_2018','GO_Molecular_Function_2018')) |
|
24 |
+#' cm, |
|
25 |
+#' databases = c("GO_Biological_Process_2018", "GO_Molecular_Function_2018") |
|
26 |
+#' ) |
|
26 | 27 |
#' @importFrom enrichR enrichr |
27 | 28 |
#' @importFrom enrichR listEnrichrDbs |
28 | 29 |
#' @export |
29 | 30 |
geneSetEnrich <- function(counts, celdaModel, databases, fdr = 0.05) { |
30 |
- #check for correct celda object |
|
31 |
- if (!(class(celdaModel) %in% c("celda_G", "celda_CG"))) { |
|
32 |
- stop("No gene modules in celda object. ", |
|
33 |
- "Please provide object of class celda_G or celda_CG.") |
|
34 |
- } |
|
31 |
+ # check for correct celda object |
|
32 |
+ if (!(class(celdaModel) %in% c("celda_G", "celda_CG"))) { |
|
33 |
+ stop( |
|
34 |
+ "No gene modules in celda object. ", |
|
35 |
+ "Please provide object of class celda_G or celda_CG." |
|
36 |
+ ) |
|
37 |
+ } |
|
35 | 38 |
|
36 |
- #initialize list with one entry for each gene module |
|
37 |
- modules <- vector("list", length = params(celdaModel)$L) |
|
39 |
+ # initialize list with one entry for each gene module |
|
40 |
+ modules <- vector("list", length = params(celdaModel)$L) |
|
38 | 41 |
|
39 |
- #create dataframe with gene-module associations |
|
40 |
- genes <- data.frame(gene = rownames(counts), |
|
41 |
- module = clusters(celdaModel)$y) |
|
42 |
+ # create dataframe with gene-module associations |
|
43 |
+ genes <- data.frame( |
|
44 |
+ gene = rownames(counts), |
|
45 |
+ module = clusters(celdaModel)$y |
|
46 |
+ ) |
|
42 | 47 |
|
43 |
- #iterate over each module, get genes in that module, add to list |
|
44 |
- for (i in seq_len(params(celdaModel)$L)) { |
|
45 |
- modules[[i]] <- as.character(genes[genes$module == i, "gene"]) |
|
46 |
- } |
|
48 |
+ # iterate over each module, get genes in that module, add to list |
|
49 |
+ for (i in seq_len(params(celdaModel)$L)) { |
|
50 |
+ modules[[i]] <- as.character(genes[genes$module == i, "gene"]) |
|
51 |
+ } |
|
47 | 52 |
|
48 |
- #enrichment analysis |
|
49 |
- enrichment <- lapply(modules, function(module) { |
|
50 |
- invisible(utils::capture.output(table <- enrichR::enrichr( |
|
51 |
- genes = module, |
|
52 |
- databases = databases))) |
|
53 |
- table <- Reduce(f = rbind, x = table) |
|
54 |
- table[table$Adjusted.P.value < fdr, "Term"] |
|
55 |
- }) |
|
53 |
+ # enrichment analysis |
|
54 |
+ enrichment <- lapply(modules, function(module) { |
|
55 |
+ invisible(utils::capture.output(table <- enrichR::enrichr( |
|
56 |
+ genes = module, |
|
57 |
+ databases = databases |
|
58 |
+ ))) |
|
59 |
+ table <- Reduce(f = rbind, x = table) |
|
60 |
+ table[table$Adjusted.P.value < fdr, "Term"] |
|
61 |
+ }) |
|
56 | 62 |
|
57 |
- #return results as a list |
|
58 |
- return(enrichment) |
|
63 |
+ # return results as a list |
|
64 |
+ return(enrichment) |
|
59 | 65 |
} |
... | ... |
@@ -34,13 +34,14 @@ geneSetEnrich <- function(counts, celdaModel, databases, fdr = 0.05) { |
34 | 34 |
} |
35 | 35 |
|
36 | 36 |
#initialize list with one entry for each gene module |
37 |
- modules <- vector("list", length = celdaModel@params$L) |
|
37 |
+ modules <- vector("list", length = params(celdaModel)$L) |
|
38 | 38 |
|
39 | 39 |
#create dataframe with gene-module associations |
40 |
- genes <- data.frame(gene = rownames(counts), module = celdaModel@clusters$y) |
|
40 |
+ genes <- data.frame(gene = rownames(counts), |
|
41 |
+ module = clusters(celdaModel)$y) |
|
41 | 42 |
|
42 | 43 |
#iterate over each module, get genes in that module, add to list |
43 |
- for (i in seq_len(celdaModel@params$L)) { |
|
44 |
+ for (i in seq_len(params(celdaModel)$L)) { |
|
44 | 45 |
modules[[i]] <- as.character(genes[genes$module == i, "gene"]) |
45 | 46 |
} |
46 | 47 |
|
... | ... |
@@ -24,6 +24,7 @@ |
24 | 24 |
#' cm, |
25 | 25 |
#' databases = c('GO_Biological_Process_2018','GO_Molecular_Function_2018')) |
26 | 26 |
#' @importFrom enrichR enrichr |
27 |
+#' @importFrom enrichR listEnrichrDbs |
|
27 | 28 |
#' @export |
28 | 29 |
geneSetEnrich <- function(counts, celdaModel, databases, fdr = 0.05) { |
29 | 30 |
#check for correct celda object |
... | ... |
@@ -23,6 +23,7 @@ |
23 | 23 |
#' gse <- geneSetEnrich(counts, |
24 | 24 |
#' cm, |
25 | 25 |
#' databases = c('GO_Biological_Process_2018','GO_Molecular_Function_2018')) |
26 |
+#' @importFrom enrichR enrichr |
|
26 | 27 |
#' @export |
27 | 28 |
geneSetEnrich <- function(counts, celdaModel, databases, fdr = 0.05) { |
28 | 29 |
#check for correct celda object |
... | ... |
@@ -17,7 +17,7 @@ |
17 | 17 |
#' library(M3DExampleData) |
18 | 18 |
#' counts <- M3DExampleData::Mmus_example_list$data |
19 | 19 |
#' #subset 100 genes for fast clustering |
20 |
-#' counts <- counts[sample(seq_len(nrow(counts)), size = 100), ] |
|
20 |
+#' counts <- counts[1500:2000, ] |
|
21 | 21 |
#' #cluster genes into 10 modules for quick demo |
22 | 22 |
#' cm <- celda_G(counts = as.matrix(counts), L = 10, verbose = FALSE) |
23 | 23 |
#' gse <- geneSetEnrich(counts, |
... | ... |
@@ -17,7 +17,7 @@ |
17 | 17 |
#' library(M3DExampleData) |
18 | 18 |
#' counts <- M3DExampleData::Mmus_example_list$data |
19 | 19 |
#' #subset 100 genes for fast clustering |
20 |
-#' counts <- counts[seq(1200, 2000), ] |
|
20 |
+#' counts <- counts[sample(seq_len(nrow(counts)), size = 100), ] |
|
21 | 21 |
#' #cluster genes into 10 modules for quick demo |
22 | 22 |
#' cm <- celda_G(counts = as.matrix(counts), L = 10, verbose = FALSE) |
23 | 23 |
#' gse <- geneSetEnrich(counts, |
... | ... |
@@ -39,15 +39,16 @@ geneSetEnrich <- function(counts, celdaModel, databases, fdr = 0.05) { |
39 | 39 |
|
40 | 40 |
#iterate over each module, get genes in that module, add to list |
41 | 41 |
for (i in seq_len(celdaModel@params$L)) { |
42 |
- modules[[i]] <- as.character(genes[genes$module == i, 'gene']) |
|
42 |
+ modules[[i]] <- as.character(genes[genes$module == i, "gene"]) |
|
43 | 43 |
} |
44 | 44 |
|
45 | 45 |
#enrichment analysis |
46 | 46 |
enrichment <- lapply(modules, function(module) { |
47 |
- invisible(capture.output(table <- enrichR::enrichr(genes = module, |
|
47 |
+ invisible(utils::capture.output(table <- enrichR::enrichr( |
|
48 |
+ genes = module, |
|
48 | 49 |
databases = databases))) |
49 | 50 |
table <- Reduce(f = rbind, x = table) |
50 |
- table[table$Adjusted.P.value < fdr, 'Term'] |
|
51 |
+ table[table$Adjusted.P.value < fdr, "Term"] |
|
51 | 52 |
}) |
52 | 53 |
|
53 | 54 |
#return results as a list |
... | ... |
@@ -15,7 +15,6 @@ |
15 | 15 |
#' enriched terms for the corresponding module. |
16 | 16 |
#' @examples |
17 | 17 |
#' library(M3DExampleData) |
18 |
-#' library(celda) |
|
19 | 18 |
#' counts <- M3DExampleData::Mmus_example_list$data |
20 | 19 |
#' #subset 100 genes for fast clustering |
21 | 20 |
#' counts <- counts[seq(1200, 2000), ] |
... | ... |
@@ -18,8 +18,7 @@ |
18 | 18 |
#' library(celda) |
19 | 19 |
#' counts <- M3DExampleData::Mmus_example_list$data |
20 | 20 |
#' #subset 100 genes for fast clustering |
21 |
-#' counts <- counts[sample(seq_len(nrow(counts)), |
|
22 |
-#' size = 100),] |
|
21 |
+#' counts <- counts[seq(1200, 2000), ] |
|
23 | 22 |
#' #cluster genes into 10 modules for quick demo |
24 | 23 |
#' cm <- celda_G(counts = as.matrix(counts), L = 10, verbose = FALSE) |
25 | 24 |
#' gse <- geneSetEnrich(counts, |
... | ... |
@@ -15,6 +15,7 @@ |
15 | 15 |
#' enriched terms for the corresponding module. |
16 | 16 |
#' @examples |
17 | 17 |
#' library(M3DExampleData) |
18 |
+#' library(celda) |
|
18 | 19 |
#' counts <- M3DExampleData::Mmus_example_list$data |
19 | 20 |
#' #subset 100 genes for fast clustering |
20 | 21 |
#' counts <- counts[sample(seq_len(nrow(counts)), |
... | ... |
@@ -14,8 +14,6 @@ |
14 | 14 |
#' @return List of length 'L' where each member contains the significantly |
15 | 15 |
#' enriched terms for the corresponding module. |
16 | 16 |
#' @examples |
17 |
-#' if (!requireNamespace("BiocManager", quietly = TRUE)) |
|
18 |
-#' install.packages("M3DExampleData") |
|
19 | 17 |
#' library(M3DExampleData) |
20 | 18 |
#' counts <- M3DExampleData::Mmus_example_list$data |
21 | 19 |
#' #subset 100 genes for fast clustering |
... | ... |
@@ -19,10 +19,10 @@ |
19 | 19 |
#' library(M3DExampleData) |
20 | 20 |
#' counts <- M3DExampleData::Mmus_example_list$data |
21 | 21 |
#' #subset 100 genes for fast clustering |
22 |
-#' counts <- counts[sample(seq_len(nrow(countsMatrix)), |
|
22 |
+#' counts <- counts[sample(seq_len(nrow(counts)), |
|
23 | 23 |
#' size = 100),] |
24 | 24 |
#' #cluster genes into 10 modules for quick demo |
25 |
-#' cm <- celda_G(counts = as.matrix(countsMatrix), L = 10, verbose = FALSE) |
|
25 |
+#' cm <- celda_G(counts = as.matrix(counts), L = 10, verbose = FALSE) |
|
26 | 26 |
#' gse <- geneSetEnrich(counts, |
27 | 27 |
#' cm, |
28 | 28 |
#' databases = c('GO_Biological_Process_2018','GO_Molecular_Function_2018')) |
... | ... |
@@ -19,7 +19,7 @@ |
19 | 19 |
#' library(M3DExampleData) |
20 | 20 |
#' counts <- M3DExampleData::Mmus_example_list$data |
21 | 21 |
#' #subset 100 genes for fast clustering |
22 |
-#' counts <- countsMatrix[sample(seq_len(nrow(countsMatrix)), |
|
22 |
+#' counts <- counts[sample(seq_len(nrow(countsMatrix)), |
|
23 | 23 |
#' size = 100),] |
24 | 24 |
#' #cluster genes into 10 modules for quick demo |
25 | 25 |
#' cm <- celda_G(counts = as.matrix(countsMatrix), L = 10, verbose = FALSE) |
... | ... |
@@ -13,7 +13,7 @@ |
13 | 13 |
#' enriched. |
14 | 14 |
#' @return List of length 'L' where each member contains the significantly |
15 | 15 |
#' enriched terms for the corresponding module. |
16 |
-#' @example |
|
16 |
+#' @examples |
|
17 | 17 |
#' if (!requireNamespace("BiocManager", quietly = TRUE)) |
18 | 18 |
#' install.packages("M3DExampleData") |
19 | 19 |
#' library(M3DExampleData) |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,58 @@ |
1 |
+#' @title Gene set enrichment |
|
2 |
+#' @description Identify and return significantly-enriched terms for each gene |
|
3 |
+#' module in a Celda object. Performs gene set enrichment analysis for Celda |
|
4 |
+#' identified modules using the enrichR package. |
|
5 |
+#' @author Ahmed Youssef |
|
6 |
+#' @param counts Integer count matrix. Rows represent genes and columns |
|
7 |
+#' represent cells. Row names of the matrix should be gene names. |
|
8 |
+#' @param celdaModel Celda object of class `celda_G` or `celda_CG`. |
|
9 |
+#' @param databases Character vector. Name of reference database. Available |
|
10 |
+#' databases can be viewed by running \code{enrichR::listEnrichrDbs()}. |
|
11 |
+#' @param fdr False discovery rate (FDR). Numeric. Cutoff value for adjusted |
|
12 |
+#' p-value, terms with FDR below this value are considered significantly |
|
13 |
+#' enriched. |
|
14 |
+#' @return List of length 'L' where each member contains the significantly |
|
15 |
+#' enriched terms for the corresponding module. |
|
16 |
+#' @example |
|
17 |
+#' if (!requireNamespace("BiocManager", quietly = TRUE)) |
|
18 |
+#' install.packages("M3DExampleData") |
|
19 |
+#' library(M3DExampleData) |
|
20 |
+#' counts <- M3DExampleData::Mmus_example_list$data |
|
21 |
+#' #subset 100 genes for fast clustering |
|
22 |
+#' counts <- countsMatrix[sample(seq_len(nrow(countsMatrix)), |
|
23 |
+#' size = 100),] |
|
24 |
+#' #cluster genes into 10 modules for quick demo |
|
25 |
+#' cm <- celda_G(counts = as.matrix(countsMatrix), L = 10, verbose = FALSE) |
|
26 |
+#' gse <- geneSetEnrich(counts, |
|
27 |
+#' cm, |
|
28 |
+#' databases = c('GO_Biological_Process_2018','GO_Molecular_Function_2018')) |
|
29 |
+#' @export |
|
30 |
+geneSetEnrich <- function(counts, celdaModel, databases, fdr = 0.05) { |
|
31 |
+ #check for correct celda object |
|
32 |
+ if (!(class(celdaModel) %in% c("celda_G", "celda_CG"))) { |
|
33 |
+ stop("No gene modules in celda object. ", |
|
34 |
+ "Please provide object of class celda_G or celda_CG.") |
|
35 |
+ } |
|
36 |
+ |
|
37 |
+ #initialize list with one entry for each gene module |
|
38 |
+ modules <- vector("list", length = celdaModel@params$L) |
|
39 |
+ |
|
40 |
+ #create dataframe with gene-module associations |
|
41 |
+ genes <- data.frame(gene = rownames(counts), module = celdaModel@clusters$y) |
|
42 |
+ |
|
43 |
+ #iterate over each module, get genes in that module, add to list |
|
44 |
+ for (i in seq_len(celdaModel@params$L)) { |
|
45 |
+ modules[[i]] <- as.character(genes[genes$module == i, 'gene']) |
|
46 |
+ } |
|
47 |
+ |
|
48 |
+ #enrichment analysis |
|
49 |
+ enrichment <- lapply(modules, function(module) { |
|
50 |
+ invisible(capture.output(table <- enrichR::enrichr(genes = module, |
|
51 |
+ databases = databases))) |
|
52 |
+ table <- Reduce(f = rbind, x = table) |
|
53 |
+ table[table$Adjusted.P.value < fdr, 'Term'] |
|
54 |
+ }) |
|
55 |
+ |
|
56 |
+ #return results as a list |
|
57 |
+ return(enrichment) |
|
58 |
+} |