Browse code

add phenotype enrichment plot #22

zhewa authored on 17/02/2019 21:25:14
Showing 4 changed files

... ...
@@ -33,13 +33,14 @@ Imports:
33 33
     Rcpp,
34 34
     RcppEigen,
35 35
     umap,
36
-    enrichR
36
+    enrichR,
37
+    vcd,
38
+    ggpubr
37 39
 Suggests:
38 40
     testthat,
39 41
     knitr,
40 42
     roxygen2,
41 43
     rmarkdown,
42
-    vcd,
43 44
     corrplot,
44 45
     Matrix,
45 46
     biomaRt,
... ...
@@ -42,6 +42,7 @@ export(plotGridSearchPerplexity.celda_C)
42 42
 export(plotGridSearchPerplexity.celda_CG)
43 43
 export(plotGridSearchPerplexity.celda_G)
44 44
 export(plotHeatmap)
45
+export(plotPhenoEnrich)
45 46
 export(recodeClusterY)
46 47
 export(recodeClusterZ)
47 48
 export(resList)
48 49
new file mode 100644
... ...
@@ -0,0 +1,133 @@
1
+#' @title Phenotype enrichment plot
2
+#' @description Plot the enrichment of phenotypes within cell clusters or
3
+#'  \emph{vice versa} in a given celda model. Generates a mosaic plot or
4
+#'  a balloon plot.
5
+#' @param counts Integer matrix. Rows represent features and columns represent
6
+#'  cells.
7
+#' @param celda.mod Celda model returned by the \link{celda_C},
8
+#'  \code{\link{celda_CG}} or \code{\link{subsetCeldaList}} functions. Must be
9
+#'  a \linkS4class{celda_C} or \linkS4class{celda_CG} object.
10
+#' @param phenoLabels Character vector of phenotype labels for the cells. Must
11
+#'  have the same length as the column number of count matrix \code{counts}.
12
+#' @param phenotype The phenotypes to inspect. If specified, cells will be
13
+#'  categorized into two groups based on whether their phenotype is in this
14
+#'  phenotype group or not. By default, no phenotype grouping is performed.
15
+#' @param cellCluster The cell clusters to inspect. If specified, cells will be
16
+#'  categorized into two groups based on whether their assigned cell cluster is
17
+#'  in this cell cluster group or not. By default, no cell cluster grouping is
18
+#'  performed.
19
+#' @param perspective The comparing perspective. One of "phenotype" or
20
+#'  "cellcluster". Indicates whether to look at the enrichment of phenotypes in
21
+#'  cell clusters or the enrichment of cell clusters in phenotypes. Default is
22
+#'  "phenotype", which looks at the enrichment of phenotype in cell clusters.
23
+#' @param plot The plot of visualizing the result. One of "mosaic" or "balloon".
24
+#'  Uses the \code{\link[vcd]{mosaic}} function for plotting mosaic plot and the
25
+#'  \code{\link[ggpubr]{ggballoonplot}} function for plotting the balloon plot.
26
+#' @param ... Additional parameters passed to the \code{\link[vcd]{mosaic}} or
27
+#'  \code{\link[ggpubr]{ggballoonplot}} function.
28
+#' @return NULL if \code{plot = "mosaic"} and a ggplot object if
29
+#'  \code{plot = "balloon"}
30
+#' @examples
31
+#' # Use sample labels as a surrogate for phenotype labels
32
+#' plotPhenoEnrich(celda.CG.sim$counts,
33
+#'     celda.CG.mod,
34
+#'     celda.CG.sim$sample.label)
35
+#'
36
+#' # Look at the enrichment of "Sample_1" in cell cluster 1 in balloon plot
37
+#' plotPhenoEnrich(counts = celda.CG.sim$counts,
38
+#'     celda.mod = celda.CG.mod,
39
+#'     phenoLabels = celda.CG.sim$sample.label,
40
+#'     phenotype = "Sample_1",
41
+#'     cellCluster = 1,
42
+#'     plot = "balloon")
43
+#'
44
+#' # Look at the enrichment of cell cluster 1 & 2 in
45
+#' # phenotype "Sample_3" & "Sample_4" in mosaic plot
46
+#' plotPhenoEnrich(counts = celda.CG.sim$counts,
47
+#'     celda.mod = celda.CG.mod,
48
+#'     phenoLabels = celda.CG.sim$sample.label,
49
+#'     phenotype = c("Sample_3", "Sample_4"),
50
+#'     cellCluster = c(1, 2),
51
+#'     perspective = "cellcluster")
52
+#' @export
53
+plotPhenoEnrich <- function(counts,
54
+    celda.mod,
55
+    phenoLabels,
56
+    phenotype = NULL,
57
+    cellCluster = NULL,
58
+    perspective = c("phenotype", "cellcluster"),
59
+    plot = c("mosaic", "balloon"),
60
+    ...) {
61
+
62
+    if (!(is(celdaModel, "celda_CG") || is(celdaModel, "celda_C"))) {
63
+        stop("celda.mod must be of class 'celda_C' or 'celda_CG'")
64
+    }
65
+
66
+    if (length(phenoLabels) != ncol(counts)) {
67
+        stop("phenoLabels must be the same length as the column number of",
68
+            " counts")
69
+    }
70
+
71
+    if (!is.null(phenotype)) {
72
+        if (!(all(phenotype %in% phenoLabels))) {
73
+            stop("phenotype ",
74
+                paste(phenotype, collapse = " "),
75
+                " must exist in phenoLabels")
76
+        }
77
+    }
78
+
79
+    if (!is.null(cellCluster)) {
80
+        if (!(all(cellCluster %in% unique(celda::clusters(celda.mod)$z)))) {
81
+            stop("Cell cluster ",
82
+                paste(cellCluster, collapse = " "),
83
+                " must exist in celda.mod@clusters$z")
84
+        }
85
+    }
86
+
87
+    plot <- match.arg(plot)
88
+    perspective <- match.arg(perspective)
89
+
90
+    phenoLabels <- factor(phenoLabels, levels = unique(phenoLabels))
91
+
92
+    if (is.null(phenotype) && is.null(cellCluster)) {
93
+        conti <- table(phenoLabels,
94
+            celda::clusters(celda.mod)$z,
95
+            dnn = c("Phenotype", "Cell cluster"))
96
+    } else if (!is.null(phenotype) && is.null(cellCluster)) {
97
+        conti <- table(phenoLabels %in% phenotype,
98
+            celda::clusters(celda.mod)$z,
99
+            dnn = c(paste0("In phenotype ", paste(phenotype, collapse = " ")),
100
+                "Cell cluster"))
101
+    } else if (is.null(phenotype) && !is.null(cellCluster)) {
102
+        conti <- table(phenoLabels,
103
+            celda::clusters(celda.mod)$z %in% cellCluster,
104
+            dnn = c("Phenotype",
105
+                paste0("In cell cluster ",
106
+                    paste(cellCluster,
107
+                        collapse = " "))))
108
+    } else {
109
+        conti <- table(phenoLabels %in% phenotype,
110
+            celda::clusters(celda.mod)$z %in% cellCluster,
111
+            dnn = c(paste0("In phenotype ", paste(phenotype, collapse = " ")),
112
+                paste0("In cell cluster ", paste(cellCluster,
113
+                    collapse = " "))))
114
+    }
115
+
116
+    if (perspective == "cellcluster") {
117
+        conti <- t(conti)
118
+    }
119
+
120
+    if (plot == "mosaic") {
121
+        vcd::mosaic(t(conti),
122
+            shade = TRUE,
123
+            legend = TRUE,
124
+            direction = "v",
125
+            ...)
126
+    } else if (plot == "balloon") {
127
+        g <- ggpubr::ggballoonplot(as.data.frame(t(conti)),
128
+            fill = "value",
129
+            ...) +
130
+            ggplot2::scale_fill_viridis_c(option = "C")
131
+        return(g)
132
+    }
133
+}
0 134
new file mode 100644
... ...
@@ -0,0 +1,74 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/phenoEnrich.R
3
+\name{plotPhenoEnrich}
4
+\alias{plotPhenoEnrich}
5
+\title{Phenotype enrichment plot}
6
+\usage{
7
+plotPhenoEnrich(counts, celda.mod, phenoLabels, phenotype = NULL,
8
+  cellCluster = NULL, perspective = c("phenotype", "cellcluster"),
9
+  plot = c("mosaic", "balloon"), ...)
10
+}
11
+\arguments{
12
+\item{counts}{Integer matrix. Rows represent features and columns represent
13
+cells.}
14
+
15
+\item{celda.mod}{Celda model returned by the \link{celda_C},
16
+\code{\link{celda_CG}} or \code{\link{subsetCeldaList}} functions. Must be
17
+a \linkS4class{celda_C} or \linkS4class{celda_CG} object.}
18
+
19
+\item{phenoLabels}{Character vector of phenotype labels for the cells. Must
20
+have the same length as the column number of count matrix \code{counts}.}
21
+
22
+\item{phenotype}{The phenotypes to inspect. If specified, cells will be
23
+categorized into two groups based on whether their phenotype is in this
24
+phenotype group or not. By default, no phenotype grouping is performed.}
25
+
26
+\item{cellCluster}{The cell clusters to inspect. If specified, cells will be
27
+categorized into two groups based on whether their assigned cell cluster is
28
+in this cell cluster group or not. By default, no cell cluster grouping is
29
+performed.}
30
+
31
+\item{perspective}{The comparing perspective. One of "phenotype" or
32
+"cellcluster". Indicates whether to look at the enrichment of phenotypes in
33
+cell clusters or the enrichment of cell clusters in phenotypes. Default is
34
+"phenotype", which looks at the enrichment of phenotype in cell clusters.}
35
+
36
+\item{plot}{The plot of visualizing the result. One of "mosaic" or "balloon".
37
+Uses the \code{\link[vcd]{mosaic}} function for plotting mosaic plot and the
38
+\code{\link[ggpubr]{ggballoonplot}} function for plotting the balloon plot.}
39
+
40
+\item{...}{Additional parameters passed to the \code{\link[vcd]{mosaic}} or
41
+\code{\link[ggpubr]{ggballoonplot}} function.}
42
+}
43
+\value{
44
+NULL if \code{plot = "mosaic"} and a ggplot object if
45
+ \code{plot = "balloon"}
46
+}
47
+\description{
48
+Plot the enrichment of phenotypes within cell clusters or
49
+ \emph{vice versa} in a given celda model. Generates a mosaic plot or
50
+ a balloon plot.
51
+}
52
+\examples{
53
+# Use sample labels as a surrogate for phenotype labels
54
+plotPhenoEnrich(celda.CG.sim$counts,
55
+    celda.CG.mod,
56
+    celda.CG.sim$sample.label)
57
+
58
+# Look at the enrichment of "Sample_1" in cell cluster 1 in balloon plot
59
+plotPhenoEnrich(counts = celda.CG.sim$counts,
60
+    celda.mod = celda.CG.mod,
61
+    phenoLabels = celda.CG.sim$sample.label,
62
+    phenotype = "Sample_1",
63
+    cellCluster = 1,
64
+    plot = "balloon")
65
+
66
+# Look at the enrichment of cell cluster 1 & 2 in
67
+# phenotype "Sample_3" & "Sample_4" in mosaic plot
68
+plotPhenoEnrich(counts = celda.CG.sim$counts,
69
+    celda.mod = celda.CG.mod,
70
+    phenoLabels = celda.CG.sim$sample.label,
71
+    phenotype = c("Sample_3", "Sample_4"),
72
+    cellCluster = c(1, 2),
73
+    perspective = "cellcluster")
74
+}