Browse code

add splitModule, a function to manually split feature module

zhewa authored on 17/06/2020 13:02:02
Showing 5 changed files

... ...
@@ -66,6 +66,7 @@ export(sampleLabel)
66 66
 export(selectBestModel)
67 67
 export(simulateCells)
68 68
 export(simulateContamination)
69
+export(splitModule)
69 70
 export(subsetCeldaList)
70 71
 export(topRank)
71 72
 exportMethods("celdaClusters<-")
... ...
@@ -115,6 +116,7 @@ exportMethods(resamplePerplexity)
115 116
 exportMethods(runParams)
116 117
 exportMethods(sampleLabel)
117 118
 exportMethods(selectBestModel)
119
+exportMethods(splitModule)
118 120
 exportMethods(subsetCeldaList)
119 121
 import(Rcpp)
120 122
 import(RcppEigen)
... ...
@@ -7,7 +7,7 @@
7 7
 #'  Rows represent features and columns represent cells.
8 8
 #' @param useAssay A string specifying which \link[SummarizedExperiment]{assay}
9 9
 #'  slot to use if \code{x} is a
10
-#'  \link[SingleCellExperiment]{SingleCellExperiment} object. Default "counts".
10
+#'  \linkS4class{SingleCellExperiment} object. Default "counts".
11 11
 #' @param L Integer. Number of feature modules.
12 12
 #' @param beta Numeric. Concentration parameter for Phi. Adds a pseudocount to
13 13
 #'  each feature module in each cell. Default 1.
... ...
@@ -45,8 +45,10 @@
45 45
 #' @param logfile Character. Messages will be redirected to a file named
46 46
 #'  `logfile`. If NULL, messages will be printed to stdout.  Default NULL.
47 47
 #' @param verbose Logical. Whether to print log messages. Default TRUE.
48
-#' @return An object of class `celda_G` with the feature module clusters stored
49
-#'  in `y`.
48
+#' @return A \linkS4class{SingleCellExperiment} object. Function
49
+#'  parameter settings are stored in the \link[S4Vectors]{metadata}
50
+#'  \code{"celda_parameters"} slot. Column \code{celda_feature_module} in
51
+#'  \link[SummarizedExperiment]{rowData} contains feature modules.
50 52
 #' @seealso \link{celda_C} for cell clustering and \link{celda_CG} for
51 53
 #'  simultaneous clustering of features and cells. \link{celdaGridSearch} can
52 54
 #'  be used to run multiple values of L and multiple chains in parallel.
53 55
new file mode 100644
... ...
@@ -0,0 +1,130 @@
1
+#' @title Split celda feature module
2
+#' @description Manually select a celda feature module to split into 2 or
3
+#'  more modules. Useful for splitting up modules that show divergent
4
+#'  expression of features in multiple cell clusters.
5
+#' @param x A \linkS4class{SingleCellExperiment} object
6
+#'  with the matrix located in the assay slot under \code{useAssay}.
7
+#'  Rows represent features and columns represent cells.
8
+#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay}
9
+#'  slot to use for \code{x}. Default "counts".
10
+#' @param module Integer. The module to be split.
11
+#' @param n Integer. How many modules should \code{module} be split into.
12
+#'  Default 2.
13
+#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
14
+#'  a default value of 12345 is used. If NULL, no calls to
15
+#'  \link[withr]{with_seed} are made.
16
+#' @return A updated \linkS4class{SingleCellExperiment} object with new
17
+#'  feature modules stored in column \code{celda_feature_module} in
18
+#'  \code{\link[SummarizedExperiment]{rowData}(x)}.
19
+#' @export
20
+setGeneric("splitModule",
21
+    function(x, ...) {
22
+        standardGeneric("splitModule")
23
+    })
24
+
25
+
26
+#' @rdname splitModule
27
+#' @examples
28
+#' data(sceCeldaCG)
29
+#' # Split module 5 into 2 new modules.
30
+#' sce <- splitModule(sceCeldaCG, module = 5)
31
+#' @export
32
+setMethod("splitModule", signature(x = "SingleCellExperiment"),
33
+    function(x,
34
+        useAssay = "counts",
35
+        module,
36
+        n = 2,
37
+        seed = 12345) {
38
+
39
+        if (!module %in% celdaClusters(x)) {
40
+            stop("Module ", module, " is not found in celdaClusters(x).",
41
+                " Please specify a valid module.")
42
+        }
43
+
44
+        celdaGMod <- .splitModuleWithSeed(x = x,
45
+            useAssay = useAssay,
46
+            module = module,
47
+            n = n,
48
+            seed = seed)
49
+
50
+        S4Vectors::metadata(x)[["celda_parameters"]]$L <- params(model)$L
51
+        S4Vectors::metadata(x)[["celda_parameters"]]$finalLogLik <-
52
+            model@finalLogLik
53
+        S4Vectors::metadata(x)[["celda_parameters"]]$featureModuleLevels <-
54
+            sort(unique(celdaClusters(celdaGMod)$y))
55
+        SummarizedExperiment::rowData(x)["celda_feature_module"] <-
56
+            celdaClusters(celdaGMod)$y
57
+        return(x)
58
+    }
59
+)
60
+
61
+
62
+.splitModuleWithSeed <- function(x,
63
+    useAssay,
64
+    module,
65
+    n,
66
+    seed) {
67
+
68
+    if (is.null(seed)) {
69
+        celdaGMod <- .splitModule(x, useAssay, module, n)
70
+    } else {
71
+        with_seed(seed, celdaGMod <- .splitModule(x, useAssay, module, n))
72
+    }
73
+    return(celdaGMod)
74
+}
75
+
76
+
77
+.splitModule <- function(x, useAssay, module, n) {
78
+    counts <- SummarizedExperiment::assay(x, i = useAssay)
79
+    .validateCounts(counts)
80
+    counts <- as.matrix(counts)
81
+    ix <- celdaModules(x) == module
82
+
83
+    if (sum(ix) > 1) {
84
+        tempModel <- .celda_G(
85
+            counts = counts[ix, , drop = FALSE],
86
+            L = n,
87
+            yInitialize = "random",
88
+            splitOnIter = -1,
89
+            splitOnLast = FALSE,
90
+            nchains = 1,
91
+            verbose = FALSE)
92
+
93
+        splitY <- celdaClusters(tempModel)$y
94
+        splitIx <- celdaClusters(tempModel)$y > 1
95
+        splitY[splitIx] <- S4Vectors::metadata(x)$celda_parameters$L +
96
+            splitY[splitIx] - 1
97
+        splitY[!splitIx] <- module
98
+
99
+        newY <- celdaModules(x)
100
+        newY[ix] <- splitY
101
+        newL <- max(newY)
102
+
103
+        newLl <- .logLikelihoodcelda_G(
104
+            counts = counts,
105
+            y = newY,
106
+            L = newL,
107
+            beta = S4Vectors::metadata(x)$celda_parameters$beta,
108
+            delta = S4Vectors::metadata(x)$celda_parameters$delta,
109
+            gamma = S4Vectors::metadata(x)$celda_parameters$gamma)
110
+        model <- methods::new(
111
+            "celda_G",
112
+            clusters = list(y = newY),
113
+            params = list(
114
+                L = newL,
115
+                beta = S4Vectors::metadata(x)$celda_parameters$beta,
116
+                delta = S4Vectors::metadata(x)$celda_parameters$delta,
117
+                gamma = S4Vectors::metadata(x)$celda_parameters$gamma,
118
+                countChecksum = .createCountChecksum(counts)
119
+            ),
120
+            names = list(row = rownames(x),
121
+                column = colnames(x),
122
+                sample = x@metadata$celda_parameters$sampleLevels),
123
+            finalLogLik = newLl
124
+        )
125
+    } else {
126
+        stop("Module ", module, "contains <= 1 feature. No additional",
127
+            " splitting was able to be performed.")
128
+    }
129
+    return(model)
130
+}
... ...
@@ -55,7 +55,7 @@ Rows represent features and columns represent cells.}
55 55
 
56 56
 \item{useAssay}{A string specifying which \link[SummarizedExperiment]{assay}
57 57
 slot to use if \code{x} is a
58
-\link[SingleCellExperiment]{SingleCellExperiment} object. Default "counts".}
58
+\linkS4class{SingleCellExperiment} object. Default "counts".}
59 59
 
60 60
 \item{L}{Integer. Number of feature modules.}
61 61
 
... ...
@@ -110,8 +110,10 @@ starting values for each feature will be randomly sampled from `1:L`.
110 110
 \item{verbose}{Logical. Whether to print log messages. Default TRUE.}
111 111
 }
112 112
 \value{
113
-An object of class `celda_G` with the feature module clusters stored
114
- in `y`.
113
+A \linkS4class{SingleCellExperiment} object. Function
114
+ parameter settings are stored in the \link[S4Vectors]{metadata}
115
+ \code{"celda_parameters"} slot. Column \code{celda_feature_module} in
116
+ \link[SummarizedExperiment]{rowData} contains feature modules.
115 117
 }
116 118
 \description{
117 119
 Clusters the rows of a count matrix containing single-cell data
118 120
new file mode 100644
... ...
@@ -0,0 +1,43 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/splitModule.R
3
+\name{splitModule}
4
+\alias{splitModule}
5
+\alias{splitModule,SingleCellExperiment-method}
6
+\title{Split celda feature module}
7
+\usage{
8
+splitModule(x, ...)
9
+
10
+\S4method{splitModule}{SingleCellExperiment}(x, useAssay = "counts", module, n = 2, seed = 12345)
11
+}
12
+\arguments{
13
+\item{x}{A \linkS4class{SingleCellExperiment} object
14
+with the matrix located in the assay slot under \code{useAssay}.
15
+Rows represent features and columns represent cells.}
16
+
17
+\item{useAssay}{A string specifying which \link[SummarizedExperiment]{assay}
18
+slot to use for \code{x}. Default "counts".}
19
+
20
+\item{module}{Integer. The module to be split.}
21
+
22
+\item{n}{Integer. How many modules should \code{module} be split into.
23
+Default 2.}
24
+
25
+\item{seed}{Integer. Passed to \link[withr]{with_seed}. For reproducibility,
26
+a default value of 12345 is used. If NULL, no calls to
27
+\link[withr]{with_seed} are made.}
28
+}
29
+\value{
30
+A updated \linkS4class{SingleCellExperiment} object with new
31
+ feature modules stored in column \code{celda_feature_module} in
32
+ \code{\link[SummarizedExperiment]{rowData}(x)}.
33
+}
34
+\description{
35
+Manually select a celda feature module to split into 2 or
36
+ more modules. Useful for splitting up modules that show divergent
37
+ expression of features in multiple cell clusters.
38
+}
39
+\examples{
40
+data(sceCeldaCG)
41
+# Split module 5 into 2 new modules.
42
+sce <- splitModule(sceCeldaCG, module = 5)
43
+}