Browse code

Removed differentialExpression related functions as these are now in the singleCellTK

Joshua D. Campbell authored on 22/03/2021 17:27:20
Showing 1 changed files
1 1
deleted file mode 100755
... ...
@@ -1,281 +0,0 @@
1
-#' @title Differential expression for cell subpopulations using MAST
2
-#' @description Uses MAST to find differentially expressed features for
3
-#'  specified cell subpopulations.
4
-#' @param x A numeric \link{matrix} of counts or a
5
-#'  \linkS4class{SingleCellExperiment}
6
-#'  with the matrix located in the assay slot under \code{useAssay}.
7
-#'  Rows represent features and columns represent cells. Must contain cluster
8
-#'  labels in \code{celdaClusters(x, altExpName = altExpName)} if \code{x} is a
9
-#'  \linkS4class{SingleCellExperiment} object.
10
-#' @param useAssay A string specifying which \link{assay}
11
-#'  slot to use if \code{x} is a
12
-#'  \link[SingleCellExperiment]{SingleCellExperiment} object. Default "counts".
13
-#' @param altExpName The name for the \link{altExp} slot
14
-#'  to use. Default "featureSubset".
15
-#' @param celdaMod Celda object of class `celda_C` or `celda_CG`.
16
-#' @param c1 Integer vector. Cell populations to include in group 1 for the
17
-#'  differential expression analysis.
18
-#' @param c2 Integer vector. Cell populations to include in group 2 for the
19
-#'  differential expression analysis. If NULL, the clusters in the c1 group are
20
-#'  compared to all other clusters. Default NULL.
21
-#' @param onlyPos Logical. Whether to only return markers with positive log2
22
-#'  fold change. Default FALSE.
23
-#' @param log2fcThreshold Numeric. A number greater than 0 that specifies the
24
-#'  absolute log2 fold change threshold. Only features with absolute value
25
-#'  above this threshold will be returned. If NULL, this filter will not be
26
-#'  applied. Default NULL.
27
-#' @param fdrThreshold Numeric. A number between 0 and 1 that specifies the
28
-#'  false discovery rate (FDR) threshold. Only features below this threshold
29
-#'  will be returned. Default 1.
30
-#' @param ... Ignored. Placeholder to prevent check warning.
31
-#' @return Data frame containing MAST results including statistics such as
32
-#'  p-value, log2 fold change, and FDR.
33
-#' @export
34
-#' @rawNamespace import(data.table, except = c(melt, shift))
35
-#' @importFrom MAST FromMatrix
36
-#' @importFrom MAST zlm
37
-#' @importFrom MAST summary
38
-#' @importFrom S4Vectors mcols
39
-#' @importFrom plyr .
40
-#' @import SummarizedExperiment
41
-setGeneric("differentialExpression", function(x, ...) {
42
-    standardGeneric("differentialExpression")})
43
-
44
-
45
-#' @rdname differentialExpression
46
-#' @examples
47
-#' data(sceCeldaCG)
48
-#' clusterDiffexpRes <- differentialExpression(sceCeldaCG, c1 = c(1, 2))
49
-#' @export
50
-setMethod("differentialExpression",
51
-    signature(x = "SingleCellExperiment"),
52
-    function(x,
53
-        useAssay = "counts",
54
-        altExpName = "featureSubset",
55
-        c1,
56
-        c2 = NULL,
57
-        onlyPos = FALSE,
58
-        log2fcThreshold = NULL,
59
-        fdrThreshold = 1) {
60
-
61
-        if (is.null(c1)) {
62
-            stop("'c1' should be a numeric vector of cell cluster(s)")
63
-        }
64
-
65
-        cdiff <- setdiff(c1, celdaClusters(x, altExpName = altExpName))
66
-
67
-        if (length(cdiff) > 0) {
68
-            warning("cluster ", cdiff, "in 'c1' does not exist in",
69
-                " 'celdaClusters(x, altExpName = altExpName)'!")
70
-            if (length(cdiff) == length(c1)) {
71
-                stop("All clusters in 'c1' does not exist in",
72
-                    " 'celdaClusters(x, altExpName = altExpName)'!")
73
-            }
74
-        }
75
-
76
-        altExp <- SingleCellExperiment::altExp(x, altExpName)
77
-        counts <- SummarizedExperiment::assay(x, i = useAssay)
78
-
79
-        if (is.null(c2)) {
80
-            c2 <- sort(setdiff(unique(celdaClusters(x,
81
-                altExpName = altExpName)), c1))
82
-        }
83
-        if (length(c1) > 1) {
84
-            cells1 <- SummarizedExperiment::colData(altExp)$colnames[
85
-                which(celdaClusters(x, altExpName = altExpName) %in% c1)]
86
-        } else {
87
-            cells1 <- SummarizedExperiment::colData(altExp)$colnames[
88
-                which(celdaClusters(x, altExpName = altExpName) == c1)]
89
-        }
90
-
91
-        if (length(c2) > 1) {
92
-            cells2 <- SummarizedExperiment::colData(altExp)$colnames[
93
-                which(celdaClusters(x, altExpName = altExpName) %in% c2)]
94
-        } else {
95
-            cells2 <- SummarizedExperiment::colData(altExp)$colnames[
96
-                which(celdaClusters(x, altExpName = altExpName) == c2)]
97
-        }
98
-
99
-        mat <- counts[, c(cells1, cells2)]
100
-        log_normalized_mat <- normalizeCounts(mat,
101
-            normalize = "cpm",
102
-            transformationFun = log1p)
103
-        cdat <- data.frame(wellKey = c(cells1, cells2),
104
-            condition = c(rep("c1", length(cells1)), rep("c2", length(cells2))),
105
-            ngeneson = rep("", (length(cells1) + length(cells2))),
106
-            stringsAsFactors = FALSE)
107
-
108
-        sca <- suppressMessages(MAST::FromMatrix(log_normalized_mat, cdat))
109
-        cdr2 <- colSums(SummarizedExperiment::assay(sca) > 0)
110
-        SummarizedExperiment::colData(sca)$cngeneson <- scale(cdr2)
111
-        cond <- factor(SummarizedExperiment::colData(sca)$condition)
112
-        cond <- stats::relevel(cond, "c2")
113
-        SummarizedExperiment::colData(sca)$condition <- cond
114
-        zlmCond <- MAST::zlm(~ condition + cngeneson, sca)
115
-        summaryCond <- MAST::summary(zlmCond, doLRT = "conditionc1")
116
-        summaryDt <- summaryCond$datatable
117
-        contrast <-
118
-            component <-
119
-            primerid <-
120
-            coef <-
121
-            ci.hi <-
122
-            ci.lo <-
123
-            `Pr(>Chisq)` <-
124
-            fdr <- NULL # Avoid NSE notes in check
125
-
126
-        fcHurdle <- merge(summaryDt[contrast == "conditionc1" &
127
-                component == "H", .(primerid, `Pr(>Chisq)`)],
128
-            summaryDt[contrast == "conditionc1" & component == "logFC",
129
-                .(primerid, coef, ci.hi, ci.lo)], by = "primerid")
130
-
131
-        fcHurdle[, fdr := stats::p.adjust(`Pr(>Chisq)`, "fdr")]
132
-        ### Some genes aren't outputted because log2FC gets NaN if one or both
133
-        ### clusters have 0 counts for a gene
134
-        ### and then they're discarded because NaN !> 0
135
-        if (is.null(log2fcThreshold)) {
136
-            fcHurdleSig <- fcHurdle
137
-        } else {
138
-            fcHurdleSig <- merge(fcHurdle[fdr < fdrThreshold &
139
-                    abs(coef) > log2fcThreshold],
140
-                data.table::as.data.table(S4Vectors::mcols(sca)),
141
-                by = "primerid")
142
-            if (onlyPos) {
143
-                fcHurdleSig <- fcHurdleSig[which(fcHurdleSig$log2fc > 0), ]
144
-            }
145
-        }
146
-        fcHurdleSig <- fcHurdleSig[, -c(4, 5)]
147
-        names(fcHurdleSig)[c(1, 2, 3, 4)] <- c("Gene", "Pvalue", "Log2_FC",
148
-            "FDR")
149
-        fcHurdleSig <- fcHurdleSig[order(fcHurdleSig$Pvalue,
150
-            decreasing = FALSE), ]
151
-
152
-        return(fcHurdleSig)
153
-    }
154
-)
155
-
156
-
157
-#' @rdname differentialExpression
158
-#' @examples
159
-#' data(celdaCGSim, celdaCGMod)
160
-#' clusterDiffexpRes <- differentialExpression(celdaCGSim$counts,
161
-#'   celdaCGMod,
162
-#'   c1 = c(1, 2))
163
-#' @export
164
-setMethod("differentialExpression",
165
-    signature(x = "matrix"),
166
-    function(x,
167
-        celdaMod,
168
-        c1,
169
-        c2 = NULL,
170
-        onlyPos = FALSE,
171
-        log2fcThreshold = NULL,
172
-        fdrThreshold = 1) {
173
-
174
-        if (!(methods::is(celdaMod, "celda_C") ||
175
-                methods::is(celdaMod, "celda_CG"))) {
176
-            stop("'celdaMod' should be an object of class celda_C or celda_CG")
177
-        }
178
-        if (is.null(c1)) {
179
-            stop("'c1' should be a numeric vector of cell cluster(s)")
180
-        }
181
-
182
-        cdiff <- setdiff(c1, celdaClusters(celdaMod)$z)
183
-
184
-        if (length(cdiff) > 0) {
185
-            warning("cluster ", cdiff, "in 'c1' does not exist in",
186
-                " 'celdaClusters(celdaMod)$z'!")
187
-            if (length(cdiff) == length(c1)) {
188
-                stop("All clusters in 'c1' does not exist in",
189
-                    " 'celdaClusters(celdaMod)$z'!")
190
-            }
191
-        }
192
-
193
-        compareCountMatrix(x, celdaMod)
194
-
195
-        if (is.null(c2)) {
196
-            c2 <- sort(setdiff(unique(celdaClusters(celdaMod)$z), c1))
197
-        }
198
-        if (length(c1) > 1) {
199
-            cells1 <- matrixNames(celdaMod)$column[which(
200
-                celdaClusters(celdaMod)$z %in% c1
201
-            )]
202
-        } else {
203
-            cells1 <- matrixNames(celdaMod)$column[which(
204
-                celdaClusters(celdaMod)$z == c1
205
-            )]
206
-        }
207
-
208
-        if (length(c2) > 1) {
209
-            cells2 <- matrixNames(celdaMod)$column[which(
210
-                celdaClusters(celdaMod)$z %in% c2
211
-            )]
212
-        } else {
213
-            cells2 <- matrixNames(celdaMod)$column[which(
214
-                celdaClusters(celdaMod)$z == c2
215
-            )]
216
-        }
217
-
218
-        mat <- x[, c(cells1, cells2)]
219
-        log_normalized_mat <- normalizeCounts(mat,
220
-            normalize = "cpm",
221
-            transformationFun = log1p
222
-        )
223
-        cdat <- data.frame(
224
-            wellKey = c(cells1, cells2),
225
-            condition = c(rep("c1", length(cells1)), rep("c2", length(cells2))),
226
-            ngeneson = rep("", (length(cells1) + length(cells2))),
227
-            stringsAsFactors = FALSE
228
-        )
229
-
230
-        sca <- suppressMessages(MAST::FromMatrix(log_normalized_mat, cdat))
231
-        cdr2 <- colSums(SummarizedExperiment::assay(sca) > 0)
232
-        SummarizedExperiment::colData(sca)$cngeneson <- scale(cdr2)
233
-        cond <- factor(SummarizedExperiment::colData(sca)$condition)
234
-        cond <- stats::relevel(cond, "c2")
235
-        SummarizedExperiment::colData(sca)$condition <- cond
236
-        zlmCond <- MAST::zlm(~ condition + cngeneson, sca)
237
-        summaryCond <- MAST::summary(zlmCond, doLRT = "conditionc1")
238
-        summaryDt <- summaryCond$datatable
239
-        contrast <-
240
-            component <-
241
-            primerid <-
242
-            coef <-
243
-            ci.hi <-
244
-            ci.lo <-
245
-            `Pr(>Chisq)` <-
246
-            fdr <- NULL # Avoid NSE notes in check
247
-
248
-        fcHurdle <- merge(summaryDt[contrast == "conditionc1" &
249
-                component == "H", .(primerid, `Pr(>Chisq)`)],
250
-            summaryDt[
251
-                contrast == "conditionc1" & component == "logFC",
252
-                .(primerid, coef, ci.hi, ci.lo)
253
-                ],
254
-            by = "primerid"
255
-        )
256
-
257
-        fcHurdle[, fdr := stats::p.adjust(`Pr(>Chisq)`, "fdr")]
258
-        ### Some genes aren't outputted because log2FC gets NaN if one or both
259
-        ### clusters have 0 counts for a gene
260
-        ### and then they're discarded because NaN !> 0
261
-        if (is.null(log2fcThreshold)) {
262
-            fcHurdleSig <- fcHurdle
263
-        } else {
264
-            fcHurdleSig <- merge(fcHurdle[fdr < fdrThreshold &
265
-                    abs(coef) > log2fcThreshold],
266
-                data.table::as.data.table(S4Vectors::mcols(sca)),
267
-                by = "primerid"
268
-            )
269
-            if (onlyPos) {
270
-                fcHurdleSig <- fcHurdleSig[which(fcHurdleSig$log2fc > 0), ]
271
-            }
272
-        }
273
-        fcHurdleSig <- fcHurdleSig[, -c(4, 5)]
274
-        names(fcHurdleSig)[c(1, 2, 3, 4)] <- c("Gene", "Pvalue", "Log2_FC",
275
-            "FDR")
276
-        fcHurdleSig <- fcHurdleSig[order(fcHurdleSig$Pvalue,
277
-            decreasing = FALSE), ]
278
-
279
-        return(fcHurdleSig)
280
-    }
281
-)
Browse code

fix doc warning file link in package does not exist and so has been treated as a topic

zhewa authored on 16/10/2020 21:36:32
Showing 1 changed files
... ...
@@ -7,10 +7,10 @@
7 7
 #'  Rows represent features and columns represent cells. Must contain cluster
8 8
 #'  labels in \code{celdaClusters(x, altExpName = altExpName)} if \code{x} is a
9 9
 #'  \linkS4class{SingleCellExperiment} object.
10
-#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay}
10
+#' @param useAssay A string specifying which \link{assay}
11 11
 #'  slot to use if \code{x} is a
12 12
 #'  \link[SingleCellExperiment]{SingleCellExperiment} object. Default "counts".
13
-#' @param altExpName The name for the \link[SingleCellExperiment]{altExp} slot
13
+#' @param altExpName The name for the \link{altExp} slot
14 14
 #'  to use. Default "featureSubset".
15 15
 #' @param celdaMod Celda object of class `celda_C` or `celda_CG`.
16 16
 #' @param c1 Integer vector. Cell populations to include in group 1 for the
Browse code

fix bioc check doc warning. Fix vignette

zhewa authored on 13/10/2020 18:47:29
Showing 1 changed files
... ...
@@ -27,6 +27,7 @@
27 27
 #' @param fdrThreshold Numeric. A number between 0 and 1 that specifies the
28 28
 #'  false discovery rate (FDR) threshold. Only features below this threshold
29 29
 #'  will be returned. Default 1.
30
+#' @param ... Ignored. Placeholder to prevent check warning.
30 31
 #' @return Data frame containing MAST results including statistics such as
31 32
 #'  p-value, log2 fold change, and FDR.
32 33
 #' @export
Browse code

fix travis errors

zhewa authored on 14/07/2020 05:00:58
Showing 1 changed files
... ...
@@ -72,7 +72,7 @@ setMethod("differentialExpression",
72 72
             }
73 73
         }
74 74
 
75
-        altExp <- SingleCellExperiment::altExp(sce, altExpName)
75
+        altExp <- SingleCellExperiment::altExp(x, altExpName)
76 76
         counts <- SummarizedExperiment::assay(x, i = useAssay)
77 77
 
78 78
         if (is.null(c2)) {
Browse code

add altExpName = "featureSubset". Store results in altExp(sce)

zhewa authored on 13/07/2020 06:58:29
Showing 1 changed files
... ...
@@ -5,11 +5,13 @@
5 5
 #'  \linkS4class{SingleCellExperiment}
6 6
 #'  with the matrix located in the assay slot under \code{useAssay}.
7 7
 #'  Rows represent features and columns represent cells. Must contain cluster
8
-#'  labels in \code{celdaClusters(x)} if \code{x} is a
8
+#'  labels in \code{celdaClusters(x, altExpName = altExpName)} if \code{x} is a
9 9
 #'  \linkS4class{SingleCellExperiment} object.
10 10
 #' @param useAssay A string specifying which \link[SummarizedExperiment]{assay}
11 11
 #'  slot to use if \code{x} is a
12 12
 #'  \link[SingleCellExperiment]{SingleCellExperiment} object. Default "counts".
13
+#' @param altExpName The name for the \link[SingleCellExperiment]{altExp} slot
14
+#'  to use. Default "featureSubset".
13 15
 #' @param celdaMod Celda object of class `celda_C` or `celda_CG`.
14 16
 #' @param c1 Integer vector. Cell populations to include in group 1 for the
15 17
 #'  differential expression analysis.
... ...
@@ -48,6 +50,7 @@ setMethod("differentialExpression",
48 50
     signature(x = "SingleCellExperiment"),
49 51
     function(x,
50 52
         useAssay = "counts",
53
+        altExpName = "featureSubset",
51 54
         c1,
52 55
         c2 = NULL,
53 56
         onlyPos = FALSE,
... ...
@@ -58,36 +61,38 @@ setMethod("differentialExpression",
58 61
             stop("'c1' should be a numeric vector of cell cluster(s)")
59 62
         }
60 63
 
61
-        cdiff <- setdiff(c1, celdaClusters(x))
64
+        cdiff <- setdiff(c1, celdaClusters(x, altExpName = altExpName))
62 65
 
63 66
         if (length(cdiff) > 0) {
64 67
             warning("cluster ", cdiff, "in 'c1' does not exist in",
65
-                " 'celdaClusters(x)'!")
68
+                " 'celdaClusters(x, altExpName = altExpName)'!")
66 69
             if (length(cdiff) == length(c1)) {
67 70
                 stop("All clusters in 'c1' does not exist in",
68
-                    " 'celdaClusters(x)'!")
71
+                    " 'celdaClusters(x, altExpName = altExpName)'!")
69 72
             }
70 73
         }
71 74
 
75
+        altExp <- SingleCellExperiment::altExp(sce, altExpName)
72 76
         counts <- SummarizedExperiment::assay(x, i = useAssay)
73 77
 
74 78
         if (is.null(c2)) {
75
-            c2 <- sort(setdiff(unique(celdaClusters(x)), c1))
79
+            c2 <- sort(setdiff(unique(celdaClusters(x,
80
+                altExpName = altExpName)), c1))
76 81
         }
77 82
         if (length(c1) > 1) {
78
-            cells1 <- SummarizedExperiment::colData(x)$colnames[
79
-                which(celdaClusters(x) %in% c1)]
83
+            cells1 <- SummarizedExperiment::colData(altExp)$colnames[
84
+                which(celdaClusters(x, altExpName = altExpName) %in% c1)]
80 85
         } else {
81
-            cells1 <- SummarizedExperiment::colData(x)$colnames[
82
-                which(celdaClusters(x) == c1)]
86
+            cells1 <- SummarizedExperiment::colData(altExp)$colnames[
87
+                which(celdaClusters(x, altExpName = altExpName) == c1)]
83 88
         }
84 89
 
85 90
         if (length(c2) > 1) {
86
-            cells2 <- SummarizedExperiment::colData(x)$colnames[
87
-                which(celdaClusters(x) %in% c2)]
91
+            cells2 <- SummarizedExperiment::colData(altExp)$colnames[
92
+                which(celdaClusters(x, altExpName = altExpName) %in% c2)]
88 93
         } else {
89
-            cells2 <- SummarizedExperiment::colData(x)$colnames[
90
-                which(celdaClusters(x) == c2)]
94
+            cells2 <- SummarizedExperiment::colData(altExp)$colnames[
95
+                which(celdaClusters(x, altExpName = altExpName) == c2)]
91 96
         }
92 97
 
93 98
         mat <- counts[, c(cells1, cells2)]
Browse code

sce findMarkersTree

zhewa authored on 16/05/2020 09:37:06
Showing 1 changed files
... ...
@@ -5,7 +5,7 @@
5 5
 #'  \linkS4class{SingleCellExperiment}
6 6
 #'  with the matrix located in the assay slot under \code{useAssay}.
7 7
 #'  Rows represent features and columns represent cells. Must contain cluster
8
-#'  labels in \code{clusters(x)} if \code{x} is a
8
+#'  labels in \code{celdaClusters(x)} if \code{x} is a
9 9
 #'  \linkS4class{SingleCellExperiment} object.
10 10
 #' @param useAssay A string specifying which \link[SummarizedExperiment]{assay}
11 11
 #'  slot to use if \code{x} is a
... ...
@@ -58,35 +58,36 @@ setMethod("differentialExpression",
58 58
             stop("'c1' should be a numeric vector of cell cluster(s)")
59 59
         }
60 60
 
61
-        cdiff <- setdiff(c1, clusters(x))
61
+        cdiff <- setdiff(c1, celdaClusters(x))
62 62
 
63 63
         if (length(cdiff) > 0) {
64 64
             warning("cluster ", cdiff, "in 'c1' does not exist in",
65
-                " 'clusters(x)'!")
65
+                " 'celdaClusters(x)'!")
66 66
             if (length(cdiff) == length(c1)) {
67
-                stop("All clusters in 'c1' does not exist in 'clusters(x)'!")
67
+                stop("All clusters in 'c1' does not exist in",
68
+                    " 'celdaClusters(x)'!")
68 69
             }
69 70
         }
70 71
 
71 72
         counts <- SummarizedExperiment::assay(x, i = useAssay)
72 73
 
73 74
         if (is.null(c2)) {
74
-            c2 <- sort(setdiff(unique(clusters(x)), c1))
75
+            c2 <- sort(setdiff(unique(celdaClusters(x)), c1))
75 76
         }
76 77
         if (length(c1) > 1) {
77 78
             cells1 <- SummarizedExperiment::colData(x)$colnames[
78
-                which(clusters(x) %in% c1)]
79
+                which(celdaClusters(x) %in% c1)]
79 80
         } else {
80 81
             cells1 <- SummarizedExperiment::colData(x)$colnames[
81
-                which(clusters(x) == c1)]
82
+                which(celdaClusters(x) == c1)]
82 83
         }
83 84
 
84 85
         if (length(c2) > 1) {
85 86
             cells2 <- SummarizedExperiment::colData(x)$colnames[
86
-                which(clusters(x) %in% c2)]
87
+                which(celdaClusters(x) %in% c2)]
87 88
         } else {
88 89
             cells2 <- SummarizedExperiment::colData(x)$colnames[
89
-                which(clusters(x) == c2)]
90
+                which(celdaClusters(x) == c2)]
90 91
         }
91 92
 
92 93
         mat <- counts[, c(cells1, cells2)]
... ...
@@ -172,39 +173,39 @@ setMethod("differentialExpression",
172 173
             stop("'c1' should be a numeric vector of cell cluster(s)")
173 174
         }
174 175
 
175
-        cdiff <- setdiff(c1, clusters(celdaMod)$z)
176
+        cdiff <- setdiff(c1, celdaClusters(celdaMod)$z)
176 177
 
177 178
         if (length(cdiff) > 0) {
178 179
             warning("cluster ", cdiff, "in 'c1' does not exist in",
179
-                " 'clusters(celdaMod)$z'!")
180
+                " 'celdaClusters(celdaMod)$z'!")
180 181
             if (length(cdiff) == length(c1)) {
181 182
                 stop("All clusters in 'c1' does not exist in",
182
-                    " 'clusters(celdaMod)$z'!")
183
+                    " 'celdaClusters(celdaMod)$z'!")
183 184
             }
184 185
         }
185 186
 
186 187
         compareCountMatrix(x, celdaMod)
187 188
 
188 189
         if (is.null(c2)) {
189
-            c2 <- sort(setdiff(unique(clusters(celdaMod)$z), c1))
190
+            c2 <- sort(setdiff(unique(celdaClusters(celdaMod)$z), c1))
190 191
         }
191 192
         if (length(c1) > 1) {
192 193
             cells1 <- matrixNames(celdaMod)$column[which(
193
-                clusters(celdaMod)$z %in% c1
194
+                celdaClusters(celdaMod)$z %in% c1
194 195
             )]
195 196
         } else {
196 197
             cells1 <- matrixNames(celdaMod)$column[which(
197
-                clusters(celdaMod)$z == c1
198
+                celdaClusters(celdaMod)$z == c1
198 199
             )]
199 200
         }
200 201
 
201 202
         if (length(c2) > 1) {
202 203
             cells2 <- matrixNames(celdaMod)$column[which(
203
-                clusters(celdaMod)$z %in% c2
204
+                celdaClusters(celdaMod)$z %in% c2
204 205
             )]
205 206
         } else {
206 207
             cells2 <- matrixNames(celdaMod)$column[which(
207
-                clusters(celdaMod)$z == c2
208
+                celdaClusters(celdaMod)$z == c2
208 209
             )]
209 210
         }
210 211
 
Browse code

sce diffexp

zhewa authored on 15/05/2020 10:51:52
Showing 1 changed files
... ...
@@ -1,9 +1,15 @@
1 1
 #' @title Differential expression for cell subpopulations using MAST
2 2
 #' @description Uses MAST to find differentially expressed features for
3 3
 #'  specified cell subpopulations.
4
-#' @param counts Integer matrix. Rows represent features and columns represent
5
-#'  cells. This matrix should be the same as the one used to generate
6
-#'  `celdaMod`.
4
+#' @param x A numeric \link{matrix} of counts or a
5
+#'  \linkS4class{SingleCellExperiment}
6
+#'  with the matrix located in the assay slot under \code{useAssay}.
7
+#'  Rows represent features and columns represent cells. Must contain cluster
8
+#'  labels in \code{clusters(x)} if \code{x} is a
9
+#'  \linkS4class{SingleCellExperiment} object.
10
+#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay}
11
+#'  slot to use if \code{x} is a
12
+#'  \link[SingleCellExperiment]{SingleCellExperiment} object. Default "counts".
7 13
 #' @param celdaMod Celda object of class `celda_C` or `celda_CG`.
8 14
 #' @param c1 Integer vector. Cell populations to include in group 1 for the
9 15
 #'  differential expression analysis.
... ...
@@ -21,127 +27,248 @@
21 27
 #'  will be returned. Default 1.
22 28
 #' @return Data frame containing MAST results including statistics such as
23 29
 #'  p-value, log2 fold change, and FDR.
24
-#' @examples
25
-#' data(celdaCGSim, celdaCGMod)
26
-#' clusterDiffexpRes <- differentialExpression(celdaCGSim$counts,
27
-#'   celdaCGMod,
28
-#'   c1 = c(1, 2)
29
-#' )
30 30
 #' @export
31 31
 #' @rawNamespace import(data.table, except = c(melt, shift))
32 32
 #' @importFrom MAST FromMatrix
33 33
 #' @importFrom MAST zlm
34 34
 #' @importFrom MAST summary
35 35
 #' @importFrom S4Vectors mcols
36
-#' @importFrom SummarizedExperiment assay
37
-#' @importFrom SummarizedExperiment colData
38
-#' @importFrom SummarizedExperiment assayNames
39 36
 #' @importFrom plyr .
40 37
 #' @import SummarizedExperiment
41
-differentialExpression <- function(counts,
42
-                                   celdaMod,
43
-                                   c1,
44
-                                   c2 = NULL,
45
-                                   onlyPos = FALSE,
46
-                                   log2fcThreshold = NULL,
47
-                                   fdrThreshold = 1) {
48
-  if (!is.matrix(counts)) {
49
-    stop("'counts' should be a numeric count matrix")
50
-  }
51
-  if (!(methods::is(celdaMod, "celda_C") ||
52
-    methods::is(celdaMod, "celda_CG"))) {
53
-    stop("'celdaMod' should be an object of class celda_C or celda_CG")
54
-  }
55
-  if (is.null(c1)) {
56
-    stop("'c1' should be a numeric vector of cell cluster(s)")
57
-  }
58
-  compareCountMatrix(counts, celdaMod)
59
-
60
-  if (is.null(c2)) {
61
-    c2 <- sort(setdiff(unique(clusters(celdaMod)$z), c1))
62
-  }
63
-  if (length(c1) > 1) {
64
-    cells1 <- matrixNames(celdaMod)$column[which(
65
-      clusters(celdaMod)$z %in% c1
66
-    )]
67
-  } else {
68
-    cells1 <- matrixNames(celdaMod)$column[which(
69
-      clusters(celdaMod)$z == c1
70
-    )]
71
-  }
72
-
73
-  if (length(c2) > 1) {
74
-    cells2 <- matrixNames(celdaMod)$column[which(
75
-      clusters(celdaMod)$z %in% c2
76
-    )]
77
-  } else {
78
-    cells2 <- matrixNames(celdaMod)$column[which(
79
-      clusters(celdaMod)$z == c2
80
-    )]
81
-  }
82
-
83
-  mat <- counts[, c(cells1, cells2)]
84
-  log_normalized_mat <- normalizeCounts(mat,
85
-    normalize = "cpm",
86
-    transformationFun = log1p
87
-  )
88
-  cdat <- data.frame(
89
-    wellKey = c(cells1, cells2),
90
-    condition = c(rep("c1", length(cells1)), rep("c2", length(cells2))),
91
-    ngeneson = rep("", (length(cells1) + length(cells2))),
92
-    stringsAsFactors = FALSE
93
-  )
94
-
95
-  # explicitly load library SummarizedExperiment due to MAST package
96
-  # dependency error
97
-  # requireNamespace(SummarizedExperiment)
98
-
99
-  sca <- suppressMessages(MAST::FromMatrix(log_normalized_mat, cdat))
100
-  cdr2 <- colSums(SummarizedExperiment::assay(sca) > 0)
101
-  SummarizedExperiment::colData(sca)$cngeneson <- scale(cdr2)
102
-  cond <- factor(SummarizedExperiment::colData(sca)$condition)
103
-  cond <- stats::relevel(cond, "c2")
104
-  SummarizedExperiment::colData(sca)$condition <- cond
105
-  zlmCond <- MAST::zlm(~ condition + cngeneson, sca)
106
-  summaryCond <- MAST::summary(zlmCond, doLRT = "conditionc1")
107
-  summaryDt <- summaryCond$datatable
108
-  contrast <-
109
-    component <-
110
-    primerid <-
111
-    coef <-
112
-    ci.hi <-
113
-    ci.lo <-
114
-    `Pr(>Chisq)` <-
115
-    fdr <- NULL # Avoid NSE notes in check
116
-
117
-  fcHurdle <- merge(summaryDt[contrast == "conditionc1" &
118
-    component == "H", .(primerid, `Pr(>Chisq)`)],
119
-  summaryDt[
120
-    contrast == "conditionc1" & component == "logFC",
121
-    .(primerid, coef, ci.hi, ci.lo)
122
-  ],
123
-  by = "primerid"
124
-  )
125
-
126
-  fcHurdle[, fdr := stats::p.adjust(`Pr(>Chisq)`, "fdr")]
127
-  ### Some genes aren't outputted because log2FC gets NaN if one or both
128
-  ### clusters have 0 counts for a gene
129
-  ### and then they're discarded because NaN !> 0
130
-  if (is.null(log2fcThreshold)) {
131
-    fcHurdleSig <- fcHurdle
132
-  } else {
133
-    fcHurdleSig <- merge(fcHurdle[fdr < fdrThreshold &
134
-      abs(coef) > log2fcThreshold],
135
-    data.table::as.data.table(S4Vectors::mcols(sca)),
136
-    by = "primerid"
137
-    )
138
-    if (onlyPos) {
139
-      fcHurdleSig <- fcHurdleSig[which(fcHurdleSig$log2fc > 0), ]
38
+setGeneric("differentialExpression", function(x, ...) {
39
+    standardGeneric("differentialExpression")})
40
+
41
+
42
+#' @rdname differentialExpression
43
+#' @examples
44
+#' data(sceCeldaCG)
45
+#' clusterDiffexpRes <- differentialExpression(sceCeldaCG, c1 = c(1, 2))
46
+#' @export
47
+setMethod("differentialExpression",
48
+    signature(x = "SingleCellExperiment"),
49
+    function(x,
50
+        useAssay = "counts",
51
+        c1,
52
+        c2 = NULL,
53
+        onlyPos = FALSE,
54
+        log2fcThreshold = NULL,
55
+        fdrThreshold = 1) {
56
+
57
+        if (is.null(c1)) {
58
+            stop("'c1' should be a numeric vector of cell cluster(s)")
59
+        }
60
+
61
+        cdiff <- setdiff(c1, clusters(x))
62
+
63
+        if (length(cdiff) > 0) {
64
+            warning("cluster ", cdiff, "in 'c1' does not exist in",
65
+                " 'clusters(x)'!")
66
+            if (length(cdiff) == length(c1)) {
67
+                stop("All clusters in 'c1' does not exist in 'clusters(x)'!")
68
+            }
69
+        }
70
+
71
+        counts <- SummarizedExperiment::assay(x, i = useAssay)
72
+
73
+        if (is.null(c2)) {
74
+            c2 <- sort(setdiff(unique(clusters(x)), c1))
75
+        }
76
+        if (length(c1) > 1) {
77
+            cells1 <- SummarizedExperiment::colData(x)$colnames[
78
+                which(clusters(x) %in% c1)]
79
+        } else {
80
+            cells1 <- SummarizedExperiment::colData(x)$colnames[
81
+                which(clusters(x) == c1)]
82
+        }
83
+
84
+        if (length(c2) > 1) {
85
+            cells2 <- SummarizedExperiment::colData(x)$colnames[
86
+                which(clusters(x) %in% c2)]
87
+        } else {
88
+            cells2 <- SummarizedExperiment::colData(x)$colnames[
89
+                which(clusters(x) == c2)]
90
+        }
91
+
92
+        mat <- counts[, c(cells1, cells2)]
93
+        log_normalized_mat <- normalizeCounts(mat,
94
+            normalize = "cpm",
95
+            transformationFun = log1p)
96
+        cdat <- data.frame(wellKey = c(cells1, cells2),
97
+            condition = c(rep("c1", length(cells1)), rep("c2", length(cells2))),
98
+            ngeneson = rep("", (length(cells1) + length(cells2))),
99
+            stringsAsFactors = FALSE)
100
+
101
+        sca <- suppressMessages(MAST::FromMatrix(log_normalized_mat, cdat))
102
+        cdr2 <- colSums(SummarizedExperiment::assay(sca) > 0)
103
+        SummarizedExperiment::colData(sca)$cngeneson <- scale(cdr2)
104
+        cond <- factor(SummarizedExperiment::colData(sca)$condition)
105
+        cond <- stats::relevel(cond, "c2")
106
+        SummarizedExperiment::colData(sca)$condition <- cond
107
+        zlmCond <- MAST::zlm(~ condition + cngeneson, sca)
108
+        summaryCond <- MAST::summary(zlmCond, doLRT = "conditionc1")
109
+        summaryDt <- summaryCond$datatable
110
+        contrast <-
111
+            component <-
112
+            primerid <-
113
+            coef <-
114
+            ci.hi <-
115
+            ci.lo <-
116
+            `Pr(>Chisq)` <-
117
+            fdr <- NULL # Avoid NSE notes in check
118
+
119
+        fcHurdle <- merge(summaryDt[contrast == "conditionc1" &
120
+                component == "H", .(primerid, `Pr(>Chisq)`)],
121
+            summaryDt[contrast == "conditionc1" & component == "logFC",
122
+                .(primerid, coef, ci.hi, ci.lo)], by = "primerid")
123
+
124
+        fcHurdle[, fdr := stats::p.adjust(`Pr(>Chisq)`, "fdr")]
125
+        ### Some genes aren't outputted because log2FC gets NaN if one or both
126
+        ### clusters have 0 counts for a gene
127
+        ### and then they're discarded because NaN !> 0
128
+        if (is.null(log2fcThreshold)) {
129
+            fcHurdleSig <- fcHurdle
130
+        } else {
131
+            fcHurdleSig <- merge(fcHurdle[fdr < fdrThreshold &
132
+                    abs(coef) > log2fcThreshold],
133
+                data.table::as.data.table(S4Vectors::mcols(sca)),
134
+                by = "primerid")
135
+            if (onlyPos) {
136
+                fcHurdleSig <- fcHurdleSig[which(fcHurdleSig$log2fc > 0), ]
137
+            }
138
+        }
139
+        fcHurdleSig <- fcHurdleSig[, -c(4, 5)]
140
+        names(fcHurdleSig)[c(1, 2, 3, 4)] <- c("Gene", "Pvalue", "Log2_FC",
141
+            "FDR")
142
+        fcHurdleSig <- fcHurdleSig[order(fcHurdleSig$Pvalue,
143
+            decreasing = FALSE), ]
144
+
145
+        return(fcHurdleSig)
140 146
     }
141
-  }
142
-  fcHurdleSig <- fcHurdleSig[, -c(4, 5)]
143
-  names(fcHurdleSig)[c(1, 2, 3, 4)] <- c("Gene", "Pvalue", "Log2_FC", "FDR")
144
-  fcHurdleSig <- fcHurdleSig[order(fcHurdleSig$Pvalue, decreasing = FALSE), ]
147
+)
145 148
 
146
-  return(fcHurdleSig)
147
-}
149
+
150
+#' @rdname differentialExpression
151
+#' @examples
152
+#' data(celdaCGSim, celdaCGMod)
153
+#' clusterDiffexpRes <- differentialExpression(celdaCGSim$counts,
154
+#'   celdaCGMod,
155
+#'   c1 = c(1, 2))
156
+#' @export
157
+setMethod("differentialExpression",
158
+    signature(x = "matrix"),
159
+    function(x,
160
+        celdaMod,
161
+        c1,
162
+        c2 = NULL,
163
+        onlyPos = FALSE,
164
+        log2fcThreshold = NULL,
165
+        fdrThreshold = 1) {
166
+
167
+        if (!(methods::is(celdaMod, "celda_C") ||
168
+                methods::is(celdaMod, "celda_CG"))) {
169
+            stop("'celdaMod' should be an object of class celda_C or celda_CG")
170
+        }
171
+        if (is.null(c1)) {
172
+            stop("'c1' should be a numeric vector of cell cluster(s)")
173
+        }
174
+
175
+        cdiff <- setdiff(c1, clusters(celdaMod)$z)
176
+
177
+        if (length(cdiff) > 0) {
178
+            warning("cluster ", cdiff, "in 'c1' does not exist in",
179
+                " 'clusters(celdaMod)$z'!")
180
+            if (length(cdiff) == length(c1)) {
181
+                stop("All clusters in 'c1' does not exist in",
182
+                    " 'clusters(celdaMod)$z'!")
183
+            }
184
+        }
185
+
186
+        compareCountMatrix(x, celdaMod)
187
+
188
+        if (is.null(c2)) {
189
+            c2 <- sort(setdiff(unique(clusters(celdaMod)$z), c1))
190
+        }
191
+        if (length(c1) > 1) {
192
+            cells1 <- matrixNames(celdaMod)$column[which(
193
+                clusters(celdaMod)$z %in% c1
194
+            )]
195
+        } else {
196
+            cells1 <- matrixNames(celdaMod)$column[which(
197
+                clusters(celdaMod)$z == c1
198
+            )]
199
+        }
200
+
201
+        if (length(c2) > 1) {
202
+            cells2 <- matrixNames(celdaMod)$column[which(
203
+                clusters(celdaMod)$z %in% c2
204
+            )]
205
+        } else {
206
+            cells2 <- matrixNames(celdaMod)$column[which(
207
+                clusters(celdaMod)$z == c2
208
+            )]
209
+        }
210
+
211
+        mat <- x[, c(cells1, cells2)]
212
+        log_normalized_mat <- normalizeCounts(mat,
213
+            normalize = "cpm",
214
+            transformationFun = log1p
215
+        )
216
+        cdat <- data.frame(
217
+            wellKey = c(cells1, cells2),
218
+            condition = c(rep("c1", length(cells1)), rep("c2", length(cells2))),
219
+            ngeneson = rep("", (length(cells1) + length(cells2))),
220
+            stringsAsFactors = FALSE
221
+        )
222
+
223
+        sca <- suppressMessages(MAST::FromMatrix(log_normalized_mat, cdat))
224
+        cdr2 <- colSums(SummarizedExperiment::assay(sca) > 0)
225
+        SummarizedExperiment::colData(sca)$cngeneson <- scale(cdr2)
226
+        cond <- factor(SummarizedExperiment::colData(sca)$condition)
227
+        cond <- stats::relevel(cond, "c2")
228
+        SummarizedExperiment::colData(sca)$condition <- cond
229
+        zlmCond <- MAST::zlm(~ condition + cngeneson, sca)
230
+        summaryCond <- MAST::summary(zlmCond, doLRT = "conditionc1")
231
+        summaryDt <- summaryCond$datatable
232
+        contrast <-
233
+            component <-
234
+            primerid <-
235
+            coef <-
236
+            ci.hi <-
237
+            ci.lo <-
238
+            `Pr(>Chisq)` <-
239
+            fdr <- NULL # Avoid NSE notes in check
240
+
241
+        fcHurdle <- merge(summaryDt[contrast == "conditionc1" &
242
+                component == "H", .(primerid, `Pr(>Chisq)`)],
243
+            summaryDt[
244
+                contrast == "conditionc1" & component == "logFC",
245
+                .(primerid, coef, ci.hi, ci.lo)
246
+                ],
247
+            by = "primerid"
248
+        )
249
+
250
+        fcHurdle[, fdr := stats::p.adjust(`Pr(>Chisq)`, "fdr")]
251
+        ### Some genes aren't outputted because log2FC gets NaN if one or both
252
+        ### clusters have 0 counts for a gene
253
+        ### and then they're discarded because NaN !> 0
254
+        if (is.null(log2fcThreshold)) {
255
+            fcHurdleSig <- fcHurdle
256
+        } else {
257
+            fcHurdleSig <- merge(fcHurdle[fdr < fdrThreshold &
258
+                    abs(coef) > log2fcThreshold],
259
+                data.table::as.data.table(S4Vectors::mcols(sca)),
260
+                by = "primerid"
261
+            )
262
+            if (onlyPos) {
263
+                fcHurdleSig <- fcHurdleSig[which(fcHurdleSig$log2fc > 0), ]
264
+            }
265
+        }
266
+        fcHurdleSig <- fcHurdleSig[, -c(4, 5)]
267
+        names(fcHurdleSig)[c(1, 2, 3, 4)] <- c("Gene", "Pvalue", "Log2_FC",
268
+            "FDR")
269
+        fcHurdleSig <- fcHurdleSig[order(fcHurdleSig$Pvalue,
270
+            decreasing = FALSE), ]
271
+
272
+        return(fcHurdleSig)
273
+    }
274
+)
Browse code

Ran styler and fixed lints

Joshua D. Campbell authored on 06/04/2020 23:58:56
Showing 1 changed files
... ...
@@ -23,8 +23,10 @@
23 23
 #'  p-value, log2 fold change, and FDR.
24 24
 #' @examples
25 25
 #' data(celdaCGSim, celdaCGMod)
26
-#' clusterDiffexpRes = differentialExpression(celdaCGSim$counts,
27
-#'     celdaCGMod, c1 = c(1, 2))
26
+#' clusterDiffexpRes <- differentialExpression(celdaCGSim$counts,
27
+#'   celdaCGMod,
28
+#'   c1 = c(1, 2)
29
+#' )
28 30
 #' @export
29 31
 #' @rawNamespace import(data.table, except = c(melt, shift))
30 32
 #' @importFrom MAST FromMatrix
... ...
@@ -37,98 +39,109 @@
37 39
 #' @importFrom plyr .
38 40
 #' @import SummarizedExperiment
39 41
 differentialExpression <- function(counts,
40
-    celdaMod,
41
-    c1,
42
-    c2 = NULL,
43