1 | 1 |
deleted file mode 100755 |
... | ... |
@@ -1,281 +0,0 @@ |
1 |
-#' @title Differential expression for cell subpopulations using MAST |
|
2 |
-#' @description Uses MAST to find differentially expressed features for |
|
3 |
-#' specified cell subpopulations. |
|
4 |
-#' @param x A numeric \link{matrix} of counts or a |
|
5 |
-#' \linkS4class{SingleCellExperiment} |
|
6 |
-#' with the matrix located in the assay slot under \code{useAssay}. |
|
7 |
-#' Rows represent features and columns represent cells. Must contain cluster |
|
8 |
-#' labels in \code{celdaClusters(x, altExpName = altExpName)} if \code{x} is a |
|
9 |
-#' \linkS4class{SingleCellExperiment} object. |
|
10 |
-#' @param useAssay A string specifying which \link{assay} |
|
11 |
-#' slot to use if \code{x} is a |
|
12 |
-#' \link[SingleCellExperiment]{SingleCellExperiment} object. Default "counts". |
|
13 |
-#' @param altExpName The name for the \link{altExp} slot |
|
14 |
-#' to use. Default "featureSubset". |
|
15 |
-#' @param celdaMod Celda object of class `celda_C` or `celda_CG`. |
|
16 |
-#' @param c1 Integer vector. Cell populations to include in group 1 for the |
|
17 |
-#' differential expression analysis. |
|
18 |
-#' @param c2 Integer vector. Cell populations to include in group 2 for the |
|
19 |
-#' differential expression analysis. If NULL, the clusters in the c1 group are |
|
20 |
-#' compared to all other clusters. Default NULL. |
|
21 |
-#' @param onlyPos Logical. Whether to only return markers with positive log2 |
|
22 |
-#' fold change. Default FALSE. |
|
23 |
-#' @param log2fcThreshold Numeric. A number greater than 0 that specifies the |
|
24 |
-#' absolute log2 fold change threshold. Only features with absolute value |
|
25 |
-#' above this threshold will be returned. If NULL, this filter will not be |
|
26 |
-#' applied. Default NULL. |
|
27 |
-#' @param fdrThreshold Numeric. A number between 0 and 1 that specifies the |
|
28 |
-#' false discovery rate (FDR) threshold. Only features below this threshold |
|
29 |
-#' will be returned. Default 1. |
|
30 |
-#' @param ... Ignored. Placeholder to prevent check warning. |
|
31 |
-#' @return Data frame containing MAST results including statistics such as |
|
32 |
-#' p-value, log2 fold change, and FDR. |
|
33 |
-#' @export |
|
34 |
-#' @rawNamespace import(data.table, except = c(melt, shift)) |
|
35 |
-#' @importFrom MAST FromMatrix |
|
36 |
-#' @importFrom MAST zlm |
|
37 |
-#' @importFrom MAST summary |
|
38 |
-#' @importFrom S4Vectors mcols |
|
39 |
-#' @importFrom plyr . |
|
40 |
-#' @import SummarizedExperiment |
|
41 |
-setGeneric("differentialExpression", function(x, ...) { |
|
42 |
- standardGeneric("differentialExpression")}) |
|
43 |
- |
|
44 |
- |
|
45 |
-#' @rdname differentialExpression |
|
46 |
-#' @examples |
|
47 |
-#' data(sceCeldaCG) |
|
48 |
-#' clusterDiffexpRes <- differentialExpression(sceCeldaCG, c1 = c(1, 2)) |
|
49 |
-#' @export |
|
50 |
-setMethod("differentialExpression", |
|
51 |
- signature(x = "SingleCellExperiment"), |
|
52 |
- function(x, |
|
53 |
- useAssay = "counts", |
|
54 |
- altExpName = "featureSubset", |
|
55 |
- c1, |
|
56 |
- c2 = NULL, |
|
57 |
- onlyPos = FALSE, |
|
58 |
- log2fcThreshold = NULL, |
|
59 |
- fdrThreshold = 1) { |
|
60 |
- |
|
61 |
- if (is.null(c1)) { |
|
62 |
- stop("'c1' should be a numeric vector of cell cluster(s)") |
|
63 |
- } |
|
64 |
- |
|
65 |
- cdiff <- setdiff(c1, celdaClusters(x, altExpName = altExpName)) |
|
66 |
- |
|
67 |
- if (length(cdiff) > 0) { |
|
68 |
- warning("cluster ", cdiff, "in 'c1' does not exist in", |
|
69 |
- " 'celdaClusters(x, altExpName = altExpName)'!") |
|
70 |
- if (length(cdiff) == length(c1)) { |
|
71 |
- stop("All clusters in 'c1' does not exist in", |
|
72 |
- " 'celdaClusters(x, altExpName = altExpName)'!") |
|
73 |
- } |
|
74 |
- } |
|
75 |
- |
|
76 |
- altExp <- SingleCellExperiment::altExp(x, altExpName) |
|
77 |
- counts <- SummarizedExperiment::assay(x, i = useAssay) |
|
78 |
- |
|
79 |
- if (is.null(c2)) { |
|
80 |
- c2 <- sort(setdiff(unique(celdaClusters(x, |
|
81 |
- altExpName = altExpName)), c1)) |
|
82 |
- } |
|
83 |
- if (length(c1) > 1) { |
|
84 |
- cells1 <- SummarizedExperiment::colData(altExp)$colnames[ |
|
85 |
- which(celdaClusters(x, altExpName = altExpName) %in% c1)] |
|
86 |
- } else { |
|
87 |
- cells1 <- SummarizedExperiment::colData(altExp)$colnames[ |
|
88 |
- which(celdaClusters(x, altExpName = altExpName) == c1)] |
|
89 |
- } |
|
90 |
- |
|
91 |
- if (length(c2) > 1) { |
|
92 |
- cells2 <- SummarizedExperiment::colData(altExp)$colnames[ |
|
93 |
- which(celdaClusters(x, altExpName = altExpName) %in% c2)] |
|
94 |
- } else { |
|
95 |
- cells2 <- SummarizedExperiment::colData(altExp)$colnames[ |
|
96 |
- which(celdaClusters(x, altExpName = altExpName) == c2)] |
|
97 |
- } |
|
98 |
- |
|
99 |
- mat <- counts[, c(cells1, cells2)] |
|
100 |
- log_normalized_mat <- normalizeCounts(mat, |
|
101 |
- normalize = "cpm", |
|
102 |
- transformationFun = log1p) |
|
103 |
- cdat <- data.frame(wellKey = c(cells1, cells2), |
|
104 |
- condition = c(rep("c1", length(cells1)), rep("c2", length(cells2))), |
|
105 |
- ngeneson = rep("", (length(cells1) + length(cells2))), |
|
106 |
- stringsAsFactors = FALSE) |
|
107 |
- |
|
108 |
- sca <- suppressMessages(MAST::FromMatrix(log_normalized_mat, cdat)) |
|
109 |
- cdr2 <- colSums(SummarizedExperiment::assay(sca) > 0) |
|
110 |
- SummarizedExperiment::colData(sca)$cngeneson <- scale(cdr2) |
|
111 |
- cond <- factor(SummarizedExperiment::colData(sca)$condition) |
|
112 |
- cond <- stats::relevel(cond, "c2") |
|
113 |
- SummarizedExperiment::colData(sca)$condition <- cond |
|
114 |
- zlmCond <- MAST::zlm(~ condition + cngeneson, sca) |
|
115 |
- summaryCond <- MAST::summary(zlmCond, doLRT = "conditionc1") |
|
116 |
- summaryDt <- summaryCond$datatable |
|
117 |
- contrast <- |
|
118 |
- component <- |
|
119 |
- primerid <- |
|
120 |
- coef <- |
|
121 |
- ci.hi <- |
|
122 |
- ci.lo <- |
|
123 |
- `Pr(>Chisq)` <- |
|
124 |
- fdr <- NULL # Avoid NSE notes in check |
|
125 |
- |
|
126 |
- fcHurdle <- merge(summaryDt[contrast == "conditionc1" & |
|
127 |
- component == "H", .(primerid, `Pr(>Chisq)`)], |
|
128 |
- summaryDt[contrast == "conditionc1" & component == "logFC", |
|
129 |
- .(primerid, coef, ci.hi, ci.lo)], by = "primerid") |
|
130 |
- |
|
131 |
- fcHurdle[, fdr := stats::p.adjust(`Pr(>Chisq)`, "fdr")] |
|
132 |
- ### Some genes aren't outputted because log2FC gets NaN if one or both |
|
133 |
- ### clusters have 0 counts for a gene |
|
134 |
- ### and then they're discarded because NaN !> 0 |
|
135 |
- if (is.null(log2fcThreshold)) { |
|
136 |
- fcHurdleSig <- fcHurdle |
|
137 |
- } else { |
|
138 |
- fcHurdleSig <- merge(fcHurdle[fdr < fdrThreshold & |
|
139 |
- abs(coef) > log2fcThreshold], |
|
140 |
- data.table::as.data.table(S4Vectors::mcols(sca)), |
|
141 |
- by = "primerid") |
|
142 |
- if (onlyPos) { |
|
143 |
- fcHurdleSig <- fcHurdleSig[which(fcHurdleSig$log2fc > 0), ] |
|
144 |
- } |
|
145 |
- } |
|
146 |
- fcHurdleSig <- fcHurdleSig[, -c(4, 5)] |
|
147 |
- names(fcHurdleSig)[c(1, 2, 3, 4)] <- c("Gene", "Pvalue", "Log2_FC", |
|
148 |
- "FDR") |
|
149 |
- fcHurdleSig <- fcHurdleSig[order(fcHurdleSig$Pvalue, |
|
150 |
- decreasing = FALSE), ] |
|
151 |
- |
|
152 |
- return(fcHurdleSig) |
|
153 |
- } |
|
154 |
-) |
|
155 |
- |
|
156 |
- |
|
157 |
-#' @rdname differentialExpression |
|
158 |
-#' @examples |
|
159 |
-#' data(celdaCGSim, celdaCGMod) |
|
160 |
-#' clusterDiffexpRes <- differentialExpression(celdaCGSim$counts, |
|
161 |
-#' celdaCGMod, |
|
162 |
-#' c1 = c(1, 2)) |
|
163 |
-#' @export |
|
164 |
-setMethod("differentialExpression", |
|
165 |
- signature(x = "matrix"), |
|
166 |
- function(x, |
|
167 |
- celdaMod, |
|
168 |
- c1, |
|
169 |
- c2 = NULL, |
|
170 |
- onlyPos = FALSE, |
|
171 |
- log2fcThreshold = NULL, |
|
172 |
- fdrThreshold = 1) { |
|
173 |
- |
|
174 |
- if (!(methods::is(celdaMod, "celda_C") || |
|
175 |
- methods::is(celdaMod, "celda_CG"))) { |
|
176 |
- stop("'celdaMod' should be an object of class celda_C or celda_CG") |
|
177 |
- } |
|
178 |
- if (is.null(c1)) { |
|
179 |
- stop("'c1' should be a numeric vector of cell cluster(s)") |
|
180 |
- } |
|
181 |
- |
|
182 |
- cdiff <- setdiff(c1, celdaClusters(celdaMod)$z) |
|
183 |
- |
|
184 |
- if (length(cdiff) > 0) { |
|
185 |
- warning("cluster ", cdiff, "in 'c1' does not exist in", |
|
186 |
- " 'celdaClusters(celdaMod)$z'!") |
|
187 |
- if (length(cdiff) == length(c1)) { |
|
188 |
- stop("All clusters in 'c1' does not exist in", |
|
189 |
- " 'celdaClusters(celdaMod)$z'!") |
|
190 |
- } |
|
191 |
- } |
|
192 |
- |
|
193 |
- compareCountMatrix(x, celdaMod) |
|
194 |
- |
|
195 |
- if (is.null(c2)) { |
|
196 |
- c2 <- sort(setdiff(unique(celdaClusters(celdaMod)$z), c1)) |
|
197 |
- } |
|
198 |
- if (length(c1) > 1) { |
|
199 |
- cells1 <- matrixNames(celdaMod)$column[which( |
|
200 |
- celdaClusters(celdaMod)$z %in% c1 |
|
201 |
- )] |
|
202 |
- } else { |
|
203 |
- cells1 <- matrixNames(celdaMod)$column[which( |
|
204 |
- celdaClusters(celdaMod)$z == c1 |
|
205 |
- )] |
|
206 |
- } |
|
207 |
- |
|
208 |
- if (length(c2) > 1) { |
|
209 |
- cells2 <- matrixNames(celdaMod)$column[which( |
|
210 |
- celdaClusters(celdaMod)$z %in% c2 |
|
211 |
- )] |
|
212 |
- } else { |
|
213 |
- cells2 <- matrixNames(celdaMod)$column[which( |
|
214 |
- celdaClusters(celdaMod)$z == c2 |
|
215 |
- )] |
|
216 |
- } |
|
217 |
- |
|
218 |
- mat <- x[, c(cells1, cells2)] |
|
219 |
- log_normalized_mat <- normalizeCounts(mat, |
|
220 |
- normalize = "cpm", |
|
221 |
- transformationFun = log1p |
|
222 |
- ) |
|
223 |
- cdat <- data.frame( |
|
224 |
- wellKey = c(cells1, cells2), |
|
225 |
- condition = c(rep("c1", length(cells1)), rep("c2", length(cells2))), |
|
226 |
- ngeneson = rep("", (length(cells1) + length(cells2))), |
|
227 |
- stringsAsFactors = FALSE |
|
228 |
- ) |
|
229 |
- |
|
230 |
- sca <- suppressMessages(MAST::FromMatrix(log_normalized_mat, cdat)) |
|
231 |
- cdr2 <- colSums(SummarizedExperiment::assay(sca) > 0) |
|
232 |
- SummarizedExperiment::colData(sca)$cngeneson <- scale(cdr2) |
|
233 |
- cond <- factor(SummarizedExperiment::colData(sca)$condition) |
|
234 |
- cond <- stats::relevel(cond, "c2") |
|
235 |
- SummarizedExperiment::colData(sca)$condition <- cond |
|
236 |
- zlmCond <- MAST::zlm(~ condition + cngeneson, sca) |
|
237 |
- summaryCond <- MAST::summary(zlmCond, doLRT = "conditionc1") |
|
238 |
- summaryDt <- summaryCond$datatable |
|
239 |
- contrast <- |
|
240 |
- component <- |
|
241 |
- primerid <- |
|
242 |
- coef <- |
|
243 |
- ci.hi <- |
|
244 |
- ci.lo <- |
|
245 |
- `Pr(>Chisq)` <- |
|
246 |
- fdr <- NULL # Avoid NSE notes in check |
|
247 |
- |
|
248 |
- fcHurdle <- merge(summaryDt[contrast == "conditionc1" & |
|
249 |
- component == "H", .(primerid, `Pr(>Chisq)`)], |
|
250 |
- summaryDt[ |
|
251 |
- contrast == "conditionc1" & component == "logFC", |
|
252 |
- .(primerid, coef, ci.hi, ci.lo) |
|
253 |
- ], |
|
254 |
- by = "primerid" |
|
255 |
- ) |
|
256 |
- |
|
257 |
- fcHurdle[, fdr := stats::p.adjust(`Pr(>Chisq)`, "fdr")] |
|
258 |
- ### Some genes aren't outputted because log2FC gets NaN if one or both |
|
259 |
- ### clusters have 0 counts for a gene |
|
260 |
- ### and then they're discarded because NaN !> 0 |
|
261 |
- if (is.null(log2fcThreshold)) { |
|
262 |
- fcHurdleSig <- fcHurdle |
|
263 |
- } else { |
|
264 |
- fcHurdleSig <- merge(fcHurdle[fdr < fdrThreshold & |
|
265 |
- abs(coef) > log2fcThreshold], |
|
266 |
- data.table::as.data.table(S4Vectors::mcols(sca)), |
|
267 |
- by = "primerid" |
|
268 |
- ) |
|
269 |
- if (onlyPos) { |
|
270 |
- fcHurdleSig <- fcHurdleSig[which(fcHurdleSig$log2fc > 0), ] |
|
271 |
- } |
|
272 |
- } |
|
273 |
- fcHurdleSig <- fcHurdleSig[, -c(4, 5)] |
|
274 |
- names(fcHurdleSig)[c(1, 2, 3, 4)] <- c("Gene", "Pvalue", "Log2_FC", |
|
275 |
- "FDR") |
|
276 |
- fcHurdleSig <- fcHurdleSig[order(fcHurdleSig$Pvalue, |
|
277 |
- decreasing = FALSE), ] |
|
278 |
- |
|
279 |
- return(fcHurdleSig) |
|
280 |
- } |
|
281 |
-) |
... | ... |
@@ -7,10 +7,10 @@ |
7 | 7 |
#' Rows represent features and columns represent cells. Must contain cluster |
8 | 8 |
#' labels in \code{celdaClusters(x, altExpName = altExpName)} if \code{x} is a |
9 | 9 |
#' \linkS4class{SingleCellExperiment} object. |
10 |
-#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay} |
|
10 |
+#' @param useAssay A string specifying which \link{assay} |
|
11 | 11 |
#' slot to use if \code{x} is a |
12 | 12 |
#' \link[SingleCellExperiment]{SingleCellExperiment} object. Default "counts". |
13 |
-#' @param altExpName The name for the \link[SingleCellExperiment]{altExp} slot |
|
13 |
+#' @param altExpName The name for the \link{altExp} slot |
|
14 | 14 |
#' to use. Default "featureSubset". |
15 | 15 |
#' @param celdaMod Celda object of class `celda_C` or `celda_CG`. |
16 | 16 |
#' @param c1 Integer vector. Cell populations to include in group 1 for the |
... | ... |
@@ -27,6 +27,7 @@ |
27 | 27 |
#' @param fdrThreshold Numeric. A number between 0 and 1 that specifies the |
28 | 28 |
#' false discovery rate (FDR) threshold. Only features below this threshold |
29 | 29 |
#' will be returned. Default 1. |
30 |
+#' @param ... Ignored. Placeholder to prevent check warning. |
|
30 | 31 |
#' @return Data frame containing MAST results including statistics such as |
31 | 32 |
#' p-value, log2 fold change, and FDR. |
32 | 33 |
#' @export |
... | ... |
@@ -5,11 +5,13 @@ |
5 | 5 |
#' \linkS4class{SingleCellExperiment} |
6 | 6 |
#' with the matrix located in the assay slot under \code{useAssay}. |
7 | 7 |
#' Rows represent features and columns represent cells. Must contain cluster |
8 |
-#' labels in \code{celdaClusters(x)} if \code{x} is a |
|
8 |
+#' labels in \code{celdaClusters(x, altExpName = altExpName)} if \code{x} is a |
|
9 | 9 |
#' \linkS4class{SingleCellExperiment} object. |
10 | 10 |
#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay} |
11 | 11 |
#' slot to use if \code{x} is a |
12 | 12 |
#' \link[SingleCellExperiment]{SingleCellExperiment} object. Default "counts". |
13 |
+#' @param altExpName The name for the \link[SingleCellExperiment]{altExp} slot |
|
14 |
+#' to use. Default "featureSubset". |
|
13 | 15 |
#' @param celdaMod Celda object of class `celda_C` or `celda_CG`. |
14 | 16 |
#' @param c1 Integer vector. Cell populations to include in group 1 for the |
15 | 17 |
#' differential expression analysis. |
... | ... |
@@ -48,6 +50,7 @@ setMethod("differentialExpression", |
48 | 50 |
signature(x = "SingleCellExperiment"), |
49 | 51 |
function(x, |
50 | 52 |
useAssay = "counts", |
53 |
+ altExpName = "featureSubset", |
|
51 | 54 |
c1, |
52 | 55 |
c2 = NULL, |
53 | 56 |
onlyPos = FALSE, |
... | ... |
@@ -58,36 +61,38 @@ setMethod("differentialExpression", |
58 | 61 |
stop("'c1' should be a numeric vector of cell cluster(s)") |
59 | 62 |
} |
60 | 63 |
|
61 |
- cdiff <- setdiff(c1, celdaClusters(x)) |
|
64 |
+ cdiff <- setdiff(c1, celdaClusters(x, altExpName = altExpName)) |
|
62 | 65 |
|
63 | 66 |
if (length(cdiff) > 0) { |
64 | 67 |
warning("cluster ", cdiff, "in 'c1' does not exist in", |
65 |
- " 'celdaClusters(x)'!") |
|
68 |
+ " 'celdaClusters(x, altExpName = altExpName)'!") |
|
66 | 69 |
if (length(cdiff) == length(c1)) { |
67 | 70 |
stop("All clusters in 'c1' does not exist in", |
68 |
- " 'celdaClusters(x)'!") |
|
71 |
+ " 'celdaClusters(x, altExpName = altExpName)'!") |
|
69 | 72 |
} |
70 | 73 |
} |
71 | 74 |
|
75 |
+ altExp <- SingleCellExperiment::altExp(sce, altExpName) |
|
72 | 76 |
counts <- SummarizedExperiment::assay(x, i = useAssay) |
73 | 77 |
|
74 | 78 |
if (is.null(c2)) { |
75 |
- c2 <- sort(setdiff(unique(celdaClusters(x)), c1)) |
|
79 |
+ c2 <- sort(setdiff(unique(celdaClusters(x, |
|
80 |
+ altExpName = altExpName)), c1)) |
|
76 | 81 |
} |
77 | 82 |
if (length(c1) > 1) { |
78 |
- cells1 <- SummarizedExperiment::colData(x)$colnames[ |
|
79 |
- which(celdaClusters(x) %in% c1)] |
|
83 |
+ cells1 <- SummarizedExperiment::colData(altExp)$colnames[ |
|
84 |
+ which(celdaClusters(x, altExpName = altExpName) %in% c1)] |
|
80 | 85 |
} else { |
81 |
- cells1 <- SummarizedExperiment::colData(x)$colnames[ |
|
82 |
- which(celdaClusters(x) == c1)] |
|
86 |
+ cells1 <- SummarizedExperiment::colData(altExp)$colnames[ |
|
87 |
+ which(celdaClusters(x, altExpName = altExpName) == c1)] |
|
83 | 88 |
} |
84 | 89 |
|
85 | 90 |
if (length(c2) > 1) { |
86 |
- cells2 <- SummarizedExperiment::colData(x)$colnames[ |
|
87 |
- which(celdaClusters(x) %in% c2)] |
|
91 |
+ cells2 <- SummarizedExperiment::colData(altExp)$colnames[ |
|
92 |
+ which(celdaClusters(x, altExpName = altExpName) %in% c2)] |
|
88 | 93 |
} else { |
89 |
- cells2 <- SummarizedExperiment::colData(x)$colnames[ |
|
90 |
- which(celdaClusters(x) == c2)] |
|
94 |
+ cells2 <- SummarizedExperiment::colData(altExp)$colnames[ |
|
95 |
+ which(celdaClusters(x, altExpName = altExpName) == c2)] |
|
91 | 96 |
} |
92 | 97 |
|
93 | 98 |
mat <- counts[, c(cells1, cells2)] |
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
#' \linkS4class{SingleCellExperiment} |
6 | 6 |
#' with the matrix located in the assay slot under \code{useAssay}. |
7 | 7 |
#' Rows represent features and columns represent cells. Must contain cluster |
8 |
-#' labels in \code{clusters(x)} if \code{x} is a |
|
8 |
+#' labels in \code{celdaClusters(x)} if \code{x} is a |
|
9 | 9 |
#' \linkS4class{SingleCellExperiment} object. |
10 | 10 |
#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay} |
11 | 11 |
#' slot to use if \code{x} is a |
... | ... |
@@ -58,35 +58,36 @@ setMethod("differentialExpression", |
58 | 58 |
stop("'c1' should be a numeric vector of cell cluster(s)") |
59 | 59 |
} |
60 | 60 |
|
61 |
- cdiff <- setdiff(c1, clusters(x)) |
|
61 |
+ cdiff <- setdiff(c1, celdaClusters(x)) |
|
62 | 62 |
|
63 | 63 |
if (length(cdiff) > 0) { |
64 | 64 |
warning("cluster ", cdiff, "in 'c1' does not exist in", |
65 |
- " 'clusters(x)'!") |
|
65 |
+ " 'celdaClusters(x)'!") |
|
66 | 66 |
if (length(cdiff) == length(c1)) { |
67 |
- stop("All clusters in 'c1' does not exist in 'clusters(x)'!") |
|
67 |
+ stop("All clusters in 'c1' does not exist in", |
|
68 |
+ " 'celdaClusters(x)'!") |
|
68 | 69 |
} |
69 | 70 |
} |
70 | 71 |
|
71 | 72 |
counts <- SummarizedExperiment::assay(x, i = useAssay) |
72 | 73 |
|
73 | 74 |
if (is.null(c2)) { |
74 |
- c2 <- sort(setdiff(unique(clusters(x)), c1)) |
|
75 |
+ c2 <- sort(setdiff(unique(celdaClusters(x)), c1)) |
|
75 | 76 |
} |
76 | 77 |
if (length(c1) > 1) { |
77 | 78 |
cells1 <- SummarizedExperiment::colData(x)$colnames[ |
78 |
- which(clusters(x) %in% c1)] |
|
79 |
+ which(celdaClusters(x) %in% c1)] |
|
79 | 80 |
} else { |
80 | 81 |
cells1 <- SummarizedExperiment::colData(x)$colnames[ |
81 |
- which(clusters(x) == c1)] |
|
82 |
+ which(celdaClusters(x) == c1)] |
|
82 | 83 |
} |
83 | 84 |
|
84 | 85 |
if (length(c2) > 1) { |
85 | 86 |
cells2 <- SummarizedExperiment::colData(x)$colnames[ |
86 |
- which(clusters(x) %in% c2)] |
|
87 |
+ which(celdaClusters(x) %in% c2)] |
|
87 | 88 |
} else { |
88 | 89 |
cells2 <- SummarizedExperiment::colData(x)$colnames[ |
89 |
- which(clusters(x) == c2)] |
|
90 |
+ which(celdaClusters(x) == c2)] |
|
90 | 91 |
} |
91 | 92 |
|
92 | 93 |
mat <- counts[, c(cells1, cells2)] |
... | ... |
@@ -172,39 +173,39 @@ setMethod("differentialExpression", |
172 | 173 |
stop("'c1' should be a numeric vector of cell cluster(s)") |
173 | 174 |
} |
174 | 175 |
|
175 |
- cdiff <- setdiff(c1, clusters(celdaMod)$z) |
|
176 |
+ cdiff <- setdiff(c1, celdaClusters(celdaMod)$z) |
|
176 | 177 |
|
177 | 178 |
if (length(cdiff) > 0) { |
178 | 179 |
warning("cluster ", cdiff, "in 'c1' does not exist in", |
179 |
- " 'clusters(celdaMod)$z'!") |
|
180 |
+ " 'celdaClusters(celdaMod)$z'!") |
|
180 | 181 |
if (length(cdiff) == length(c1)) { |
181 | 182 |
stop("All clusters in 'c1' does not exist in", |
182 |
- " 'clusters(celdaMod)$z'!") |
|
183 |
+ " 'celdaClusters(celdaMod)$z'!") |
|
183 | 184 |
} |
184 | 185 |
} |
185 | 186 |
|
186 | 187 |
compareCountMatrix(x, celdaMod) |
187 | 188 |
|
188 | 189 |
if (is.null(c2)) { |
189 |
- c2 <- sort(setdiff(unique(clusters(celdaMod)$z), c1)) |
|
190 |
+ c2 <- sort(setdiff(unique(celdaClusters(celdaMod)$z), c1)) |
|
190 | 191 |
} |
191 | 192 |
if (length(c1) > 1) { |
192 | 193 |
cells1 <- matrixNames(celdaMod)$column[which( |
193 |
- clusters(celdaMod)$z %in% c1 |
|
194 |
+ celdaClusters(celdaMod)$z %in% c1 |
|
194 | 195 |
)] |
195 | 196 |
} else { |
196 | 197 |
cells1 <- matrixNames(celdaMod)$column[which( |
197 |
- clusters(celdaMod)$z == c1 |
|
198 |
+ celdaClusters(celdaMod)$z == c1 |
|
198 | 199 |
)] |
199 | 200 |
} |
200 | 201 |
|
201 | 202 |
if (length(c2) > 1) { |
202 | 203 |
cells2 <- matrixNames(celdaMod)$column[which( |
203 |
- clusters(celdaMod)$z %in% c2 |
|
204 |
+ celdaClusters(celdaMod)$z %in% c2 |
|
204 | 205 |
)] |
205 | 206 |
} else { |
206 | 207 |
cells2 <- matrixNames(celdaMod)$column[which( |
207 |
- clusters(celdaMod)$z == c2 |
|
208 |
+ celdaClusters(celdaMod)$z == c2 |
|
208 | 209 |
)] |
209 | 210 |
} |
210 | 211 |
|
... | ... |
@@ -1,9 +1,15 @@ |
1 | 1 |
#' @title Differential expression for cell subpopulations using MAST |
2 | 2 |
#' @description Uses MAST to find differentially expressed features for |
3 | 3 |
#' specified cell subpopulations. |
4 |
-#' @param counts Integer matrix. Rows represent features and columns represent |
|
5 |
-#' cells. This matrix should be the same as the one used to generate |
|
6 |
-#' `celdaMod`. |
|
4 |
+#' @param x A numeric \link{matrix} of counts or a |
|
5 |
+#' \linkS4class{SingleCellExperiment} |
|
6 |
+#' with the matrix located in the assay slot under \code{useAssay}. |
|
7 |
+#' Rows represent features and columns represent cells. Must contain cluster |
|
8 |
+#' labels in \code{clusters(x)} if \code{x} is a |
|
9 |
+#' \linkS4class{SingleCellExperiment} object. |
|
10 |
+#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay} |
|
11 |
+#' slot to use if \code{x} is a |
|
12 |
+#' \link[SingleCellExperiment]{SingleCellExperiment} object. Default "counts". |
|
7 | 13 |
#' @param celdaMod Celda object of class `celda_C` or `celda_CG`. |
8 | 14 |
#' @param c1 Integer vector. Cell populations to include in group 1 for the |
9 | 15 |
#' differential expression analysis. |
... | ... |
@@ -21,127 +27,248 @@ |
21 | 27 |
#' will be returned. Default 1. |
22 | 28 |
#' @return Data frame containing MAST results including statistics such as |
23 | 29 |
#' p-value, log2 fold change, and FDR. |
24 |
-#' @examples |
|
25 |
-#' data(celdaCGSim, celdaCGMod) |
|
26 |
-#' clusterDiffexpRes <- differentialExpression(celdaCGSim$counts, |
|
27 |
-#' celdaCGMod, |
|
28 |
-#' c1 = c(1, 2) |
|
29 |
-#' ) |
|
30 | 30 |
#' @export |
31 | 31 |
#' @rawNamespace import(data.table, except = c(melt, shift)) |
32 | 32 |
#' @importFrom MAST FromMatrix |
33 | 33 |
#' @importFrom MAST zlm |
34 | 34 |
#' @importFrom MAST summary |
35 | 35 |
#' @importFrom S4Vectors mcols |
36 |
-#' @importFrom SummarizedExperiment assay |
|
37 |
-#' @importFrom SummarizedExperiment colData |
|
38 |
-#' @importFrom SummarizedExperiment assayNames |
|
39 | 36 |
#' @importFrom plyr . |
40 | 37 |
#' @import SummarizedExperiment |
41 |
-differentialExpression <- function(counts, |
|
42 |
- celdaMod, |
|
43 |
- c1, |
|
44 |
- c2 = NULL, |
|
45 |
- onlyPos = FALSE, |
|
46 |
- log2fcThreshold = NULL, |
|
47 |
- fdrThreshold = 1) { |
|
48 |
- if (!is.matrix(counts)) { |
|
49 |
- stop("'counts' should be a numeric count matrix") |
|
50 |
- } |
|
51 |
- if (!(methods::is(celdaMod, "celda_C") || |
|
52 |
- methods::is(celdaMod, "celda_CG"))) { |
|
53 |
- stop("'celdaMod' should be an object of class celda_C or celda_CG") |
|
54 |
- } |
|
55 |
- if (is.null(c1)) { |
|
56 |
- stop("'c1' should be a numeric vector of cell cluster(s)") |
|
57 |
- } |
|
58 |
- compareCountMatrix(counts, celdaMod) |
|
59 |
- |
|
60 |
- if (is.null(c2)) { |
|
61 |
- c2 <- sort(setdiff(unique(clusters(celdaMod)$z), c1)) |
|
62 |
- } |
|
63 |
- if (length(c1) > 1) { |
|
64 |
- cells1 <- matrixNames(celdaMod)$column[which( |
|
65 |
- clusters(celdaMod)$z %in% c1 |
|
66 |
- )] |
|
67 |
- } else { |
|
68 |
- cells1 <- matrixNames(celdaMod)$column[which( |
|
69 |
- clusters(celdaMod)$z == c1 |
|
70 |
- )] |
|
71 |
- } |
|
72 |
- |
|
73 |
- if (length(c2) > 1) { |
|
74 |
- cells2 <- matrixNames(celdaMod)$column[which( |
|
75 |
- clusters(celdaMod)$z %in% c2 |
|
76 |
- )] |
|
77 |
- } else { |
|
78 |
- cells2 <- matrixNames(celdaMod)$column[which( |
|
79 |
- clusters(celdaMod)$z == c2 |
|
80 |
- )] |
|
81 |
- } |
|
82 |
- |
|
83 |
- mat <- counts[, c(cells1, cells2)] |
|
84 |
- log_normalized_mat <- normalizeCounts(mat, |
|
85 |
- normalize = "cpm", |
|
86 |
- transformationFun = log1p |
|
87 |
- ) |
|
88 |
- cdat <- data.frame( |
|
89 |
- wellKey = c(cells1, cells2), |
|
90 |
- condition = c(rep("c1", length(cells1)), rep("c2", length(cells2))), |
|
91 |
- ngeneson = rep("", (length(cells1) + length(cells2))), |
|
92 |
- stringsAsFactors = FALSE |
|
93 |
- ) |
|
94 |
- |
|
95 |
- # explicitly load library SummarizedExperiment due to MAST package |
|
96 |
- # dependency error |
|
97 |
- # requireNamespace(SummarizedExperiment) |
|
98 |
- |
|
99 |
- sca <- suppressMessages(MAST::FromMatrix(log_normalized_mat, cdat)) |
|
100 |
- cdr2 <- colSums(SummarizedExperiment::assay(sca) > 0) |
|
101 |
- SummarizedExperiment::colData(sca)$cngeneson <- scale(cdr2) |
|
102 |
- cond <- factor(SummarizedExperiment::colData(sca)$condition) |
|
103 |
- cond <- stats::relevel(cond, "c2") |
|
104 |
- SummarizedExperiment::colData(sca)$condition <- cond |
|
105 |
- zlmCond <- MAST::zlm(~ condition + cngeneson, sca) |
|
106 |
- summaryCond <- MAST::summary(zlmCond, doLRT = "conditionc1") |
|
107 |
- summaryDt <- summaryCond$datatable |
|
108 |
- contrast <- |
|
109 |
- component <- |
|
110 |
- primerid <- |
|
111 |
- coef <- |
|
112 |
- ci.hi <- |
|
113 |
- ci.lo <- |
|
114 |
- `Pr(>Chisq)` <- |
|
115 |
- fdr <- NULL # Avoid NSE notes in check |
|
116 |
- |
|
117 |
- fcHurdle <- merge(summaryDt[contrast == "conditionc1" & |
|
118 |
- component == "H", .(primerid, `Pr(>Chisq)`)], |
|
119 |
- summaryDt[ |
|
120 |
- contrast == "conditionc1" & component == "logFC", |
|
121 |
- .(primerid, coef, ci.hi, ci.lo) |
|
122 |
- ], |
|
123 |
- by = "primerid" |
|
124 |
- ) |
|
125 |
- |
|
126 |
- fcHurdle[, fdr := stats::p.adjust(`Pr(>Chisq)`, "fdr")] |
|
127 |
- ### Some genes aren't outputted because log2FC gets NaN if one or both |
|
128 |
- ### clusters have 0 counts for a gene |
|
129 |
- ### and then they're discarded because NaN !> 0 |
|
130 |
- if (is.null(log2fcThreshold)) { |
|
131 |
- fcHurdleSig <- fcHurdle |
|
132 |
- } else { |
|
133 |
- fcHurdleSig <- merge(fcHurdle[fdr < fdrThreshold & |
|
134 |
- abs(coef) > log2fcThreshold], |
|
135 |
- data.table::as.data.table(S4Vectors::mcols(sca)), |
|
136 |
- by = "primerid" |
|
137 |
- ) |
|
138 |
- if (onlyPos) { |
|
139 |
- fcHurdleSig <- fcHurdleSig[which(fcHurdleSig$log2fc > 0), ] |
|
38 |
+setGeneric("differentialExpression", function(x, ...) { |
|
39 |
+ standardGeneric("differentialExpression")}) |
|
40 |
+ |
|
41 |
+ |
|
42 |
+#' @rdname differentialExpression |
|
43 |
+#' @examples |
|
44 |
+#' data(sceCeldaCG) |
|
45 |
+#' clusterDiffexpRes <- differentialExpression(sceCeldaCG, c1 = c(1, 2)) |
|
46 |
+#' @export |
|
47 |
+setMethod("differentialExpression", |
|
48 |
+ signature(x = "SingleCellExperiment"), |
|
49 |
+ function(x, |
|
50 |
+ useAssay = "counts", |
|
51 |
+ c1, |
|
52 |
+ c2 = NULL, |
|
53 |
+ onlyPos = FALSE, |
|
54 |
+ log2fcThreshold = NULL, |
|
55 |
+ fdrThreshold = 1) { |
|
56 |
+ |
|
57 |
+ if (is.null(c1)) { |
|
58 |
+ stop("'c1' should be a numeric vector of cell cluster(s)") |
|
59 |
+ } |
|
60 |
+ |
|
61 |
+ cdiff <- setdiff(c1, clusters(x)) |
|
62 |
+ |
|
63 |
+ if (length(cdiff) > 0) { |
|
64 |
+ warning("cluster ", cdiff, "in 'c1' does not exist in", |
|
65 |
+ " 'clusters(x)'!") |
|
66 |
+ if (length(cdiff) == length(c1)) { |
|
67 |
+ stop("All clusters in 'c1' does not exist in 'clusters(x)'!") |
|
68 |
+ } |
|
69 |
+ } |
|
70 |
+ |
|
71 |
+ counts <- SummarizedExperiment::assay(x, i = useAssay) |
|
72 |
+ |
|
73 |
+ if (is.null(c2)) { |
|
74 |
+ c2 <- sort(setdiff(unique(clusters(x)), c1)) |
|
75 |
+ } |
|
76 |
+ if (length(c1) > 1) { |
|
77 |
+ cells1 <- SummarizedExperiment::colData(x)$colnames[ |
|
78 |
+ which(clusters(x) %in% c1)] |
|
79 |
+ } else { |
|
80 |
+ cells1 <- SummarizedExperiment::colData(x)$colnames[ |
|
81 |
+ which(clusters(x) == c1)] |
|
82 |
+ } |
|
83 |
+ |
|
84 |
+ if (length(c2) > 1) { |
|
85 |
+ cells2 <- SummarizedExperiment::colData(x)$colnames[ |
|
86 |
+ which(clusters(x) %in% c2)] |
|
87 |
+ } else { |
|
88 |
+ cells2 <- SummarizedExperiment::colData(x)$colnames[ |
|
89 |
+ which(clusters(x) == c2)] |
|
90 |
+ } |
|
91 |
+ |
|
92 |
+ mat <- counts[, c(cells1, cells2)] |
|
93 |
+ log_normalized_mat <- normalizeCounts(mat, |
|
94 |
+ normalize = "cpm", |
|
95 |
+ transformationFun = log1p) |
|
96 |
+ cdat <- data.frame(wellKey = c(cells1, cells2), |
|
97 |
+ condition = c(rep("c1", length(cells1)), rep("c2", length(cells2))), |
|
98 |
+ ngeneson = rep("", (length(cells1) + length(cells2))), |
|
99 |
+ stringsAsFactors = FALSE) |
|
100 |
+ |
|
101 |
+ sca <- suppressMessages(MAST::FromMatrix(log_normalized_mat, cdat)) |
|
102 |
+ cdr2 <- colSums(SummarizedExperiment::assay(sca) > 0) |
|
103 |
+ SummarizedExperiment::colData(sca)$cngeneson <- scale(cdr2) |
|
104 |
+ cond <- factor(SummarizedExperiment::colData(sca)$condition) |
|
105 |
+ cond <- stats::relevel(cond, "c2") |
|
106 |
+ SummarizedExperiment::colData(sca)$condition <- cond |
|
107 |
+ zlmCond <- MAST::zlm(~ condition + cngeneson, sca) |
|
108 |
+ summaryCond <- MAST::summary(zlmCond, doLRT = "conditionc1") |
|
109 |
+ summaryDt <- summaryCond$datatable |
|
110 |
+ contrast <- |
|
111 |
+ component <- |
|
112 |
+ primerid <- |
|
113 |
+ coef <- |
|
114 |
+ ci.hi <- |
|
115 |
+ ci.lo <- |
|
116 |
+ `Pr(>Chisq)` <- |
|
117 |
+ fdr <- NULL # Avoid NSE notes in check |
|
118 |
+ |
|
119 |
+ fcHurdle <- merge(summaryDt[contrast == "conditionc1" & |
|
120 |
+ component == "H", .(primerid, `Pr(>Chisq)`)], |
|
121 |
+ summaryDt[contrast == "conditionc1" & component == "logFC", |
|
122 |
+ .(primerid, coef, ci.hi, ci.lo)], by = "primerid") |
|
123 |
+ |
|
124 |
+ fcHurdle[, fdr := stats::p.adjust(`Pr(>Chisq)`, "fdr")] |
|
125 |
+ ### Some genes aren't outputted because log2FC gets NaN if one or both |
|
126 |
+ ### clusters have 0 counts for a gene |
|
127 |
+ ### and then they're discarded because NaN !> 0 |
|
128 |
+ if (is.null(log2fcThreshold)) { |
|
129 |
+ fcHurdleSig <- fcHurdle |
|
130 |
+ } else { |
|
131 |
+ fcHurdleSig <- merge(fcHurdle[fdr < fdrThreshold & |
|
132 |
+ abs(coef) > log2fcThreshold], |
|
133 |
+ data.table::as.data.table(S4Vectors::mcols(sca)), |
|
134 |
+ by = "primerid") |
|
135 |
+ if (onlyPos) { |
|
136 |
+ fcHurdleSig <- fcHurdleSig[which(fcHurdleSig$log2fc > 0), ] |
|
137 |
+ } |
|
138 |
+ } |
|
139 |
+ fcHurdleSig <- fcHurdleSig[, -c(4, 5)] |
|
140 |
+ names(fcHurdleSig)[c(1, 2, 3, 4)] <- c("Gene", "Pvalue", "Log2_FC", |
|
141 |
+ "FDR") |
|
142 |
+ fcHurdleSig <- fcHurdleSig[order(fcHurdleSig$Pvalue, |
|
143 |
+ decreasing = FALSE), ] |
|
144 |
+ |
|
145 |
+ return(fcHurdleSig) |
|
140 | 146 |
} |
141 |
- } |
|
142 |
- fcHurdleSig <- fcHurdleSig[, -c(4, 5)] |
|
143 |
- names(fcHurdleSig)[c(1, 2, 3, 4)] <- c("Gene", "Pvalue", "Log2_FC", "FDR") |
|
144 |
- fcHurdleSig <- fcHurdleSig[order(fcHurdleSig$Pvalue, decreasing = FALSE), ] |
|
147 |
+) |
|
145 | 148 |
|
146 |
- return(fcHurdleSig) |
|
147 |
-} |
|
149 |
+ |
|
150 |
+#' @rdname differentialExpression |
|
151 |
+#' @examples |
|
152 |
+#' data(celdaCGSim, celdaCGMod) |
|
153 |
+#' clusterDiffexpRes <- differentialExpression(celdaCGSim$counts, |
|
154 |
+#' celdaCGMod, |
|
155 |
+#' c1 = c(1, 2)) |
|
156 |
+#' @export |
|
157 |
+setMethod("differentialExpression", |
|
158 |
+ signature(x = "matrix"), |
|
159 |
+ function(x, |
|
160 |
+ celdaMod, |
|
161 |
+ c1, |
|
162 |
+ c2 = NULL, |
|
163 |
+ onlyPos = FALSE, |
|
164 |
+ log2fcThreshold = NULL, |
|
165 |
+ fdrThreshold = 1) { |
|
166 |
+ |
|
167 |
+ if (!(methods::is(celdaMod, "celda_C") || |
|
168 |
+ methods::is(celdaMod, "celda_CG"))) { |
|
169 |
+ stop("'celdaMod' should be an object of class celda_C or celda_CG") |
|
170 |
+ } |
|
171 |
+ if (is.null(c1)) { |
|
172 |
+ stop("'c1' should be a numeric vector of cell cluster(s)") |
|
173 |
+ } |
|
174 |
+ |
|
175 |
+ cdiff <- setdiff(c1, clusters(celdaMod)$z) |
|
176 |
+ |
|
177 |
+ if (length(cdiff) > 0) { |
|
178 |
+ warning("cluster ", cdiff, "in 'c1' does not exist in", |
|
179 |
+ " 'clusters(celdaMod)$z'!") |
|
180 |
+ if (length(cdiff) == length(c1)) { |
|
181 |
+ stop("All clusters in 'c1' does not exist in", |
|
182 |
+ " 'clusters(celdaMod)$z'!") |
|
183 |
+ } |
|
184 |
+ } |
|
185 |
+ |
|
186 |
+ compareCountMatrix(x, celdaMod) |
|
187 |
+ |
|
188 |
+ if (is.null(c2)) { |
|
189 |
+ c2 <- sort(setdiff(unique(clusters(celdaMod)$z), c1)) |
|
190 |
+ } |
|
191 |
+ if (length(c1) > 1) { |
|
192 |
+ cells1 <- matrixNames(celdaMod)$column[which( |
|
193 |
+ clusters(celdaMod)$z %in% c1 |
|
194 |
+ )] |
|
195 |
+ } else { |
|
196 |
+ cells1 <- matrixNames(celdaMod)$column[which( |
|
197 |
+ clusters(celdaMod)$z == c1 |
|
198 |
+ )] |
|
199 |
+ } |
|
200 |
+ |
|
201 |
+ if (length(c2) > 1) { |
|
202 |
+ cells2 <- matrixNames(celdaMod)$column[which( |
|
203 |
+ clusters(celdaMod)$z %in% c2 |
|
204 |
+ )] |
|
205 |
+ } else { |
|
206 |
+ cells2 <- matrixNames(celdaMod)$column[which( |
|
207 |
+ clusters(celdaMod)$z == c2 |
|
208 |
+ )] |
|
209 |
+ } |
|
210 |
+ |
|
211 |
+ mat <- x[, c(cells1, cells2)] |
|
212 |
+ log_normalized_mat <- normalizeCounts(mat, |
|
213 |
+ normalize = "cpm", |
|
214 |
+ transformationFun = log1p |
|
215 |
+ ) |
|
216 |
+ cdat <- data.frame( |
|
217 |
+ wellKey = c(cells1, cells2), |
|
218 |
+ condition = c(rep("c1", length(cells1)), rep("c2", length(cells2))), |
|
219 |
+ ngeneson = rep("", (length(cells1) + length(cells2))), |
|
220 |
+ stringsAsFactors = FALSE |
|
221 |
+ ) |
|
222 |
+ |
|
223 |
+ sca <- suppressMessages(MAST::FromMatrix(log_normalized_mat, cdat)) |
|
224 |
+ cdr2 <- colSums(SummarizedExperiment::assay(sca) > 0) |
|
225 |
+ SummarizedExperiment::colData(sca)$cngeneson <- scale(cdr2) |
|
226 |
+ cond <- factor(SummarizedExperiment::colData(sca)$condition) |
|
227 |
+ cond <- stats::relevel(cond, "c2") |
|
228 |
+ SummarizedExperiment::colData(sca)$condition <- cond |
|
229 |
+ zlmCond <- MAST::zlm(~ condition + cngeneson, sca) |
|
230 |
+ summaryCond <- MAST::summary(zlmCond, doLRT = "conditionc1") |
|
231 |
+ summaryDt <- summaryCond$datatable |
|
232 |
+ contrast <- |
|
233 |
+ component <- |
|
234 |
+ primerid <- |
|
235 |
+ coef <- |
|
236 |
+ ci.hi <- |
|
237 |
+ ci.lo <- |
|
238 |
+ `Pr(>Chisq)` <- |
|
239 |
+ fdr <- NULL # Avoid NSE notes in check |
|
240 |
+ |
|
241 |
+ fcHurdle <- merge(summaryDt[contrast == "conditionc1" & |
|
242 |
+ component == "H", .(primerid, `Pr(>Chisq)`)], |
|
243 |
+ summaryDt[ |
|
244 |
+ contrast == "conditionc1" & component == "logFC", |
|
245 |
+ .(primerid, coef, ci.hi, ci.lo) |
|
246 |
+ ], |
|
247 |
+ by = "primerid" |
|
248 |
+ ) |
|
249 |
+ |
|
250 |
+ fcHurdle[, fdr := stats::p.adjust(`Pr(>Chisq)`, "fdr")] |
|
251 |
+ ### Some genes aren't outputted because log2FC gets NaN if one or both |
|
252 |
+ ### clusters have 0 counts for a gene |
|
253 |
+ ### and then they're discarded because NaN !> 0 |
|
254 |
+ if (is.null(log2fcThreshold)) { |
|
255 |
+ fcHurdleSig <- fcHurdle |
|
256 |
+ } else { |
|
257 |
+ fcHurdleSig <- merge(fcHurdle[fdr < fdrThreshold & |
|
258 |
+ abs(coef) > log2fcThreshold], |
|
259 |
+ data.table::as.data.table(S4Vectors::mcols(sca)), |
|
260 |
+ by = "primerid" |
|
261 |
+ ) |
|
262 |
+ if (onlyPos) { |
|
263 |
+ fcHurdleSig <- fcHurdleSig[which(fcHurdleSig$log2fc > 0), ] |
|
264 |
+ } |
|
265 |
+ } |
|
266 |
+ fcHurdleSig <- fcHurdleSig[, -c(4, 5)] |
|
267 |
+ names(fcHurdleSig)[c(1, 2, 3, 4)] <- c("Gene", "Pvalue", "Log2_FC", |
|
268 |
+ "FDR") |
|
269 |
+ fcHurdleSig <- fcHurdleSig[order(fcHurdleSig$Pvalue, |
|
270 |
+ decreasing = FALSE), ] |
|
271 |
+ |
|
272 |
+ return(fcHurdleSig) |
|
273 |
+ } |
|
274 |
+) |
... | ... |
@@ -23,8 +23,10 @@ |
23 | 23 |
#' p-value, log2 fold change, and FDR. |
24 | 24 |
#' @examples |
25 | 25 |
#' data(celdaCGSim, celdaCGMod) |
26 |
-#' clusterDiffexpRes = differentialExpression(celdaCGSim$counts, |
|
27 |
-#' celdaCGMod, c1 = c(1, 2)) |
|
26 |
+#' clusterDiffexpRes <- differentialExpression(celdaCGSim$counts, |
|
27 |
+#' celdaCGMod, |
|
28 |
+#' c1 = c(1, 2) |
|
29 |
+#' ) |
|
28 | 30 |
#' @export |
29 | 31 |
#' @rawNamespace import(data.table, except = c(melt, shift)) |
30 | 32 |
#' @importFrom MAST FromMatrix |
... | ... |
@@ -37,98 +39,109 @@ |
37 | 39 |
#' @importFrom plyr . |
38 | 40 |
#' @import SummarizedExperiment |
39 | 41 |
differentialExpression <- function(counts, |
40 |
- celdaMod, |
|
41 |
- c1, |
|
42 |
- c2 = NULL, |
|
43 |