1 | 1 |
deleted file mode 100755 |
... | ... |
@@ -1,1154 +0,0 @@ |
1 |
-#' @title Celda models |
|
2 |
-#' @description List of available Celda models with correpsonding descriptions. |
|
3 |
-#' @export |
|
4 |
-#' @examples |
|
5 |
-#' celda() |
|
6 |
-#' @return None |
|
7 |
-celda <- function() { |
|
8 |
- message( |
|
9 |
- "celda_C: Clusters the columns of a count matrix containing", |
|
10 |
- " single-cell data into K subpopulations." |
|
11 |
- ) |
|
12 |
- message( |
|
13 |
- "celda_G: Clusters the rows of a count matrix containing", |
|
14 |
- " single-cell data into L modules." |
|
15 |
- ) |
|
16 |
- message( |
|
17 |
- "celda_CG: Clusters the rows and columns of a count matrix", |
|
18 |
- " containing single-cell data into L modules and K subpopulations,", |
|
19 |
- " respectively." |
|
20 |
- ) |
|
21 |
- message( |
|
22 |
- "celdaGridSearch: Run Celda with different combinations of", |
|
23 |
- " parameters and multiple chains in parallel." |
|
24 |
- ) |
|
25 |
-} |
|
26 |
- |
|
27 |
- |
|
28 |
-#' @title Get log-likelihood history |
|
29 |
-#' @description Retrieves the complete log-likelihood from all iterations of |
|
30 |
-#' Gibbs sampling used to generate a celdaModel. |
|
31 |
-#' @param celdaMod celdaModel. Options available in `celda::availableModels`. |
|
32 |
-#' @return Numeric. The log-likelihood at each step of Gibbs sampling used to |
|
33 |
-#' generate the model. |
|
34 |
-#' @examples |
|
35 |
-#' data(celdaCGMod) |
|
36 |
-#' logLikelihoodHistory(celdaCGMod) |
|
37 |
-#' @export |
|
38 |
-setGeneric( |
|
39 |
- "logLikelihoodHistory", |
|
40 |
- function(celdaMod) { |
|
41 |
- standardGeneric("logLikelihoodHistory") |
|
42 |
- } |
|
43 |
-) |
|
44 |
-#' @title Get log-likelihood history |
|
45 |
-#' @description Retrieves the complete log-likelihood from all iterations of |
|
46 |
-#' Gibbs sampling used to generate a celdaModel. |
|
47 |
-#' @param celdaMod celdaModel. Options available in `celda::availableModels`. |
|
48 |
-#' @return Numeric. The log-likelihood at each step of Gibbs sampling used to |
|
49 |
-#' generate the model. |
|
50 |
-#' @examples |
|
51 |
-#' data(celdaCGMod) |
|
52 |
-#' logLikelihoodHistory(celdaCGMod) |
|
53 |
-#' @export |
|
54 |
-setMethod("logLikelihoodHistory", |
|
55 |
- signature = c(celdaMod = "celdaModel"), |
|
56 |
- function(celdaMod) { |
|
57 |
- celdaMod@completeLogLik |
|
58 |
- } |
|
59 |
-) |
|
60 |
- |
|
61 |
- |
|
62 |
-#' @title Get the log-likelihood |
|
63 |
-#' @description Retrieves the final log-likelihood from all iterations of Gibbs |
|
64 |
-#' sampling used to generate a celdaModel. |
|
65 |
-#' @return Numeric. The log-likelihood at the final step of Gibbs sampling used |
|
66 |
-#' to generate the model. |
|
67 |
-#' @param celdaMod A celdaModel object of class celda_C, celda_G, or celda_CG. |
|
68 |
-#' @examples |
|
69 |
-#' data(celdaCGMod) |
|
70 |
-#' bestLogLikelihood(celdaCGMod) |
|
71 |
-#' @export |
|
72 |
-setGeneric( |
|
73 |
- "bestLogLikelihood", |
|
74 |
- function(celdaMod) { |
|
75 |
- standardGeneric("bestLogLikelihood") |
|
76 |
- } |
|
77 |
-) |
|
78 |
-#' @title Get the log-likelihood |
|
79 |
-#' @description Retrieves the final log-likelihood from all iterations of Gibbs |
|
80 |
-#' sampling used to generate a celdaModel. |
|
81 |
-#' @param celdaMod A celdaModel object of class celda_C, celda_G, or celda_CG. |
|
82 |
-#' @return Numeric. The log-likelihood at the final step of Gibbs sampling used |
|
83 |
-#' to generate the model. |
|
84 |
-#' @examples |
|
85 |
-#' data(celdaCGMod) |
|
86 |
-#' bestLogLikelihood(celdaCGMod) |
|
87 |
-#' @export |
|
88 |
-setMethod("bestLogLikelihood", |
|
89 |
- signature = c(celdaMod = "celdaModel"), |
|
90 |
- function(celdaMod) { |
|
91 |
- celdaMod@finalLogLik |
|
92 |
- } |
|
93 |
-) |
|
94 |
- |
|
95 |
- |
|
96 |
-setClass("celda_C", |
|
97 |
- representation(sampleLabel = "factor"), |
|
98 |
- contains = "celdaModel" |
|
99 |
-) |
|
100 |
- |
|
101 |
- |
|
102 |
-#' @title Get celda model from a celda |
|
103 |
-#' \link[SingleCellExperiment]{SingleCellExperiment} object |
|
104 |
-#' @description Return the celda model for \code{sce} returned by |
|
105 |
-#' \link{celda_C}, \link{celda_G} or \link{celda_CG}. |
|
106 |
-#' @param sce A \link[SingleCellExperiment]{SingleCellExperiment} object |
|
107 |
-#' returned by \link{celda_C}, \link{celda_G}, or \link{celda_CG}. |
|
108 |
-#' @return Character. The celda model. Can be one of "celda_C", "celda_G", or |
|
109 |
-#' "celda_CG". |
|
110 |
-#' @examples |
|
111 |
-#' data(sceCeldaCG) |
|
112 |
-#' celdaModel(sceCeldaCG) |
|
113 |
-#' @export |
|
114 |
-setGeneric("celdaModel", |
|
115 |
- function(sce) { |
|
116 |
- standardGeneric("celdaModel") |
|
117 |
- }) |
|
118 |
-#' @rdname celdaModel |
|
119 |
-#' @export |
|
120 |
-setMethod("celdaModel", |
|
121 |
- signature(sce = "SingleCellExperiment"), |
|
122 |
- function(sce) { |
|
123 |
- tryCatch( |
|
124 |
- if (S4Vectors::metadata(sce)$celda_parameters$model %in% |
|
125 |
- c("celda_C", "celda_G", "celda_CG")) { |
|
126 |
- return(S4Vectors::metadata(sce)$celda_parameters$model) |
|
127 |
- } else { |
|
128 |
- stop("S4Vectors::metadata(sce)$celda_parameters$model must be", |
|
129 |
- " one of 'celda_C', 'celda_G', or 'celda_CG'") |
|
130 |
- }, |
|
131 |
- error = function(e) { |
|
132 |
- message("S4Vectors::metadata(sce)$celda_parameters$model must", |
|
133 |
- " exist! Try running celda model (celda_C, celda_CG, or", |
|
134 |
- " celda_G) first.") |
|
135 |
- stop(e) |
|
136 |
- }) |
|
137 |
- }) |
|
138 |
-setClass("celda_G", contains = "celdaModel") |
|
139 |
- |
|
140 |
-setClass("celda_CG", contains = c("celda_C", "celda_G")) |
|
141 |
- |
|
142 |
- |
|
143 |
-#' @title Get perplexity for every model in a celdaList |
|
144 |
-#' @description Returns perplexity for each model in a celdaList as calculated |
|
145 |
-#' by `perplexity().` |
|
146 |
-#' @param celdaList An object of class celdaList. |
|
147 |
-#' @return List. Contains one celdaModel object for each of the parameters |
|
148 |
-#' specified in the `runParams()` of the provided celda list. |
|
149 |
-#' @examples |
|
150 |
-#' data(celdaCGGridSearchRes) |
|
151 |
-#' celdaCGGridModelPerplexities <- celdaPerplexity(celdaCGGridSearchRes) |
|
152 |
-#' @export |
|
153 |
-setGeneric( |
|
154 |
- "celdaPerplexity", |
|
155 |
- function(celdaList) { |
|
156 |
- standardGeneric("celdaPerplexity") |
|
157 |
- } |
|
158 |
-) |
|
159 |
-#' @title Get perplexity for every model in a celdaList |
|
160 |
-#' @description Returns perplexity for each model in a celdaList as calculated |
|
161 |
-#' by `perplexity().` |
|
162 |
-#' @param celdaList An object of class celdaList. |
|
163 |
-#' @return List. Contains one celdaModel object for each of the parameters |
|
164 |
-#' specified in the `runParams()` of the provided celda list. |
|
165 |
-#' @examples |
|
166 |
-#' data(celdaCGGridSearchRes) |
|
167 |
-#' celdaCGGridModelPerplexities <- celdaPerplexity(celdaCGGridSearchRes) |
|
168 |
-#' @export |
|
169 |
-setMethod("celdaPerplexity", |
|
170 |
- signature = c(celdaList = "celdaList"), |
|
171 |
- function(celdaList) { |
|
172 |
- celdaList@perplexity |
|
173 |
- } |
|
174 |
-) |
|
175 |
- |
|
176 |
- |
|
177 |
-#' @title Append two celdaList objects |
|
178 |
-#' @description Returns a single celdaList representing the combination of two |
|
179 |
-#' provided celdaList objects. |
|
180 |
-#' @return A celdaList object. This object contains all resList entries and |
|
181 |
-#' runParam records from both lists. |
|
182 |
-#' @param list1 A celda_list object |
|
183 |
-#' @param list2 A celda_list object to be joined with list_1 |
|
184 |
-#' @examples |
|
185 |
-#' data(celdaCGGridSearchRes) |
|
186 |
-#' appendedList <- appendCeldaList( |
|
187 |
-#' celdaCGGridSearchRes, |
|
188 |
-#' celdaCGGridSearchRes |
|
189 |
-#' ) |
|
190 |
-#' @importFrom methods new |
|
191 |
-#' @export |
|
192 |
-appendCeldaList <- function(list1, list2) { |
|
193 |
- if (!is.element("celdaList", class(list1)) | |
|
194 |
- !is.element("celdaList", class(list2))) { |
|
195 |
- stop("Both parameters to appendCeldaList must be of class celdaList.") |
|
196 |
- } |
|
197 |
- if (!(countChecksum(list1) == countChecksum(list2))) { |
|
198 |
- warning( |
|
199 |
- "Provided lists have different countChecksums and may have", |
|
200 |
- " been generated from different count matrices. Using checksum", |
|
201 |
- " from first list..." |
|
202 |
- ) |
|
203 |
- } |
|
204 |
- newList <- methods::new( |
|
205 |
- "celdaList", |
|
206 |
- runParams = rbind(runParams(list1), runParams(list2)), |
|
207 |
- resList = c(resList(list1), resList(list2)), |
|
208 |
- countChecksum = countChecksum(list1), |
|
209 |
- perplexity = matrix(nrow = 0, ncol = 0) |
|
210 |
- ) |
|
211 |
- return(newList) |
|
212 |
-} |
|
213 |
- |
|
214 |
- |
|
215 |
-#' @title Get the MD5 hash of the count matrix from the celdaList |
|
216 |
-#' @description Returns the MD5 hash of the count matrix used to generate the |
|
217 |
-#' celdaList. |
|
218 |
-#' @param celdaList An object of class celdaList. |
|
219 |
-#' @return A character string of length 32 containing the MD5 digest of |
|
220 |
-#' the count matrix. |
|
221 |
-#' @examples |
|
222 |
-#' data(celdaCGGridSearchRes) |
|
223 |
-#' countChecksum <- countChecksum(celdaCGGridSearchRes) |
|
224 |
-#' @export |
|
225 |
-setGeneric( |
|
226 |
- "countChecksum", |
|
227 |
- function(celdaList) { |
|
228 |
- standardGeneric("countChecksum") |
|
229 |
- } |
|
230 |
-) |
|
231 |
-#' @title Get the MD5 hash of the count matrix from the celdaList |
|
232 |
-#' @description Returns the MD5 hash of the count matrix used to generate the |
|
233 |
-#' celdaList. |
|
234 |
-#' @param celdaList An object of class celdaList. |
|
235 |
-#' @return A character string of length 32 containing the MD5 digest of |
|
236 |
-#' the count matrix. |
|
237 |
-#' @examples |
|
238 |
-#' data(celdaCGGridSearchRes) |
|
239 |
-#' countChecksum <- countChecksum(celdaCGGridSearchRes) |
|
240 |
-#' @export |
|
241 |
-setMethod("countChecksum", |
|
242 |
- signature = c(celdaList = "celdaList"), |
|
243 |
- function(celdaList) { |
|
244 |
- celdaList@countChecksum |
|
245 |
- } |
|
246 |
-) |
|
247 |
- |
|
248 |
-############################################################################### |
|
249 |
-# Generics |
|
250 |
-############################################################################### |
|
251 |
- |
|
252 |
- |
|
253 |
-#' @title Plot celda Heatmap |
|
254 |
-#' @description Render a stylable heatmap of count data based on celda |
|
255 |
-#' clustering results. |
|
256 |
-#' @param sce A \link[SingleCellExperiment]{SingleCellExperiment} object |
|
257 |
-#' returned by \link{celda_C}, \link{celda_G}, or \link{celda_CG}. |
|
258 |
-#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay} |
|
259 |
-#' slot to use. Default "counts". |
|
260 |
-#' @param featureIx Integer vector. Select features for display in heatmap. If |
|
261 |
-#' NULL, no subsetting will be performed. Default NULL. \strong{Only used for |
|
262 |
-#' \code{sce} containing celda_C model result returned by \link{celda_C}.} |
|
263 |
-#' @param nfeatures Integer. Maximum number of features to select for each |
|
264 |
-#' gene module. Default 25. \strong{Only used for \code{sce} containing |
|
265 |
-#' celda_CG or celda_G model results returned by \link{celda_CG} or |
|
266 |
-#' \link{celda_G}.} |
|
267 |
-#' @param ... Additional parameters passed to \link{plotHeatmap}. |
|
268 |
-#' @seealso `celdaTsne()` for generating 2-dimensional tSNE coordinates |
|
269 |
-#' @examples |
|
270 |
-#' data(sceCeldaCG) |
|
271 |
-#' celdaHeatmap(sceCeldaCG) |
|
272 |
-#' @return list A list containing dendrogram information and the heatmap grob |
|
273 |
-#' @export |
|
274 |
-setGeneric("celdaHeatmap", |
|
275 |
- function(sce, ...) { |
|
276 |
- standardGeneric("celdaHeatmap") |
|
277 |
- }) |
|
278 |
- |
|
279 |
- |
|
280 |
-#' @export |
|
281 |
-#' @rdname celdaHeatmap |
|
282 |
-setMethod("celdaHeatmap", signature(sce = "SingleCellExperiment"), |
|
283 |
- function(sce, useAssay = "counts", featureIx = NULL, nfeatures = 25, ...) { |
|
284 |
- if (celdaModel(sce) == "celda_C") { |
|
285 |
- g <- .celdaHeatmapCelda_C(sce = sce, |
|
286 |
- useAssay = useAssay, |
|
287 |
- featureIx = featureIx, |
|
288 |
- ...) |
|
289 |
- return(g) |
|
290 |
- } else if (celdaModel(sce) == "celda_CG") { |
|
291 |
- g <- .celdaHeatmapCelda_CG(sce = sce, |
|
292 |
- useAssay = useAssay, |
|
293 |
- nfeatures = nfeatures, |
|
294 |
- ...) |
|
295 |
- return(g) |
|
296 |
- } else if (celdaModel(sce) == "celda_G") { |
|
297 |
- g <- .celdaHeatmapCelda_G(sce = sce, |
|
298 |
- useAssay = useAssay, |
|
299 |
- nfeatures = nfeatures, |
|
300 |
- ...) |
|
301 |
- return(g) |
|
302 |
- } else { |
|
303 |
- stop("S4Vectors::metadata(sce)$celda_parameters$model must be", |
|
304 |
- " one of 'celda_C', 'celda_G', or 'celda_CG'") |
|
305 |
- } |
|
306 |
- }) |
|
307 |
- |
|
308 |
- |
|
309 |
-#' @title Calculate the Log-likelihood of a celda model |
|
310 |
-#' @description Calculate the log-likelihood for cell population |
|
311 |
-#' and feature module cluster assignments on the count matrix, per celda model. |
|
312 |
-#' @param x A \linkS4class{SingleCellExperiment} object returned by |
|
313 |
-#' \link{celda_C}, \link{celda_G}, or \link{celda_CG}, with the matrix |
|
314 |
-#' located in the \code{useAssay} assay slot. |
|
315 |
-#' Rows represent features and columns represent cells. |
|
316 |
-#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay} |
|
317 |
-#' slot to use. Default "counts". |
|
318 |
-#' @param celdaMod celda model object. Ignored if \code{x} is a |
|
319 |
-#' \linkS4class{SingleCellExperiment} object. |
|
320 |
-#' @return The log-likelihood of the cluster assignment for the |
|
321 |
-#' provided \linkS4class{SingleCellExperiment}. |
|
322 |
-#' @seealso `celda_C()` for clustering cells |
|
323 |
-#' @export |
|
324 |
-setGeneric("logLikelihood", |
|
325 |
- function(x, ...) { |
|
326 |
- standardGeneric("logLikelihood") |
|
327 |
- }) |
|
328 |
- |
|
329 |
- |
|
330 |
-#' @rdname logLikelihood |
|
331 |
-#' @examples |
|
332 |
-#' data(sceCeldaC, sceCeldaCG) |
|
333 |
-#' loglikC <- logLikelihood(sceCeldaC) |
|
334 |
-#' loglikCG <- logLikelihood(sceCeldaCG) |
|
335 |
-#' @export |
|
336 |
-setMethod("logLikelihood", signature(x = "SingleCellExperiment"), |
|
337 |
- function(x, useAssay = "counts") { |
|
338 |
- |
|
339 |
- counts <- SummarizedExperiment::assay(x, i = useAssay) |
|
340 |
- sampleLabel <- sampleLabel(x) |
|
341 |
- z <- celdaClusters(x) |
|
342 |
- y <- celdaModules(x) |
|
343 |
- K <- S4Vectors::metadata(x)$celda_parameters$K |
|
344 |
- L <- S4Vectors::metadata(x)$celda_parameters$L |
|
345 |
- alpha <- S4Vectors::metadata(x)$celda_parameters$alpha |
|
346 |
- beta <- S4Vectors::metadata(x)$celda_parameters$beta |
|
347 |
- delta = S4Vectors::metadata(x)$celda_parameters$delta |
|
348 |
- gamma = S4Vectors::metadata(x)$celda_parameters$gamma |
|
349 |
- |
|
350 |
- if (celdaModel(x) == "celda_C") { |
|
351 |
- ll <- .logLikelihoodcelda_C(counts = counts, |
|
352 |
- sampleLabel = sampleLabel, |
|
353 |
- z = z, |
|
354 |
- K = K, |
|
355 |
- alpha = alpha, |
|
356 |
- beta = beta) |
|
357 |
- } else if (celdaModel(x) == "celda_CG") { |
|
358 |
- ll <- .logLikelihoodcelda_CG(counts = counts, |
|
359 |
- sampleLabel = sampleLabel, |
|
360 |
- z = z, |
|
361 |
- y = y, |
|
362 |
- K = K, |
|
363 |
- L = L, |
|
364 |
- alpha = alpha, |
|
365 |
- beta = beta, |
|
366 |
- delta = delta, |
|
367 |
- gamma = gamma) |
|
368 |
- } else if (celdaModel(x) == "celda_G") { |
|
369 |
- ll <- .logLikelihoodcelda_G(counts = counts, |
|
370 |
- y = y, |
|
371 |
- L = L, |
|
372 |
- beta = beta, |
|
373 |
- delta = delta, |
|
374 |
- gamma = gamma) |
|
375 |
- } else { |
|
376 |
- stop("S4Vectors::metadata(x)$celda_parameters$model must be", |
|
377 |
- " one of 'celda_C', 'celda_G', or 'celda_CG'!") |
|
378 |
- } |
|
379 |
- return(ll) |
|
380 |
- } |
|
381 |
-) |
|
382 |
- |
|
383 |
- |
|
384 |
-#' @rdname logLikelihood |
|
385 |
-#' @export |
|
386 |
-setMethod("logLikelihood", signature(x = "matrix", celdaMod = "celda_C"), |
|
387 |
- function(x, celdaMod) { |
|
388 |
- sampleLabel <- sampleLabel(celdaMod) |
|
389 |
- z = celdaClusters(celdaMod)$z |
|
390 |
- K = params(celdaMod)$K |
|
391 |
- alpha = params(celdaMod)$alpha |
|
392 |
- beta = params(celdaMod)$beta |
|
393 |
- |
|
394 |
- ll <- .logLikelihoodcelda_C(counts = x, |
|
395 |
- sampleLabel = sampleLabel, |
|
396 |
- z = z, |
|
397 |
- K = K, |
|
398 |
- alpha = alpha, |
|
399 |
- beta = beta) |
|
400 |
- return(ll) |
|
401 |
- } |
|
402 |
-) |
|
403 |
- |
|
404 |
- |
|
405 |
-#' @rdname logLikelihood |
|
406 |
-#' @export |
|
407 |
-setMethod("logLikelihood", signature(x = "matrix", celdaMod = "celda_G"), |
|
408 |
- function(x, celdaMod) { |
|
409 |
- y <- celdaClusters(celdaMod)$y |
|
410 |
- L <- params(celdaMod)$L |
|
411 |
- beta = params(celdaMod)$beta |
|
412 |
- delta = params(celdaMod)$delta |
|
413 |
- gamma = params(celdaMod)$gamma |
|
414 |
- |
|
415 |
- ll <- .logLikelihoodcelda_G(counts = x, |
|
416 |
- y = y, |
|
417 |
- L = L, |
|
418 |
- beta = beta, |
|
419 |
- delta = delta, |
|
420 |
- gamma = gamma) |
|
421 |
- return(ll) |
|
422 |
- } |
|
423 |
-) |
|
424 |
- |
|
425 |
- |
|
426 |
-#' @rdname logLikelihood |
|
427 |
-#' @export |
|
428 |
-setMethod("logLikelihood", signature(x = "matrix", celdaMod = "celda_CG"), |
|
429 |
- function(x, celdaMod) { |
|
430 |
- sampleLabel <- sampleLabel(celdaMod) |
|
431 |
- z <- celdaClusters(celdaMod)$z |
|
432 |
- y <- celdaClusters(celdaMod)$y |
|
433 |
- K <- params(celdaMod)$K |
|
434 |
- L <- params(celdaMod)$L |
|
435 |
- alpha = params(celdaMod)$alpha |
|
436 |
- beta = params(celdaMod)$beta |
|
437 |
- delta = params(celdaMod)$delta |
|
438 |
- gamma = params(celdaMod)$gamma |
|
439 |
- |
|
440 |
- ll <- .logLikelihoodcelda_CG(counts = x, |
|
441 |
- sampleLabel = sampleLabel, |
|
442 |
- z = z, |
|
443 |
- y = y, |
|
444 |
- K = K, |
|
445 |
- L = L, |
|
446 |
- alpha = alpha, |
|
447 |
- beta = beta, |
|
448 |
- delta = delta, |
|
449 |
- gamma = gamma) |
|
450 |
- return(ll) |
|
451 |
- } |
|
452 |
-) |
|
453 |
- |
|
454 |
- |
|
455 |
-#' @title Get the conditional probabilities of cell in subpopulations from celda |
|
456 |
-#' model |
|
457 |
-#' @description Calculate the conditional probability of each cell belonging to |
|
458 |
-#' each subpopulation given all other cell cluster assignments and/or |
|
459 |
-#' each feature belonging to each module given all other feature cluster |
|
460 |
-#' assignments in a celda model. |
|
461 |
-#' @param sce A \linkS4class{SingleCellExperiment} object returned by |
|
462 |
-#' \link{celda_C}, \link{celda_G}, or \link{celda_CG}, with the matrix |
|
463 |
-#' located in the \code{useAssay} assay slot. |
|
464 |
-#' Rows represent features and columns represent cells. |
|
465 |
-#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay} |
|
466 |
-#' slot to use. Default "counts". |
|
467 |
-#' @param log Logical. If \code{FALSE}, then the normalized conditional |
|
468 |
-#' probabilities will be returned. If \code{TRUE}, then the unnormalized log |
|
469 |
-#' probabilities will be returned. Default \code{FALSE}. |
|
470 |
-#' @examples |
|
471 |
-#' data(sceCeldaCG) |
|
472 |
-#' clusterProb <- clusterProbability(sceCeldaCG, log = TRUE) |
|
473 |
-#' @return A list containging a matrix for the conditional cell subpopulation |
|
474 |
-#' cluster and/or feature module probabilities. |
|
475 |
-#' @export |
|
476 |
-setGeneric("clusterProbability", |
|
477 |
- function(sce, ...) { |
|
478 |
- standardGeneric("clusterProbability") |
|
479 |
- }) |
|
480 |
- |
|
481 |
- |
|
482 |
-#' @seealso `celda_C()` for clustering cells |
|
483 |
-#' @examples |
|
484 |
-#' data(sceCeldaC) |
|
485 |
-#' clusterProb <- clusterProbability(sceCeldaC) |
|
486 |
-#' @rdname clusterProbability |
|
487 |
-#' @export |
|
488 |
-setMethod("clusterProbability", signature(sce = "SingleCellExperiment"), |
|
489 |
- function(sce, useAssay = "counts", log = FALSE) { |
|
490 |
- |
|
491 |
- if (celdaModel(sce) == "celda_C") { |
|
492 |
- cp <- .clusterProbabilityCeldaC(sce = sce, |
|
493 |
- useAssay = useAssay, |
|
494 |
- log = log) |
|
495 |
- return(cp) |
|
496 |
- } else if (celdaModel(sce) == "celda_CG") { |
|
497 |
- cp <- .clusterProbabilityCeldaCG(sce = sce, |
|
498 |
- useAssay = useAssay, |
|
499 |
- log = log) |
|
500 |
- return(cp) |
|
501 |
- } else if (celdaModel(sce) == "celda_G") { |
|
502 |
- cp <- .clusterProbabilityCeldaG(sce = sce, |
|
503 |
- useAssay = useAssay, |
|
504 |
- log = log) |
|
505 |
- return(cp) |
|
506 |
- } else { |
|
507 |
- stop("S4Vectors::metadata(sce)$celda_parameters$model must be", |
|
508 |
- " one of 'celda_C', 'celda_G', or 'celda_CG'!") |
|
509 |
- } |
|
510 |
- }) |
|
511 |
- |
|
512 |
- |
|
513 |
-#' @title Simulate count data from the celda generative models. |
|
514 |
-#' @description This function generates a \linkS4class{SingleCellExperiment} |
|
515 |
-#' containing a simulated counts matrix in the \code{"counts"} assay slot, as |
|
516 |
-#' well as various parameters used in the simulation which can be |
|
517 |
-#' useful for running celda and are stored in \code{metadata} slot. The user |
|
518 |
-#' must provide the desired model (one of celda_C, celda_G, celda_CG) as well |
|
519 |
-#' as any desired tuning parameters for those model's simulation functions |
|
520 |
-#' as detailed below. |
|
521 |
-#' @param model Character. Options available in \code{celda::availableModels}. |
|
522 |
-#' Can be one of \code{"celda_CG"}, \code{"celda_C"}, or \code{"celda_G"}. |
|
523 |
-#' Default \code{"celda_CG"}. |
|
524 |
-#' @param S Integer. Number of samples to simulate. Default 5. Only used if |
|
525 |
-#' \code{model} is one of \code{"celda_CG"} or \code{"celda_C"}. |
|
526 |
-#' @param CRange Integer vector. A vector of length 2 that specifies the lower |
|
527 |
-#' and upper bounds of the number of cells to be generated in each sample. |
|
528 |
-#' Default c(50, 100). Only used if |
|
529 |
-#' \code{model} is one of \code{"celda_CG"} or \code{"celda_C"}. |
|
530 |
-#' @param NRange Integer vector. A vector of length 2 that specifies the lower |
|
531 |
-#' and upper bounds of the number of counts generated for each cell. Default |
|
532 |
-#' c(500, 1000). |
|
533 |
-#' @param C Integer. Number of cells to simulate. Default 100. Only used if |
|
534 |
-#' \code{model} is \code{"celda_G"}. |
|
535 |
-#' @param G Integer. The total number of features to be simulated. Default 100. |
|
536 |
-#' @param K Integer. Number of cell populations. Default 5. Only used if |
|
537 |
-#' \code{model} is one of \code{"celda_CG"} or \code{"celda_C"}. |
|
538 |
-#' @param L Integer. Number of feature modules. Default 10. Only used if |
|
539 |
-#' \code{model} is one of \code{"celda_CG"} or \code{"celda_G"}. |
|
540 |
-#' @param alpha Numeric. Concentration parameter for Theta. Adds a pseudocount |
|
541 |
-#' to each cell population in each sample. Default 1. Only used if |
|
542 |
-#' \code{model} is one of \code{"celda_CG"} or \code{"celda_C"}. |
|
543 |
-#' @param beta Numeric. Concentration parameter for Phi. Adds a pseudocount to |
|
544 |
-#' each feature module in each cell population. Default 1. |
|
545 |
-#' @param gamma Numeric. Concentration parameter for Eta. Adds a pseudocount to |
|
546 |
-#' the number of features in each module. Default 5. Only used if |
|
547 |
-#' \code{model} is one of \code{"celda_CG"} or \code{"celda_G"}. |
|
548 |
-#' @param delta Numeric. Concentration parameter for Psi. Adds a pseudocount to |
|
549 |
-#' each feature in each module. Default 1. Only used if |
|
550 |
-#' \code{model} is one of \code{"celda_CG"} or \code{"celda_G"}. |
|
551 |
-#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility, |
|
552 |
-#' a default value of 12345 is used. If NULL, no calls to |
|
553 |
-#' \link[withr]{with_seed} are made. |
|
554 |
-#' @return A \link[SingleCellExperiment]{SingleCellExperiment} object with |
|
555 |
-#' simulated count matrix stored in the "counts" assay slot. Function |
|
556 |
-#' parameter settings are stored in the \link[S4Vectors]{metadata} slot. For |
|
557 |
-#' \code{"celda_CG"} and \code{"celda_C"} models, |
|
558 |
-#' columns \code{celda_sample_label} and \code{celda_cell_cluster} in |
|
559 |
-#' \link[SummarizedExperiment]{colData} contain simulated sample labels and |
|
560 |
-#' cell population clusters. For \code{"celda_CG"} and \code{"celda_G"} |
|
561 |
-#' models, column \code{celda_feature_module} in |
|
562 |
-#' \link[SummarizedExperiment]{rowData} contains simulated gene modules. |
|
563 |
-#' @examples |
|
564 |
-#' sce <- simulateCells() |
|
565 |
-#' @export |
|
566 |
-simulateCells <- function( |
|
567 |
- model = c("celda_CG", "celda_C", "celda_G"), |
|
568 |
- S = 5, |
|
569 |
- CRange = c(50, 100), |
|
570 |
- NRange = c(500, 1000), |
|
571 |
- C = 100, |
|
572 |
- G = 100, |
|
573 |
- K = 5, |
|
574 |
- L = 10, |
|
575 |
- alpha = 1, |
|
576 |
- beta = 1, |
|
577 |
- gamma = 5, |
|
578 |
- delta = 1, |
|
579 |
- seed = 12345) { |
|
580 |
- |
|
581 |
- model <- match.arg(model) |
|
582 |
- |
|
583 |
- if (model == "celda_C") { |
|
584 |
- sce <- .simulateCellsMaincelda_C(model = model, |
|
585 |
- S = S, |
|
586 |
- CRange = CRange, |
|
587 |
- NRange = NRange, |
|
588 |
- G = G, |
|
589 |
- K = K, |
|
590 |
- alpha = alpha, |
|
591 |
- beta = beta, |
|
592 |
- seed = seed) |
|
593 |
- } else if (model == "celda_CG") { |
|
594 |
- sce <- .simulateCellsMaincelda_CG( |
|
595 |
- model = model, |
|
596 |
- S = S, |
|
597 |
- CRange = CRange, |
|
598 |
- NRange = NRange, |
|
599 |
- G = G, |
|
600 |
- K = K, |
|
601 |
- L = L, |
|
602 |
- alpha = alpha, |
|
603 |
- beta = beta, |
|
604 |
- gamma = gamma, |
|
605 |
- delta = delta, |
|
606 |
- seed = seed) |
|
607 |
- } else if (model == "celda_G") { |
|
608 |
- sce <- .simulateCellsMaincelda_G( |
|
609 |
- model = model, |
|
610 |
- C = C, |
|
611 |
- L = L, |
|
612 |
- NRange = NRange, |
|
613 |
- G = G, |
|
614 |
- beta = beta, |
|
615 |
- delta = delta, |
|
616 |
- gamma = gamma, |
|
617 |
- seed = seed) |
|
618 |
- } else { |
|
619 |
- stop("'model' must be one of 'celda_C', 'celda_G', or 'celda_CG'") |
|
620 |
- } |
|
621 |
- |
|
622 |
- return(sce) |
|
623 |
-} |
|
624 |
- |
|
625 |
- |
|
626 |
-#' @title Renders probability and relative expression heatmaps to visualize the |
|
627 |
-#' relationship between feature modules and cell populations. |
|
628 |
-#' @description It is often useful to visualize to what degree each feature |
|
629 |
-#' influences each cell cluster. This can also be useful for identifying |
|
630 |
-#' features which may be redundant or unassociated with cell clustering. |
|
631 |
-#' @param counts Integer matrix. Rows represent features and columns represent |
|
632 |
-#' cells. This matrix should be the same as the one used to generate |
|
633 |
-#' `celdaMod`. |
|
634 |
-#' @param celdaMod Celda object of class "celda_C" or "celda_CG". |
|
635 |
-#' @param ... Additional parameters. |
|
636 |
-#' @examples |
|
637 |
-#' data(celdaCGSim, celdaCGMod) |
|
638 |
-#' celdaProbabilityMap(celdaCGSim$counts, celdaCGMod) |
|
639 |
-#' @return A grob containing the specified plots |
|
640 |
-#' @export |
|
641 |
-setGeneric("celdaProbabilityMap", |
|
642 |
- signature = "celdaMod", |
|
643 |
- function(counts, celdaMod, ...) { |
|
644 |
- standardGeneric("celdaProbabilityMap") |
|
645 |
- } |
|
646 |
-) |
|
647 |
- |
|
648 |
- |
|
649 |
-#' @title Embeds cells in two dimensions using tSNE based on celda_CG results. |
|
650 |
-#' @param counts Integer matrix. Rows represent features and columns represent |
|
651 |
-#' cells. This matrix should be the same as the one used to generate |
|
652 |
-#' `celdaMod`. |
|
653 |
-#' @param celdaMod Celda object of class `celda_CG`. |
|
654 |
-#' @param maxCells Integer. Maximum number of cells to plot. Cells will be |
|
655 |
-#' randomly subsampled if ncol(counts) > maxCells. Larger numbers of cells |
|
656 |
-#' requires more memory. Default \code{25000}. |
|
657 |
-#' @param minClusterSize Integer. Do not subsample cell clusters below this |
|
658 |
-#' threshold. Default \code{100}. |
|
659 |
-#' @param initialDims integer. The number of dimensions that should be retained |
|
660 |
-#' in the initial PCA step. Default \code{20}. |
|
661 |
-#' @param modules Integer vector. Determines which features modules to use for |
|
662 |
-#' tSNE. If NULL, all modules will be used. Default NULL. |
|
663 |
-#' @param perplexity Numeric. Perplexity parameter for tSNE. Default \code{20}. |
|
664 |
-#' @param maxIter Integer. Maximum number of iterations in tSNE generation. |
|
665 |
-#' Default \code{2500}. |
|
666 |
-#' @param ... Additional parameters. |
|
667 |
-#' @return Numeric Matrix of dimension `ncol(counts)` x 2, colums representing |
|
668 |
-#' the "X" and "Y" coordinates in the data's t-SNE represetation. |
|
669 |
-#' @examples |
|
670 |
-#' data(celdaCGSim, celdaCGMod) |
|
671 |
-#' tsneRes <- celdaTsne(celdaCGSim$counts, celdaCGMod) |
|
672 |
-#' @export |
|
673 |
-setGeneric("celdaTsne", |
|
674 |
- signature = "celdaMod", |
|
675 |
- function(counts, |
|
676 |
- celdaMod, |
|
677 |
- maxCells = 25000, |
|
678 |
- minClusterSize = 100, |
|
679 |
- initialDims = 20, |
|
680 |
- modules = NULL, |
|
681 |
- perplexity = 20, |
|
682 |
- maxIter = 2500, |
|
683 |
- ...) { |
|
684 |
- # counts = processCounts(counts) |
|
685 |
- # compareCountMatrix(counts, celdaMod) |
|
686 |
- standardGeneric("celdaTsne") |
|
687 |
- } |
|
688 |
-) |
|
689 |
- |
|
690 |
- |
|
691 |
-#' @title Embeds cells in two dimensions using umap. |
|
692 |
-#' @param counts Integer matrix. Rows represent features and columns represent |
|
693 |
-#' cells. This matrix should be the same as the one used to generate |
|
694 |
-#' `celdaMod`. |
|
695 |
-#' @param celdaMod Celda object of class `celda_CG`. |
|
696 |
-#' @param maxCells Integer. Maximum number of cells to plot. Cells will be |
|
697 |
-#' randomly subsampled if ncol(counts) > maxCells. Larger numbers of cells |
|
698 |
-#' requires more memory. Default 25000. |
|
699 |
-#' @param minClusterSize Integer. Do not subsample cell clusters below this |
|
700 |
-#' threshold. Default 100. |
|
701 |
-#' @param modules Integer vector. Determines which features modules to use for |
|
702 |
-#' tSNE. If NULL, all modules will be used. Default NULL. |
|
703 |
-#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility, |
|
704 |
-#' a default value of 12345 is used. If NULL, no calls to |
|
705 |
-#' \link[withr]{with_seed} are made. |
|
706 |
-#' the UMAP algorithm. |
|
707 |
-#' @param ... Additional parameters to `uwot::umap` |
|
708 |
-#' @return A two column matrix of UMAP coordinates#' @examples |
|
709 |
-#' data(celdaCGSim, celdaCGMod) |
|
710 |
-#' umapRes <- celdaUmap(celdaCGSim$counts, celdaCGMod) |
|
711 |
-#' @export |
|
712 |
-setGeneric("celdaUmap", |
|
713 |
- signature = "celdaMod", |
|
714 |
- function(counts, |
|
715 |
- celdaMod, |
|
716 |
- maxCells = NULL, |
|
717 |
- minClusterSize = 100, |
|
718 |
- modules = NULL, |
|
719 |
- seed = 12345, |
|
720 |
- ...) { |
|
721 |
- standardGeneric("celdaUmap") |
|
722 |
- } |
|
723 |
-) |
|
724 |
- |
|
725 |
- |
|
726 |
-#' @title Obtain the gene module of a gene of interest |
|
727 |
-#' @description This function will output the corresponding feature module for |
|
728 |
-#' a specified vector of genes from a celda_CG or celda_G celdaModel. |
|
729 |
-#' \code{feature} must match the rownames of \code{sce}. |
|
730 |
-#' @param sce A \linkS4class{SingleCellExperiment} object returned by |
|
731 |
-#' \link{celda_G}, or \link{celda_CG}, with the matrix |
|
732 |
-#' located in the \code{useAssay} assay slot. |
|
733 |
-#' Rows represent features and columns represent cells. |
|
734 |
-#' @param feature Character vector. Identify feature modules for the specified |
|
735 |
-#' feature names. \code{feature} must match the rownames of \code{sce}. |
|
736 |
-#' @param exactMatch Logical. Whether to look for exactMatch of the gene name |
|
737 |
-#' within counts matrix. Default \code{TRUE}. |
|
738 |
-#' @return List. Each entry corresponds to the feature module determined for |
|
739 |
-#' the provided features. |
|
740 |
-#' @export |
|
741 |
-setGeneric("featureModuleLookup", |
|
742 |
- function(sce, ...) {standardGeneric("featureModuleLookup")}) |
|
743 |
- |
|
744 |
- |
|
745 |
-#' @examples |
|
746 |
-#' data(sceCeldaCG) |
|
747 |
-#' module <- featureModuleLookup(sce = sceCeldaCG, |
|
748 |
-#' feature = c("Gene_1", "Gene_XXX")) |
|
749 |
-#' @export |
|
750 |
-#' @rdname featureModuleLookup |
|
751 |
-setMethod("featureModuleLookup", signature(sce = "SingleCellExperiment"), |
|
752 |
- function(sce, |
|
753 |
- feature, |
|
754 |
- exactMatch = TRUE) { |
|
755 |
- |
|
756 |
- if (celdaModel(sce) == "celda_CG") { |
|
757 |
- featureList <- .featureModuleLookupCG(sce = sce, feature = feature, |
|
758 |
- exactMatch = exactMatch) |
|
759 |
- } else if (celdaModel(sce) == "celda_G") { |
|
760 |
- featureList <- .featureModuleLookupG(sce = sce, feature = feature, |
|
761 |
- exactMatch = exactMatch) |
|
762 |
- } else { |
|
763 |
- stop("S4Vectors::metadata(sce)$celda_parameters$model must be", |
|
764 |
- " one of 'celda_G', or 'celda_CG'") |
|
765 |
- } |
|
766 |
- return(featureList) |
|
767 |
- } |
|
768 |
-) |
|
769 |
- |
|
770 |
- |
|
771 |
-.featureModuleLookupCG <- function(sce, |
|
772 |
- feature, |
|
773 |
- exactMatch) { |
|
774 |
- |
|
775 |
- list <- list() |
|
776 |
- if (!isTRUE(exactMatch)) { |
|
777 |
- featureGrep <- c() |
|
778 |
- for (x in seq(length(feature))) { |
|
779 |
- featureGrep <- c(featureGrep, rownames(sce)[grep( |
|
780 |
- feature[x], |
|
781 |
- rownames(sce) |
|
782 |
- )]) |
|
783 |
- } |
|
784 |
- feature <- featureGrep |
|
785 |
- } |
|
786 |
- for (x in seq(length(feature))) { |
|
787 |
- if (feature[x] %in% rownames(sce)) { |
|
788 |
- list[x] <- celdaModules(sce)[which(rownames(sce) == |
|
789 |
- feature[x])] |
|
790 |
- } else { |
|
791 |
- list[x] <- paste0( |
|
792 |
- "No feature was identified matching '", |
|
793 |
- feature[x], |
|
794 |
- "'." |
|
795 |
- ) |
|
796 |
- } |
|
797 |
- } |
|
798 |
- names(list) <- feature |
|
799 |
- return(list) |
|
800 |
-} |
|
801 |
- |
|
802 |
- |
|
803 |
-.featureModuleLookupG <- function(sce, feature, exactMatch) { |
|
804 |
- if (!isTRUE(exactMatch)) { |
|
805 |
- feature <- unlist(lapply( |
|
806 |
- seq(length(feature)), |
|
807 |
- function(x) { |
|
808 |
- rownames(sce)[grep(feature[x], rownames(sce))] |
|
809 |
- } |
|
810 |
- )) |
|
811 |
- } |
|
812 |
- |
|
813 |
- featList <- lapply( |
|
814 |
- seq(length(feature)), |
|
815 |
- function(x) { |
|
816 |
- if (feature[x] %in% rownames(sce)) { |
|
817 |
- return(celdaModules(sce)[which(rownames(sce) == |
|
818 |
- feature[x])]) |
|
819 |
- } else { |
|
820 |
- return(paste0( |
|
821 |
- "No feature was identified matching '", |
|
822 |
- feature[x], |
|
823 |
- "'." |
|
824 |
- )) |
|
825 |
- } |
|
826 |
- } |
|
827 |
- ) |
|
828 |
- names(featList) <- feature |
|
829 |
- return(featList) |
|
830 |
-} |
|
831 |
- |
|
832 |
- |
|
833 |
-#' @title Uniform Manifold Approximation and Projection (UMAP) dimension |
|
834 |
-#' reduction for celda \code{sce} object |
|
835 |
-#' @description Embeds cells in two dimensions using \link[uwot]{umap} based on |
|
836 |
-#' a celda model. For celda_C \code{sce} objects, PCA on the normalized counts |
|
837 |
-#' is used to reduce the number of features before applying UMAP. For celda_CG |
|
838 |
-#' \code{sce} object, UMAP is run on module probabilities to reduce the number |
|
839 |
-#' of features instead of using PCA. Module probabilities are square-root |
|
840 |
-#' transformed before applying UMAP. |
|
841 |
-#' @param sce A \link[SingleCellExperiment]{SingleCellExperiment} object |
|
842 |
-#' returned by \link{celda_C}, \link{celda_G}, or \link{celda_CG}. |
|
843 |
-#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay} |
|
844 |
-#' slot to use. Default "counts". |
|
845 |
-#' @param maxCells Integer. Maximum number of cells to plot. Cells will be |
|
846 |
-#' randomly subsampled if \code{ncol(sce) > maxCells}. Larger numbers of cells |
|
847 |
-#' requires more memory. If NULL, no subsampling will be performed. |
|
848 |
-#' Default NULL. |
|
849 |
-#' @param minClusterSize Integer. Do not subsample cell clusters below this |
|
850 |
-#' threshold. Default 100. |
|
851 |
-#' @param modules Integer vector. Determines which features modules to use for |
|
852 |
-#' UMAP. If NULL, all modules will be used. Default NULL. |
|
853 |
-#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility, |
|
854 |
-#' a default value of 12345 is used. If NULL, no calls to |
|
855 |
-#' \link[withr]{with_seed} are made. |
|
856 |
-#' @param nNeighbors The size of local neighborhood used for |
|
857 |
-#' manifold approximation. Larger values result in more global |
|
858 |
-#' views of the manifold, while smaller values result in more |
|
859 |
-#' local data being preserved. Default 30. |
|
860 |
-#' See \link[uwot]{umap} for more information. |
|
861 |
-#' @param minDist The effective minimum distance between embedded points. |
|
862 |
-#' Smaller values will result in a more clustered/clumped |
|
863 |
-#' embedding where nearby points on the manifold are drawn |
|
864 |
-#' closer together, while larger values will result on a more |
|
865 |
-#' even dispersal of points. Default 0.75. |
|
866 |
-#' See \link[uwot]{umap} for more information. |
|
867 |
-#' @param spread The effective scale of embedded points. In combination with |
|
868 |
-#' \code{min_dist}, this determines how clustered/clumped the |
|
869 |
-#' embedded points are. Default 1. See \link[uwot]{umap} for more information. |
|
870 |
-#' @param pca Logical. Whether to perform |
|
871 |
-#' dimensionality reduction with PCA before UMAP. Only works for celda_C |
|
872 |
-#' \code{sce} objects. |
|
873 |
-#' @param initialDims Integer. Number of dimensions from PCA to use as |
|
874 |
-#' input in UMAP. Default 50. Only works for celda_C \code{sce} objects. |
|
875 |
-#' @param cores Number of threads to use. Default 1. |
|
876 |
-#' @param ... Additional parameters to pass to \link[uwot]{umap}. |
|
877 |
-#' @examples |
|
878 |
-#' data(sceCeldaCG) |
|
879 |
-#' umapRes <- celdaUmap(sceCeldaCG) |
|
880 |
-#' @return \code{sce} with UMAP coordinates |
|
881 |
-#' (columns "celda_UMAP1" & "celda_UMAP2") added to |
|
882 |
-#' \code{\link[SummarizedExperiment]{colData}(sce)}. |
|
883 |
-#' @export |
|
884 |
-setGeneric("celdaUmap", |
|
885 |
- function(sce, ...) { |
|
886 |
- standardGeneric("celdaUmap") |
|
887 |
- }) |
|
888 |
- |
|
889 |
- |
|
890 |
-#' @rdname celdaUmap |
|
891 |
-#' @export |
|
892 |
-setMethod("celdaUmap", signature(sce = "SingleCellExperiment"), |
|
893 |