Browse code

sce vignettes

zhewa authored on 23/05/2020 07:26:57
Showing 1 changed files
1 1
deleted file mode 100755
... ...
@@ -1,1154 +0,0 @@
1
-#' @title Celda models
2
-#' @description List of available Celda models with correpsonding descriptions.
3
-#' @export
4
-#' @examples
5
-#' celda()
6
-#' @return None
7
-celda <- function() {
8
-    message(
9
-        "celda_C: Clusters the columns of a count matrix containing",
10
-        " single-cell data into K subpopulations."
11
-    )
12
-    message(
13
-        "celda_G: Clusters the rows of a count matrix containing",
14
-        " single-cell data into L modules."
15
-    )
16
-    message(
17
-        "celda_CG: Clusters the rows and columns of a count matrix",
18
-        " containing single-cell data into L modules and K subpopulations,",
19
-        " respectively."
20
-    )
21
-    message(
22
-        "celdaGridSearch: Run Celda with different combinations of",
23
-        " parameters and multiple chains in parallel."
24
-    )
25
-}
26
-
27
-
28
-#' @title Get log-likelihood history
29
-#' @description Retrieves the complete log-likelihood from all iterations of
30
-#'  Gibbs sampling used to generate a celdaModel.
31
-#' @param celdaMod celdaModel. Options available in `celda::availableModels`.
32
-#' @return Numeric. The log-likelihood at each step of Gibbs sampling used to
33
-#'  generate the model.
34
-#' @examples
35
-#' data(celdaCGMod)
36
-#' logLikelihoodHistory(celdaCGMod)
37
-#' @export
38
-setGeneric(
39
-  "logLikelihoodHistory",
40
-  function(celdaMod) {
41
-    standardGeneric("logLikelihoodHistory")
42
-  }
43
-)
44
-#' @title Get log-likelihood history
45
-#' @description Retrieves the complete log-likelihood from all iterations of
46
-#'  Gibbs sampling used to generate a celdaModel.
47
-#' @param celdaMod celdaModel. Options available in `celda::availableModels`.
48
-#' @return Numeric. The log-likelihood at each step of Gibbs sampling used to
49
-#'  generate the model.
50
-#' @examples
51
-#' data(celdaCGMod)
52
-#' logLikelihoodHistory(celdaCGMod)
53
-#' @export
54
-setMethod("logLikelihoodHistory",
55
-  signature = c(celdaMod = "celdaModel"),
56
-  function(celdaMod) {
57
-    celdaMod@completeLogLik
58
-  }
59
-)
60
-
61
-
62
-#' @title Get the log-likelihood
63
-#' @description Retrieves the final log-likelihood from all iterations of Gibbs
64
-#'  sampling used to generate a celdaModel.
65
-#' @return Numeric. The log-likelihood at the final step of Gibbs sampling used
66
-#'  to generate the model.
67
-#' @param celdaMod A celdaModel object of class celda_C, celda_G, or celda_CG.
68
-#' @examples
69
-#' data(celdaCGMod)
70
-#' bestLogLikelihood(celdaCGMod)
71
-#' @export
72
-setGeneric(
73
-  "bestLogLikelihood",
74
-  function(celdaMod) {
75
-    standardGeneric("bestLogLikelihood")
76
-  }
77
-)
78
-#' @title Get the log-likelihood
79
-#' @description Retrieves the final log-likelihood from all iterations of Gibbs
80
-#'  sampling used to generate a celdaModel.
81
-#' @param celdaMod A celdaModel object of class celda_C, celda_G, or celda_CG.
82
-#' @return Numeric. The log-likelihood at the final step of Gibbs sampling used
83
-#'  to generate the model.
84
-#' @examples
85
-#' data(celdaCGMod)
86
-#' bestLogLikelihood(celdaCGMod)
87
-#' @export
88
-setMethod("bestLogLikelihood",
89
-  signature = c(celdaMod = "celdaModel"),
90
-  function(celdaMod) {
91
-    celdaMod@finalLogLik
92
-  }
93
-)
94
-
95
-
96
-setClass("celda_C",
97
-  representation(sampleLabel = "factor"),
98
-  contains = "celdaModel"
99
-)
100
-
101
-
102
-#' @title Get celda model from a celda
103
-#'  \link[SingleCellExperiment]{SingleCellExperiment} object
104
-#' @description Return the celda model for \code{sce} returned by
105
-#'  \link{celda_C}, \link{celda_G} or \link{celda_CG}.
106
-#' @param sce A \link[SingleCellExperiment]{SingleCellExperiment} object
107
-#'  returned by \link{celda_C}, \link{celda_G}, or \link{celda_CG}.
108
-#' @return Character. The celda model. Can be one of "celda_C", "celda_G", or
109
-#'  "celda_CG".
110
-#' @examples
111
-#' data(sceCeldaCG)
112
-#' celdaModel(sceCeldaCG)
113
-#' @export
114
-setGeneric("celdaModel",
115
-    function(sce) {
116
-        standardGeneric("celdaModel")
117
-    })
118
-#' @rdname celdaModel
119
-#' @export
120
-setMethod("celdaModel",
121
-    signature(sce = "SingleCellExperiment"),
122
-    function(sce) {
123
-        tryCatch(
124
-            if (S4Vectors::metadata(sce)$celda_parameters$model %in%
125
-                    c("celda_C", "celda_G", "celda_CG")) {
126
-                return(S4Vectors::metadata(sce)$celda_parameters$model)
127
-            } else {
128
-                stop("S4Vectors::metadata(sce)$celda_parameters$model must be",
129
-                    " one of 'celda_C', 'celda_G', or 'celda_CG'")
130
-            },
131
-            error = function(e) {
132
-                message("S4Vectors::metadata(sce)$celda_parameters$model must",
133
-                    " exist! Try running celda model (celda_C, celda_CG, or",
134
-                    " celda_G) first.")
135
-                stop(e)
136
-            })
137
-    })
138
-setClass("celda_G", contains = "celdaModel")
139
-
140
-setClass("celda_CG", contains = c("celda_C", "celda_G"))
141
-
142
-
143
-#' @title Get perplexity for every model in a celdaList
144
-#' @description Returns perplexity for each model in a celdaList as calculated
145
-#'  by `perplexity().`
146
-#' @param celdaList An object of class celdaList.
147
-#' @return List. Contains one celdaModel object for each of the parameters
148
-#'  specified in the `runParams()` of the provided celda list.
149
-#' @examples
150
-#' data(celdaCGGridSearchRes)
151
-#' celdaCGGridModelPerplexities <- celdaPerplexity(celdaCGGridSearchRes)
152
-#' @export
153
-setGeneric(
154
-  "celdaPerplexity",
155
-  function(celdaList) {
156
-    standardGeneric("celdaPerplexity")
157
-  }
158
-)
159
-#' @title Get perplexity for every model in a celdaList
160
-#' @description Returns perplexity for each model in a celdaList as calculated
161
-#'  by `perplexity().`
162
-#' @param celdaList An object of class celdaList.
163
-#' @return List. Contains one celdaModel object for each of the parameters
164
-#'  specified in the `runParams()` of the provided celda list.
165
-#' @examples
166
-#' data(celdaCGGridSearchRes)
167
-#' celdaCGGridModelPerplexities <- celdaPerplexity(celdaCGGridSearchRes)
168
-#' @export
169
-setMethod("celdaPerplexity",
170
-  signature = c(celdaList = "celdaList"),
171
-  function(celdaList) {
172
-    celdaList@perplexity
173
-  }
174
-)
175
-
176
-
177
-#' @title Append two celdaList objects
178
-#' @description Returns a single celdaList representing the combination of two
179
-#'  provided celdaList objects.
180
-#' @return A celdaList object. This object contains all resList entries and
181
-#'  runParam records from both lists.
182
-#' @param list1 A celda_list object
183
-#' @param list2 A celda_list object to be joined with list_1
184
-#' @examples
185
-#' data(celdaCGGridSearchRes)
186
-#' appendedList <- appendCeldaList(
187
-#'   celdaCGGridSearchRes,
188
-#'   celdaCGGridSearchRes
189
-#' )
190
-#' @importFrom methods new
191
-#' @export
192
-appendCeldaList <- function(list1, list2) {
193
-  if (!is.element("celdaList", class(list1)) |
194
-    !is.element("celdaList", class(list2))) {
195
-    stop("Both parameters to appendCeldaList must be of class celdaList.")
196
-  }
197
-  if (!(countChecksum(list1) == countChecksum(list2))) {
198
-    warning(
199
-      "Provided lists have different countChecksums and may have",
200
-      " been generated from different count matrices. Using checksum",
201
-      " from first list..."
202
-    )
203
-  }
204
-  newList <- methods::new(
205
-    "celdaList",
206
-    runParams = rbind(runParams(list1), runParams(list2)),
207
-    resList = c(resList(list1), resList(list2)),
208
-    countChecksum = countChecksum(list1),
209
-    perplexity = matrix(nrow = 0, ncol = 0)
210
-  )
211
-  return(newList)
212
-}
213
-
214
-
215
-#' @title Get the MD5 hash of the count matrix from the celdaList
216
-#' @description Returns the MD5 hash of the count matrix used to generate the
217
-#'  celdaList.
218
-#' @param celdaList An object of class celdaList.
219
-#' @return A character string of length 32 containing the MD5 digest of
220
-#'  the count matrix.
221
-#' @examples
222
-#' data(celdaCGGridSearchRes)
223
-#' countChecksum <- countChecksum(celdaCGGridSearchRes)
224
-#' @export
225
-setGeneric(
226
-  "countChecksum",
227
-  function(celdaList) {
228
-    standardGeneric("countChecksum")
229
-  }
230
-)
231
-#' @title Get the MD5 hash of the count matrix from the celdaList
232
-#' @description Returns the MD5 hash of the count matrix used to generate the
233
-#'  celdaList.
234
-#' @param celdaList An object of class celdaList.
235
-#' @return A character string of length 32 containing the MD5 digest of
236
-#'  the count matrix.
237
-#' @examples
238
-#' data(celdaCGGridSearchRes)
239
-#' countChecksum <- countChecksum(celdaCGGridSearchRes)
240
-#' @export
241
-setMethod("countChecksum",
242
-  signature = c(celdaList = "celdaList"),
243
-  function(celdaList) {
244
-    celdaList@countChecksum
245
-  }
246
-)
247
-
248
-###############################################################################
249
-# Generics
250
-###############################################################################
251
-
252
-
253
-#' @title Plot celda Heatmap
254
-#' @description Render a stylable heatmap of count data based on celda
255
-#'  clustering results.
256
-#' @param sce A \link[SingleCellExperiment]{SingleCellExperiment} object
257
-#'  returned by \link{celda_C}, \link{celda_G}, or \link{celda_CG}.
258
-#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay}
259
-#'  slot to use. Default "counts".
260
-#' @param featureIx Integer vector. Select features for display in heatmap. If
261
-#'  NULL, no subsetting will be performed. Default NULL. \strong{Only used for
262
-#'  \code{sce} containing celda_C model result returned by \link{celda_C}.}
263
-#' @param nfeatures Integer. Maximum number of features to select for each
264
-#'  gene module. Default 25. \strong{Only used for \code{sce} containing
265
-#'  celda_CG or celda_G model results returned by \link{celda_CG} or
266
-#'  \link{celda_G}.}
267
-#' @param ... Additional parameters passed to \link{plotHeatmap}.
268
-#' @seealso `celdaTsne()` for generating 2-dimensional tSNE coordinates
269
-#' @examples
270
-#' data(sceCeldaCG)
271
-#' celdaHeatmap(sceCeldaCG)
272
-#' @return list A list containing dendrogram information and the heatmap grob
273
-#' @export
274
-setGeneric("celdaHeatmap",
275
-    function(sce, ...) {
276
-        standardGeneric("celdaHeatmap")
277
-    })
278
-
279
-
280
-#' @export
281
-#' @rdname celdaHeatmap
282
-setMethod("celdaHeatmap", signature(sce = "SingleCellExperiment"),
283
-    function(sce, useAssay = "counts", featureIx = NULL, nfeatures = 25, ...) {
284
-        if (celdaModel(sce) == "celda_C") {
285
-            g <- .celdaHeatmapCelda_C(sce = sce,
286
-                useAssay = useAssay,
287
-                featureIx = featureIx,
288
-                ...)
289
-            return(g)
290
-        } else if (celdaModel(sce) == "celda_CG") {
291
-            g <- .celdaHeatmapCelda_CG(sce = sce,
292
-                useAssay = useAssay,
293
-                nfeatures = nfeatures,
294
-                ...)
295
-            return(g)
296
-        } else if (celdaModel(sce) == "celda_G") {
297
-            g <- .celdaHeatmapCelda_G(sce = sce,
298
-                useAssay = useAssay,
299
-                nfeatures = nfeatures,
300
-                ...)
301
-            return(g)
302
-        } else {
303
-            stop("S4Vectors::metadata(sce)$celda_parameters$model must be",
304
-                " one of 'celda_C', 'celda_G', or 'celda_CG'")
305
-        }
306
-    })
307
-
308
-
309
-#' @title Calculate the Log-likelihood of a celda model
310
-#' @description Calculate the log-likelihood for cell population
311
-#'  and feature module cluster assignments on the count matrix, per celda model.
312
-#' @param x A \linkS4class{SingleCellExperiment} object returned by
313
-#'  \link{celda_C}, \link{celda_G}, or \link{celda_CG}, with the matrix
314
-#'  located in the \code{useAssay} assay slot.
315
-#'  Rows represent features and columns represent cells.
316
-#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay}
317
-#'  slot to use. Default "counts".
318
-#' @param celdaMod celda model object. Ignored if \code{x} is a
319
-#'  \linkS4class{SingleCellExperiment} object.
320
-#' @return The log-likelihood of the cluster assignment for the
321
-#'  provided \linkS4class{SingleCellExperiment}.
322
-#' @seealso `celda_C()` for clustering cells
323
-#' @export
324
-setGeneric("logLikelihood",
325
-    function(x, ...) {
326
-        standardGeneric("logLikelihood")
327
-    })
328
-
329
-
330
-#' @rdname logLikelihood
331
-#' @examples
332
-#' data(sceCeldaC, sceCeldaCG)
333
-#' loglikC <- logLikelihood(sceCeldaC)
334
-#' loglikCG <- logLikelihood(sceCeldaCG)
335
-#' @export
336
-setMethod("logLikelihood", signature(x = "SingleCellExperiment"),
337
-    function(x, useAssay = "counts") {
338
-
339
-        counts <- SummarizedExperiment::assay(x, i = useAssay)
340
-        sampleLabel <- sampleLabel(x)
341
-        z <- celdaClusters(x)
342
-        y <- celdaModules(x)
343
-        K <- S4Vectors::metadata(x)$celda_parameters$K
344
-        L <- S4Vectors::metadata(x)$celda_parameters$L
345
-        alpha <- S4Vectors::metadata(x)$celda_parameters$alpha
346
-        beta <- S4Vectors::metadata(x)$celda_parameters$beta
347
-        delta = S4Vectors::metadata(x)$celda_parameters$delta
348
-        gamma = S4Vectors::metadata(x)$celda_parameters$gamma
349
-
350
-        if (celdaModel(x) == "celda_C") {
351
-            ll <- .logLikelihoodcelda_C(counts = counts,
352
-                sampleLabel = sampleLabel,
353
-                z = z,
354
-                K = K,
355
-                alpha = alpha,
356
-                beta = beta)
357
-        } else if (celdaModel(x) == "celda_CG") {
358
-            ll <- .logLikelihoodcelda_CG(counts = counts,
359
-                sampleLabel = sampleLabel,
360
-                z = z,
361
-                y = y,
362
-                K = K,
363
-                L = L,
364
-                alpha = alpha,
365
-                beta = beta,
366
-                delta = delta,
367
-                gamma = gamma)
368
-        } else if (celdaModel(x) == "celda_G") {
369
-            ll <- .logLikelihoodcelda_G(counts = counts,
370
-                y = y,
371
-                L = L,
372
-                beta = beta,
373
-                delta = delta,
374
-                gamma = gamma)
375
-        } else {
376
-            stop("S4Vectors::metadata(x)$celda_parameters$model must be",
377
-                " one of 'celda_C', 'celda_G', or 'celda_CG'!")
378
-        }
379
-        return(ll)
380
-    }
381
-)
382
-
383
-
384
-#' @rdname logLikelihood
385
-#' @export
386
-setMethod("logLikelihood", signature(x = "matrix", celdaMod = "celda_C"),
387
-    function(x, celdaMod) {
388
-        sampleLabel <- sampleLabel(celdaMod)
389
-        z = celdaClusters(celdaMod)$z
390
-        K = params(celdaMod)$K
391
-        alpha = params(celdaMod)$alpha
392
-        beta = params(celdaMod)$beta
393
-
394
-        ll <- .logLikelihoodcelda_C(counts = x,
395
-            sampleLabel = sampleLabel,
396
-            z = z,
397
-            K = K,
398
-            alpha = alpha,
399
-            beta = beta)
400
-        return(ll)
401
-    }
402
-)
403
-
404
-
405
-#' @rdname logLikelihood
406
-#' @export
407
-setMethod("logLikelihood", signature(x = "matrix", celdaMod = "celda_G"),
408
-    function(x, celdaMod) {
409
-        y <- celdaClusters(celdaMod)$y
410
-        L <- params(celdaMod)$L
411
-        beta = params(celdaMod)$beta
412
-        delta = params(celdaMod)$delta
413
-        gamma = params(celdaMod)$gamma
414
-
415
-        ll <- .logLikelihoodcelda_G(counts = x,
416
-            y = y,
417
-            L = L,
418
-            beta = beta,
419
-            delta = delta,
420
-            gamma = gamma)
421
-        return(ll)
422
-    }
423
-)
424
-
425
-
426
-#' @rdname logLikelihood
427
-#' @export
428
-setMethod("logLikelihood", signature(x = "matrix", celdaMod = "celda_CG"),
429
-    function(x, celdaMod) {
430
-        sampleLabel <- sampleLabel(celdaMod)
431
-        z <- celdaClusters(celdaMod)$z
432
-        y <- celdaClusters(celdaMod)$y
433
-        K <- params(celdaMod)$K
434
-        L <- params(celdaMod)$L
435
-        alpha = params(celdaMod)$alpha
436
-        beta = params(celdaMod)$beta
437
-        delta = params(celdaMod)$delta
438
-        gamma = params(celdaMod)$gamma
439
-
440
-        ll <- .logLikelihoodcelda_CG(counts = x,
441
-            sampleLabel = sampleLabel,
442
-            z = z,
443
-            y = y,
444
-            K = K,
445
-            L = L,
446
-            alpha = alpha,
447
-            beta = beta,
448
-            delta = delta,
449
-            gamma = gamma)
450
-        return(ll)
451
-    }
452
-)
453
-
454
-
455
-#' @title Get the conditional probabilities of cell in subpopulations from celda
456
-#'  model
457
-#' @description Calculate the conditional probability of each cell belonging to
458
-#'  each subpopulation given all other cell cluster assignments and/or
459
-#'  each feature belonging to each module given all other feature cluster
460
-#'  assignments in a celda model.
461
-#' @param sce A \linkS4class{SingleCellExperiment} object returned by
462
-#'  \link{celda_C}, \link{celda_G}, or \link{celda_CG}, with the matrix
463
-#'  located in the \code{useAssay} assay slot.
464
-#'  Rows represent features and columns represent cells.
465
-#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay}
466
-#'  slot to use. Default "counts".
467
-#' @param log Logical. If \code{FALSE}, then the normalized conditional
468
-#'  probabilities will be returned. If \code{TRUE}, then the unnormalized log
469
-#'  probabilities will be returned. Default \code{FALSE}.
470
-#' @examples
471
-#' data(sceCeldaCG)
472
-#' clusterProb <- clusterProbability(sceCeldaCG, log = TRUE)
473
-#' @return A list containging a matrix for the conditional cell subpopulation
474
-#'  cluster and/or feature module probabilities.
475
-#' @export
476
-setGeneric("clusterProbability",
477
-    function(sce, ...) {
478
-        standardGeneric("clusterProbability")
479
-    })
480
-
481
-
482
-#' @seealso `celda_C()` for clustering cells
483
-#' @examples
484
-#' data(sceCeldaC)
485
-#' clusterProb <- clusterProbability(sceCeldaC)
486
-#' @rdname clusterProbability
487
-#' @export
488
-setMethod("clusterProbability", signature(sce = "SingleCellExperiment"),
489
-    function(sce, useAssay = "counts", log = FALSE) {
490
-
491
-        if (celdaModel(sce) == "celda_C") {
492
-            cp <- .clusterProbabilityCeldaC(sce = sce,
493
-                useAssay = useAssay,
494
-                log = log)
495
-            return(cp)
496
-        } else if (celdaModel(sce) == "celda_CG") {
497
-            cp <- .clusterProbabilityCeldaCG(sce = sce,
498
-                useAssay = useAssay,
499
-                log = log)
500
-            return(cp)
501
-        } else if (celdaModel(sce) == "celda_G") {
502
-            cp <- .clusterProbabilityCeldaG(sce = sce,
503
-                useAssay = useAssay,
504
-                log = log)
505
-            return(cp)
506
-        } else {
507
-            stop("S4Vectors::metadata(sce)$celda_parameters$model must be",
508
-                " one of 'celda_C', 'celda_G', or 'celda_CG'!")
509
-        }
510
-    })
511
-
512
-
513
-#' @title Simulate count data from the celda generative models.
514
-#' @description This function generates a \linkS4class{SingleCellExperiment}
515
-#'  containing a simulated counts matrix in the \code{"counts"} assay slot, as
516
-#'  well as various parameters used in the simulation which can be
517
-#'  useful for running celda and are stored in \code{metadata} slot. The user
518
-#'  must provide the desired model (one of celda_C, celda_G, celda_CG) as well
519
-#'  as any desired tuning parameters for those model's simulation functions
520
-#'  as detailed below.
521
-#' @param model Character. Options available in \code{celda::availableModels}.
522
-#'  Can be one of \code{"celda_CG"}, \code{"celda_C"}, or \code{"celda_G"}.
523
-#'  Default \code{"celda_CG"}.
524
-#' @param S Integer. Number of samples to simulate. Default 5. Only used if
525
-#'  \code{model} is one of \code{"celda_CG"} or \code{"celda_C"}.
526
-#' @param CRange Integer vector. A vector of length 2 that specifies the lower
527
-#'  and upper bounds of the number of cells to be generated in each sample.
528
-#'  Default c(50, 100). Only used if
529
-#'  \code{model} is one of \code{"celda_CG"} or \code{"celda_C"}.
530
-#' @param NRange Integer vector. A vector of length 2 that specifies the lower
531
-#'  and upper bounds of the number of counts generated for each cell. Default
532
-#'  c(500, 1000).
533
-#' @param C Integer. Number of cells to simulate. Default 100. Only used if
534
-#'  \code{model} is \code{"celda_G"}.
535
-#' @param G Integer. The total number of features to be simulated. Default 100.
536
-#' @param K Integer. Number of cell populations. Default 5. Only used if
537
-#'  \code{model} is one of \code{"celda_CG"} or \code{"celda_C"}.
538
-#' @param L Integer. Number of feature modules. Default 10. Only used if
539
-#'  \code{model} is one of \code{"celda_CG"} or \code{"celda_G"}.
540
-#' @param alpha Numeric. Concentration parameter for Theta. Adds a pseudocount
541
-#'  to each cell population in each sample. Default 1. Only used if
542
-#'  \code{model} is one of \code{"celda_CG"} or \code{"celda_C"}.
543
-#' @param beta Numeric. Concentration parameter for Phi. Adds a pseudocount to
544
-#'  each feature module in each cell population. Default 1.
545
-#' @param gamma Numeric. Concentration parameter for Eta. Adds a pseudocount to
546
-#'  the number of features in each module. Default 5. Only used if
547
-#'  \code{model} is one of \code{"celda_CG"} or \code{"celda_G"}.
548
-#' @param delta Numeric. Concentration parameter for Psi. Adds a pseudocount to
549
-#'  each feature in each module. Default 1. Only used if
550
-#'  \code{model} is one of \code{"celda_CG"} or \code{"celda_G"}.
551
-#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
552
-#'  a default value of 12345 is used. If NULL, no calls to
553
-#'  \link[withr]{with_seed} are made.
554
-#' @return A \link[SingleCellExperiment]{SingleCellExperiment} object with
555
-#'  simulated count matrix stored in the "counts" assay slot. Function
556
-#'  parameter settings are stored in the \link[S4Vectors]{metadata} slot. For
557
-#'  \code{"celda_CG"} and \code{"celda_C"} models,
558
-#'  columns \code{celda_sample_label} and \code{celda_cell_cluster} in
559
-#'  \link[SummarizedExperiment]{colData} contain simulated sample labels and
560
-#'  cell population clusters. For \code{"celda_CG"} and \code{"celda_G"}
561
-#'  models, column \code{celda_feature_module} in
562
-#'  \link[SummarizedExperiment]{rowData} contains simulated gene modules.
563
-#' @examples
564
-#' sce <- simulateCells()
565
-#' @export
566
-simulateCells <- function(
567
-    model = c("celda_CG", "celda_C", "celda_G"),
568
-    S = 5,
569
-    CRange = c(50, 100),
570
-    NRange = c(500, 1000),
571
-    C = 100,
572
-    G = 100,
573
-    K = 5,
574
-    L = 10,
575
-    alpha = 1,
576
-    beta = 1,
577
-    gamma = 5,
578
-    delta = 1,
579
-    seed = 12345) {
580
-
581
-    model <- match.arg(model)
582
-
583
-    if (model == "celda_C") {
584
-        sce <- .simulateCellsMaincelda_C(model = model,
585
-            S = S,
586
-            CRange = CRange,
587
-            NRange = NRange,
588
-            G = G,
589
-            K = K,
590
-            alpha = alpha,
591
-            beta = beta,
592
-            seed = seed)
593
-    } else if (model == "celda_CG") {
594
-        sce <- .simulateCellsMaincelda_CG(
595
-            model = model,
596
-            S = S,
597
-            CRange = CRange,
598
-            NRange = NRange,
599
-            G = G,
600
-            K = K,
601
-            L = L,
602
-            alpha = alpha,
603
-            beta = beta,
604
-            gamma = gamma,
605
-            delta = delta,
606
-            seed = seed)
607
-    } else if (model == "celda_G") {
608
-        sce <- .simulateCellsMaincelda_G(
609
-            model = model,
610
-            C = C,
611
-            L = L,
612
-            NRange = NRange,
613
-            G = G,
614
-            beta = beta,
615
-            delta = delta,
616
-            gamma = gamma,
617
-            seed = seed)
618
-    } else {
619
-        stop("'model' must be one of 'celda_C', 'celda_G', or 'celda_CG'")
620
-    }
621
-
622
-    return(sce)
623
-}
624
-
625
-
626
-#' @title Renders probability and relative expression heatmaps to visualize the
627
-#'  relationship between feature modules and cell populations.
628
-#' @description It is often useful to visualize to what degree each feature
629
-#' influences each cell cluster. This can also be useful for identifying
630
-#' features which may be redundant or unassociated with cell clustering.
631
-#' @param counts Integer matrix. Rows represent features and columns represent
632
-#'  cells. This matrix should be the same as the one used to generate
633
-#'  `celdaMod`.
634
-#' @param celdaMod Celda object of class "celda_C" or "celda_CG".
635
-#' @param ... Additional parameters.
636
-#' @examples
637
-#' data(celdaCGSim, celdaCGMod)
638
-#' celdaProbabilityMap(celdaCGSim$counts, celdaCGMod)
639
-#' @return A grob containing the specified plots
640
-#' @export
641
-setGeneric("celdaProbabilityMap",
642
-  signature = "celdaMod",
643
-  function(counts, celdaMod, ...) {
644
-    standardGeneric("celdaProbabilityMap")
645
-  }
646
-)
647
-
648
-
649
-#' @title Embeds cells in two dimensions using tSNE based on celda_CG results.
650
-#' @param counts Integer matrix. Rows represent features and columns represent
651
-#'  cells. This matrix should be the same as the one used to generate
652
-#'  `celdaMod`.
653
-#' @param celdaMod Celda object of class `celda_CG`.
654
-#' @param maxCells Integer. Maximum number of cells to plot. Cells will be
655
-#'  randomly subsampled if ncol(counts) > maxCells. Larger numbers of cells
656
-#'  requires more memory. Default \code{25000}.
657
-#' @param minClusterSize Integer. Do not subsample cell clusters below this
658
-#'  threshold. Default \code{100}.
659
-#' @param initialDims integer. The number of dimensions that should be retained
660
-#'  in the initial PCA step. Default \code{20}.
661
-#' @param modules Integer vector. Determines which features modules to use for
662
-#'  tSNE. If NULL, all modules will be used. Default NULL.
663
-#' @param perplexity Numeric. Perplexity parameter for tSNE. Default \code{20}.
664
-#' @param maxIter Integer. Maximum number of iterations in tSNE generation.
665
-#'  Default \code{2500}.
666
-#' @param ... Additional parameters.
667
-#' @return Numeric Matrix of dimension `ncol(counts)` x 2, colums representing
668
-#'  the "X" and "Y" coordinates in the data's t-SNE represetation.
669
-#' @examples
670
-#' data(celdaCGSim, celdaCGMod)
671
-#' tsneRes <- celdaTsne(celdaCGSim$counts, celdaCGMod)
672
-#' @export
673
-setGeneric("celdaTsne",
674
-  signature = "celdaMod",
675
-  function(counts,
676
-           celdaMod,
677
-           maxCells = 25000,
678
-           minClusterSize = 100,
679
-           initialDims = 20,
680
-           modules = NULL,
681
-           perplexity = 20,
682
-           maxIter = 2500,
683
-           ...) {
684
-    # counts = processCounts(counts)
685
-    # compareCountMatrix(counts, celdaMod)
686
-    standardGeneric("celdaTsne")
687
-  }
688
-)
689
-
690
-
691
-#' @title Embeds cells in two dimensions using umap.
692
-#' @param counts Integer matrix. Rows represent features and columns represent
693
-#'  cells. This matrix should be the same as the one used to generate
694
-#'  `celdaMod`.
695
-#' @param celdaMod Celda object of class `celda_CG`.
696
-#' @param maxCells Integer. Maximum number of cells to plot. Cells will be
697
-#'  randomly subsampled if ncol(counts) > maxCells. Larger numbers of cells
698
-#'  requires more memory. Default 25000.
699
-#' @param minClusterSize Integer. Do not subsample cell clusters below this
700
-#'  threshold. Default 100.
701
-#' @param modules Integer vector. Determines which features modules to use for
702
-#'  tSNE. If NULL, all modules will be used. Default NULL.
703
-#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
704
-#'  a default value of 12345 is used. If NULL, no calls to
705
-#'  \link[withr]{with_seed} are made.
706
-#'  the UMAP algorithm.
707
-#' @param ... Additional parameters to `uwot::umap`
708
-#' @return A two column matrix of UMAP coordinates#' @examples
709
-#' data(celdaCGSim, celdaCGMod)
710
-#' umapRes <- celdaUmap(celdaCGSim$counts, celdaCGMod)
711
-#' @export
712
-setGeneric("celdaUmap",
713
-  signature = "celdaMod",
714
-  function(counts,
715
-           celdaMod,
716
-           maxCells = NULL,
717
-           minClusterSize = 100,
718
-           modules = NULL,
719
-           seed = 12345,
720
-           ...) {
721
-    standardGeneric("celdaUmap")
722
-  }
723
-)
724
-
725
-
726
-#' @title Obtain the gene module of a gene of interest
727
-#' @description This function will output the corresponding feature module for
728
-#'  a specified vector of genes from a celda_CG or celda_G celdaModel.
729
-#'  \code{feature} must match the rownames of \code{sce}.
730
-#' @param sce A \linkS4class{SingleCellExperiment} object returned by
731
-#'  \link{celda_G}, or \link{celda_CG}, with the matrix
732
-#'  located in the \code{useAssay} assay slot.
733
-#'  Rows represent features and columns represent cells.
734
-#' @param feature Character vector. Identify feature modules for the specified
735
-#'  feature names. \code{feature} must match the rownames of \code{sce}.
736
-#' @param exactMatch Logical. Whether to look for exactMatch of the gene name
737
-#'  within counts matrix. Default \code{TRUE}.
738
-#' @return List. Each entry corresponds to the feature module determined for
739
-#' the provided features.
740
-#' @export
741
-setGeneric("featureModuleLookup",
742
-    function(sce, ...) {standardGeneric("featureModuleLookup")})
743
-
744
-
745
-#' @examples
746
-#' data(sceCeldaCG)
747
-#' module <- featureModuleLookup(sce = sceCeldaCG,
748
-#'     feature = c("Gene_1", "Gene_XXX"))
749
-#' @export
750
-#' @rdname featureModuleLookup
751
-setMethod("featureModuleLookup", signature(sce = "SingleCellExperiment"),
752
-    function(sce,
753
-        feature,
754
-        exactMatch = TRUE) {
755
-
756
-        if (celdaModel(sce) == "celda_CG") {
757
-            featureList <- .featureModuleLookupCG(sce = sce, feature = feature,
758
-                exactMatch = exactMatch)
759
-        } else if (celdaModel(sce) == "celda_G") {
760
-            featureList <- .featureModuleLookupG(sce = sce, feature = feature,
761
-                exactMatch = exactMatch)
762
-        } else {
763
-            stop("S4Vectors::metadata(sce)$celda_parameters$model must be",
764
-                " one of 'celda_G', or 'celda_CG'")
765
-        }
766
-        return(featureList)
767
-    }
768
-)
769
-
770
-
771
-.featureModuleLookupCG <- function(sce,
772
-    feature,
773
-    exactMatch) {
774
-
775
-    list <- list()
776
-    if (!isTRUE(exactMatch)) {
777
-        featureGrep <- c()
778
-        for (x in seq(length(feature))) {
779
-            featureGrep <- c(featureGrep, rownames(sce)[grep(
780
-                feature[x],
781
-                rownames(sce)
782
-            )])
783
-        }
784
-        feature <- featureGrep
785
-    }
786
-    for (x in seq(length(feature))) {
787
-        if (feature[x] %in% rownames(sce)) {
788
-            list[x] <- celdaModules(sce)[which(rownames(sce) ==
789
-                    feature[x])]
790
-        } else {
791
-            list[x] <- paste0(
792
-                "No feature was identified matching '",
793
-                feature[x],
794
-                "'."
795
-            )
796
-        }
797
-    }
798
-    names(list) <- feature
799
-    return(list)
800
-}
801
-
802
-
803
-.featureModuleLookupG <- function(sce, feature, exactMatch) {
804
-    if (!isTRUE(exactMatch)) {
805
-        feature <- unlist(lapply(
806
-            seq(length(feature)),
807
-            function(x) {
808
-                rownames(sce)[grep(feature[x], rownames(sce))]
809
-            }
810
-        ))
811
-    }
812
-
813
-    featList <- lapply(
814
-        seq(length(feature)),
815
-        function(x) {
816
-            if (feature[x] %in% rownames(sce)) {
817
-                return(celdaModules(sce)[which(rownames(sce) ==
818
-                        feature[x])])
819
-            } else {
820
-                return(paste0(
821
-                    "No feature was identified matching '",
822
-                    feature[x],
823
-                    "'."
824
-                ))
825
-            }
826
-        }
827
-    )
828
-    names(featList) <- feature
829
-    return(featList)
830
-}
831
-
832
-
833
-#' @title Uniform Manifold Approximation and Projection (UMAP) dimension
834
-#'  reduction for celda \code{sce} object
835
-#' @description Embeds cells in two dimensions using \link[uwot]{umap} based on
836
-#'  a celda model. For celda_C \code{sce} objects, PCA on the normalized counts
837
-#'  is used to reduce the number of features before applying UMAP. For celda_CG
838
-#'  \code{sce} object, UMAP is run on module probabilities to reduce the number
839
-#'  of features instead of using PCA. Module probabilities are square-root
840
-#'  transformed before applying UMAP.
841
-#' @param sce A \link[SingleCellExperiment]{SingleCellExperiment} object
842
-#'  returned by \link{celda_C}, \link{celda_G}, or \link{celda_CG}.
843
-#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay}
844
-#'  slot to use. Default "counts".
845
-#' @param maxCells Integer. Maximum number of cells to plot. Cells will be
846
-#'  randomly subsampled if \code{ncol(sce) > maxCells}. Larger numbers of cells
847
-#'  requires more memory. If NULL, no subsampling will be performed.
848
-#'  Default NULL.
849
-#' @param minClusterSize Integer. Do not subsample cell clusters below this
850
-#'  threshold. Default 100.
851
-#' @param modules Integer vector. Determines which features modules to use for
852
-#'  UMAP. If NULL, all modules will be used. Default NULL.
853
-#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
854
-#'  a default value of 12345 is used. If NULL, no calls to
855
-#'  \link[withr]{with_seed} are made.
856
-#' @param nNeighbors The size of local neighborhood used for
857
-#'   manifold approximation. Larger values result in more global
858
-#'   views of the manifold, while smaller values result in more
859
-#'   local data being preserved. Default 30.
860
-#'   See \link[uwot]{umap} for more information.
861
-#' @param minDist The effective minimum distance between embedded points.
862
-#'   Smaller values will result in a more clustered/clumped
863
-#'   embedding where nearby points on the manifold are drawn
864
-#'   closer together, while larger values will result on a more
865
-#'   even dispersal of points. Default 0.75.
866
-#'   See \link[uwot]{umap} for more information.
867
-#' @param spread The effective scale of embedded points. In combination with
868
-#'  \code{min_dist}, this determines how clustered/clumped the
869
-#'   embedded points are. Default 1. See \link[uwot]{umap} for more information.
870
-#' @param pca Logical. Whether to perform
871
-#' dimensionality reduction with PCA before UMAP. Only works for celda_C
872
-#'  \code{sce} objects.
873
-#' @param initialDims Integer. Number of dimensions from PCA to use as
874
-#' input in UMAP. Default 50. Only works for celda_C \code{sce} objects.
875
-#' @param cores Number of threads to use. Default 1.
876
-#' @param ... Additional parameters to pass to \link[uwot]{umap}.
877
-#' @examples
878
-#' data(sceCeldaCG)
879
-#' umapRes <- celdaUmap(sceCeldaCG)
880
-#' @return \code{sce} with UMAP coordinates
881
-#'  (columns "celda_UMAP1" & "celda_UMAP2") added to
882
-#'  \code{\link[SummarizedExperiment]{colData}(sce)}.
883
-#' @export
884
-setGeneric("celdaUmap",
885
-    function(sce, ...) {
886
-        standardGeneric("celdaUmap")
887
-    })
888
-
889
-
890
-#' @rdname celdaUmap
891
-#' @export
892
-setMethod("celdaUmap", signature(sce = "SingleCellExperiment"),
893