% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/recursiveSplit.R
\name{recursiveSplitCell}
\alias{recursiveSplitCell}
\alias{recursiveSplitCell,SingleCellExperiment-method}
\alias{recursiveSplitCell,matrix-method}
\title{Recursive cell splitting}
\usage{
recursiveSplitCell(x, ...)

\S4method{recursiveSplitCell}{SingleCellExperiment}(
  x,
  useAssay = "counts",
  altExpName = "featureSubset",
  sampleLabel = NULL,
  initialK = 5,
  maxK = 25,
  tempL = NULL,
  yInit = NULL,
  alpha = 1,
  beta = 1,
  delta = 1,
  gamma = 1,
  minCell = 3,
  reorder = TRUE,
  seed = 12345,
  perplexity = TRUE,
  logfile = NULL,
  verbose = TRUE
)

\S4method{recursiveSplitCell}{matrix}(
  x,
  useAssay = "counts",
  altExpName = "featureSubset",
  sampleLabel = NULL,
  initialK = 5,
  maxK = 25,
  tempL = NULL,
  yInit = NULL,
  alpha = 1,
  beta = 1,
  delta = 1,
  gamma = 1,
  minCell = 3,
  reorder = TRUE,
  seed = 12345,
  perplexity = TRUE,
  logfile = NULL,
  verbose = TRUE
)
}
\arguments{
\item{x}{A numeric \link{matrix} of counts or a
\linkS4class{SingleCellExperiment}
with the matrix located in the assay slot under \code{useAssay}.
Rows represent features and columns represent cells.}

\item{...}{Ignored. Placeholder to prevent check warning.}

\item{useAssay}{A string specifying the name of the
\link{assay}
slot to use. Default "counts".}

\item{altExpName}{The name for the \link{altExp} slot
to use. Default "featureSubset".}

\item{sampleLabel}{Vector or factor. Denotes the sample label for each cell
(column) in the count matrix.}

\item{initialK}{Integer. Minimum number of cell populations to try.}

\item{maxK}{Integer. Maximum number of cell populations to try.}

\item{tempL}{Integer. Number of temporary modules to identify and use in cell
splitting. Only used if \code{yInit = NULL}. Collapsing features to a
relatively smaller number of modules will increase the speed of clustering
and tend to produce better cell populations. This number should be larger
than the number of true modules expected in the dataset. Default NULL.}

\item{yInit}{Integer vector. Module labels for features. Cells will be
clustered using the \link{celda_CG} model based on the modules specified in
\code{yInit} rather than the counts of individual features. While the
features will be initialized to the module labels in \code{yInit}, the
labels will be allowed to move within each new model with a different K.}

\item{alpha}{Numeric. Concentration parameter for Theta. Adds a pseudocount
to each cell population in each sample. Default 1.}

\item{beta}{Numeric. Concentration parameter for Phi. Adds a pseudocount to
each feature in each cell (if \code{yInit} is NULL) or to each module in
each cell population (if \code{yInit} is set). Default 1.}

\item{delta}{Numeric. Concentration parameter for Psi. Adds a pseudocount
to each feature in each module. Only used if \code{yInit} is set. Default 1.}

\item{gamma}{Numeric. Concentration parameter for Eta. Adds a pseudocount
to the number of features in each module. Only used if \code{yInit} is set.
Default 1.}

\item{minCell}{Integer. Only attempt to split cell populations with at
least this many cells.}

\item{reorder}{Logical. Whether to reorder cell populations using
hierarchical clustering after each model has been created. If FALSE, cell
populations numbers will correspond to the split which created the cell
populations (i.e. 'K15' was created at split 15, 'K16' was created at split
16, etc.). Default TRUE.}

\item{seed}{Integer. Passed to \link[withr]{with_seed}. For reproducibility,
a default value of 12345 is used. If NULL, no calls to
\link[withr]{with_seed} are made.}

\item{perplexity}{Logical. Whether to calculate perplexity for each model.
If FALSE, then perplexity can be calculated later with
\link{resamplePerplexity}. Default TRUE.}

\item{logfile}{Character. Messages will be redirected to a file named
"logfile". If NULL, messages will be printed to stdout.  Default NULL.}

\item{verbose}{Logical. Whether to print log messages. Default TRUE.}
}
\value{
A \linkS4class{SingleCellExperiment} object. Function
 parameter settings and celda model results are stored in the
 \link{metadata} \code{"celda_grid_search"} slot. The models in
 the list will be of class \code{celda_C} if \code{yInit = NULL} or
 \code{celda_CG} if \code{zInit} is set.
}
\description{
Uses the \link{celda_C} model to cluster cells into
 population for range of possible K's. The cell population labels of the
 previous "K-1" model are used as the initial values in the current model
 with K cell populations. The best split of an existing cell population is
 found to create the K-th cluster. This procedure is much faster than
 randomly initializing each model with a different K. If module labels for
 each feature are given in 'yInit', the \link{celda_CG} model will be used to
 split cell populations based on those modules instead of individual
 features. Module labels will also be updated during sampling and thus
 may end up slightly different than \code{yInit}.
}
\examples{
data(sceCeldaCG)
## Create models that range from K = 3 to K = 7 by recursively splitting
## cell populations into two to produce \link{celda_C} cell clustering models
sce <- recursiveSplitCell(sceCeldaCG, initialK = 3, maxK = 7)

## Alternatively, first identify features modules using
## \link{recursiveSplitModule}
moduleSplit <- recursiveSplitModule(sceCeldaCG, initialL = 3, maxL = 15)
plotGridSearchPerplexity(moduleSplit)
moduleSplitSelect <- subsetCeldaList(moduleSplit, list(L = 10))

## Then use module labels for initialization in \link{recursiveSplitCell} to
## produce \link{celda_CG} bi-clustering models
cellSplit <- recursiveSplitCell(sceCeldaCG,
  initialK = 3, maxK = 7, yInit = celdaModules(moduleSplitSelect))
plotGridSearchPerplexity(cellSplit)
sce <- subsetCeldaList(cellSplit, list(K = 5, L = 10))
data(celdaCGSim, celdaCSim)
## Create models that range from K = 3 to K = 7 by recursively splitting
## cell populations into two to produce \link{celda_C} cell clustering models
sce <- recursiveSplitCell(celdaCSim$counts, initialK = 3, maxK = 7)

## Alternatively, first identify features modules using
## \link{recursiveSplitModule}
moduleSplit <- recursiveSplitModule(celdaCGSim$counts,
  initialL = 3, maxL = 15)
plotGridSearchPerplexity(moduleSplit)
moduleSplitSelect <- subsetCeldaList(moduleSplit, list(L = 10))

## Then use module labels for initialization in \link{recursiveSplitCell} to
## produce \link{celda_CG} bi-clustering models
cellSplit <- recursiveSplitCell(celdaCGSim$counts,
  initialK = 3, maxK = 7, yInit = celdaModules(moduleSplitSelect))
plotGridSearchPerplexity(cellSplit)
sce <- subsetCeldaList(cellSplit, list(K = 5, L = 10))
}
\seealso{
\link{recursiveSplitModule} for recursive splitting of feature
 modules.
}