#' @title t-Distributed Stochastic Neighbor Embedding (t-SNE) dimension #' reduction for celda \code{sce} object #' @description Embeds cells in two dimensions using \link[Rtsne]{Rtsne} based #' on a celda model. For celda_C \code{sce} objects, PCA on the normalized #' counts is used to reduce the number of features before applying t-SNE. For #' celda_CG and celda_G \code{sce} objects, tSNE is run on module #' probabilities to reduce the number of features instead of using PCA. #' Module probabilities are square-root transformed before applying tSNE. #' @param sce A \linkS4class{SingleCellExperiment} object #' returned by \link{celda_C}, \link{celda_G}, or \link{celda_CG}. #' @param useAssay A string specifying which \link{assay} #' slot to use. Default "counts". #' @param altExpName The name for the \link{altExp} slot #' to use. Default "featureSubset". #' @param maxCells Integer. Maximum number of cells to plot. Cells will be #' randomly subsampled if \code{ncol(counts) > maxCells}. Larger numbers of #' cells requires more memory. If \code{NULL}, no subsampling will be #' performed. Default \code{NULL}. #' @param minClusterSize Integer. Do not subsample cell clusters below this #' threshold. Default 100. #' @param initialDims Integer. PCA will be used to reduce the dimensionality #' of the dataset. The top 'initialDims' principal components will be used #' for tSNE. Default 20. #' @param modules Integer vector. Determines which feature modules to use for #' tSNE. If \code{NULL}, all modules will be used. Default \code{NULL}. #' @param perplexity Numeric. Perplexity parameter for tSNE. Default 20. #' @param maxIter Integer. Maximum number of iterations in tSNE generation. #' Default 2500. #' @param normalize Character. Passed to \link{normalizeCounts} in #' normalization step. Divides counts by the library sizes for each #' cell. One of 'proportion', 'cpm', 'median', or 'mean'. 'proportion' uses #' the total counts for each cell as the library size. 'cpm' divides the #' library size of each cell by one million to produce counts per million. #' 'median' divides the library size of each cell by the median library size #' across all cells. 'mean' divides the library size of each cell by the mean #' library size across all cells. #' @param scaleFactor Numeric. Sets the scale factor for cell-level #' normalization. This scale factor is multiplied to each cell after the #' library size of each cell had been adjusted in \code{normalize}. Default #' \code{NULL} which means no scale factor is applied. #' @param transformationFun Function. Applys a transformation such as 'sqrt', #' 'log', 'log2', 'log10', or 'log1p'. If \code{NULL}, no transformation will #' be applied. Occurs after applying normalization and scale factor. Default #' \code{NULL}. #' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility, #' a default value of 12345 is used. If NULL, no calls to #' \link[withr]{with_seed} are made. #' @param ... Ignored. Placeholder to prevent check warning. #' @return \code{sce} with t-SNE coordinates #' (columns "celda_tSNE1" & "celda_tSNE2") added to #' \code{\link{reducedDim}(sce, "celda_tSNE")}. #' @export setGeneric("celdaTsne", function(sce, ...) { standardGeneric("celdaTsne") }) #' @rdname celdaTsne #' @examples #' data(sceCeldaCG) #' tsneRes <- celdaTsne(sceCeldaCG) #' @export setMethod("celdaTsne", signature(sce = "SingleCellExperiment"), function(sce, useAssay = "counts", altExpName = "featureSubset", maxCells = NULL, minClusterSize = 100, initialDims = 20, modules = NULL, perplexity = 20, maxIter = 2500, normalize = "proportion", scaleFactor = NULL, transformationFun = sqrt, seed = 12345) { if (is.null(seed)) { sce <- .celdaTsne(sce = sce, useAssay = useAssay, altExpName = altExpName, maxCells = maxCells, minClusterSize = minClusterSize, initialDims = initialDims, modules = modules, perplexity = perplexity, maxIter = maxIter, normalize = normalize, scaleFactor = scaleFactor, transformationFun = transformationFun) } else { with_seed(seed, sce <- .celdaTsne(sce = sce, useAssay = useAssay, altExpName = altExpName, maxCells = maxCells, minClusterSize = minClusterSize, initialDims = initialDims, modules = modules, perplexity = perplexity, maxIter = maxIter, normalize = normalize, scaleFactor = scaleFactor, transformationFun = transformationFun)) } return(sce) }) .celdaTsne <- function(sce, useAssay, altExpName, maxCells, minClusterSize, initialDims, modules, perplexity, maxIter, normalize, scaleFactor, transformationFun) { celdaMod <- celdaModel(sce, altExpName = altExpName) altExp <- SingleCellExperiment::altExp(sce, altExpName) if (celdaMod == "celda_C") { res <- .celdaTsneC(sce = altExp, useAssay = useAssay, maxCells = maxCells, minClusterSize = minClusterSize, initialDims = initialDims, perplexity = perplexity, maxIter = maxIter, normalize = normalize, scaleFactor = scaleFactor, transformationFun = transformationFun) } else if (celdaMod == "celda_CG") { res <- .celdaTsneCG(sce = altExp, useAssay = useAssay, maxCells = maxCells, minClusterSize = minClusterSize, initialDims = initialDims, modules = modules, perplexity = perplexity, maxIter = maxIter, normalize = normalize, scaleFactor = scaleFactor, transformationFun = transformationFun) } else if (celdaMod == "celda_G") { res <- .celdaTsneG(sce = altExp, useAssay = useAssay, maxCells = maxCells, minClusterSize = minClusterSize, initialDims = initialDims, modules = modules, perplexity = perplexity, maxIter = maxIter, normalize = normalize, scaleFactor = scaleFactor, transformationFun = transformationFun) } else { stop("S4Vectors::metadata(altExp(sce, altExpName))$", "celda_parameters$model must be", " one of 'celda_C', 'celda_G', or 'celda_CG'") } SingleCellExperiment::reducedDim(altExp, "celda_tSNE") <- res SingleCellExperiment::altExp(sce, altExpName) <- altExp return(sce) } .celdaTsneC <- function(sce, useAssay, maxCells, minClusterSize, initialDims, perplexity, maxIter, normalize, scaleFactor, transformationFun) { preparedCountInfo <- .prepareCountsForDimReductionCeldaC(sce = sce, useAssay = useAssay, maxCells = maxCells, minClusterSize = minClusterSize, normalize = normalize, scaleFactor = scaleFactor, transformationFun = transformationFun) res <- .calculateTsne(preparedCountInfo$norm, perplexity = perplexity, maxIter = maxIter, doPca = TRUE, initialDims = initialDims) final <- matrix(NA, nrow = ncol(sce), ncol = 2) final[preparedCountInfo$cellIx, ] <- res rownames(final) <- colnames(sce) colnames(final) <- c("celda_tSNE1", "celda_tSNE2") return(final) } .celdaTsneCG <- function(sce, useAssay, maxCells, minClusterSize, initialDims, modules, perplexity, maxIter, normalize, scaleFactor, transformationFun) { preparedCountInfo <- .prepareCountsForDimReductionCeldaCG(sce = sce, useAssay = useAssay, maxCells = maxCells, minClusterSize = minClusterSize, modules = modules, normalize = normalize, scaleFactor = scaleFactor, transformationFun = transformationFun) norm <- preparedCountInfo$norm res <- .calculateTsne(norm, doPca = FALSE, perplexity = perplexity, maxIter = maxIter, initialDims = initialDims) final <- matrix(NA, nrow = ncol(sce), ncol = 2) final[preparedCountInfo$cellIx, ] <- res rownames(final) <- colnames(sce) colnames(final) <- c("celda_tSNE1", "celda_tSNE2") return(final) } .celdaTsneG <- function(sce, useAssay, maxCells, minClusterSize, initialDims, modules, perplexity, maxIter, normalize, scaleFactor, transformationFun) { preparedCountInfo <- .prepareCountsForDimReductionCeldaG(sce = sce, useAssay = useAssay, maxCells = maxCells, minClusterSize = minClusterSize, modules = modules, normalize = normalize, scaleFactor = scaleFactor, transformationFun = transformationFun) res <- .calculateTsne(preparedCountInfo$norm, perplexity = perplexity, maxIter = maxIter, doPca = FALSE, initialDims = initialDims) final <- matrix(NA, nrow = ncol(sce), ncol = 2) final[preparedCountInfo$cellIx, ] <- res rownames(final) <- colnames(sce) colnames(final) <- c("celda_tSNE1", "celda_tSNE2") return(final) } # Run the t-SNE algorithm for dimensionality reduction # @param norm Normalized count matrix. # @param perplexity Numeric vector. Determines perplexity for tsne. Default 20. # @param maxIter Numeric vector. Determines iterations for tsne. Default 1000. # @param doPca Logical. Whether to perform # dimensionality reduction with PCA before tSNE. # @param initialDims Integer. Number of dimensions from PCA to use as # input in tSNE. Default 50. #' @importFrom Rtsne Rtsne .calculateTsne <- function(norm, perplexity, maxIter, doPca, initialDims) { res <- Rtsne::Rtsne( norm, pca = doPca, max_iter = maxIter, perplexity = perplexity, check_duplicates = FALSE, is_distance = FALSE, initial_dims = initialDims)$Y return(res) }