.singleSplitZ <- function(counts, z, s, K, minCell = 3, alpha = 1, beta = 1) { zTa <- tabulate(z, K) zToSplit <- which(zTa > minCell) bestZ <- z bestLl <- -Inf for (i in zToSplit) { clustLabel <- .celda_C( counts[, z == i, drop = FALSE], K = 2, zInitialize = "random", splitOnIter = -1, splitOnLast = FALSE, verbose = FALSE) if (length(unique(clusters(clustLabel)$z)) == 2) { ix <- z == i newZ <- z newZ[ix] <- ifelse(clusters(clustLabel)$z == 2, i, K) ll <- logLikelihoodcelda_C(counts, s, newZ, K, alpha, beta) if (ll > bestLl) { bestZ <- newZ bestLl <- ll } } } return(list(ll = bestLl, z = bestZ)) } .singleSplitY <- function(counts, y, L, minFeature = 3, beta = 1, delta = 1, gamma = 1) { yTa <- tabulate(y, L) yToSplit <- which(yTa > minFeature) bestY <- y bestLl <- -Inf # previousY <- y for (i in yToSplit) { clustLabel <- .celda_G(counts[y == i, , drop = FALSE], L = 2, yInitialize = "random", splitOnIter = -1, splitOnLast = FALSE, nchains = 1, verbose = FALSE) if (length(unique(clusters(clustLabel)$y)) == 2) { ix <- y == i newY <- y newY[ix] <- ifelse(clusters(clustLabel)$y == 2, i, L) ll <- logLikelihoodcelda_G(counts, newY, L, beta, delta, gamma) if (ll > bestLl) { bestY <- newY bestLl <- ll } } } return(list(ll = bestLl, y = bestY)) } #' @title Recursive cell splitting #' @description Uses the `celda_C` model to cluster cells into population for #' range of possible K's. The cell population labels of the previous "K-1" #' model are used as the initial values in the current model with K cell #' populations. The best split of an existing cell population is found to #' create the K-th cluster. This procedure is much faster than randomly #' initializing each model with a different K. If module labels for each #' feature are given in 'yInit', the `celda_CG` model will be used to split #' cell populations based on those modules instead of individual features. #' Module labels will also be updated during sampling and thus may end up #' slightly different than `yInit`. #' @param counts Integer matrix. Rows represent features and columns represent #' cells. #' @param sampleLabel Vector or factor. Denotes the sample label for each cell #' (column) in the count matrix. #' @param initialK Integer. Minimum number of cell populations to try. #' @param maxK Integer. Maximum number of cell populations to try. #' @param tempL Integer. Number of temporary modules to identify and use in cell #' splitting. Only used if `yInit = NULL`. Collapsing features to a relatively #' smaller number of modules will increase the speed of clustering and tend to #' produce better cell populations. This number should be larger than the #' number of true modules expected in the dataset. Default NULL. #' @param yInit Integer vector. Module labels for features. Cells will be #' clustered using the `celda_CG` model based on the modules specified in #' `yInit` rather than the counts of individual features. While the features #' will be initialized to the module labels in `yInit`, the labels will be #' allowed to move within each new model with a different K. #' @param alpha Numeric. Concentration parameter for Theta. Adds a pseudocount #' to each cell population in each sample. Default 1. #' @param beta Numeric. Concentration parameter for Phi. Adds a pseudocount to #' each feature in each cell (if `yInit` is NULL) or to each module in each #' cell population (if `yInit` is set). Default 1. #' @param delta Numeric. Concentration parameter for Psi. Adds a pseudocount #' to each feature in each module. Only used if `yInit` is set. Default 1. #' @param gamma Numeric. Concentration parameter for Eta. Adds a pseudocount #' to the number of features in each module. Only used if `yInit` is set. #' Default 1. #' @param minCell Integer. Only attempt to split cell populations with at #' least this many cells. #' @param reorder Logical. Whether to reorder cell populations using #' hierarchical clustering after each model has been created. If FALSE, cell #' populations numbers will correspond to the split which created the cell #' populations (i.e. 'K15' was created at split 15, 'K16' was created at split #' 16, etc.). Default TRUE. #' @param perplexity Logical. Whether to calculate perplexity for each model. #' If FALSE, then perplexity can be calculated later with #' `resamplePerplexity()`. Default TRUE. #' @param verbose Logical. Whether to print log messages. Default TRUE. #' @param logfile Character. Messages will be redirected to a file named #' `logfile`. If NULL, messages will be printed to stdout. Default NULL. #' @return Object of class `celda_list`, which contains results for all model #' parameter combinations and summaries of the run parameters. The models in #' the list will be of class `celda_C` if `yInit = NULL` or `celda_CG` if #' `zInit` is set. #' @seealso `recursiveSplitModule()` for recursive splitting of cell #' populations. #' @examples #' data(celdaCGSim, celdaCSim) #' ## Create models that range from K = 3 to K = 7 by recursively splitting #' ## cell populations into two to produce `celda_C` cell clustering models #' testZ <- recursiveSplitCell(celdaCSim$counts, initialK = 3, maxK = 7) #' #' ## Alternatively, first identify features modules using #' ## `recursiveSplitModule()` #' moduleSplit <- recursiveSplitModule(celdaCGSim$counts, #' initialL = 3, maxL = 15) #' plotGridSearchPerplexity(moduleSplit) #' moduleSplitSelect <- subsetCeldaList(moduleSplit, list(L = 10)) #' #' ## Then use module labels for initialization in `recursiveSplitCell()` to #' ## produce `celda_CG` bi-clustering models #' cellSplit <- recursiveSplitCell(celdaCGSim$counts, #' initialK = 3, maxK = 7, yInit = clusters(moduleSplitSelect)$y) #' plotGridSearchPerplexity(cellSplit) #' celdaMod <- subsetCeldaList(cellSplit, list(K = 5, L = 10)) #' @export recursiveSplitCell <- function(counts, sampleLabel = NULL, initialK = 5, maxK = 25, tempL = NULL, yInit = NULL, alpha = 1, beta = 1, delta = 1, gamma = 1, minCell = 3, reorder = TRUE, perplexity = TRUE, logfile = NULL, verbose = TRUE) { .logMessages(paste(rep("=", 50), collapse = ""), logfile = logfile, append = FALSE, verbose = verbose) .logMessages("Starting recursive cell population splitting.", logfile = logfile, append = TRUE, verbose = verbose) .logMessages(paste(rep("=", 50), collapse = ""), logfile = logfile, append = TRUE, verbose = verbose) startTime <- Sys.time() counts <- .processCounts(counts) countChecksum <- .createCountChecksum(counts) sampleLabel <- .processSampleLabels(sampleLabel, numCells = ncol(counts)) s <- as.integer(sampleLabel) names <- list(row = rownames(counts), column = colnames(counts), sample = levels(sampleLabel)) if (!is.null(yInit)) { # Create collapsed module matrix L <- length(unique(yInit)) .logMessages(date(), ".. Collapsing to", L, "modules", append = TRUE, verbose = verbose, logfile = logfile) overallY <- .initializeCluster(L, nrow(counts), initial = yInit) countsY <- .rowSumByGroup(counts, overallY, L) # Create initial model with initialK and predifined y labels .logMessages(date(), ".. Initializing with", initialK, "populations", append = TRUE, verbose = verbose, logfile = logfile) modelInitial <- .celda_CG(counts, sampleLabel = s, K = as.integer(initialK), L = as.integer(L), zInitialize = "split", yInitialize = "predefined", nchains = 1, yInit = overallY, alpha = alpha, beta = beta, gamma = gamma, delta = delta, verbose = FALSE, reorder = reorder) currentK <- length(unique(clusters(modelInitial)$z)) + 1 overallZ <- clusters(modelInitial)$z resList <- list(modelInitial) while (currentK <= maxK) { # previousY <- overallY tempSplit <- .singleSplitZ(countsY, overallZ, s, currentK, minCell = 3, alpha = alpha, beta = beta) tempModel <- .celda_CG(counts, sampleLabel = s, K = as.integer(currentK), L = as.integer(L), yInit = overallY, zInit = tempSplit$z, nchains = 1, zInitialize = "predefined", yInitialize = "predefined", splitOnLast = FALSE, stopIter = 5, alpha = alpha, beta = beta, gamma = gamma, delta = delta, verbose = FALSE, reorder = reorder) # Calculate new decomposed counts matrix with new module labels # overallY = clusters(tempModel)$y # p = .cGReDecomposeCounts(counts, overallY, previousY, countsY, # nByG, L = as.integer(L)) # countsY = p$nTSByC # If the number of clusters is still "currentK", then keep the # reordering, otherwise keep the previous configuration if (length(unique(clusters(tempModel)$z)) == currentK) { overallZ <- clusters(tempModel)$z } else { overallZ <- tempSplit$z ll <- logLikelihoodcelda_CG(counts, s, overallZ, clusters(tempModel)$y, currentK, L, alpha, beta, delta, gamma ) tempModel <- methods::new("celda_CG", clusters = list(z = overallZ, y = clusters(tempModel)$y), params = list(K = as.integer(currentK), L = as.integer(L), alpha = alpha, beta = beta, delta = delta, gamma = gamma, countChecksum = countChecksum ), finalLogLik = ll, sampleLabel = sampleLabel, names = names ) } resList <- c(resList, list(tempModel)) .logMessages(date(), ".. Current cell population", currentK, "| logLik:", bestLogLikelihood(tempModel), append = TRUE, verbose = verbose, logfile = logfile ) currentK <- length(unique(overallZ)) + 1 } runK <- vapply(resList, function(mod) { params(mod)$K }, integer(1)) runL <- vapply(resList, function(mod) { params(mod)$L }, integer(1)) runParams <- data.frame(index = seq.int(1, length(resList)), L = as.integer(runL), K = as.integer(runK), stringsAsFactors = FALSE) } else if (!is.null(tempL)) { L <- tempL .logMessages(date(), ".. Collapsing to", L, "temporary modules", append = TRUE, verbose = verbose, logfile = logfile ) tempY <- .initializeSplitY(counts, L = as.integer(L), tempK = max(100, maxK), minFeature = 3) tempY <- as.integer(as.factor(tempY)) L <- length(unique(tempY)) # Recalculate in case some modules are empty countsY <- .rowSumByGroup(counts, tempY, L) # Create initial model with initialK .logMessages(date(), ".. Initializing with", initialK, "populations", append = TRUE, verbose = verbose, logfile = logfile ) modelInitial <- .celda_C(countsY, sampleLabel = s, K = as.integer(initialK), zInitialize = "split", nchains = 1, alpha = alpha, beta = beta, verbose = FALSE, reorder = reorder) currentK <- length(unique(clusters(modelInitial)$z)) + 1 overallZ <- clusters(modelInitial)$z ll <- logLikelihoodcelda_C(counts, s, overallZ, currentK, alpha, beta) modelInitial@params$countChecksum <- countChecksum modelInitial@completeLogLik <- ll modelInitial@finalLogLik <- ll resList <- list(modelInitial) while (currentK <= maxK) { # Find next best split, then do a new celda_C run with that split tempSplit <- .singleSplitZ(countsY, overallZ, s, currentK, minCell = 3, alpha = alpha, beta = beta) tempModel <- .celda_C(countsY, sampleLabel = s, K = as.integer(currentK), nchains = 1, zInitialize = "random", alpha = alpha, beta = beta, stopIter = 5, splitOnLast = FALSE, verbose = FALSE, zInit = tempSplit$z, reorder = reorder) # Handle rare cases where a population has no cells after running # the model if (length(unique(clusters(tempModel)$z)) == currentK) { overallZ <- clusters(tempModel)$z } else { overallZ <- tempSplit$z } # Need to change below line to use decompose counts to save time ll <- logLikelihoodcelda_C(counts, s, overallZ, currentK, alpha, beta) tempModel <- methods::new("celda_C", clusters = list(z = overallZ), params = list(K = as.integer(currentK), alpha = alpha, beta = beta, countChecksum = countChecksum), finalLogLik = ll, sampleLabel = sampleLabel, names = names ) resList <- c(resList, list(tempModel)) .logMessages(date(), ".. Current cell population", currentK, "| logLik:", bestLogLikelihood(tempModel), append = TRUE, verbose = verbose, logfile = logfile ) currentK <- length(unique(overallZ)) + 1 } runK <- vapply(resList, function(mod) { params(mod)$K }, integer(1)) runParams <- data.frame(index = seq.int(1, length(resList)), K = as.integer(runK), stringsAsFactors = FALSE ) } else { # Create initial model with initialK .logMessages(date(), ".. Initializing with", initialK, "populations", append = TRUE, verbose = verbose, logfile = logfile) modelInitial <- .celda_C(counts, sampleLabel = s, K = as.integer(initialK), zInitialize = "split", nchains = 1, alpha = alpha, beta = beta, verbose = FALSE, reorder = reorder) currentK <- length(unique(clusters(modelInitial)$z)) + 1 overallZ <- clusters(modelInitial)$z resList <- list(modelInitial) while (currentK <= maxK) { tempSplit <- .singleSplitZ(counts, overallZ, s, currentK, minCell = 3, alpha = alpha, beta = beta) tempModel <- .celda_C(counts, sampleLabel = s, K = as.integer(currentK), nchains = 1, zInitialize = "random", alpha = alpha, beta = beta, stopIter = 5, splitOnLast = FALSE, verbose = FALSE, zInit = tempSplit$z, reorder = reorder) if (length(unique(clusters(tempModel)$z)) == currentK) { overallZ <- clusters(tempModel)$z } else { overallZ <- tempSplit$z ll <- logLikelihoodcelda_C(counts, s, overallZ, currentK, alpha, beta) tempModel <- methods::new("celda_C", clusters = list(z = overallZ), params = list(K = as.integer(currentK), alpha = alpha, beta = beta, countChecksum = countChecksum), finalLogLik = ll, sampleLabel = sampleLabel, names = names ) } resList <- c(resList, list(tempModel)) .logMessages(date(), ".. Current cell population", currentK, "| logLik:", bestLogLikelihood(tempModel), append = TRUE, verbose = verbose, logfile = logfile ) currentK <- length(unique(overallZ)) + 1 } runK <- vapply(resList, function(mod) { params(mod)$K }, integer(1)) runParams <- data.frame(index = seq.int(1, length(resList)), K = as.integer(runK), stringsAsFactors = FALSE ) } # Summarize paramters of different models logliks <- vapply(resList, function(mod) { bestLogLikelihood(mod) }, double(1)) runParams <- data.frame(runParams, log_likelihood = logliks, stringsAsFactors = FALSE) celdaRes <- methods::new("celdaList", runParams = runParams, resList = resList, countChecksum = countChecksum ) if (isTRUE(perplexity)) { .logMessages(date(), ".. Calculating perplexity", append = TRUE, verbose = verbose, logfile = NULL ) celdaRes <- resamplePerplexity(counts, celdaRes) } endTime <- Sys.time() .logMessages( paste(rep("=", 50), collapse = ""), logfile = logfile, append = TRUE, verbose = verbose ) .logMessages( "Completed recursive cell population splitting. Total time:", format(difftime(endTime, startTime)), logfile = logfile, append = TRUE, verbose = verbose ) .logMessages( paste(rep("=", 50), collapse = ""), logfile = logfile, append = TRUE, verbose = verbose ) return(celdaRes) } #' @title Recursive module splitting #' #' @description Uses the `celda_G` model to cluster features into modules for #' a range of possible L's. The module labels of the previous "L-1" model are #' used as the initial values in the current model with L modules. The best #' split of an existing module is found to create the L-th module. This #' procedure is much faster than randomly initializing each model with a #' different L. #' @param counts Integer matrix. Rows represent features and columns represent #' cells. #' @param initialL Integer. Minimum number of modules to try. #' @param maxL Integer. Maximum number of modules to try. #' @param tempK Integer. Number of temporary cell populations to identify and #' use in module splitting. Only used if `zInit=NULL` Collapsing cells to a #' relatively smaller number of cell popluations will increase the speed of #' module clustering and tend to produce better modules. This number should be #' larger than the number of true cell populations expected in the dataset. #' Default 100. #' @param zInit Integer vector. Collapse cells to cell populations based on #' labels in `zInit` and then perform module splitting. If NULL, no #' collapasing will be performed unless `tempK` is specified. Default NULL. #' @param sampleLabel Vector or factor. Denotes the sample label for each cell #' (column) in the count matrix. Only used if `zInit` is set. #' @param alpha Numeric. Concentration parameter for Theta. Adds a pseudocount #' to each cell population in each sample. Only used if `zInit` is set. #' Default 1. #' @param beta Numeric. Concentration parameter for Phi. Adds a pseudocount #' to each feature module in each cell. Default 1. #' @param delta Numeric. Concentration parameter for Psi. Adds a pseudocount #' to each feature in each module. Default 1. #' @param gamma Numeric. Concentration parameter for Eta. Adds a pseudocount #' to the number of features in each module. Default 1. #' @param minFeature Integer. Only attempt to split modules with at least this #' many features. #' @param reorder Logical. Whether to reorder modules using hierarchical #' clustering after each model has been created. If FALSE, module numbers will #' correspond to the split which created the module (i.e. 'L15' was created at #' split 15, 'L16' was created at split 16, etc.). Default TRUE. #' @param perplexity Logical. Whether to calculate perplexity for each model. #' If FALSE, then perplexity can be calculated later with #' `resamplePerplexity()`. Default TRUE. #' @param verbose Logical. Whether to print log messages. Default TRUE. #' @param logfile Character. Messages will be redirected to a file named #' `logfile`. If NULL, messages will be printed to stdout. Default NULL. #' @return Object of class `celda_list`, which contains results for all model #' parameter combinations and summaries of the run parameters. The models in #' the list will be of class `celda_G` if `zInit=NULL` or `celda_CG` if #' `zInit` is set. #' @seealso `recursiveSplitCell()` for recursive splitting of cell populations. #' @examples #' data(celdaCGSim) #' ## Create models that range from L=3 to L=20 by recursively splitting modules #' ## into two #' moduleSplit <- recursiveSplitModule(celdaCGSim$counts, #' initialL = 3, maxL = 20) #' #' ## Example results with perplexity #' plotGridSearchPerplexity(moduleSplit) #' #' ## Select model for downstream analysis #' celdaMod <- subsetCeldaList(moduleSplit, list(L = 10)) #' @export recursiveSplitModule <- function(counts, initialL = 10, maxL = 100, tempK = 100, zInit = NULL, sampleLabel = NULL, alpha = 1, beta = 1, delta = 1, gamma = 1, minFeature = 3, reorder = TRUE, perplexity = TRUE, verbose = TRUE, logfile = NULL) { .logMessages(paste(rep("=", 50), collapse = ""), logfile = logfile, append = FALSE, verbose = verbose) .logMessages("Starting recursive module splitting.", logfile = logfile, append = TRUE, verbose = verbose) .logMessages(paste(rep("=", 50), collapse = ""), logfile = logfile, append = TRUE, verbose = verbose) startTime <- Sys.time() counts <- .processCounts(counts) countChecksum <- .createCountChecksum(counts) names <- list(row = rownames(counts), column = colnames(counts)) sampleLabel <- .processSampleLabels(sampleLabel, numCells = ncol(counts)) s <- as.integer(sampleLabel) if (!is.null(zInit)) { # Create collapsed module matrix K <- length(unique(zInit)) .logMessages( date(), ".. Collapsing to", K, "cell populations", append = TRUE, verbose = verbose, logfile = logfile ) overallZ <- .initializeCluster(N = K, len = ncol(counts), initial = zInit) countsZ <- .colSumByGroup(counts, overallZ, K) # Create initial model with initialL and predifined z labels .logMessages(date(), ".. Initializing with", initialL, "modules", append = TRUE, verbose = verbose, logfile = logfile) modelInitial <- .celda_CG(counts, sampleLabel = s, L = initialL, K = K, zInitialize = "predefined", yInitialize = "split", nchains = 1, zInit = overallZ, alpha = alpha, beta = beta, gamma = gamma, delta = delta, verbose = FALSE, reorder = reorder) currentL <- length(unique(clusters(modelInitial)$y)) + 1 overallY <- clusters(modelInitial)$y resList <- list(modelInitial) while (currentL <= maxL) { # Allow features to cluster further with celda_CG tempSplit <- .singleSplitY( countsZ, overallY, currentL, minFeature = 3, beta = beta, delta = delta, gamma = gamma) tempModel <- .celda_CG( counts, L = currentL, K = K, stopIter = 5, splitOnIter = -1, splitOnLast = FALSE, nchains = 1, verbose = FALSE, yInitialize = "predefined", zInitialize = "predefined", yInit = tempSplit$y, zInit = overallZ, reorder = reorder ) overallY <- clusters(tempModel)$y ## Add new model to results list and increment L .logMessages( date(), ".. Created module", currentL, "| logLik:", bestLogLikelihood(tempModel), append = TRUE, verbose = verbose, logfile = NULL ) resList <- c(resList, list(tempModel)) currentL <- currentL + 1 } runK <- vapply(resList, function(mod) { params(mod)$K }, integer(1)) runL <- vapply(resList, function(mod) { params(mod)$L }, integer(1)) runParams <- data.frame( index = seq.int(1, length(resList)), L = runL, K = runK, stringsAsFactors = FALSE ) } else if (!is.null(tempK)) { K <- tempK .logMessages( date(), ".. Collapsing to", K, "temporary cell populations", append = TRUE, verbose = verbose, logfile = logfile ) z <- .initializeSplitZ(counts, K = K, minCell = 3) countsZ <- .colSumByGroup(counts, z, length(unique(z))) .logMessages( date(), ".. Initializing with", initialL, "modules", append = TRUE, verbose = verbose, logfile = logfile ) modelInitial <- .celda_G( countsZ, L = initialL, nchains = 1, verbose = FALSE) currentL <- length(unique(clusters(modelInitial)$y)) + 1 overallY <- clusters(modelInitial)$y ## Decomposed counts for full count matrix p <- .cGDecomposeCounts(counts, overallY, currentL) nTSByC <- p$nTSByC nByTS <- p$nByTS nGByTS <- p$nGByTS nByG <- p$nByG nG <- p$nG nM <- p$nM resList <- list(modelInitial) while (currentL <= maxL) { # Allow features to cluster further previousY <- overallY tempSplit <- .singleSplitY( countsZ, overallY, currentL, minFeature = 3, beta = beta, delta = delta, gamma = gamma) tempModel <- .celda_G( countsZ, L = currentL, stopIter = 5, splitOnIter = -1, splitOnLast = FALSE, nchains = 1, verbose = FALSE, yInitialize = "predefined", yInit = tempSplit$y, reorder = reorder ) overallY <- clusters(tempModel)$y # Adjust decomposed count matrices p <- .cGReDecomposeCounts(counts, overallY, previousY, nTSByC, nByG, L = currentL) nTSByC <- p$nTSByC nByTS <- p$nByTS nGByTS <- p$nGByTS previousY <- overallY ## Create the final model object with correct info on full counts ## matrix tempModel@finalLogLik <- .cGCalcLL( nTSByC = nTSByC, nByTS = nByTS, nByG = nByG, nGByTS = nGByTS, nM = nM, nG = nG, L = currentL, beta = beta, delta = delta, gamma = gamma ) tempModel@completeLogLik <- bestLogLikelihood(tempModel) tempModel@params$countChecksum <- countChecksum tempModel@names <- names ## Add extra row/column for next round of L nTSByC <- rbind(nTSByC, rep(0L, ncol(nTSByC))) nByTS <- c(nByTS, 0L) nGByTS <- c(nGByTS, 0L) ## Add new model to results list and increment L .logMessages( date(), ".. Created module", currentL, "| logLik:", bestLogLikelihood(tempModel), append = TRUE, verbose = verbose, logfile = NULL ) resList <- c(resList, list(tempModel)) currentL <- currentL + 1 } runL <- vapply(resList, function(mod) { params(mod)$L }, integer(1)) runParams <- data.frame( index = seq.int(1, length(resList)), L = runL, stringsAsFactors = FALSE ) } else { .logMessages( date(), ".. Initializing with", initialL, "modules", append = TRUE, verbose = verbose, logfile = logfile ) modelInitial <- .celda_G( counts, L = initialL, maxIter = 20, nchains = 1, verbose = FALSE) overallY <- clusters(modelInitial)$y currentL <- length(unique(overallY)) + 1 ## Perform splitting for y labels resList <- list(modelInitial) while (currentL <= maxL) { # Allow features to cluster further previousY <- overallY tempSplit <- .singleSplitY( counts, overallY, currentL, minFeature = 3, beta = beta, delta = delta, gamma = gamma) tempModel <- .celda_G( counts, L = currentL, stopIter = 5, splitOnIter = -1, splitOnLast = FALSE, nchains = 1, verbose = FALSE, yInitialize = "predefined", yInit = tempSplit$y, reorder = reorder ) overallY <- clusters(tempModel)$y ## Add new model to results list and increment L .logMessages( date(), ".. Created module", currentL, "| logLik:", bestLogLikelihood(tempModel), append = TRUE, verbose = verbose, logfile = NULL ) resList <- c(resList, list(tempModel)) currentL <- currentL + 1 } runL <- vapply(resList, function(mod) { params(mod)$L }, integer(1)) runParams <- data.frame( index = seq.int(1, length(resList)), L = runL, stringsAsFactors = FALSE ) } ## Summarize paramters of different models logliks <- vapply(resList, function(mod) { bestLogLikelihood(mod) }, double(1)) runParams <- data.frame(runParams, log_likelihood = logliks, stringsAsFactors = FALSE) celdaRes <- methods::new( "celdaList", runParams = runParams, resList = resList, countChecksum = countChecksum ) if (isTRUE(perplexity)) { .logMessages( date(), ".. Calculating perplexity", append = TRUE, verbose = verbose, logfile = NULL ) celdaRes <- resamplePerplexity(counts, celdaRes) } endTime <- Sys.time() .logMessages(paste(rep("=", 50), collapse = ""), logfile = logfile, append = TRUE, verbose = verbose) .logMessages("Completed recursive module splitting. Total time:", format(difftime(endTime, startTime)), logfile = logfile, append = TRUE, verbose = verbose) .logMessages(paste(rep("=", 50), collapse = ""), logfile = logfile, append = TRUE, verbose = verbose) return(celdaRes) }