... | ... |
@@ -11,7 +11,7 @@ |
11 | 11 |
#' Rows represent features and columns represent cells. Alternatively, |
12 | 12 |
#' any matrix-like object that can be coerced to a sparse matrix of class |
13 | 13 |
#' "dgCMatrix" can be directly used as input. The matrix will automatically be |
14 |
-#' converted to a \linkS4class{SingleCellExperiment} object. |
|
14 |
+#' converted to a \linkS4class{SingleCellExperiment} object. |
|
15 | 15 |
#' @param useAssay A string specifying the name of the |
16 | 16 |
#' \link{assay} slot to use. Default "counts". |
17 | 17 |
#' @param altExpName The name for the \link{altExp} slot |
... | ... |
@@ -223,7 +223,7 @@ setMethod("celda_CG", |
223 | 223 |
|
224 | 224 |
# Convert to sparse matrix |
225 | 225 |
x <- methods::as(x, "dgCMatrix") |
226 |
- |
|
226 |
+ |
|
227 | 227 |
ls <- list() |
228 | 228 |
ls[[useAssay]] <- x |
229 | 229 |
sce <- SingleCellExperiment::SingleCellExperiment(assays = ls) |
... | ... |
@@ -6,11 +6,12 @@ |
6 | 6 |
#' it exists. Otherwise, the \code{useAssay} |
7 | 7 |
#' \link{assay} slot in \code{x} will be used if |
8 | 8 |
#' \code{x} is a \linkS4class{SingleCellExperiment} object. |
9 |
-#' @param x A numeric \link{matrix} of counts or a |
|
10 |
-#' \linkS4class{SingleCellExperiment} |
|
11 |
-#' with the matrix located in the \link{assay} |
|
12 |
-#' slot under \code{useAssay} in \code{altExp(x, altExpName)}. |
|
13 |
-#' Rows represent features and columns represent cells. |
|
9 |
+#' @param x A \linkS4class{SingleCellExperiment} |
|
10 |
+#' with the matrix located in the assay slot under \code{useAssay}. |
|
11 |
+#' Rows represent features and columns represent cells. Alternatively, |
|
12 |
+#' any matrix-like object that can be coerced to a sparse matrix of class |
|
13 |
+#' "dgCMatrix" can be directly used as input. The matrix will automatically be |
|
14 |
+#' converted to a \linkS4class{SingleCellExperiment} object. |
|
14 | 15 |
#' @param useAssay A string specifying the name of the |
15 | 16 |
#' \link{assay} slot to use. Default "counts". |
16 | 17 |
#' @param altExpName The name for the \link{altExp} slot |
... | ... |
@@ -194,7 +195,7 @@ setMethod("celda_CG", |
194 | 195 |
#' nchains = 1) |
195 | 196 |
#' @export |
196 | 197 |
setMethod("celda_CG", |
197 |
- signature(x = "matrix"), |
|
198 |
+ signature(x = "ANY"), |
|
198 | 199 |
function(x, |
199 | 200 |
useAssay = "counts", |
200 | 201 |
altExpName = "featureSubset", |
... | ... |
@@ -220,6 +221,9 @@ setMethod("celda_CG", |
220 | 221 |
logfile = NULL, |
221 | 222 |
verbose = TRUE) { |
222 | 223 |
|
224 |
+ # Convert to sparse matrix |
|
225 |
+ x <- as(x, "dgCMatrix") |
|
226 |
+ |
|
223 | 227 |
ls <- list() |
224 | 228 |
ls[[useAssay]] <- x |
225 | 229 |
sce <- SingleCellExperiment::SingleCellExperiment(assays = ls) |
... | ... |
@@ -943,8 +943,8 @@ setMethod("celda_CG", |
943 | 943 |
counts <- .processCounts(counts) |
944 | 944 |
|
945 | 945 |
K <- S4Vectors::metadata(sce)$celda_parameters$K |
946 |
- z <- SummarizedExperiment::colData(sce)$celda_cell_cluster |
|
947 |
- y <- SummarizedExperiment::rowData(sce)$celda_feature_module |
|
946 |
+ z <- as.integer(SummarizedExperiment::colData(sce)$celda_cell_cluster) |
|
947 |
+ y <- as.integer(SummarizedExperiment::rowData(sce)$celda_feature_module) |
|
948 | 948 |
L <- S4Vectors::metadata(sce)$celda_parameters$L |
949 | 949 |
alpha <- S4Vectors::metadata(sce)$celda_parameters$alpha |
950 | 950 |
beta <- S4Vectors::metadata(sce)$celda_parameters$beta |
... | ... |
@@ -942,15 +942,29 @@ setMethod("celda_CG", |
942 | 942 |
counts <- SummarizedExperiment::assay(sce, i = useAssay) |
943 | 943 |
counts <- .processCounts(counts) |
944 | 944 |
|
945 |
+ K <- S4Vectors::metadata(sce)$celda_parameters$K |
|
946 |
+ z <- SummarizedExperiment::colData(sce)$celda_cell_cluster |
|
947 |
+ y <- SummarizedExperiment::rowData(sce)$celda_feature_module |
|
948 |
+ L <- S4Vectors::metadata(sce)$celda_parameters$L |
|
949 |
+ alpha <- S4Vectors::metadata(sce)$celda_parameters$alpha |
|
950 |
+ beta <- S4Vectors::metadata(sce)$celda_parameters$beta |
|
951 |
+ |
|
952 |
+ delta <- S4Vectors::metadata(sce)$celda_parameters$delta |
|
953 |
+ gamma <- S4Vectors::metadata(sce)$celda_parameters$gamma |
|
954 |
+ sampleLabel <- |
|
955 |
+ SummarizedExperiment::colData(sce)$celda_sample_label |
|
956 |
+ cNames <- colnames(sce) |
|
957 |
+ rNames <- rownames(sce) |
|
958 |
+ sNames <- S4Vectors::metadata(sce)$celda_parameters$sampleLevels |
|
959 |
+ |
|
945 | 960 |
## Checking if maxCells and minClusterSize will work |
946 | 961 |
if (!is.null(maxCells)) { |
947 | 962 |
if ((maxCells < ncol(counts)) & |
948 |
- (maxCells / minClusterSize < |
|
949 |
- S4Vectors::metadata(sce)$celda_parameters$K)) { |
|
963 |
+ (maxCells / minClusterSize < K)) { |
|
950 | 964 |
stop("Cannot distribute ", |
951 | 965 |
maxCells, |
952 | 966 |
" cells among ", |
953 |
- S4Vectors::metadata(sce)$celda_parameters$K, |
|
967 |
+ K, |
|
954 | 968 |
" clusters while maintaining a minumum of ", |
955 | 969 |
minClusterSize, |
956 | 970 |
" cells per cluster. Try increasing 'maxCells' or", |
... | ... |
@@ -960,7 +974,21 @@ setMethod("celda_CG", |
960 | 974 |
maxCells <- ncol(counts) |
961 | 975 |
} |
962 | 976 |
|
963 |
- fm <- factorizeMatrix(x = sce, useAssay = useAssay, type = "counts") |
|
977 |
+ fm <- .factorizeMatrixCG( |
|
978 |
+ counts = counts, |
|
979 |
+ K = K, |
|
980 |
+ z = z, |
|
981 |
+ y = y, |
|
982 |
+ L = L, |
|
983 |
+ alpha = alpha, |
|
984 |
+ beta = beta, |
|
985 |
+ delta = delta, |
|
986 |
+ gamma = gamma, |
|
987 |
+ sampleLabel = sampleLabel, |
|
988 |
+ cNames = cNames, |
|
989 |
+ rNames = rNames, |
|
990 |
+ sNames = sNames, |
|
991 |
+ type = "counts") |
|
964 | 992 |
modulesToUse <- seq(nrow(fm$counts$cell)) |
965 | 993 |
if (!is.null(modules)) { |
966 | 994 |
if (!all(modules %in% modulesToUse)) { |
... | ... |
@@ -976,8 +1004,7 @@ setMethod("celda_CG", |
976 | 1004 |
zInclude <- rep(TRUE, ncol(counts)) |
977 | 1005 |
|
978 | 1006 |
if (totalCellsToRemove > 0) { |
979 |
- zTa <- tabulate(SummarizedExperiment::colData(sce)$celda_cell_cluster, |
|
980 |
- S4Vectors::metadata(sce)$celda_parameters$K) |
|
1007 |
+ zTa <- tabulate(z, K) |
|
981 | 1008 |
|
982 | 1009 |
## Number of cells that can be sampled from each cluster without |
983 | 1010 |
## going below the minimum threshold |
... | ... |
@@ -995,11 +1022,7 @@ setMethod("celda_CG", |
995 | 1022 |
|
996 | 1023 |
## Perform sampling for each cluster |
997 | 1024 |
for (i in which(clusterNToSample > 0)) { |
998 |
- zInclude[sample( |
|
999 |
- which(SummarizedExperiment::colData(sce)$celda_cell_cluster == |
|
1000 |
- i), |
|
1001 |
- clusterNToSample[i] |
|
1002 |
- )] <- FALSE |
|
1025 |
+ zInclude[sample(which(z == i), clusterNToSample[i])] <- FALSE |
|
1003 | 1026 |
} |
1004 | 1027 |
} |
1005 | 1028 |
cellIx <- which(zInclude) |
... | ... |
@@ -68,7 +68,6 @@ |
68 | 68 |
#' @param logfile Character. Messages will be redirected to a file named |
69 | 69 |
#' `logfile`. If NULL, messages will be printed to stdout. Default NULL. |
70 | 70 |
#' @param verbose Logical. Whether to print log messages. Default TRUE. |
71 |
-#' @param ... Ignored. Placeholder to prevent check warning. |
|
72 | 71 |
#' @return A \linkS4class{SingleCellExperiment} object. Function |
73 | 72 |
#' parameter settings are stored in \link{metadata} |
74 | 73 |
#' \code{"celda_parameters"} in \link{altExp} slot. |
... | ... |
@@ -82,7 +81,31 @@ |
82 | 81 |
#' values of K/L and multiple chains in parallel. |
83 | 82 |
#' @import Rcpp RcppEigen |
84 | 83 |
#' @export |
85 |
-setGeneric("celda_CG", function(x, ...) { |
|
84 |
+setGeneric("celda_CG", |
|
85 |
+ function(x, |
|
86 |
+ useAssay = "counts", |
|
87 |
+ altExpName = "featureSubset", |
|
88 |
+ sampleLabel = NULL, |
|
89 |
+ K, |
|
90 |
+ L, |
|
91 |
+ alpha = 1, |
|
92 |
+ beta = 1, |
|
93 |
+ delta = 1, |
|
94 |
+ gamma = 1, |
|
95 |
+ algorithm = c("EM", "Gibbs"), |
|
96 |
+ stopIter = 10, |
|
97 |
+ maxIter = 200, |
|
98 |
+ splitOnIter = 10, |
|
99 |
+ splitOnLast = TRUE, |
|
100 |
+ seed = 12345, |
|
101 |
+ nchains = 3, |
|
102 |
+ zInitialize = c("split", "random", "predefined"), |
|
103 |
+ yInitialize = c("split", "random", "predefined"), |
|
104 |
+ countChecksum = NULL, |
|
105 |
+ zInit = NULL, |
|
106 |
+ yInit = NULL, |
|
107 |
+ logfile = NULL, |
|
108 |
+ verbose = TRUE) { |
|
86 | 109 |
standardGeneric("celda_CG")}) |
87 | 110 |
|
88 | 111 |
|
... | ... |
@@ -937,7 +937,7 @@ setMethod("celda_CG", |
937 | 937 |
maxCells <- ncol(counts) |
938 | 938 |
} |
939 | 939 |
|
940 |
- fm <- .factorizeMatrixCelda_CG(sce, useAssay, type = "counts") |
|
940 |
+ fm <- factorizeMatrix(x = sce, useAssay = useAssay, type = "counts") |
|
941 | 941 |
modulesToUse <- seq(nrow(fm$counts$cell)) |
942 | 942 |
if (!is.null(modules)) { |
943 | 943 |
if (!all(modules %in% modulesToUse)) { |
... | ... |
@@ -1041,11 +1041,11 @@ setMethod("celda_CG", |
1041 | 1041 |
SummarizedExperiment::colData(sce)["colnames"] <- |
1042 | 1042 |
celdaCGMod@names$column |
1043 | 1043 |
SummarizedExperiment::colData(sce)["celda_sample_label"] <- |
1044 |
- celdaCGMod@sampleLabel |
|
1044 |
+ as.factor(celdaCGMod@sampleLabel) |
|
1045 | 1045 |
SummarizedExperiment::colData(sce)["celda_cell_cluster"] <- |
1046 |
- celdaClusters(celdaCGMod)$z |
|
1046 |
+ as.factor(celdaClusters(celdaCGMod)$z) |
|
1047 | 1047 |
SummarizedExperiment::rowData(sce)["celda_feature_module"] <- |
1048 |
- celdaClusters(celdaCGMod)$y |
|
1048 |
+ as.factor(celdaClusters(celdaCGMod)$y) |
|
1049 | 1049 |
|
1050 | 1050 |
return(sce) |
1051 | 1051 |
} |
... | ... |
@@ -379,7 +379,7 @@ setMethod("celda_CG", |
379 | 379 |
|
380 | 380 |
startTime <- Sys.time() |
381 | 381 |
|
382 |
- #counts <- .processCounts(counts) |
|
382 |
+ counts <- .processCounts(counts) |
|
383 | 383 |
if (is.null(countChecksum)) { |
384 | 384 |
countChecksum <- .createCountChecksum(counts) |
385 | 385 |
} |
... | ... |
@@ -917,7 +917,7 @@ setMethod("celda_CG", |
917 | 917 |
transformationFun) { |
918 | 918 |
|
919 | 919 |
counts <- SummarizedExperiment::assay(sce, i = useAssay) |
920 |
- #counts <- .processCounts(counts) |
|
920 |
+ counts <- .processCounts(counts) |
|
921 | 921 |
|
922 | 922 |
## Checking if maxCells and minClusterSize will work |
923 | 923 |
if (!is.null(maxCells)) { |
... | ... |
@@ -871,6 +871,7 @@ setMethod("celda_CG", |
871 | 871 |
# @param y Numeric vector. Denotes feature module labels. |
872 | 872 |
# @param K Integer. Number of cell populations. |
873 | 873 |
# @param L Integer. Number of feature modules. |
874 |
+#' @importFrom Matrix colSums rowSums |
|
874 | 875 |
.cCGDecomposeCounts <- function(counts, s, z, y, K, L) { |
875 | 876 |
nS <- length(unique(s)) |
876 | 877 |
mCPByS <- matrix(as.integer(table(factor(z, levels = seq(K)), s)), |
... | ... |
@@ -880,14 +881,9 @@ setMethod("celda_CG", |
880 | 881 |
nTSByC <- .rowSumByGroup(counts, group = y, L = L) |
881 | 882 |
nGByCP <- .colSumByGroup(counts, group = z, K = K) |
882 | 883 |
nTSByCP <- .colSumByGroup(nTSByC, group = z, K = K) |
883 |
- |
|
884 |
- if (inherits(counts, "dgCMatrix")) { |
|
885 |
- nByC <- Matrix::colSums(counts) |
|
886 |
- nByG <- Matrix::rowSums(counts) |
|
887 |
- } else { |
|
888 |
- nByG <- .rowSums(counts, nrow(counts), ncol(counts)) |
|
889 |
- nByC <- .colSums(counts, nrow(counts), ncol(counts)) |
|
890 |
- } |
|
884 |
+ |
|
885 |
+ nByC <- colSums(counts) |
|
886 |
+ nByG <- rowSums(counts) |
|
891 | 887 |
nByTS <- .rowSumByGroup(matrix(nByG, ncol = 1), group = y, L = L) |
892 | 888 |
nCP <- .colSums(nTSByCP, nrow(nTSByCP), ncol(nTSByCP)) |
893 | 889 |
nGByTS <- tabulate(y, L) + 1 ## Add pseudogene to each module |
... | ... |
@@ -379,7 +379,7 @@ setMethod("celda_CG", |
379 | 379 |
|
380 | 380 |
startTime <- Sys.time() |
381 | 381 |
|
382 |
- counts <- .processCounts(counts) |
|
382 |
+ #counts <- .processCounts(counts) |
|
383 | 383 |
if (is.null(countChecksum)) { |
384 | 384 |
countChecksum <- .createCountChecksum(counts) |
385 | 385 |
} |
... | ... |
@@ -554,20 +554,7 @@ setMethod("celda_CG", |
554 | 554 |
mCPByS <- nextZ$mCPByS |
555 | 555 |
nTSByCP <- nextZ$nGByCP |
556 | 556 |
nCP <- nextZ$nCP |
557 |
- |
|
558 |
- # Also need to recalculate nGByCP based on new label |
|
559 |
- # This could be done by '.cCReDecomposeCounts', however that also |
|
560 |
- # calculates additional variables, so just the relevant code for nGByCP |
|
561 |
- # is here. |
|
562 |
- if (inherits(counts, "matrix") & is.integer(counts)) { |
|
563 |
- nGByCP <- .colSumByGroupChange(counts, nGByCP, z, previousZ, K) |
|
564 |
- } else if (inherits(counts, "matrix") & is.numeric(counts)) { |
|
565 |
- nGByCP <- .colSumByGroupChangeNumeric(counts, nGByCP, z, previousZ, K) |
|
566 |
- } else if (inherits(counts, "dgCMatrix")) { |
|
567 |
- nGByCP <- colSumByGroupChangeSparse(counts, nGByCP, group = z, |
|
568 |
- pgroup = previousZ) |
|
569 |
- } |
|
570 |
- |
|
557 |
+ nGByCP <- .colSumByGroupChange(counts, nGByCP, nextZ$z, z, K) |
|
571 | 558 |
z <- nextZ$z |
572 | 559 |
|
573 | 560 |
## Perform split on i-th iteration defined by splitOnIter |
... | ... |
@@ -642,20 +629,7 @@ setMethod("celda_CG", |
642 | 629 |
nTSByCP <- res$nTSByCP |
643 | 630 |
nByTS <- res$nByTS |
644 | 631 |
nGByTS <- res$nGByTS |
645 |
- |
|
646 |
- # Also need to recalculate nTSByC based on new label |
|
647 |
- # This could be done by '.cGReDecomposeCounts', however that also |
|
648 |
- # calculates additional variables, so just the relevant code for nTSByC |
|
649 |
- # is here. |
|
650 |
- if (inherits(counts, "matrix") & is.integer(counts)) { |
|
651 |
- nTSByC <- .rowSumByGroupChange(counts, nTSByC, y, previousY, L) |
|
652 |
- } else if (inherits(counts, "matrix") & is.numeric(counts)) { |
|
653 |
- nTSByC <- .rowSumByGroupChangeNumeric(counts, nTSByC, y, previousY, L) |
|
654 |
- } else if (inherits(counts, "dgCMatrix")) { |
|
655 |
- nTSByC <- rowSumByGroupChangeSparse(counts, nTSByC, y, previousY) |
|
656 |
- } else { |
|
657 |
- stop("'counts' must be an integer, numeric, or dgCMatrix matrix.") |
|
658 |
- } |
|
632 |
+ nTSByC <- .rowSumByGroup(counts, group = y, L = L) |
|
659 | 633 |
} |
660 | 634 |
|
661 | 635 |
if (K > 2 & iter != maxIter & |
... | ... |
@@ -902,30 +876,19 @@ setMethod("celda_CG", |
902 | 876 |
mCPByS <- matrix(as.integer(table(factor(z, levels = seq(K)), s)), |
903 | 877 |
ncol = nS |
904 | 878 |
) |
905 |
- if (inherits(counts, "matrix") & is.integer(counts)) { |
|
906 |
- nTSByC <- .rowSumByGroup(counts, group = y, L = L) |
|
907 |
- nGByCP <- .colSumByGroup(counts, group = z, K = K) |
|
908 |
- nTSByCP <- .colSumByGroup(nTSByC, group = z, K = K) |
|
909 |
- nByG <- .rowSums(counts, nrow(counts), ncol(counts)) |
|
910 |
- nByC <- .colSums(counts, nrow(counts), ncol(counts)) |
|
911 |
- nByTS <- .rowSumByGroup(matrix(nByG, ncol = 1), group = y, L = L) |
|
912 |
- } else if (inherits(counts, "matrix") & is.numeric(counts)) { |
|
913 |
- nTSByC <- .rowSumByGroupNumeric(counts, group = y, L = L) |
|
914 |
- nGByCP <- .colSumByGroupNumeric(counts, group = z, K = K) |
|
915 |
- nTSByCP <- .colSumByGroupNumeric(nTSByC, group = z, K = K) |
|
916 |
- nByG <- .rowSums(counts, nrow(counts), ncol(counts)) |
|
917 |
- nByC <- .colSums(counts, nrow(counts), ncol(counts)) |
|
918 |
- nByTS <- .rowSumByGroupNumeric(matrix(nByG, ncol = 1), group = y, L = L) |
|
919 |
- } else if (inherits(counts, "dgCMatrix")) { |
|
920 |
- nTSByC <- rowSumByGroupSparse(counts, group = y) |
|
921 |
- nGByCP <- .colSumByGroupSparse(counts, group = z) |
|
922 |
- nByTS <- .rowSumByGroupNumeric(matrix(nByG, ncol = 1), group = y, L = L) |
|
923 |
- nTSByCP <- colSumByGroupSparse(nTSByC, group = z) |
|
879 |
+ |
|
880 |
+ nTSByC <- .rowSumByGroup(counts, group = y, L = L) |
|
881 |
+ nGByCP <- .colSumByGroup(counts, group = z, K = K) |
|
882 |
+ nTSByCP <- .colSumByGroup(nTSByC, group = z, K = K) |
|
883 |
+ |
|
884 |
+ if (inherits(counts, "dgCMatrix")) { |
|
924 | 885 |
nByC <- Matrix::colSums(counts) |
925 | 886 |
nByG <- Matrix::rowSums(counts) |
926 | 887 |
} else { |
927 |
- stop("'counts' must be an integer, numeric, or dgCMatrix matrix.") |
|
888 |
+ nByG <- .rowSums(counts, nrow(counts), ncol(counts)) |
|
889 |
+ nByC <- .colSums(counts, nrow(counts), ncol(counts)) |
|
928 | 890 |
} |
891 |
+ nByTS <- .rowSumByGroup(matrix(nByG, ncol = 1), group = y, L = L) |
|
929 | 892 |
nCP <- .colSums(nTSByCP, nrow(nTSByCP), ncol(nTSByCP)) |
930 | 893 |
nGByTS <- tabulate(y, L) + 1 ## Add pseudogene to each module |
931 | 894 |
nG <- nrow(counts) |
... | ... |
@@ -554,7 +554,20 @@ setMethod("celda_CG", |
554 | 554 |
mCPByS <- nextZ$mCPByS |
555 | 555 |
nTSByCP <- nextZ$nGByCP |
556 | 556 |
nCP <- nextZ$nCP |
557 |
- nGByCP <- .colSumByGroupChange(counts, nGByCP, nextZ$z, z, K) |
|
557 |
+ |
|
558 |
+ # Also need to recalculate nGByCP based on new label |
|
559 |
+ # This could be done by '.cCReDecomposeCounts', however that also |
|
560 |
+ # calculates additional variables, so just the relevant code for nGByCP |
|
561 |
+ # is here. |
|
562 |
+ if (inherits(counts, "matrix") & is.integer(counts)) { |
|
563 |
+ nGByCP <- .colSumByGroupChange(counts, nGByCP, z, previousZ, K) |
|
564 |
+ } else if (inherits(counts, "matrix") & is.numeric(counts)) { |
|
565 |
+ nGByCP <- .colSumByGroupChangeNumeric(counts, nGByCP, z, previousZ, K) |
|
566 |
+ } else if (inherits(counts, "dgCMatrix")) { |
|
567 |
+ nGByCP <- colSumByGroupChangeSparse(counts, nGByCP, group = z, |
|
568 |
+ pgroup = previousZ) |
|
569 |
+ } |
|
570 |
+ |
|
558 | 571 |
z <- nextZ$z |
559 | 572 |
|
560 | 573 |
## Perform split on i-th iteration defined by splitOnIter |
... | ... |
@@ -629,7 +642,20 @@ setMethod("celda_CG", |
629 | 642 |
nTSByCP <- res$nTSByCP |
630 | 643 |
nByTS <- res$nByTS |
631 | 644 |
nGByTS <- res$nGByTS |
632 |
- nTSByC <- .rowSumByGroup(counts, group = y, L = L) |
|
645 |
+ |
|
646 |
+ # Also need to recalculate nTSByC based on new label |
|
647 |
+ # This could be done by '.cGReDecomposeCounts', however that also |
|
648 |
+ # calculates additional variables, so just the relevant code for nTSByC |
|
649 |
+ # is here. |
|
650 |
+ if (inherits(counts, "matrix") & is.integer(counts)) { |
|
651 |
+ nTSByC <- .rowSumByGroupChange(counts, nTSByC, y, previousY, L) |
|
652 |
+ } else if (inherits(counts, "matrix") & is.numeric(counts)) { |
|
653 |
+ nTSByC <- .rowSumByGroupChangeNumeric(counts, nTSByC, y, previousY, L) |
|
654 |
+ } else if (inherits(counts, "dgCMatrix")) { |
|
655 |
+ nTSByC <- rowSumByGroupChangeSparse(counts, nTSByC, y, previousY) |
|
656 |
+ } else { |
|
657 |
+ stop("'counts' must be an integer, numeric, or dgCMatrix matrix.") |
|
658 |
+ } |
|
633 | 659 |
} |
634 | 660 |
|
635 | 661 |
if (K > 2 & iter != maxIter & |
... | ... |
@@ -876,17 +902,32 @@ setMethod("celda_CG", |
876 | 902 |
mCPByS <- matrix(as.integer(table(factor(z, levels = seq(K)), s)), |
877 | 903 |
ncol = nS |
878 | 904 |
) |
879 |
- nTSByC <- .rowSumByGroup(counts, group = y, L = L) |
|
880 |
- nTSByCP <- .colSumByGroup(nTSByC, group = z, K = K) |
|
881 |
- nCP <- as.integer(colSums(nTSByCP)) |
|
882 |
- nByG <- as.integer(rowSums(counts)) |
|
883 |
- nByC <- as.integer(colSums(counts)) |
|
884 |
- nByTS <- as.integer(.rowSumByGroup(matrix(nByG, ncol = 1), |
|
885 |
- group = y, L = L |
|
886 |
- )) |
|
905 |
+ if (inherits(counts, "matrix") & is.integer(counts)) { |
|
906 |
+ nTSByC <- .rowSumByGroup(counts, group = y, L = L) |
|
907 |
+ nGByCP <- .colSumByGroup(counts, group = z, K = K) |
|
908 |
+ nTSByCP <- .colSumByGroup(nTSByC, group = z, K = K) |
|
909 |
+ nByG <- .rowSums(counts, nrow(counts), ncol(counts)) |
|
910 |
+ nByC <- .colSums(counts, nrow(counts), ncol(counts)) |
|
911 |
+ nByTS <- .rowSumByGroup(matrix(nByG, ncol = 1), group = y, L = L) |
|
912 |
+ } else if (inherits(counts, "matrix") & is.numeric(counts)) { |
|
913 |
+ nTSByC <- .rowSumByGroupNumeric(counts, group = y, L = L) |
|
914 |
+ nGByCP <- .colSumByGroupNumeric(counts, group = z, K = K) |
|
915 |
+ nTSByCP <- .colSumByGroupNumeric(nTSByC, group = z, K = K) |
|
916 |
+ nByG <- .rowSums(counts, nrow(counts), ncol(counts)) |
|
917 |
+ nByC <- .colSums(counts, nrow(counts), ncol(counts)) |
|
918 |
+ nByTS <- .rowSumByGroupNumeric(matrix(nByG, ncol = 1), group = y, L = L) |
|
919 |
+ } else if (inherits(counts, "dgCMatrix")) { |
|
920 |
+ nTSByC <- rowSumByGroupSparse(counts, group = y) |
|
921 |
+ nGByCP <- .colSumByGroupSparse(counts, group = z) |
|
922 |
+ nByTS <- .rowSumByGroupNumeric(matrix(nByG, ncol = 1), group = y, L = L) |
|
923 |
+ nTSByCP <- colSumByGroupSparse(nTSByC, group = z) |
|
924 |
+ nByC <- Matrix::colSums(counts) |
|
925 |
+ nByG <- Matrix::rowSums(counts) |
|
926 |
+ } else { |
|
927 |
+ stop("'counts' must be an integer, numeric, or dgCMatrix matrix.") |
|
928 |
+ } |
|
929 |
+ nCP <- .colSums(nTSByCP, nrow(nTSByCP), ncol(nTSByCP)) |
|
887 | 930 |
nGByTS <- tabulate(y, L) + 1 ## Add pseudogene to each module |
888 |
- nGByCP <- .colSumByGroup(counts, group = z, K = K) |
|
889 |
- |
|
890 | 931 |
nG <- nrow(counts) |
891 | 932 |
nM <- ncol(counts) |
892 | 933 |
|
... | ... |
@@ -917,7 +958,7 @@ setMethod("celda_CG", |
917 | 958 |
transformationFun) { |
918 | 959 |
|
919 | 960 |
counts <- SummarizedExperiment::assay(sce, i = useAssay) |
920 |
- counts <- .processCounts(counts) |
|
961 |
+ #counts <- .processCounts(counts) |
|
921 | 962 |
|
922 | 963 |
## Checking if maxCells and minClusterSize will work |
923 | 964 |
if (!is.null(maxCells)) { |
... | ... |
@@ -576,7 +576,7 @@ setMethod("celda_CG", |
576 | 576 |
|
577 | 577 |
if (L > 2 & iter != maxIter & |
578 | 578 |
(((numIterWithoutImprovement == stopIter & |
579 |
- !all(tempLl > ll)) & isTRUE(splitOnLast)) | |
|
579 |
+ !all(tempLl >= ll)) & isTRUE(splitOnLast)) | |
|
580 | 580 |
(splitOnIter > 0 & iter %% splitOnIter == 0 & |
581 | 581 |
isTRUE(doGeneSplit)))) { |
582 | 582 |
.logMessages(date(), |
... | ... |
@@ -62,7 +62,7 @@ |
62 | 62 |
#' @param zInit Integer vector. Sets initial starting values of z. 'zInit' |
63 | 63 |
#' is only used when `zInitialize = 'predfined'`. Default NULL. |
64 | 64 |
#' @param yInit Integer vector. Sets initial starting values of y. |
65 |
-#' 'yInit' is only be used when \code{yInitialize = "random"}. Default NULL. |
|
65 |
+#' 'yInit' is only be used when `yInitialize = "predefined"`. Default NULL. |
|
66 | 66 |
#' @param countChecksum Character. An MD5 checksum for the counts matrix. |
67 | 67 |
#' Default NULL. |
68 | 68 |
#' @param logfile Character. Messages will be redirected to a file named |
... | ... |
@@ -51,20 +51,18 @@ |
51 | 51 |
#' @param nchains Integer. Number of random cluster initializations. Default 3. |
52 | 52 |
#' @param zInitialize Chararacter. One of 'random', 'split', or 'predefined'. |
53 | 53 |
#' With 'random', cells are randomly assigned to a populations. With 'split', |
54 |
-#' cells will be split into sqrt(K) populations and then each popluation will |
|
54 |
+#' cells will be split into sqrt(K) populations and then each population will |
|
55 | 55 |
#' be subsequently split into another sqrt(K) populations. With 'predefined', |
56 | 56 |
#' values in \code{zInit} will be used to initialize \code{z}. Default 'split'. |
57 |
-#' @param yInitialize Chararacter. One of 'random', 'split', or 'predefined'. |
|
57 |
+#' @param yInitialize Character. One of 'random', 'split', or 'predefined'. |
|
58 | 58 |
#' With 'random', features are randomly assigned to a modules. With 'split', |
59 | 59 |
#' features will be split into sqrt(L) modules and then each module will be |
60 | 60 |
#' subsequently split into another sqrt(L) modules. With 'predefined', values |
61 | 61 |
#' in \code{yInit} will be used to initialize \code{y}. Default 'split'. |
62 |
-#' @param zInit Integer vector. Sets initial starting values of z. If NULL, |
|
63 |
-#' starting values for each cell will be randomly sampled from 1:K. 'zInit' |
|
64 |
-#' can only be used when \code{initialize = "random"}. Default NULL. |
|
65 |
-#' @param yInit Integer vector. Sets initial starting values of y. If NULL, |
|
66 |
-#' starting values for each feature will be randomly sampled from 1:L. |
|
67 |
-#' 'yInit' can only be used when \code{initialize = "random"}. Default NULL. |
|
62 |
+#' @param zInit Integer vector. Sets initial starting values of z. 'zInit' |
|
63 |
+#' is only used when `zInitialize = 'predfined'`. Default NULL. |
|
64 |
+#' @param yInit Integer vector. Sets initial starting values of y. |
|
65 |
+#' 'yInit' is only be used when \code{yInitialize = "random"}. Default NULL. |
|
68 | 66 |
#' @param countChecksum Character. An MD5 checksum for the counts matrix. |
69 | 67 |
#' Default NULL. |
70 | 68 |
#' @param logfile Character. Messages will be redirected to a file named |
... | ... |
@@ -909,33 +909,6 @@ setMethod("celda_CG", |
909 | 909 |
} |
910 | 910 |
|
911 | 911 |
|
912 |
-.reorderCeldaCG <- function(counts, res) { |
|
913 |
- # Reorder K |
|
914 |
- if (params(res)$K > 2 & isTRUE(length(unique(celdaClusters(res)$z)) > 1)) { |
|
915 |
- res@clusters$z <- as.integer(as.factor(celdaClusters(res)$z)) |
|
916 |
- fm <- factorizeMatrix(counts, res, type = "posterior") |
|
917 |
- uniqueZ <- sort(unique(celdaClusters(res)$z)) |
|
918 |
- d <- .cosineDist(fm$posterior$cellPopulation[, uniqueZ]) |
|
919 |
- h <- stats::hclust(d, method = "complete") |
|
920 |
- |
|
921 |
- res <- .recodeClusterZ(res, from = h$order, to = seq(length(h$order))) |
|
922 |
- } |
|
923 |
- |
|
924 |
- # Reorder L |
|
925 |
- if (params(res)$L > 2 & isTRUE(length(unique(celdaClusters(res)$y)) > 1)) { |
|
926 |
- res@clusters$y <- as.integer(as.factor(celdaClusters(res)$y)) |
|
927 |
- fm <- factorizeMatrix(counts, res, type = "posterior") |
|
928 |
- uniqueY <- sort(unique(celdaClusters(res)$y)) |
|
929 |
- cs <- prop.table(t(fm$posterior$cellPopulation[uniqueY, ]), 2) |
|
930 |
- d <- .cosineDist(cs) |
|
931 |
- h <- stats::hclust(d, method = "complete") |
|
932 |
- |
|
933 |
- res <- .recodeClusterY(res, from = h$order, to = seq(length(h$order))) |
|
934 |
- } |
|
935 |
- return(res) |
|
936 |
-} |
|
937 |
- |
|
938 |
- |
|
939 | 912 |
.prepareCountsForDimReductionCeldaCG <- function(sce, |
940 | 913 |
useAssay, |
941 | 914 |
maxCells, |
... | ... |
@@ -1,19 +1,19 @@ |
1 | 1 |
#' @title Cell and feature clustering with Celda |
2 | 2 |
#' @description Clusters the rows and columns of a count matrix containing |
3 | 3 |
#' single-cell data into L modules and K subpopulations, respectively. The |
4 |
-#' \code{useAssay} \link[SummarizedExperiment]{assay} slot in |
|
5 |
-#' \code{altExpName} \link[SingleCellExperiment]{altExp} slot will be used if |
|
4 |
+#' \code{useAssay} \link{assay} slot in |
|
5 |
+#' \code{altExpName} \link{altExp} slot will be used if |
|
6 | 6 |
#' it exists. Otherwise, the \code{useAssay} |
7 |
-#' \link[SummarizedExperiment]{assay} slot in \code{x} will be used if |
|
7 |
+#' \link{assay} slot in \code{x} will be used if |
|
8 | 8 |
#' \code{x} is a \linkS4class{SingleCellExperiment} object. |
9 | 9 |
#' @param x A numeric \link{matrix} of counts or a |
10 | 10 |
#' \linkS4class{SingleCellExperiment} |
11 |
-#' with the matrix located in the \link[SummarizedExperiment]{assay} |
|
11 |
+#' with the matrix located in the \link{assay} |
|
12 | 12 |
#' slot under \code{useAssay} in \code{altExp(x, altExpName)}. |
13 | 13 |
#' Rows represent features and columns represent cells. |
14 | 14 |
#' @param useAssay A string specifying the name of the |
15 |
-#' \link[SummarizedExperiment]{assay} slot to use. Default "counts". |
|
16 |
-#' @param altExpName The name for the \link[SingleCellExperiment]{altExp} slot |
|
15 |
+#' \link{assay} slot to use. Default "counts". |
|
16 |
+#' @param altExpName The name for the \link{altExp} slot |
|
17 | 17 |
#' to use. Default "featureSubset". |
18 | 18 |
#' @param sampleLabel Vector or factor. Denotes the sample label for each cell |
19 | 19 |
#' (column) in the count matrix. |
... | ... |
@@ -35,11 +35,12 @@ |
35 | 35 |
#' likelihood to stop inference. Default 10. |
36 | 36 |
#' @param maxIter Integer. Maximum number of iterations of Gibbs sampling to |
37 | 37 |
#' perform. Default 200. |
38 |
-#' @param splitOnIter Integer. On every `splitOnIter` iteration, a heuristic |
|
38 |
+#' @param splitOnIter Integer. On every \code{splitOnIter} iteration, |
|
39 |
+#' a heuristic |
|
39 | 40 |
#' will be applied to determine if a cell population or feature module should |
40 | 41 |
#' be reassigned and another cell population or feature module should be split |
41 | 42 |
#' into two clusters. To disable splitting, set to -1. Default 10. |
42 |
-#' @param splitOnLast Integer. After `stopIter` iterations have been |
|
43 |
+#' @param splitOnLast Integer. After \code{stopIter} iterations have been |
|
43 | 44 |
#' performed without improvement, a heuristic will be applied to determine if |
44 | 45 |
#' a cell population or feature module should be reassigned and another cell |
45 | 46 |
#' population or feature module should be split into two clusters. If a split |
... | ... |
@@ -52,32 +53,32 @@ |
52 | 53 |
#' With 'random', cells are randomly assigned to a populations. With 'split', |
53 | 54 |
#' cells will be split into sqrt(K) populations and then each popluation will |
54 | 55 |
#' be subsequently split into another sqrt(K) populations. With 'predefined', |
55 |
-#' values in `zInit` will be used to initialize `z`. Default 'split'. |
|
56 |
+#' values in \code{zInit} will be used to initialize \code{z}. Default 'split'. |
|
56 | 57 |
#' @param yInitialize Chararacter. One of 'random', 'split', or 'predefined'. |
57 | 58 |
#' With 'random', features are randomly assigned to a modules. With 'split', |
58 | 59 |
#' features will be split into sqrt(L) modules and then each module will be |
59 | 60 |
#' subsequently split into another sqrt(L) modules. With 'predefined', values |
60 |
-#' in `yInit` will be used to initialize `y`. Default 'split'. |
|
61 |
+#' in \code{yInit} will be used to initialize \code{y}. Default 'split'. |
|
61 | 62 |
#' @param zInit Integer vector. Sets initial starting values of z. If NULL, |
62 | 63 |
#' starting values for each cell will be randomly sampled from 1:K. 'zInit' |
63 |
-#' can only be used when `initialize' = 'random'`. Default NULL. |
|
64 |
+#' can only be used when \code{initialize = "random"}. Default NULL. |
|
64 | 65 |
#' @param yInit Integer vector. Sets initial starting values of y. If NULL, |
65 | 66 |
#' starting values for each feature will be randomly sampled from 1:L. |
66 |
-#' 'yInit' can only be used when `initialize = 'random'`. Default NULL. |
|
67 |
-#' @param countChecksum Character. An MD5 checksum for the `counts` matrix. |
|
67 |
+#' 'yInit' can only be used when \code{initialize = "random"}. Default NULL. |
|
68 |
+#' @param countChecksum Character. An MD5 checksum for the counts matrix. |
|
68 | 69 |
#' Default NULL. |
69 | 70 |
#' @param logfile Character. Messages will be redirected to a file named |
70 | 71 |
#' `logfile`. If NULL, messages will be printed to stdout. Default NULL. |
71 | 72 |
#' @param verbose Logical. Whether to print log messages. Default TRUE. |
72 | 73 |
#' @param ... Ignored. Placeholder to prevent check warning. |
73 | 74 |
#' @return A \linkS4class{SingleCellExperiment} object. Function |
74 |
-#' parameter settings are stored in \link[S4Vectors]{metadata} |
|
75 |
-#' \code{"celda_parameters"} in \link[SingleCellExperiment]{altExp} slot. |
|
76 |
-#' In \link[SingleCellExperiment]{altExp} slot, |
|
75 |
+#' parameter settings are stored in \link{metadata} |
|
76 |
+#' \code{"celda_parameters"} in \link{altExp} slot. |
|
77 |
+#' In \link{altExp} slot, |
|
77 | 78 |
#' columns \code{celda_sample_label} and \code{celda_cell_cluster} in |
78 |
-#' \link[SummarizedExperiment]{colData} contain sample labels and celda cell |
|
79 |
+#' \link{colData} contain sample labels and celda cell |
|
79 | 80 |
#' population clusters. Column \code{celda_feature_module} in |
80 |
-#' \link[SummarizedExperiment]{rowData} contains feature modules. |
|
81 |
+#' \link{rowData} contains feature modules. |
|
81 | 82 |
#' @seealso \link{celda_G} for feature clustering and \link{celda_C} for |
82 | 83 |
#' clustering cells. \link{celdaGridSearch} can be used to run multiple |
83 | 84 |
#' values of K/L and multiple chains in parallel. |
... | ... |
@@ -69,6 +69,7 @@ |
69 | 69 |
#' @param logfile Character. Messages will be redirected to a file named |
70 | 70 |
#' `logfile`. If NULL, messages will be printed to stdout. Default NULL. |
71 | 71 |
#' @param verbose Logical. Whether to print log messages. Default TRUE. |
72 |
+#' @param ... Ignored. Placeholder to prevent check warning. |
|
72 | 73 |
#' @return A \linkS4class{SingleCellExperiment} object. Function |
73 | 74 |
#' parameter settings are stored in \link[S4Vectors]{metadata} |
74 | 75 |
#' \code{"celda_parameters"} in \link[SingleCellExperiment]{altExp} slot. |
... | ... |
@@ -81,7 +81,6 @@ |
81 | 81 |
#' clustering cells. \link{celdaGridSearch} can be used to run multiple |
82 | 82 |
#' values of K/L and multiple chains in parallel. |
83 | 83 |
#' @import Rcpp RcppEigen |
84 |
-#' @rawNamespace import(gridExtra, except = c(combine)) |
|
85 | 84 |
#' @export |
86 | 85 |
setGeneric("celda_CG", function(x, ...) { |
87 | 86 |
standardGeneric("celda_CG")}) |
... | ... |
@@ -1,13 +1,20 @@ |
1 | 1 |
#' @title Cell and feature clustering with Celda |
2 | 2 |
#' @description Clusters the rows and columns of a count matrix containing |
3 |
-#' single-cell data into L modules and K subpopulations, respectively. |
|
3 |
+#' single-cell data into L modules and K subpopulations, respectively. The |
|
4 |
+#' \code{useAssay} \link[SummarizedExperiment]{assay} slot in |
|
5 |
+#' \code{altExpName} \link[SingleCellExperiment]{altExp} slot will be used if |
|
6 |
+#' it exists. Otherwise, the \code{useAssay} |
|
7 |
+#' \link[SummarizedExperiment]{assay} slot in \code{x} will be used if |
|
8 |
+#' \code{x} is a \linkS4class{SingleCellExperiment} object. |
|
4 | 9 |
#' @param x A numeric \link{matrix} of counts or a |
5 | 10 |
#' \linkS4class{SingleCellExperiment} |
6 |
-#' with the matrix located in the assay slot under \code{useAssay}. |
|
11 |
+#' with the matrix located in the \link[SummarizedExperiment]{assay} |
|
12 |
+#' slot under \code{useAssay} in \code{altExp(x, altExpName)}. |
|
7 | 13 |
#' Rows represent features and columns represent cells. |
8 |
-#' @param useAssay A string specifying which \link[SummarizedExperiment]{assay} |
|
9 |
-#' slot to use if \code{x} is a |
|
10 |
-#' \linkS4class{SingleCellExperiment} object. Default "counts". |
|
14 |
+#' @param useAssay A string specifying the name of the |
|
15 |
+#' \link[SummarizedExperiment]{assay} slot to use. Default "counts". |
|
16 |
+#' @param altExpName The name for the \link[SingleCellExperiment]{altExp} slot |
|
17 |
+#' to use. Default "featureSubset". |
|
11 | 18 |
#' @param sampleLabel Vector or factor. Denotes the sample label for each cell |
12 | 19 |
#' (column) in the count matrix. |
13 | 20 |
#' @param K Integer. Number of cell populations. |
... | ... |
@@ -62,23 +69,17 @@ |
62 | 69 |
#' @param logfile Character. Messages will be redirected to a file named |
63 | 70 |
#' `logfile`. If NULL, messages will be printed to stdout. Default NULL. |
64 | 71 |
#' @param verbose Logical. Whether to print log messages. Default TRUE. |
65 |
-#' @return A \link[SingleCellExperiment]{SingleCellExperiment} object. Function |
|
66 |
-#' parameter settings are stored in the \link[S4Vectors]{metadata} |
|
67 |
-#' \code{"celda_parameters"} slot. |
|
68 |
-#' Columns \code{celda_sample_label} and \code{celda_cell_cluster} in |
|
72 |
+#' @return A \linkS4class{SingleCellExperiment} object. Function |
|
73 |
+#' parameter settings are stored in \link[S4Vectors]{metadata} |
|
74 |
+#' \code{"celda_parameters"} in \link[SingleCellExperiment]{altExp} slot. |
|
75 |
+#' In \link[SingleCellExperiment]{altExp} slot, |
|
76 |
+#' columns \code{celda_sample_label} and \code{celda_cell_cluster} in |
|
69 | 77 |
#' \link[SummarizedExperiment]{colData} contain sample labels and celda cell |
70 | 78 |
#' population clusters. Column \code{celda_feature_module} in |
71 | 79 |
#' \link[SummarizedExperiment]{rowData} contains feature modules. |
72 | 80 |
#' @seealso \link{celda_G} for feature clustering and \link{celda_C} for |
73 | 81 |
#' clustering cells. \link{celdaGridSearch} can be used to run multiple |
74 | 82 |
#' values of K/L and multiple chains in parallel. |
75 |
-#' @examples |
|
76 |
-#' data(celdaCGSim) |
|
77 |
-#' sce <- celda_CG(celdaCGSim$counts, |
|
78 |
-#' K = celdaCGSim$K, |
|
79 |
-#' L = celdaCGSim$L, |
|
80 |
-#' sampleLabel = celdaCGSim$sampleLabel, |
|
81 |
-#' nchains = 1) |
|
82 | 83 |
#' @import Rcpp RcppEigen |
83 | 84 |
#' @rawNamespace import(gridExtra, except = c(combine)) |
84 | 85 |
#' @export |
... | ... |
@@ -92,6 +93,7 @@ setMethod("celda_CG", |
92 | 93 |
signature(x = "SingleCellExperiment"), |
93 | 94 |
function(x, |
94 | 95 |
useAssay = "counts", |
96 |
+ altExpName = "featureSubset", |
|
95 | 97 |
sampleLabel = NULL, |
96 | 98 |
K, |
97 | 99 |
L, |
... | ... |
@@ -115,12 +117,24 @@ setMethod("celda_CG", |
115 | 117 |
verbose = TRUE) { |
116 | 118 |
|
117 | 119 |
xClass <- "SingleCellExperiment" |
118 |
- counts <- SummarizedExperiment::assay(x, i = useAssay) |
|
119 | 120 |
|
120 |
- sce <- .celdaCGWithSeed(counts = counts, |
|
121 |
+ if (!altExpName %in% SingleCellExperiment::altExpNames(x)) { |
|
122 |
+ stop(altExpName, " not in 'altExpNames(x)'. Run ", |
|
123 |
+ "selectFeatures(x) first!") |
|
124 |
+ } |
|
125 |
+ |
|
126 |
+ altExp <- SingleCellExperiment::altExp(x, altExpName) |
|
127 |
+ |
|
128 |
+ if (!useAssay %in% SummarizedExperiment::assayNames(altExp)) { |
|
129 |
+ stop(useAssay, " not in assayNames(altExp(x, altExpName))") |
|
130 |
+ } |
|
131 |
+ |
|
132 |
+ counts <- SummarizedExperiment::assay(altExp, i = useAssay) |
|
133 |
+ |
|
134 |
+ altExp <- .celdaCGWithSeed(counts = counts, |
|
121 | 135 |
xClass = xClass, |
122 | 136 |
useAssay = useAssay, |
123 |
- sce = x, |
|
137 |
+ sce = altExp, |
|
124 | 138 |
sampleLabel = sampleLabel, |
125 | 139 |
K = K, |
126 | 140 |
L = L, |
... | ... |
@@ -142,16 +156,26 @@ setMethod("celda_CG", |
142 | 156 |
yInit = yInit, |
143 | 157 |
logfile = logfile, |
144 | 158 |
verbose = verbose) |
145 |
- return(sce) |
|
159 |
+ SingleCellExperiment::altExp(x, altExpName) <- altExp |
|
160 |
+ return(x) |
|
146 | 161 |
} |
147 | 162 |
) |
148 | 163 |
|
149 | 164 |
|
150 | 165 |
#' @rdname celda_CG |
166 |
+#' @examples |
|
167 |
+#' data(celdaCGSim) |
|
168 |
+#' sce <- celda_CG(celdaCGSim$counts, |
|
169 |
+#' K = celdaCGSim$K, |
|
170 |
+#' L = celdaCGSim$L, |
|
171 |
+#' sampleLabel = celdaCGSim$sampleLabel, |
|
172 |
+#' nchains = 1) |
|
151 | 173 |
#' @export |
152 | 174 |
setMethod("celda_CG", |
153 | 175 |
signature(x = "matrix"), |
154 | 176 |
function(x, |
177 |
+ useAssay = "counts", |
|
178 |
+ altExpName = "featureSubset", |
|
155 | 179 |
sampleLabel = NULL, |
156 | 180 |
K, |
157 | 181 |
L, |
... | ... |
@@ -174,14 +198,16 @@ setMethod("celda_CG", |
174 | 198 |
logfile = NULL, |
175 | 199 |
verbose = TRUE) { |
176 | 200 |
|
201 |
+ ls <- list() |
|
202 |
+ ls[[useAssay]] <- x |
|
203 |
+ sce <- SingleCellExperiment::SingleCellExperiment(assays = ls) |
|
204 |
+ SingleCellExperiment::altExp(sce, altExpName) <- sce |
|
177 | 205 |
xClass <- "matrix" |
178 |
- useAssay <- NULL |
|
179 |
- sce <- SingleCellExperiment::SingleCellExperiment( |
|
180 |
- assays = list(counts = x)) |
|
181 |
- sce <- .celdaCGWithSeed(counts = x, |
|
206 |
+ |
|
207 |
+ altExp <- .celdaCGWithSeed(counts = x, |
|
182 | 208 |
xClass = xClass, |
183 | 209 |
useAssay = useAssay, |
184 |
- sce = sce, |
|
210 |
+ sce = SingleCellExperiment::altExp(sce, altExpName), |
|
185 | 211 |
sampleLabel = sampleLabel, |
186 | 212 |
K = K, |
187 | 213 |
L = L, |
... | ... |
@@ -203,6 +229,7 @@ setMethod("celda_CG", |
203 | 229 |
yInit = yInit, |
204 | 230 |
logfile = logfile, |
205 | 231 |
verbose = verbose) |
232 |
+ SingleCellExperiment::altExp(sce, altExpName) <- altExp |
|
206 | 233 |
return(sce) |
207 | 234 |
} |
208 | 235 |
) |
... | ... |
@@ -938,9 +965,7 @@ setMethod("celda_CG", |
938 | 965 |
maxCells <- ncol(counts) |
939 | 966 |
} |
940 | 967 |
|
941 |
- fm <- factorizeMatrix(sce, |
|
942 |
- useAssay, |
|
943 |
- type = "counts") |
|
968 |
+ fm <- .factorizeMatrixCelda_CG(sce, useAssay, type = "counts") |
|
944 | 969 |
modulesToUse <- seq(nrow(fm$counts$cell)) |
945 | 970 |
if (!is.null(modules)) { |
946 | 971 |
if (!all(modules %in% modulesToUse)) { |
... | ... |
@@ -956,7 +981,7 @@ setMethod("celda_CG", |
956 | 981 |
zInclude <- rep(TRUE, ncol(counts)) |
957 | 982 |
|
958 | 983 |
if (totalCellsToRemove > 0) { |
959 |
- zTa <- tabulate(celdaClusters(sce), |
|
984 |
+ zTa <- tabulate(SummarizedExperiment::colData(sce)$celda_cell_cluster, |
|
960 | 985 |
S4Vectors::metadata(sce)$celda_parameters$K) |
961 | 986 |
|
962 | 987 |
## Number of cells that can be sampled from each cluster without |
... | ... |
@@ -976,7 +1001,8 @@ setMethod("celda_CG", |
976 | 1001 |
## Perform sampling for each cluster |
977 | 1002 |
for (i in which(clusterNToSample > 0)) { |
978 | 1003 |
zInclude[sample( |
979 |
- which(celdaClusters(sce) == i), |
|
1004 |
+ which(SummarizedExperiment::colData(sce)$celda_cell_cluster == |
|
1005 |
+ i), |
|
980 | 1006 |
clusterNToSample[i] |
981 | 1007 |
)] <- FALSE |
982 | 1008 |
} |
... | ... |
@@ -912,7 +912,10 @@ setMethod("celda_CG", |
912 | 912 |
useAssay, |
913 | 913 |
maxCells, |
914 | 914 |
minClusterSize, |
915 |
- modules) { |
|
915 |
+ modules, |
|
916 |
+ normalize, |
|
917 |
+ scaleFactor, |
|
918 |
+ transformationFun) { |
|
916 | 919 |
|
917 | 920 |
counts <- SummarizedExperiment::assay(sce, i = useAssay) |
918 | 921 |
counts <- .processCounts(counts) |
... | ... |
@@ -981,9 +984,9 @@ setMethod("celda_CG", |
981 | 984 |
cellIx <- which(zInclude) |
982 | 985 |
|
983 | 986 |
norm <- t(normalizeCounts(fm$counts$cell[modulesToUse, cellIx], |
984 |
- normalize = "proportion", |
|
985 |
- transformationFun = sqrt |
|
986 |
- )) |
|
987 |
+ normalize = normalize, |
|
988 |
+ scaleFactor = scaleFactor, |
|
989 |
+ transformationFun = transformationFun)) |
|
987 | 990 |
return(list(norm = norm, cellIx = cellIx)) |
988 | 991 |
} |
989 | 992 |
|
... | ... |
@@ -82,7 +82,8 @@ |
82 | 82 |
#' @import Rcpp RcppEigen |
83 | 83 |
#' @rawNamespace import(gridExtra, except = c(combine)) |
84 | 84 |
#' @export |
85 |
-setGeneric("celda_CG", function(x, ...) {standardGeneric("celda_CG")}) |
|
85 |
+setGeneric("celda_CG", function(x, ...) { |
|
86 |
+ standardGeneric("celda_CG")}) |
|
86 | 87 |
|
87 | 88 |
|
88 | 89 |
#' @rdname celda_CG |
... | ... |
@@ -740,7 +740,7 @@ setMethod("celda_CG", |
740 | 740 |
|
741 | 741 |
## Peform reordering on final Z and Y assigments: |
742 | 742 |
bestResult <- methods::new("celda_CG", |
743 |
- celdaClusters = list(z = zBest, y = yBest), |
|
743 |
+ clusters = list(z = zBest, y = yBest), |
|
744 | 744 |
params = list( |
745 | 745 |
K = as.integer(K), |
746 | 746 |
L = as.integer(L), |
... | ... |
@@ -882,10 +882,10 @@ setMethod("celda_CG", |
882 | 882 |
|
883 | 883 |
.reorderCeldaCG <- function(counts, res) { |
884 | 884 |
# Reorder K |
885 |
- if (params(res)$K > 2 & isTRUE(length(unique(res@celdaClusters$z)) > 1)) { |
|
886 |
- res@celdaClusters$z <- as.integer(as.factor(res@celdaClusters$z)) |
|
885 |
+ if (params(res)$K > 2 & isTRUE(length(unique(celdaClusters(res)$z)) > 1)) { |
|
886 |
+ res@clusters$z <- as.integer(as.factor(celdaClusters(res)$z)) |
|