Browse code

Fixed bug related to factors/integers for z/y cluster labels within internal functions

Joshua D. Campbell authored on 22/10/2022 00:27:29
Showing 6 changed files

... ...
@@ -162,9 +162,9 @@ recodeClusterZ <- function(sce, from, to, altExpName = "featureSubset") {
162 162
         stop("Provided 'sce' argument does not have a 'celda_cell_cluster'",
163 163
             " column in 'colData(altExp(sce, altExpName))'")
164 164
     }
165
-    new.clusters <- plyr::mapvalues(celdaClusters(sce,
166
-                                                  altExpName = altExpName),
167
-                                    from, to)
165
+    z <- as.integer(celdaClusters(sce, altExpName = altExpName))
166
+    new.clusters <- plyr::mapvalues(z,
167
+                                    from = from, to = to)
168 168
     new.clusters <- factor(new.clusters, levels =
169 169
                              sort(as.numeric(unique(new.clusters))))
170 170
 
... ...
@@ -181,8 +181,12 @@ recodeClusterZ <- function(sce, from, to, altExpName = "featureSubset") {
181 181
     if (is.null(celdaClusters(celdaMod)$z)) {
182 182
         stop("Provided celdaMod argument does not have a z attribute")
183 183
     }
184
-    celdaMod@clusters$z <- plyr::mapvalues(celdaClusters(celdaMod)$z,
185
-        from, to)
184
+    z <- as.integer(celdaClusters(celdaMod)$z)
185
+    new.clusters <- plyr::mapvalues(z, from = from, to = to)
186
+    new.clusters <- factor(new.clusters, levels =
187
+                             sort(as.numeric(unique(new.clusters))))
188
+    
189
+    celdaMod@clusters$z <- new.clusters
186 190
     return(celdaMod)
187 191
 }
188 192
 
... ...
@@ -215,14 +219,12 @@ recodeClusterY <- function(sce, from, to, altExpName = "featureSubset") {
215 219
         stop("Provided 'sce' argument does not have a 'celda_feature_module'",
216 220
             " column in 'rowData(altExp(sce, altExpName))'")
217 221
     }
218
-    new.clusters <- plyr::mapvalues(celdaModules(sce,
219
-                                                  altExpName = altExpName),
220
-                                  from, to)
222
+    y <- as.integer(celdaModules(sce, altExpName = altExpName))
223
+    new.clusters <- plyr::mapvalues(y, from = from, to = to)
221 224
     new.clusters <- factor(new.clusters, levels =
222 225
                              sort(as.numeric(unique(new.clusters))))
223
-
224
-    celdaModules(sce, altExpName = altExpName) <- plyr::mapvalues(
225
-        celdaModules(sce, altExpName = altExpName), from, to)
226
+    
227
+    celdaModules(sce, altExpName = altExpName) <- new.clusters
226 228
     return(sce)
227 229
 }
228 230
 
... ...
@@ -235,8 +237,13 @@ recodeClusterY <- function(sce, from, to, altExpName = "featureSubset") {
235 237
     if (is.null(celdaClusters(celdaMod)$y)) {
236 238
         stop("Provided celdaMod argument does not have a y attribute")
237 239
     }
238
-    celdaMod@clusters$y <- plyr::mapvalues(celdaClusters(celdaMod)$y,
239
-        from, to)
240
+    y <- as.integer(celdaClusters(celdaMod)$y)
241
+    
242
+    new.clusters <- plyr::mapvalues(y, from = from, to = to)
243
+    new.clusters <- factor(new.clusters, levels =
244
+                             sort(as.numeric(unique(new.clusters))))
245
+    
246
+    celdaMod@clusters$y <- new.clusters
240 247
     return(celdaMod)
241 248
 }
242 249
 
... ...
@@ -152,8 +152,8 @@ setMethod("factorizeMatrix", signature(x = "ANY", celdaMod = "celda_CG"),
152 152
         counts <- .processCounts(x)
153 153
         compareCountMatrix(counts, celdaMod)
154 154
 
155
-        z <- celdaClusters(celdaMod)$z
156
-        y <- celdaClusters(celdaMod)$y
155
+        z <- as.integer(celdaClusters(celdaMod)$z)
156
+        y <- as.integer(celdaClusters(celdaMod)$y)
157 157
         # Sometimes, fewer clusters get returned by celda_C/G
158 158
         # Taking the max(z)/max(y) rather than
159 159
         # the original K/L will prevent errors
... ...
@@ -318,7 +318,7 @@ setMethod("factorizeMatrix", signature(x = "ANY", celdaMod = "celda_C"),
318 318
         counts <- .processCounts(x)
319 319
         compareCountMatrix(counts, celdaMod)
320 320
 
321
-        z <- celdaClusters(celdaMod)$z
321
+        z <- as.integer(celdaClusters(celdaMod)$z)
322 322
         # Sometimes, fewer clusters get returned by celda_C
323 323
         # Taking the max(z) rather than the
324 324
         # original K will prevent errors
... ...
@@ -430,7 +430,7 @@ setMethod("factorizeMatrix", signature(x = "ANY", celdaMod = "celda_G"),
430 430
         counts <- .processCounts(x)
431 431
         compareCountMatrix(counts, celdaMod)
432 432
 
433
-        y <- celdaClusters(celdaMod)$y
433
+        y <- as.integer(celdaClusters(celdaMod)$y)
434 434
         # Sometimes, fewer clusters get returned by celda_G
435 435
         # Taking the max(y) rather than the original
436 436
         # L will prevent errors
... ...
@@ -85,7 +85,7 @@ setMethod("logLikelihood", signature(x = "SingleCellExperiment"),
85 85
 setMethod("logLikelihood", signature(x = "matrix", celdaMod = "celda_C"),
86 86
     function(x, celdaMod) {
87 87
         sampleLabel <- sampleLabel(celdaMod)
88
-        z <- celdaClusters(celdaMod)$z
88
+        z <- as.integer(celdaClusters(celdaMod)$z)
89 89
         K <- params(celdaMod)$K
90 90
         alpha <- params(celdaMod)$alpha
91 91
         beta <- params(celdaMod)$beta
... ...
@@ -105,7 +105,7 @@ setMethod("logLikelihood", signature(x = "matrix", celdaMod = "celda_C"),
105 105
 #' @export
106 106
 setMethod("logLikelihood", signature(x = "matrix", celdaMod = "celda_G"),
107 107
     function(x, celdaMod) {
108
-        y <- celdaClusters(celdaMod)$y
108
+        y <- as.integer(celdaClusters(celdaMod)$y)
109 109
         L <- params(celdaMod)$L
110 110
         beta <- params(celdaMod)$beta
111 111
         delta <- params(celdaMod)$delta
... ...
@@ -127,8 +127,8 @@ setMethod("logLikelihood", signature(x = "matrix", celdaMod = "celda_G"),
127 127
 setMethod("logLikelihood", signature(x = "matrix", celdaMod = "celda_CG"),
128 128
     function(x, celdaMod) {
129 129
         sampleLabel <- sampleLabel(celdaMod)
130
-        z <- celdaClusters(celdaMod)$z
131
-        y <- celdaClusters(celdaMod)$y
130
+        z <- as.integer(celdaClusters(celdaMod)$z)
131
+        y <- as.integer(celdaClusters(celdaMod)$y)
132 132
         K <- params(celdaMod)$K
133 133
         L <- params(celdaMod)$L
134 134
         alpha <- params(celdaMod)$alpha
... ...
@@ -534,7 +534,7 @@ setMethod("recursiveSplitCell",
534 534
       verbose = FALSE,
535 535
       reorder = reorder)
536 536
     currentK <- length(unique(celdaClusters(modelInitial)$z)) + 1
537
-    overallZ <- celdaClusters(modelInitial)$z
537
+    overallZ <- as.integer(celdaClusters(modelInitial)$z)
538 538
     resList <- list(modelInitial)
539 539
     while (currentK <= maxK) {
540 540
       # previousY <- overallY
... ...
@@ -574,14 +574,14 @@ setMethod("recursiveSplitCell",
574 574
       # If the number of clusters is still "currentK", then keep the
575 575
       # reordering, otherwise keep the previous configuration
576 576
       if (length(unique(celdaClusters(tempModel)$z)) == currentK) {
577
-        overallZ <- celdaClusters(tempModel)$z
577
+        overallZ <- as.integer(celdaClusters(tempModel)$z)
578 578
       } else {
579 579
         overallZ <- tempSplit$z
580 580
         ll <- .logLikelihoodcelda_CG(
581 581
           counts,
582 582
           s,
583 583
           overallZ,
584
-          celdaClusters(tempModel)$y,
584
+          as.integer(celdaClusters(tempModel)$y),
585 585
           currentK,
586 586
           L,
587 587
           alpha,
... ...
@@ -590,7 +590,7 @@ setMethod("recursiveSplitCell",
590 590
           gamma
591 591
         )
592 592
         tempModel <- methods::new("celda_CG",
593
-          clusters = list(z = overallZ, y = celdaClusters(tempModel)$y),
593
+          clusters = list(z = overallZ, y = as.integer(celdaClusters(tempModel)$y)),
594 594
           params = list(
595 595
             K = as.integer(currentK),
596 596
             L = as.integer(L),
... ...
@@ -670,7 +670,7 @@ setMethod("recursiveSplitCell",
670 670
       reorder = reorder
671 671
     )
672 672
     currentK <- length(unique(celdaClusters(modelInitial)$z)) + 1
673
-    overallZ <- celdaClusters(modelInitial)$z
673
+    overallZ <- as.integer(celdaClusters(modelInitial)$z)
674 674
     ll <- .logLikelihoodcelda_C(
675 675
       counts, s, overallZ, currentK,
676 676
       alpha, beta
... ...
@@ -707,7 +707,7 @@ setMethod("recursiveSplitCell",
707 707
       # Handle rare cases where a population has no cells after running
708 708
       # the model
709 709
       if (length(unique(celdaClusters(tempModel)$z)) == currentK) {
710
-        overallZ <- celdaClusters(tempModel)$z
710
+        overallZ <- as.integer(celdaClusters(tempModel)$z)
711 711
       } else {
712 712
         overallZ <- tempSplit$z
713 713
       }
... ...
@@ -772,7 +772,7 @@ setMethod("recursiveSplitCell",
772 772
       reorder = reorder
773 773
     )
774 774
     currentK <- length(unique(celdaClusters(modelInitial)$z)) + 1
775
-    overallZ <- celdaClusters(modelInitial)$z
775
+    overallZ <- as.integer(celdaClusters(modelInitial)$z)
776 776
     resList <- list(modelInitial)
777 777
     while (currentK <= maxK) {
778 778
       tempSplit <- .singleSplitZ(counts,
... ...
@@ -798,7 +798,7 @@ setMethod("recursiveSplitCell",
798 798
       )
799 799
 
800 800
       if (length(unique(celdaClusters(tempModel)$z)) == currentK) {
801
-        overallZ <- celdaClusters(tempModel)$z
801
+        overallZ <- as.integer(celdaClusters(tempModel)$z)
802 802
       } else {
803 803
         overallZ <- tempSplit$z
804 804
         ll <-
... ...
@@ -1336,7 +1336,7 @@ setMethod("recursiveSplitModule",
1336 1336
       verbose = FALSE,
1337 1337
       reorder = reorder)
1338 1338
     currentL <- length(unique(celdaClusters(modelInitial)$y)) + 1
1339
-    overallY <- celdaClusters(modelInitial)$y
1339
+    overallY <- as.integer(celdaClusters(modelInitial)$y)
1340 1340
 
1341 1341
     resList <- list(modelInitial)
1342 1342
     while (currentL <= maxL) {
... ...
@@ -1365,7 +1365,7 @@ setMethod("recursiveSplitModule",
1365 1365
         zInit = overallZ,
1366 1366
         reorder = reorder
1367 1367
       )
1368
-      overallY <- celdaClusters(tempModel)$y
1368
+      overallY <- as.integer(celdaClusters(tempModel)$y)
1369 1369
 
1370 1370
       ## Add new model to results list and increment L
1371 1371
       .logMessages(
... ...
@@ -1428,8 +1428,8 @@ setMethod("recursiveSplitModule",
1428 1428
     )
1429 1429
     modelInitial@params$countChecksum <- countChecksum
1430 1430
 
1431
-    currentL <- length(unique(celdaClusters(modelInitial)$y)) + 1
1432
-    overallY <- celdaClusters(modelInitial)$y
1431
+    currentL <- length(unique(as.integer(celdaClusters(modelInitial)$y))) + 1
1432
+    overallY <- as.integer(celdaClusters(modelInitial)$y)
1433 1433
 
1434 1434
     ## Decomposed counts for full count matrix
1435 1435
     p <- .cGDecomposeCounts(counts, overallY, currentL)
... ...
@@ -1465,7 +1465,7 @@ setMethod("recursiveSplitModule",
1465 1465
         yInit = tempSplit$y,
1466 1466
         reorder = reorder
1467 1467
       )
1468
-      overallY <- celdaClusters(tempModel)$y
1468
+      overallY <- as.integer(celdaClusters(tempModel)$y)
1469 1469
 
1470 1470
       # Adjust decomposed count matrices
1471 1471
       p <- .cGReDecomposeCounts(counts,
... ...
@@ -1543,7 +1543,7 @@ setMethod("recursiveSplitModule",
1543 1543
       nchains = 1,
1544 1544
       verbose = FALSE
1545 1545
     )
1546
-    overallY <- celdaClusters(modelInitial)$y
1546
+    overallY <- as.integer(celdaClusters(modelInitial)$y)
1547 1547
     currentL <- length(unique(overallY)) + 1
1548 1548
 
1549 1549
     ## Perform splitting for y labels
... ...
@@ -1572,7 +1572,7 @@ setMethod("recursiveSplitModule",
1572 1572
         yInit = tempSplit$y,
1573 1573
         reorder = reorder
1574 1574
       )
1575
-      overallY <- celdaClusters(tempModel)$y
1575
+      overallY <- as.integer(celdaClusters(tempModel)$y)
1576 1576
 
1577 1577
       ## Add new model to results list and increment L
1578 1578
       .logMessages(
... ...
@@ -47,7 +47,7 @@
47 47
       verbose = FALSE,
48 48
       reorder = FALSE
49 49
     )
50
-    clustSplit[[i]] <- celdaClusters(clustLabel)$z
50
+    clustSplit[[i]] <- as.integer(celdaClusters(clustLabel)$z)
51 51
   }
52 52
 
53 53
   ## Find second best assignment give current assignments for each cell
... ...
@@ -205,7 +205,7 @@
205 205
       verbose = FALSE,
206 206
       reorder = FALSE
207 207
     )
208
-    clustSplit[[i]] <- celdaClusters(clustLabel)$z
208
+    clustSplit[[i]] <- as.integer(celdaClusters(clustLabel)$z)
209 209
   }
210 210
 
211 211
   ## Find second best assignment give current assignments for each cell
... ...
@@ -399,7 +399,7 @@
399 399
         verbose = FALSE,
400 400
         reorder = FALSE
401 401
       )
402
-      tempZ[ix] <- celdaClusters(clustLabel)$z + currentTopZ
402
+      tempZ[ix] <- as.integer(celdaClusters(clustLabel)$z) + currentTopZ
403 403
     }
404 404
     currentTopZ <- max(tempZ, na.rm = TRUE)
405 405
   }
... ...
@@ -443,7 +443,7 @@
443 443
       verbose = FALSE,
444 444
       reorder = FALSE
445 445
     )
446
-    clustSplit[[i]] <- celdaClusters(clustLabel)$y
446
+    clustSplit[[i]] <- as.integer(celdaClusters(clustLabel)$y)
447 447
   }
448 448
 
449 449
   ## Find second best assignment give current assignments for each cell
... ...
@@ -637,7 +637,7 @@
637 637
       verbose = FALSE,
638 638
       reorder = FALSE
639 639
     )
640
-    clustSplit[[i]] <- celdaClusters(clustLabel)$y
640
+    clustSplit[[i]] <- as.integer(celdaClusters(clustLabel)$y)
641 641
   }
642 642
 
643 643
   ## Find second best assignment give current assignments for each cell
... ...
@@ -6,6 +6,11 @@
6 6
 
7 7
 using namespace Rcpp;
8 8
 
9
+#ifdef RCPP_USE_GLOBAL_ROSTREAM
10
+Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
11
+Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
12
+#endif
13
+
9 14
 // decontXEM
10 15
 Rcpp::List decontXEM(const Eigen::MappedSparseMatrix<double>& counts, const NumericVector& counts_colsums, const NumericVector& theta, const bool& estimate_eta, const NumericMatrix& eta, const NumericMatrix& phi, const IntegerVector& z, const bool& estimate_delta, const NumericVector& delta, const double& pseudocount);
11 16
 RcppExport SEXP _celda_decontXEM(SEXP countsSEXP, SEXP counts_colsumsSEXP, SEXP thetaSEXP, SEXP estimate_etaSEXP, SEXP etaSEXP, SEXP phiSEXP, SEXP zSEXP, SEXP estimate_deltaSEXP, SEXP deltaSEXP, SEXP pseudocountSEXP) {