Browse code

Merge pull request #356 from yuan-yin-truly/adding_background_batch

Adding background batch

Joshua D. Campbell authored on 15/01/2022 19:02:30 • GitHub committed on 15/01/2022 19:02:30
Showing 4 changed files

... ...
@@ -165,9 +165,9 @@ recodeClusterZ <- function(sce, from, to, altExpName = "featureSubset") {
165 165
     new.clusters <- plyr::mapvalues(celdaClusters(sce,
166 166
                                                   altExpName = altExpName),
167 167
                                     from, to)
168
-    new.clusters <- factor(new.clusters, levels = 
168
+    new.clusters <- factor(new.clusters, levels =
169 169
                              sort(as.numeric(unique(new.clusters))))
170
-    
170
+
171 171
     celdaClusters(sce, altExpName = altExpName) <- new.clusters
172 172
     return(sce)
173 173
 }
... ...
@@ -218,9 +218,9 @@ recodeClusterY <- function(sce, from, to, altExpName = "featureSubset") {
218 218
     new.clusters <- plyr::mapvalues(celdaModules(sce,
219 219
                                                   altExpName = altExpName),
220 220
                                   from, to)
221
-    new.clusters <- factor(new.clusters, levels = 
221
+    new.clusters <- factor(new.clusters, levels =
222 222
                              sort(as.numeric(unique(new.clusters))))
223
-  
223
+
224 224
     celdaModules(sce, altExpName = altExpName) <- plyr::mapvalues(
225 225
         celdaModules(sce, altExpName = altExpName), from, to)
226 226
     return(sce)
... ...
@@ -26,13 +26,17 @@
26 26
 #' should be considered different batches. Default NULL.
27 27
 #' @param background A numeric matrix of counts or a
28 28
 #' \linkS4class{SingleCellExperiment} with the matrix located in the assay
29
-#' slot under \code{assayName}. It should have the same structure as \code{x}
30
-#' except it contains the matrix of empty droplets instead of cells. When
31
-#' supplied, empirical distribution of transcripts from these empty droplets
29
+#' slot under \code{assayName}. It should have the same data format as \code{x}
30
+#' except it contains the empty droplets instead of cells. When supplied,
31
+#' empirical distribution of transcripts from these empty droplets
32 32
 #' will be used as the contamination distribution. Default NULL.
33 33
 #' @param bgAssayName Character. Name of the assay to use if \code{background}
34 34
 #' is a \linkS4class{SingleCellExperiment}. Default to same as
35 35
 #' \code{assayName}.
36
+#' @param bgBatch Numeric or character vector. Batch labels for
37
+#' \code{background}. Its unique values should be the same as those in
38
+#' \code{batch}, such that each batch of cells have their corresponding batch
39
+#' of empty droplets as background, pointed by this parameter. Default to NULL.
36 40
 #' @param maxIter Integer. Maximum iterations of the EM algorithm. Default 500.
37 41
 #' @param convergence Numeric. The EM algorithm will be stopped if the maximum
38 42
 #' difference in the contamination estimates between the previous and
... ...
@@ -150,6 +154,7 @@ setMethod("decontX", "SingleCellExperiment", function(x,
150 154
                                                       batch = NULL,
151 155
                                                       background = NULL,
152 156
                                                       bgAssayName = NULL,
157
+                                                      bgBatch = NULL,
153 158
                                                       maxIter = 500,
154 159
                                                       delta = c(10, 10),
155 160
                                                       estimateDelta = TRUE,
... ...
@@ -163,8 +168,16 @@ setMethod("decontX", "SingleCellExperiment", function(x,
163 168
   countsBackground <- NULL
164 169
   if (!is.null(background)) {
165 170
     # Remove cells with the same ID between x and the background matrix
166
-    background <- .checkBackground(x = x, background = background,
167
-                                   logfile = logfile, verbose = verbose)
171
+    # Also update bgBatch when background is updated and bgBatch is not null
172
+    temp <- .checkBackground(x = x,
173
+                             background = background,
174
+                             bgBatch = bgBatch,
175
+                             logfile = logfile,
176
+                             verbose = verbose)
177
+
178
+    background <- temp$background
179
+    bgBatch <- temp$bgBatch
180
+
168 181
     if (is.null(bgAssayName)) {
169 182
       bgAssayName <- assayName
170 183
     }
... ...
@@ -178,6 +191,7 @@ setMethod("decontX", "SingleCellExperiment", function(x,
178 191
     z = z,
179 192
     batch = batch,
180 193
     countsBackground = countsBackground,
194
+    batchBackground = bgBatch,
181 195
     maxIter = maxIter,
182 196
     convergence = convergence,
183 197
     iterLogLik = iterLogLik,
... ...
@@ -230,6 +244,7 @@ setMethod("decontX", "ANY", function(x,
230 244
                                      z = NULL,
231 245
                                      batch = NULL,
232 246
                                      background = NULL,
247
+                                     bgBatch = NULL,
233 248
                                      maxIter = 500,
234 249
                                      delta = c(10, 10),
235 250
                                      estimateDelta = TRUE,
... ...
@@ -244,8 +259,16 @@ setMethod("decontX", "ANY", function(x,
244 259
   countsBackground <- NULL
245 260
   if (!is.null(background)) {
246 261
     # Remove cells with the same ID between x and the background matrix
247
-    background <- .checkBackground(x = x, background = background,
248
-                                   logfile = logfile, verbose = verbose)
262
+    # Also update bgBatch when background is updated and bgBatch is not null
263
+    temp <- .checkBackground(x = x,
264
+                             background = background,
265
+                             bgBatch = bgBatch,
266
+                             logfile = logfile,
267
+                             verbose = verbose)
268
+
269
+    background <- temp$background
270
+    bgBatch <- temp$bgBatch
271
+
249 272
   }
250 273
 
251 274
   .decontX(
... ...
@@ -253,6 +276,7 @@ setMethod("decontX", "ANY", function(x,
253 276
     z = z,
254 277
     batch = batch,
255 278
     countsBackground = countsBackground,
279
+    batchBackground = bgBatch,
256 280
     maxIter = maxIter,
257 281
     convergence = convergence,
258 282
     iterLogLik = iterLogLik,
... ...
@@ -335,6 +359,7 @@ setMethod(
335 359
                      z = NULL,
336 360
                      batch = NULL,
337 361
                      countsBackground = NULL,
362
+                     batchBackground = NULL,
338 363
                      maxIter = 200,
339 364
                      convergence = 0.001,
340 365
                      iterLogLik = 10,
... ...
@@ -365,6 +390,7 @@ setMethod(
365 390
   runParams <- list(
366 391
     z = z,
367 392
     batch = batch,
393
+    batchBackground = batchBackground,
368 394
     maxIter = maxIter,
369 395
     delta = delta,
370 396
     estimateDelta = estimateDelta,
... ...
@@ -382,7 +408,7 @@ setMethod(
382 408
   nC <- ncol(counts)
383 409
   allCellNames <- colnames(counts)
384 410
 
385
-  ## Set up final deconaminated matrix
411
+  ## Set up final decontaminated matrix
386 412
   estRmat <- Matrix::Matrix(
387 413
     data = 0,
388 414
     ncol = totalCells,
... ...
@@ -394,8 +420,32 @@ setMethod(
394 420
   ## Generate batch labels if none were supplied
395 421
   if (is.null(batch)) {
396 422
     batch <- rep("all_cells", nC)
423
+
424
+    # If batch null, bgBatch has to be null
425
+    if (!is.null(batchBackground)) {
426
+      stop(
427
+        "When experiment default to no bacth, background should ",
428
+        "also default to no batch."
429
+      )
430
+    }
431
+
432
+    if (!is.null(countsBackground)) {
433
+      batchBackground <- rep("all_cells", ncol(countsBackground))
434
+    }
435
+  } else {
436
+
437
+    # If batch not null and countsBackground supplied,
438
+    # user has to supply batchBackground as well
439
+    if (!is.null(countsBackground) & is.null(batchBackground)) {
440
+      stop(
441
+        "Cell batch, and background are supplied. Please also ",
442
+        "supply background batch."
443
+      )
444
+    }
445
+
397 446
   }
398 447
   runParams$batch <- batch
448
+  runParams$batchBackground <- batchBackground
399 449
   batchIndex <- unique(batch)
400 450
 
401 451
   ## Set result lists upfront for all cells from different batches
... ...
@@ -428,6 +478,7 @@ setMethod(
428 478
 
429 479
     zBat <- NULL
430 480
     countsBat <- counts[, batch == bat]
481
+    bgBat <- countsBackground[, batchBackground == bat]
431 482
 
432 483
     ## Convert to sparse matrix
433 484
     if (!inherits(countsBat, "dgCMatrix")) {
... ...
@@ -440,9 +491,9 @@ setMethod(
440 491
       )
441 492
       countsBat <- methods::as(countsBat, "dgCMatrix")
442 493
     }
443
-    if (!is.null(countsBackground)) {
444
-      if (!inherits(countsBackground, "dgCMatrix")) {
445
-        countsBackground <- methods::as(countsBackground, "dgCMatrix")
494
+    if (!is.null(bgBat)) {
495
+      if (!inherits(bgBat, "dgCMatrix")) {
496
+        bgBat <- methods::as(bgBat, "dgCMatrix")
446 497
       }
447 498
     }
448 499
 
... ...
@@ -454,7 +505,7 @@ setMethod(
454 505
         counts = countsBat,
455 506
         z = zBat,
456 507
         batch = bat,
457
-        countsBackground = countsBackground,
508
+        countsBackground = bgBat,
458 509
         maxIter = maxIter,
459 510
         delta = delta,
460 511
         estimateDelta = estimateDelta,
... ...
@@ -473,7 +524,7 @@ setMethod(
473 524
           counts = countsBat,
474 525
           z = zBat,
475 526
           batch = bat,
476
-          countsBackground = countsBackground,
527
+          countsBackground = bgBat,
477 528
           maxIter = maxIter,
478 529
           delta = delta,
479 530
           estimateDelta = estimateDelta,
... ...
@@ -489,7 +540,7 @@ setMethod(
489 540
     }
490 541
 
491 542
     ## Try to convert class of new matrix to class of original matrix
492
-    
543
+
493 544
     .logMessages(
494 545
       date(),
495 546
       ".. Calculating final decontaminated matrix",
... ...
@@ -561,7 +612,7 @@ setMethod(
561 612
       append = TRUE,
562 613
       verbose = verbose
563 614
     )
564
-    
615
+
565 616
     ## Determine class of seed in DelayedArray
566 617
     seed.class <- unique(DelayedArray::seedApply(counts, class))[[1]]
567 618
     if (seed.class == "HDF5ArraySeed") {
... ...
@@ -1367,9 +1418,11 @@ simulateContamination <- function(C = 300,
1367 1418
 }
1368 1419
 
1369 1420
 
1370
-.checkBackground <- function(x, background, logfile = NULL, verbose = FALSE) {
1421
+.checkBackground <- function(x, background, bgBatch,
1422
+                             logfile = NULL, verbose = FALSE) {
1371 1423
   # Remove background barcodes that have already appeared in x
1372
-  if(!is.null(colnames(background))) {
1424
+  # If bgBatch param is supplied, also remove duplicate bgBatch
1425
+  if (!is.null(colnames(background))) {
1373 1426
     dupBarcode <- colnames(background) %in% colnames(x)
1374 1427
   } else {
1375 1428
     dupBarcode <- FALSE
... ...
@@ -1379,7 +1432,7 @@ simulateContamination <- function(C = 300,
1379 1432
             " Please ensure that no true cells are included in the background ",
1380 1433
             "matrix. Otherwise, results will be incorrect.")
1381 1434
   }
1382
-  
1435
+
1383 1436
   if (any(dupBarcode)) {
1384 1437
     .logMessages(
1385 1438
       date(),
... ...
@@ -1392,6 +1445,20 @@ simulateContamination <- function(C = 300,
1392 1445
       verbose = verbose
1393 1446
     )
1394 1447
     background <- background[, !(dupBarcode), drop = FALSE]
1448
+
1449
+    if (!is.null(bgBatch)) {
1450
+      if (length(bgBatch) != length(dupBarcode)) {
1451
+        stop(
1452
+          "Length of bgBatch must be equal to the number of columns",
1453
+          "of background matrix."
1454
+          )
1455
+      }
1456
+      bgBatch <- bgBatch[!(dupBarcode)]
1457
+    }
1395 1458
   }
1396
-  return(background)
1459
+
1460
+  re <- list(background = background,
1461
+            bgBatch = bgBatch)
1462
+
1463
+  return(re)
1397 1464
 }
... ...
@@ -15,6 +15,7 @@ decontX(x, ...)
15 15
   batch = NULL,
16 16
   background = NULL,
17 17
   bgAssayName = NULL,
18
+  bgBatch = NULL,
18 19
   maxIter = 500,
19 20
   delta = c(10, 10),
20 21
   estimateDelta = TRUE,
... ...
@@ -32,6 +33,7 @@ decontX(x, ...)
32 33
   z = NULL,
33 34
   batch = NULL,
34 35
   background = NULL,
36
+  bgBatch = NULL,
35 37
   maxIter = 500,
36 38
   delta = c(10, 10),
37 39
   estimateDelta = TRUE,
... ...
@@ -72,15 +74,20 @@ should be considered different batches. Default NULL.}
72 74
 
73 75
 \item{background}{A numeric matrix of counts or a
74 76
 \linkS4class{SingleCellExperiment} with the matrix located in the assay
75
-slot under \code{assayName}. It should have the same structure as \code{x}
76
-except it contains the matrix of empty droplets instead of cells. When
77
-supplied, empirical distribution of transcripts from these empty droplets
77
+slot under \code{assayName}. It should have the same data format as \code{x}
78
+except it contains the empty droplets instead of cells. When supplied,
79
+empirical distribution of transcripts from these empty droplets
78 80
 will be used as the contamination distribution. Default NULL.}
79 81
 
80 82
 \item{bgAssayName}{Character. Name of the assay to use if \code{background}
81 83
 is a \linkS4class{SingleCellExperiment}. Default to same as
82 84
 \code{assayName}.}
83 85
 
86
+\item{bgBatch}{Numeric or character vector. Batch labels for
87
+\code{background}. Its unique values should be the same as those in
88
+\code{batch}, such that each batch of cells have their corresponding batch
89
+of empty droplets as background, pointed by this parameter. Default to NULL.}
90
+
84 91
 \item{maxIter}{Integer. Maximum iterations of the EM algorithm. Default 500.}
85 92
 
86 93
 \item{delta}{Numeric Vector of length 2. Concentration parameters for
... ...
@@ -6,6 +6,11 @@
6 6
 
7 7
 using namespace Rcpp;
8 8
 
9
+#ifdef RCPP_USE_GLOBAL_ROSTREAM
10
+Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
11
+Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
12
+#endif
13
+
9 14
 // decontXEM
10 15
 Rcpp::List decontXEM(const Eigen::MappedSparseMatrix<double>& counts, const NumericVector& counts_colsums, const NumericVector& theta, const bool& estimate_eta, const NumericMatrix& eta, const NumericMatrix& phi, const IntegerVector& z, const bool& estimate_delta, const NumericVector& delta, const double& pseudocount);
11 16
 RcppExport SEXP _celda_decontXEM(SEXP countsSEXP, SEXP counts_colsumsSEXP, SEXP thetaSEXP, SEXP estimate_etaSEXP, SEXP etaSEXP, SEXP phiSEXP, SEXP zSEXP, SEXP estimate_deltaSEXP, SEXP deltaSEXP, SEXP pseudocountSEXP) {