Adding background batch
... | ... |
@@ -165,9 +165,9 @@ recodeClusterZ <- function(sce, from, to, altExpName = "featureSubset") { |
165 | 165 |
new.clusters <- plyr::mapvalues(celdaClusters(sce, |
166 | 166 |
altExpName = altExpName), |
167 | 167 |
from, to) |
168 |
- new.clusters <- factor(new.clusters, levels = |
|
168 |
+ new.clusters <- factor(new.clusters, levels = |
|
169 | 169 |
sort(as.numeric(unique(new.clusters)))) |
170 |
- |
|
170 |
+ |
|
171 | 171 |
celdaClusters(sce, altExpName = altExpName) <- new.clusters |
172 | 172 |
return(sce) |
173 | 173 |
} |
... | ... |
@@ -218,9 +218,9 @@ recodeClusterY <- function(sce, from, to, altExpName = "featureSubset") { |
218 | 218 |
new.clusters <- plyr::mapvalues(celdaModules(sce, |
219 | 219 |
altExpName = altExpName), |
220 | 220 |
from, to) |
221 |
- new.clusters <- factor(new.clusters, levels = |
|
221 |
+ new.clusters <- factor(new.clusters, levels = |
|
222 | 222 |
sort(as.numeric(unique(new.clusters)))) |
223 |
- |
|
223 |
+ |
|
224 | 224 |
celdaModules(sce, altExpName = altExpName) <- plyr::mapvalues( |
225 | 225 |
celdaModules(sce, altExpName = altExpName), from, to) |
226 | 226 |
return(sce) |
... | ... |
@@ -26,13 +26,17 @@ |
26 | 26 |
#' should be considered different batches. Default NULL. |
27 | 27 |
#' @param background A numeric matrix of counts or a |
28 | 28 |
#' \linkS4class{SingleCellExperiment} with the matrix located in the assay |
29 |
-#' slot under \code{assayName}. It should have the same structure as \code{x} |
|
30 |
-#' except it contains the matrix of empty droplets instead of cells. When |
|
31 |
-#' supplied, empirical distribution of transcripts from these empty droplets |
|
29 |
+#' slot under \code{assayName}. It should have the same data format as \code{x} |
|
30 |
+#' except it contains the empty droplets instead of cells. When supplied, |
|
31 |
+#' empirical distribution of transcripts from these empty droplets |
|
32 | 32 |
#' will be used as the contamination distribution. Default NULL. |
33 | 33 |
#' @param bgAssayName Character. Name of the assay to use if \code{background} |
34 | 34 |
#' is a \linkS4class{SingleCellExperiment}. Default to same as |
35 | 35 |
#' \code{assayName}. |
36 |
+#' @param bgBatch Numeric or character vector. Batch labels for |
|
37 |
+#' \code{background}. Its unique values should be the same as those in |
|
38 |
+#' \code{batch}, such that each batch of cells have their corresponding batch |
|
39 |
+#' of empty droplets as background, pointed by this parameter. Default to NULL. |
|
36 | 40 |
#' @param maxIter Integer. Maximum iterations of the EM algorithm. Default 500. |
37 | 41 |
#' @param convergence Numeric. The EM algorithm will be stopped if the maximum |
38 | 42 |
#' difference in the contamination estimates between the previous and |
... | ... |
@@ -150,6 +154,7 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
150 | 154 |
batch = NULL, |
151 | 155 |
background = NULL, |
152 | 156 |
bgAssayName = NULL, |
157 |
+ bgBatch = NULL, |
|
153 | 158 |
maxIter = 500, |
154 | 159 |
delta = c(10, 10), |
155 | 160 |
estimateDelta = TRUE, |
... | ... |
@@ -163,8 +168,16 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
163 | 168 |
countsBackground <- NULL |
164 | 169 |
if (!is.null(background)) { |
165 | 170 |
# Remove cells with the same ID between x and the background matrix |
166 |
- background <- .checkBackground(x = x, background = background, |
|
167 |
- logfile = logfile, verbose = verbose) |
|
171 |
+ # Also update bgBatch when background is updated and bgBatch is not null |
|
172 |
+ temp <- .checkBackground(x = x, |
|
173 |
+ background = background, |
|
174 |
+ bgBatch = bgBatch, |
|
175 |
+ logfile = logfile, |
|
176 |
+ verbose = verbose) |
|
177 |
+ |
|
178 |
+ background <- temp$background |
|
179 |
+ bgBatch <- temp$bgBatch |
|
180 |
+ |
|
168 | 181 |
if (is.null(bgAssayName)) { |
169 | 182 |
bgAssayName <- assayName |
170 | 183 |
} |
... | ... |
@@ -178,6 +191,7 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
178 | 191 |
z = z, |
179 | 192 |
batch = batch, |
180 | 193 |
countsBackground = countsBackground, |
194 |
+ batchBackground = bgBatch, |
|
181 | 195 |
maxIter = maxIter, |
182 | 196 |
convergence = convergence, |
183 | 197 |
iterLogLik = iterLogLik, |
... | ... |
@@ -230,6 +244,7 @@ setMethod("decontX", "ANY", function(x, |
230 | 244 |
z = NULL, |
231 | 245 |
batch = NULL, |
232 | 246 |
background = NULL, |
247 |
+ bgBatch = NULL, |
|
233 | 248 |
maxIter = 500, |
234 | 249 |
delta = c(10, 10), |
235 | 250 |
estimateDelta = TRUE, |
... | ... |
@@ -244,8 +259,16 @@ setMethod("decontX", "ANY", function(x, |
244 | 259 |
countsBackground <- NULL |
245 | 260 |
if (!is.null(background)) { |
246 | 261 |
# Remove cells with the same ID between x and the background matrix |
247 |
- background <- .checkBackground(x = x, background = background, |
|
248 |
- logfile = logfile, verbose = verbose) |
|
262 |
+ # Also update bgBatch when background is updated and bgBatch is not null |
|
263 |
+ temp <- .checkBackground(x = x, |
|
264 |
+ background = background, |
|
265 |
+ bgBatch = bgBatch, |
|
266 |
+ logfile = logfile, |
|
267 |
+ verbose = verbose) |
|
268 |
+ |
|
269 |
+ background <- temp$background |
|
270 |
+ bgBatch <- temp$bgBatch |
|
271 |
+ |
|
249 | 272 |
} |
250 | 273 |
|
251 | 274 |
.decontX( |
... | ... |
@@ -253,6 +276,7 @@ setMethod("decontX", "ANY", function(x, |
253 | 276 |
z = z, |
254 | 277 |
batch = batch, |
255 | 278 |
countsBackground = countsBackground, |
279 |
+ batchBackground = bgBatch, |
|
256 | 280 |
maxIter = maxIter, |
257 | 281 |
convergence = convergence, |
258 | 282 |
iterLogLik = iterLogLik, |
... | ... |
@@ -335,6 +359,7 @@ setMethod( |
335 | 359 |
z = NULL, |
336 | 360 |
batch = NULL, |
337 | 361 |
countsBackground = NULL, |
362 |
+ batchBackground = NULL, |
|
338 | 363 |
maxIter = 200, |
339 | 364 |
convergence = 0.001, |
340 | 365 |
iterLogLik = 10, |
... | ... |
@@ -365,6 +390,7 @@ setMethod( |
365 | 390 |
runParams <- list( |
366 | 391 |
z = z, |
367 | 392 |
batch = batch, |
393 |
+ batchBackground = batchBackground, |
|
368 | 394 |
maxIter = maxIter, |
369 | 395 |
delta = delta, |
370 | 396 |
estimateDelta = estimateDelta, |
... | ... |
@@ -382,7 +408,7 @@ setMethod( |
382 | 408 |
nC <- ncol(counts) |
383 | 409 |
allCellNames <- colnames(counts) |
384 | 410 |
|
385 |
- ## Set up final deconaminated matrix |
|
411 |
+ ## Set up final decontaminated matrix |
|
386 | 412 |
estRmat <- Matrix::Matrix( |
387 | 413 |
data = 0, |
388 | 414 |
ncol = totalCells, |
... | ... |
@@ -394,8 +420,32 @@ setMethod( |
394 | 420 |
## Generate batch labels if none were supplied |
395 | 421 |
if (is.null(batch)) { |
396 | 422 |
batch <- rep("all_cells", nC) |
423 |
+ |
|
424 |
+ # If batch null, bgBatch has to be null |
|
425 |
+ if (!is.null(batchBackground)) { |
|
426 |
+ stop( |
|
427 |
+ "When experiment default to no bacth, background should ", |
|
428 |
+ "also default to no batch." |
|
429 |
+ ) |
|
430 |
+ } |
|
431 |
+ |
|
432 |
+ if (!is.null(countsBackground)) { |
|
433 |
+ batchBackground <- rep("all_cells", ncol(countsBackground)) |
|
434 |
+ } |
|
435 |
+ } else { |
|
436 |
+ |
|
437 |
+ # If batch not null and countsBackground supplied, |
|
438 |
+ # user has to supply batchBackground as well |
|
439 |
+ if (!is.null(countsBackground) & is.null(batchBackground)) { |
|
440 |
+ stop( |
|
441 |
+ "Cell batch, and background are supplied. Please also ", |
|
442 |
+ "supply background batch." |
|
443 |
+ ) |
|
444 |
+ } |
|
445 |
+ |
|
397 | 446 |
} |
398 | 447 |
runParams$batch <- batch |
448 |
+ runParams$batchBackground <- batchBackground |
|
399 | 449 |
batchIndex <- unique(batch) |
400 | 450 |
|
401 | 451 |
## Set result lists upfront for all cells from different batches |
... | ... |
@@ -428,6 +478,7 @@ setMethod( |
428 | 478 |
|
429 | 479 |
zBat <- NULL |
430 | 480 |
countsBat <- counts[, batch == bat] |
481 |
+ bgBat <- countsBackground[, batchBackground == bat] |
|
431 | 482 |
|
432 | 483 |
## Convert to sparse matrix |
433 | 484 |
if (!inherits(countsBat, "dgCMatrix")) { |
... | ... |
@@ -440,9 +491,9 @@ setMethod( |
440 | 491 |
) |
441 | 492 |
countsBat <- methods::as(countsBat, "dgCMatrix") |
442 | 493 |
} |
443 |
- if (!is.null(countsBackground)) { |
|
444 |
- if (!inherits(countsBackground, "dgCMatrix")) { |
|
445 |
- countsBackground <- methods::as(countsBackground, "dgCMatrix") |
|
494 |
+ if (!is.null(bgBat)) { |
|
495 |
+ if (!inherits(bgBat, "dgCMatrix")) { |
|
496 |
+ bgBat <- methods::as(bgBat, "dgCMatrix") |
|
446 | 497 |
} |
447 | 498 |
} |
448 | 499 |
|
... | ... |
@@ -454,7 +505,7 @@ setMethod( |
454 | 505 |
counts = countsBat, |
455 | 506 |
z = zBat, |
456 | 507 |
batch = bat, |
457 |
- countsBackground = countsBackground, |
|
508 |
+ countsBackground = bgBat, |
|
458 | 509 |
maxIter = maxIter, |
459 | 510 |
delta = delta, |
460 | 511 |
estimateDelta = estimateDelta, |
... | ... |
@@ -473,7 +524,7 @@ setMethod( |
473 | 524 |
counts = countsBat, |
474 | 525 |
z = zBat, |
475 | 526 |
batch = bat, |
476 |
- countsBackground = countsBackground, |
|
527 |
+ countsBackground = bgBat, |
|
477 | 528 |
maxIter = maxIter, |
478 | 529 |
delta = delta, |
479 | 530 |
estimateDelta = estimateDelta, |
... | ... |
@@ -489,7 +540,7 @@ setMethod( |
489 | 540 |
} |
490 | 541 |
|
491 | 542 |
## Try to convert class of new matrix to class of original matrix |
492 |
- |
|
543 |
+ |
|
493 | 544 |
.logMessages( |
494 | 545 |
date(), |
495 | 546 |
".. Calculating final decontaminated matrix", |
... | ... |
@@ -561,7 +612,7 @@ setMethod( |
561 | 612 |
append = TRUE, |
562 | 613 |
verbose = verbose |
563 | 614 |
) |
564 |
- |
|
615 |
+ |
|
565 | 616 |
## Determine class of seed in DelayedArray |
566 | 617 |
seed.class <- unique(DelayedArray::seedApply(counts, class))[[1]] |
567 | 618 |
if (seed.class == "HDF5ArraySeed") { |
... | ... |
@@ -1367,9 +1418,11 @@ simulateContamination <- function(C = 300, |
1367 | 1418 |
} |
1368 | 1419 |
|
1369 | 1420 |
|
1370 |
-.checkBackground <- function(x, background, logfile = NULL, verbose = FALSE) { |
|
1421 |
+.checkBackground <- function(x, background, bgBatch, |
|
1422 |
+ logfile = NULL, verbose = FALSE) { |
|
1371 | 1423 |
# Remove background barcodes that have already appeared in x |
1372 |
- if(!is.null(colnames(background))) { |
|
1424 |
+ # If bgBatch param is supplied, also remove duplicate bgBatch |
|
1425 |
+ if (!is.null(colnames(background))) { |
|
1373 | 1426 |
dupBarcode <- colnames(background) %in% colnames(x) |
1374 | 1427 |
} else { |
1375 | 1428 |
dupBarcode <- FALSE |
... | ... |
@@ -1379,7 +1432,7 @@ simulateContamination <- function(C = 300, |
1379 | 1432 |
" Please ensure that no true cells are included in the background ", |
1380 | 1433 |
"matrix. Otherwise, results will be incorrect.") |
1381 | 1434 |
} |
1382 |
- |
|
1435 |
+ |
|
1383 | 1436 |
if (any(dupBarcode)) { |
1384 | 1437 |
.logMessages( |
1385 | 1438 |
date(), |
... | ... |
@@ -1392,6 +1445,20 @@ simulateContamination <- function(C = 300, |
1392 | 1445 |
verbose = verbose |
1393 | 1446 |
) |
1394 | 1447 |
background <- background[, !(dupBarcode), drop = FALSE] |
1448 |
+ |
|
1449 |
+ if (!is.null(bgBatch)) { |
|
1450 |
+ if (length(bgBatch) != length(dupBarcode)) { |
|
1451 |
+ stop( |
|
1452 |
+ "Length of bgBatch must be equal to the number of columns", |
|
1453 |
+ "of background matrix." |
|
1454 |
+ ) |
|
1455 |
+ } |
|
1456 |
+ bgBatch <- bgBatch[!(dupBarcode)] |
|
1457 |
+ } |
|
1395 | 1458 |
} |
1396 |
- return(background) |
|
1459 |
+ |
|
1460 |
+ re <- list(background = background, |
|
1461 |
+ bgBatch = bgBatch) |
|
1462 |
+ |
|
1463 |
+ return(re) |
|
1397 | 1464 |
} |
... | ... |
@@ -15,6 +15,7 @@ decontX(x, ...) |
15 | 15 |
batch = NULL, |
16 | 16 |
background = NULL, |
17 | 17 |
bgAssayName = NULL, |
18 |
+ bgBatch = NULL, |
|
18 | 19 |
maxIter = 500, |
19 | 20 |
delta = c(10, 10), |
20 | 21 |
estimateDelta = TRUE, |
... | ... |
@@ -32,6 +33,7 @@ decontX(x, ...) |
32 | 33 |
z = NULL, |
33 | 34 |
batch = NULL, |
34 | 35 |
background = NULL, |
36 |
+ bgBatch = NULL, |
|
35 | 37 |
maxIter = 500, |
36 | 38 |
delta = c(10, 10), |
37 | 39 |
estimateDelta = TRUE, |
... | ... |
@@ -72,15 +74,20 @@ should be considered different batches. Default NULL.} |
72 | 74 |
|
73 | 75 |
\item{background}{A numeric matrix of counts or a |
74 | 76 |
\linkS4class{SingleCellExperiment} with the matrix located in the assay |
75 |
-slot under \code{assayName}. It should have the same structure as \code{x} |
|
76 |
-except it contains the matrix of empty droplets instead of cells. When |
|
77 |
-supplied, empirical distribution of transcripts from these empty droplets |
|
77 |
+slot under \code{assayName}. It should have the same data format as \code{x} |
|
78 |
+except it contains the empty droplets instead of cells. When supplied, |
|
79 |
+empirical distribution of transcripts from these empty droplets |
|
78 | 80 |
will be used as the contamination distribution. Default NULL.} |
79 | 81 |
|
80 | 82 |
\item{bgAssayName}{Character. Name of the assay to use if \code{background} |
81 | 83 |
is a \linkS4class{SingleCellExperiment}. Default to same as |
82 | 84 |
\code{assayName}.} |
83 | 85 |
|
86 |
+\item{bgBatch}{Numeric or character vector. Batch labels for |
|
87 |
+\code{background}. Its unique values should be the same as those in |
|
88 |
+\code{batch}, such that each batch of cells have their corresponding batch |
|
89 |
+of empty droplets as background, pointed by this parameter. Default to NULL.} |
|
90 |
+ |
|
84 | 91 |
\item{maxIter}{Integer. Maximum iterations of the EM algorithm. Default 500.} |
85 | 92 |
|
86 | 93 |
\item{delta}{Numeric Vector of length 2. Concentration parameters for |
... | ... |
@@ -6,6 +6,11 @@ |
6 | 6 |
|
7 | 7 |
using namespace Rcpp; |
8 | 8 |
|
9 |
+#ifdef RCPP_USE_GLOBAL_ROSTREAM |
|
10 |
+Rcpp::Rostream<true>& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); |
|
11 |
+Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); |
|
12 |
+#endif |
|
13 |
+ |
|
9 | 14 |
// decontXEM |
10 | 15 |
Rcpp::List decontXEM(const Eigen::MappedSparseMatrix<double>& counts, const NumericVector& counts_colsums, const NumericVector& theta, const bool& estimate_eta, const NumericMatrix& eta, const NumericMatrix& phi, const IntegerVector& z, const bool& estimate_delta, const NumericVector& delta, const double& pseudocount); |
11 | 16 |
RcppExport SEXP _celda_decontXEM(SEXP countsSEXP, SEXP counts_colsumsSEXP, SEXP thetaSEXP, SEXP estimate_etaSEXP, SEXP etaSEXP, SEXP phiSEXP, SEXP zSEXP, SEXP estimate_deltaSEXP, SEXP deltaSEXP, SEXP pseudocountSEXP) { |