... | ... |
@@ -165,9 +165,9 @@ recodeClusterZ <- function(sce, from, to, altExpName = "featureSubset") { |
165 | 165 |
new.clusters <- plyr::mapvalues(celdaClusters(sce, |
166 | 166 |
altExpName = altExpName), |
167 | 167 |
from, to) |
168 |
- new.clusters <- factor(new.clusters, levels = |
|
168 |
+ new.clusters <- factor(new.clusters, levels = |
|
169 | 169 |
sort(as.numeric(unique(new.clusters)))) |
170 |
- |
|
170 |
+ |
|
171 | 171 |
celdaClusters(sce, altExpName = altExpName) <- new.clusters |
172 | 172 |
return(sce) |
173 | 173 |
} |
... | ... |
@@ -218,9 +218,9 @@ recodeClusterY <- function(sce, from, to, altExpName = "featureSubset") { |
218 | 218 |
new.clusters <- plyr::mapvalues(celdaModules(sce, |
219 | 219 |
altExpName = altExpName), |
220 | 220 |
from, to) |
221 |
- new.clusters <- factor(new.clusters, levels = |
|
221 |
+ new.clusters <- factor(new.clusters, levels = |
|
222 | 222 |
sort(as.numeric(unique(new.clusters)))) |
223 |
- |
|
223 |
+ |
|
224 | 224 |
celdaModules(sce, altExpName = altExpName) <- plyr::mapvalues( |
225 | 225 |
celdaModules(sce, altExpName = altExpName), from, to) |
226 | 226 |
return(sce) |
... | ... |
@@ -26,13 +26,17 @@ |
26 | 26 |
#' should be considered different batches. Default NULL. |
27 | 27 |
#' @param background A numeric matrix of counts or a |
28 | 28 |
#' \linkS4class{SingleCellExperiment} with the matrix located in the assay |
29 |
-#' slot under \code{assayName}. It should have the same structure as \code{x} |
|
30 |
-#' except it contains the matrix of empty droplets instead of cells. When |
|
31 |
-#' supplied, empirical distribution of transcripts from these empty droplets |
|
29 |
+#' slot under \code{assayName}. It should have the same data format as \code{x} |
|
30 |
+#' except it contains the empty droplets instead of cells. When supplied, |
|
31 |
+#' empirical distribution of transcripts from these empty droplets |
|
32 | 32 |
#' will be used as the contamination distribution. Default NULL. |
33 | 33 |
#' @param bgAssayName Character. Name of the assay to use if \code{background} |
34 | 34 |
#' is a \linkS4class{SingleCellExperiment}. Default to same as |
35 | 35 |
#' \code{assayName}. |
36 |
+#' @param bgBatch Numeric or character vector. Batch labels for |
|
37 |
+#' \code{background}. Its unique values should be the same as those in |
|
38 |
+#' \code{batch}, such that each batch of cells have their corresponding batch |
|
39 |
+#' of empty droplets as background, pointed by this parameter. Default to NULL. |
|
36 | 40 |
#' @param maxIter Integer. Maximum iterations of the EM algorithm. Default 500. |
37 | 41 |
#' @param convergence Numeric. The EM algorithm will be stopped if the maximum |
38 | 42 |
#' difference in the contamination estimates between the previous and |
... | ... |
@@ -150,6 +154,7 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
150 | 154 |
batch = NULL, |
151 | 155 |
background = NULL, |
152 | 156 |
bgAssayName = NULL, |
157 |
+ bgBatch = NULL, |
|
153 | 158 |
maxIter = 500, |
154 | 159 |
delta = c(10, 10), |
155 | 160 |
estimateDelta = TRUE, |
... | ... |
@@ -163,8 +168,16 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
163 | 168 |
countsBackground <- NULL |
164 | 169 |
if (!is.null(background)) { |
165 | 170 |
# Remove cells with the same ID between x and the background matrix |
166 |
- background <- .checkBackground(x = x, background = background, |
|
167 |
- logfile = logfile, verbose = verbose) |
|
171 |
+ # Also update bgBatch when background is updated and bgBatch is not null |
|
172 |
+ temp <- .checkBackground(x = x, |
|
173 |
+ background = background, |
|
174 |
+ bgBatch = bgBatch, |
|
175 |
+ logfile = logfile, |
|
176 |
+ verbose = verbose) |
|
177 |
+ |
|
178 |
+ background <- temp$background |
|
179 |
+ bgBatch <- temp$bgBatch |
|
180 |
+ |
|
168 | 181 |
if (is.null(bgAssayName)) { |
169 | 182 |
bgAssayName <- assayName |
170 | 183 |
} |
... | ... |
@@ -178,6 +191,7 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
178 | 191 |
z = z, |
179 | 192 |
batch = batch, |
180 | 193 |
countsBackground = countsBackground, |
194 |
+ batchBackground = bgBatch, |
|
181 | 195 |
maxIter = maxIter, |
182 | 196 |
convergence = convergence, |
183 | 197 |
iterLogLik = iterLogLik, |
... | ... |
@@ -230,6 +244,7 @@ setMethod("decontX", "ANY", function(x, |
230 | 244 |
z = NULL, |
231 | 245 |
batch = NULL, |
232 | 246 |
background = NULL, |
247 |
+ bgBatch = NULL, |
|
233 | 248 |
maxIter = 500, |
234 | 249 |
delta = c(10, 10), |
235 | 250 |
estimateDelta = TRUE, |
... | ... |
@@ -244,10 +259,18 @@ setMethod("decontX", "ANY", function(x, |
244 | 259 |
countsBackground <- NULL |
245 | 260 |
if (!is.null(background)) { |
246 | 261 |
# Remove cells with the same ID between x and the background matrix |
247 |
- background <- .checkBackground(x = x, background = background, |
|
248 |
- logfile = logfile, verbose = verbose) |
|
249 |
- |
|
262 |
+ # Also update bgBatch when background is updated and bgBatch is not null |
|
263 |
+ temp <- .checkBackground(x = x, |
|
264 |
+ background = background, |
|
265 |
+ bgBatch = bgBatch, |
|
266 |
+ logfile = logfile, |
|
267 |
+ verbose = verbose) |
|
268 |
+ |
|
269 |
+ background <- temp$background |
|
250 | 270 |
countsBackground <- background |
271 |
+ |
|
272 |
+ bgBatch <- temp$bgBatch |
|
273 |
+ |
|
251 | 274 |
} |
252 | 275 |
|
253 | 276 |
.decontX( |
... | ... |
@@ -255,6 +278,7 @@ setMethod("decontX", "ANY", function(x, |
255 | 278 |
z = z, |
256 | 279 |
batch = batch, |
257 | 280 |
countsBackground = countsBackground, |
281 |
+ batchBackground = bgBatch, |
|
258 | 282 |
maxIter = maxIter, |
259 | 283 |
convergence = convergence, |
260 | 284 |
iterLogLik = iterLogLik, |
... | ... |
@@ -337,6 +361,7 @@ setMethod( |
337 | 361 |
z = NULL, |
338 | 362 |
batch = NULL, |
339 | 363 |
countsBackground = NULL, |
364 |
+ batchBackground = NULL, |
|
340 | 365 |
maxIter = 200, |
341 | 366 |
convergence = 0.001, |
342 | 367 |
iterLogLik = 10, |
... | ... |
@@ -367,6 +392,7 @@ setMethod( |
367 | 392 |
runParams <- list( |
368 | 393 |
z = z, |
369 | 394 |
batch = batch, |
395 |
+ batchBackground = batchBackground, |
|
370 | 396 |
maxIter = maxIter, |
371 | 397 |
delta = delta, |
372 | 398 |
estimateDelta = estimateDelta, |
... | ... |
@@ -384,7 +410,7 @@ setMethod( |
384 | 410 |
nC <- ncol(counts) |
385 | 411 |
allCellNames <- colnames(counts) |
386 | 412 |
|
387 |
- ## Set up final deconaminated matrix |
|
413 |
+ ## Set up final decontaminated matrix |
|
388 | 414 |
estRmat <- Matrix::Matrix( |
389 | 415 |
data = 0, |
390 | 416 |
ncol = totalCells, |
... | ... |
@@ -396,8 +422,32 @@ setMethod( |
396 | 422 |
## Generate batch labels if none were supplied |
397 | 423 |
if (is.null(batch)) { |
398 | 424 |
batch <- rep("all_cells", nC) |
425 |
+ |
|
426 |
+ # If batch null, bgBatch has to be null |
|
427 |
+ if (!is.null(batchBackground)) { |
|
428 |
+ stop( |
|
429 |
+ "When experiment default to no bacth, background should ", |
|
430 |
+ "also default to no batch." |
|
431 |
+ ) |
|
432 |
+ } |
|
433 |
+ |
|
434 |
+ if (!is.null(countsBackground)) { |
|
435 |
+ batchBackground <- rep("all_cells", ncol(countsBackground)) |
|
436 |
+ } |
|
437 |
+ } else { |
|
438 |
+ |
|
439 |
+ # If batch not null and countsBackground supplied, |
|
440 |
+ # user has to supply batchBackground as well |
|
441 |
+ if (!is.null(countsBackground) & is.null(batchBackground)) { |
|
442 |
+ stop( |
|
443 |
+ "Cell batch, and background are supplied. Please also ", |
|
444 |
+ "supply background batch." |
|
445 |
+ ) |
|
446 |
+ } |
|
447 |
+ |
|
399 | 448 |
} |
400 | 449 |
runParams$batch <- batch |
450 |
+ runParams$batchBackground <- batchBackground |
|
401 | 451 |
batchIndex <- unique(batch) |
402 | 452 |
|
403 | 453 |
## Set result lists upfront for all cells from different batches |
... | ... |
@@ -430,6 +480,7 @@ setMethod( |
430 | 480 |
|
431 | 481 |
zBat <- NULL |
432 | 482 |
countsBat <- counts[, batch == bat] |
483 |
+ bgBat <- countsBackground[, batchBackground == bat] |
|
433 | 484 |
|
434 | 485 |
## Convert to sparse matrix |
435 | 486 |
if (!inherits(countsBat, "dgCMatrix")) { |
... | ... |
@@ -442,9 +493,9 @@ setMethod( |
442 | 493 |
) |
443 | 494 |
countsBat <- methods::as(countsBat, "dgCMatrix") |
444 | 495 |
} |
445 |
- if (!is.null(countsBackground)) { |
|
446 |
- if (!inherits(countsBackground, "dgCMatrix")) { |
|
447 |
- countsBackground <- methods::as(countsBackground, "dgCMatrix") |
|
496 |
+ if (!is.null(bgBat)) { |
|
497 |
+ if (!inherits(bgBat, "dgCMatrix")) { |
|
498 |
+ bgBat <- methods::as(bgBat, "dgCMatrix") |
|
448 | 499 |
} |
449 | 500 |
} |
450 | 501 |
|
... | ... |
@@ -456,7 +507,7 @@ setMethod( |
456 | 507 |
counts = countsBat, |
457 | 508 |
z = zBat, |
458 | 509 |
batch = bat, |
459 |
- countsBackground = countsBackground, |
|
510 |
+ countsBackground = bgBat, |
|
460 | 511 |
maxIter = maxIter, |
461 | 512 |
delta = delta, |
462 | 513 |
estimateDelta = estimateDelta, |
... | ... |
@@ -475,7 +526,7 @@ setMethod( |
475 | 526 |
counts = countsBat, |
476 | 527 |
z = zBat, |
477 | 528 |
batch = bat, |
478 |
- countsBackground = countsBackground, |
|
529 |
+ countsBackground = bgBat, |
|
479 | 530 |
maxIter = maxIter, |
480 | 531 |
delta = delta, |
481 | 532 |
estimateDelta = estimateDelta, |
... | ... |
@@ -491,7 +542,7 @@ setMethod( |
491 | 542 |
} |
492 | 543 |
|
493 | 544 |
## Try to convert class of new matrix to class of original matrix |
494 |
- |
|
545 |
+ |
|
495 | 546 |
.logMessages( |
496 | 547 |
date(), |
497 | 548 |
".. Calculating final decontaminated matrix", |
... | ... |
@@ -563,7 +614,7 @@ setMethod( |
563 | 614 |
append = TRUE, |
564 | 615 |
verbose = verbose |
565 | 616 |
) |
566 |
- |
|
617 |
+ |
|
567 | 618 |
## Determine class of seed in DelayedArray |
568 | 619 |
seed.class <- unique(DelayedArray::seedApply(counts, class))[[1]] |
569 | 620 |
if (seed.class == "HDF5ArraySeed") { |
... | ... |
@@ -1369,9 +1420,11 @@ simulateContamination <- function(C = 300, |
1369 | 1420 |
} |
1370 | 1421 |
|
1371 | 1422 |
|
1372 |
-.checkBackground <- function(x, background, logfile = NULL, verbose = FALSE) { |
|
1423 |
+.checkBackground <- function(x, background, bgBatch, |
|
1424 |
+ logfile = NULL, verbose = FALSE) { |
|
1373 | 1425 |
# Remove background barcodes that have already appeared in x |
1374 |
- if(!is.null(colnames(background))) { |
|
1426 |
+ # If bgBatch param is supplied, also remove duplicate bgBatch |
|
1427 |
+ if (!is.null(colnames(background))) { |
|
1375 | 1428 |
dupBarcode <- colnames(background) %in% colnames(x) |
1376 | 1429 |
} else { |
1377 | 1430 |
dupBarcode <- FALSE |
... | ... |
@@ -1381,7 +1434,7 @@ simulateContamination <- function(C = 300, |
1381 | 1434 |
" Please ensure that no true cells are included in the background ", |
1382 | 1435 |
"matrix. Otherwise, results will be incorrect.") |
1383 | 1436 |
} |
1384 |
- |
|
1437 |
+ |
|
1385 | 1438 |
if (any(dupBarcode)) { |
1386 | 1439 |
.logMessages( |
1387 | 1440 |
date(), |
... | ... |
@@ -1394,6 +1447,20 @@ simulateContamination <- function(C = 300, |
1394 | 1447 |
verbose = verbose |
1395 | 1448 |
) |
1396 | 1449 |
background <- background[, !(dupBarcode), drop = FALSE] |
1450 |
+ |
|
1451 |
+ if (!is.null(bgBatch)) { |
|
1452 |
+ if (length(bgBatch) != length(dupBarcode)) { |
|
1453 |
+ stop( |
|
1454 |
+ "Length of bgBatch must be equal to the number of columns", |
|
1455 |
+ "of background matrix." |
|
1456 |
+ ) |
|
1457 |
+ } |
|
1458 |
+ bgBatch <- bgBatch[!(dupBarcode)] |
|
1459 |
+ } |
|
1397 | 1460 |
} |
1398 |
- return(background) |
|
1461 |
+ |
|
1462 |
+ re <- list(background = background, |
|
1463 |
+ bgBatch = bgBatch) |
|
1464 |
+ |
|
1465 |
+ return(re) |
|
1399 | 1466 |
} |
... | ... |
@@ -15,6 +15,7 @@ decontX(x, ...) |
15 | 15 |
batch = NULL, |
16 | 16 |
background = NULL, |
17 | 17 |
bgAssayName = NULL, |
18 |
+ bgBatch = NULL, |
|
18 | 19 |
maxIter = 500, |
19 | 20 |
delta = c(10, 10), |
20 | 21 |
estimateDelta = TRUE, |
... | ... |
@@ -32,6 +33,7 @@ decontX(x, ...) |
32 | 33 |
z = NULL, |
33 | 34 |
batch = NULL, |
34 | 35 |
background = NULL, |
36 |
+ bgBatch = NULL, |
|
35 | 37 |
maxIter = 500, |
36 | 38 |
delta = c(10, 10), |
37 | 39 |
estimateDelta = TRUE, |
... | ... |
@@ -72,15 +74,20 @@ should be considered different batches. Default NULL.} |
72 | 74 |
|
73 | 75 |
\item{background}{A numeric matrix of counts or a |
74 | 76 |
\linkS4class{SingleCellExperiment} with the matrix located in the assay |
75 |
-slot under \code{assayName}. It should have the same structure as \code{x} |
|
76 |
-except it contains the matrix of empty droplets instead of cells. When |
|
77 |
-supplied, empirical distribution of transcripts from these empty droplets |
|
77 |
+slot under \code{assayName}. It should have the same data format as \code{x} |
|
78 |
+except it contains the empty droplets instead of cells. When supplied, |
|
79 |
+empirical distribution of transcripts from these empty droplets |
|
78 | 80 |
will be used as the contamination distribution. Default NULL.} |
79 | 81 |
|
80 | 82 |
\item{bgAssayName}{Character. Name of the assay to use if \code{background} |
81 | 83 |
is a \linkS4class{SingleCellExperiment}. Default to same as |
82 | 84 |
\code{assayName}.} |
83 | 85 |
|
86 |
+\item{bgBatch}{Numeric or character vector. Batch labels for |
|
87 |
+\code{background}. Its unique values should be the same as those in |
|
88 |
+\code{batch}, such that each batch of cells have their corresponding batch |
|
89 |
+of empty droplets as background, pointed by this parameter. Default to NULL.} |
|
90 |
+ |
|
84 | 91 |
\item{maxIter}{Integer. Maximum iterations of the EM algorithm. Default 500.} |
85 | 92 |
|
86 | 93 |
\item{delta}{Numeric Vector of length 2. Concentration parameters for |