... | ... |
@@ -493,11 +493,11 @@ setMethod( |
493 | 493 |
append = TRUE, |
494 | 494 |
verbose = verbose |
495 | 495 |
) |
496 |
- countsBat <- methods::as(countsBat, "dgCMatrix") |
|
496 |
+ countsBat <- methods::as(countsBat, "CsparseMatrix") |
|
497 | 497 |
} |
498 | 498 |
if (!is.null(bgBat)) { |
499 | 499 |
if (!inherits(bgBat, "dgCMatrix")) { |
500 |
- bgBat <- methods::as(bgBat, "dgCMatrix") |
|
500 |
+ bgBat <- methods::as(bgBat, "CsparseMatrix") |
|
501 | 501 |
} |
502 | 502 |
} |
503 | 503 |
|
... | ... |
@@ -144,6 +144,8 @@ setGeneric("decontX", function(x, ...) standardGeneric("decontX")) |
144 | 144 |
|
145 | 145 |
#' @export |
146 | 146 |
#' @rdname decontX |
147 |
+#' @importClassesFrom SingleCellExperiment SingleCellExperiment |
|
148 |
+#' @importClassesFrom Matrix dgCMatrix |
|
147 | 149 |
setMethod("decontX", "SingleCellExperiment", function(x, |
148 | 150 |
assayName = "counts", |
149 | 151 |
z = NULL, |
... | ... |
@@ -246,6 +246,8 @@ setMethod("decontX", "ANY", function(x, |
246 | 246 |
# Remove cells with the same ID between x and the background matrix |
247 | 247 |
background <- .checkBackground(x = x, background = background, |
248 | 248 |
logfile = logfile, verbose = verbose) |
249 |
+ |
|
250 |
+ countsBackground <- background |
|
249 | 251 |
} |
250 | 252 |
|
251 | 253 |
.decontX( |
... | ... |
@@ -27,13 +27,13 @@ |
27 | 27 |
#' @param background A numeric matrix of counts or a |
28 | 28 |
#' \linkS4class{SingleCellExperiment} with the matrix located in the assay |
29 | 29 |
#' slot under \code{assayName}. It should have the same data format as \code{x} |
30 |
-#' except it contains the empty droplets instead of cells. When supplied, |
|
30 |
+#' except it contains the empty droplets instead of cells. When supplied, |
|
31 | 31 |
#' empirical distribution of transcripts from these empty droplets |
32 | 32 |
#' will be used as the contamination distribution. Default NULL. |
33 | 33 |
#' @param bgAssayName Character. Name of the assay to use if \code{background} |
34 | 34 |
#' is a \linkS4class{SingleCellExperiment}. Default to same as |
35 | 35 |
#' \code{assayName}. |
36 |
-#' @param bgBatch Numeric or character vector. Batch labels for |
|
36 |
+#' @param bgBatch Numeric or character vector. Batch labels for |
|
37 | 37 |
#' \code{background}. Its unique values should be the same as those in |
38 | 38 |
#' \code{batch}, such that each batch of cells have their corresponding batch |
39 | 39 |
#' of empty droplets as background, pointed by this parameter. Default to NULL. |
... | ... |
@@ -174,10 +174,10 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
174 | 174 |
bgBatch = bgBatch, |
175 | 175 |
logfile = logfile, |
176 | 176 |
verbose = verbose) |
177 |
- |
|
177 |
+ |
|
178 | 178 |
background <- temp$background |
179 | 179 |
bgBatch <- temp$bgBatch |
180 |
- |
|
180 |
+ |
|
181 | 181 |
if (is.null(bgAssayName)) { |
182 | 182 |
bgAssayName <- assayName |
183 | 183 |
} |
... | ... |
@@ -265,10 +265,10 @@ setMethod("decontX", "ANY", function(x, |
265 | 265 |
bgBatch = bgBatch, |
266 | 266 |
logfile = logfile, |
267 | 267 |
verbose = verbose) |
268 |
- |
|
268 |
+ |
|
269 | 269 |
background <- temp$background |
270 | 270 |
bgBatch <- temp$bgBatch |
271 |
- |
|
271 |
+ |
|
272 | 272 |
} |
273 | 273 |
|
274 | 274 |
.decontX( |
... | ... |
@@ -420,9 +420,9 @@ setMethod( |
420 | 420 |
## Generate batch labels if none were supplied |
421 | 421 |
if (is.null(batch)) { |
422 | 422 |
batch <- rep("all_cells", nC) |
423 |
- |
|
423 |
+ |
|
424 | 424 |
# If batch null, bgBatch has to be null |
425 |
- if (!is.null(batchBackground)){ |
|
425 |
+ if (!is.null(batchBackground)) { |
|
426 | 426 |
stop( |
427 | 427 |
"When experiment default to no bacth, background should ", |
428 | 428 |
"also default to no batch." |
... | ... |
@@ -436,7 +436,7 @@ setMethod( |
436 | 436 |
|
437 | 437 |
# If batch not null and countsBackground supplied, |
438 | 438 |
# user has to supply batchBackground as well |
439 |
- if (!is.null(countsBackground) & is.null(batchBackground)){ |
|
439 |
+ if (!is.null(countsBackground) & is.null(batchBackground)) { |
|
440 | 440 |
stop( |
441 | 441 |
"Cell batch, and background are supplied. Please also ", |
442 | 442 |
"supply background batch." |
... | ... |
@@ -540,7 +540,7 @@ setMethod( |
540 | 540 |
} |
541 | 541 |
|
542 | 542 |
## Try to convert class of new matrix to class of original matrix |
543 |
- |
|
543 |
+ |
|
544 | 544 |
.logMessages( |
545 | 545 |
date(), |
546 | 546 |
".. Calculating final decontaminated matrix", |
... | ... |
@@ -612,7 +612,7 @@ setMethod( |
612 | 612 |
append = TRUE, |
613 | 613 |
verbose = verbose |
614 | 614 |
) |
615 |
- |
|
615 |
+ |
|
616 | 616 |
## Determine class of seed in DelayedArray |
617 | 617 |
seed.class <- unique(DelayedArray::seedApply(counts, class))[[1]] |
618 | 618 |
if (seed.class == "HDF5ArraySeed") { |
... | ... |
@@ -1422,7 +1422,7 @@ simulateContamination <- function(C = 300, |
1422 | 1422 |
logfile = NULL, verbose = FALSE) { |
1423 | 1423 |
# Remove background barcodes that have already appeared in x |
1424 | 1424 |
# If bgBatch param is supplied, also remove duplicate bgBatch |
1425 |
- if(!is.null(colnames(background))) { |
|
1425 |
+ if (!is.null(colnames(background))) { |
|
1426 | 1426 |
dupBarcode <- colnames(background) %in% colnames(x) |
1427 | 1427 |
} else { |
1428 | 1428 |
dupBarcode <- FALSE |
... | ... |
@@ -1432,7 +1432,7 @@ simulateContamination <- function(C = 300, |
1432 | 1432 |
" Please ensure that no true cells are included in the background ", |
1433 | 1433 |
"matrix. Otherwise, results will be incorrect.") |
1434 | 1434 |
} |
1435 |
- |
|
1435 |
+ |
|
1436 | 1436 |
if (any(dupBarcode)) { |
1437 | 1437 |
.logMessages( |
1438 | 1438 |
date(), |
... | ... |
@@ -1445,8 +1445,8 @@ simulateContamination <- function(C = 300, |
1445 | 1445 |
verbose = verbose |
1446 | 1446 |
) |
1447 | 1447 |
background <- background[, !(dupBarcode), drop = FALSE] |
1448 |
- |
|
1449 |
- if(!is.null(bgBatch)){ |
|
1448 |
+ |
|
1449 |
+ if (!is.null(bgBatch)) { |
|
1450 | 1450 |
if (length(bgBatch) != length(dupBarcode)) { |
1451 | 1451 |
stop( |
1452 | 1452 |
"Length of bgBatch must be equal to the number of columns", |
... | ... |
@@ -1456,9 +1456,9 @@ simulateContamination <- function(C = 300, |
1456 | 1456 |
bgBatch <- bgBatch[!(dupBarcode)] |
1457 | 1457 |
} |
1458 | 1458 |
} |
1459 |
- |
|
1460 |
- re = list(background = background, |
|
1459 |
+ |
|
1460 |
+ re <- list(background = background, |
|
1461 | 1461 |
bgBatch = bgBatch) |
1462 |
- |
|
1462 |
+ |
|
1463 | 1463 |
return(re) |
1464 | 1464 |
} |
... | ... |
@@ -424,7 +424,7 @@ setMethod( |
424 | 424 |
# If batch null, bgBatch has to be null |
425 | 425 |
if (!is.null(batchBackground)){ |
426 | 426 |
stop( |
427 |
- "When experiment default to no bacth, background should", |
|
427 |
+ "When experiment default to no bacth, background should ", |
|
428 | 428 |
"also default to no batch." |
429 | 429 |
) |
430 | 430 |
} |
... | ... |
@@ -432,6 +432,17 @@ setMethod( |
432 | 432 |
if (!is.null(countsBackground)) { |
433 | 433 |
batchBackground <- rep("all_cells", ncol(countsBackground)) |
434 | 434 |
} |
435 |
+ } else { |
|
436 |
+ |
|
437 |
+ # If batch not null and countsBackground supplied, |
|
438 |
+ # user has to supply batchBackground as well |
|
439 |
+ if (!is.null(countsBackground) & is.null(batchBackground)){ |
|
440 |
+ stop( |
|
441 |
+ "Cell batch, and background are supplied. Please also ", |
|
442 |
+ "supply background batch." |
|
443 |
+ ) |
|
444 |
+ } |
|
445 |
+ |
|
435 | 446 |
} |
436 | 447 |
runParams$batch <- batch |
437 | 448 |
runParams$batchBackground <- batchBackground |
... | ... |
@@ -421,6 +421,14 @@ setMethod( |
421 | 421 |
if (is.null(batch)) { |
422 | 422 |
batch <- rep("all_cells", nC) |
423 | 423 |
|
424 |
+ # If batch null, bgBatch has to be null |
|
425 |
+ if (!is.null(batchBackground)){ |
|
426 |
+ stop( |
|
427 |
+ "When experiment default to no bacth, background should", |
|
428 |
+ "also default to no batch." |
|
429 |
+ ) |
|
430 |
+ } |
|
431 |
+ |
|
424 | 432 |
if (!is.null(countsBackground)) { |
425 | 433 |
batchBackground <- rep("all_cells", ncol(countsBackground)) |
426 | 434 |
} |
... | ... |
@@ -426,6 +426,7 @@ setMethod( |
426 | 426 |
} |
427 | 427 |
} |
428 | 428 |
runParams$batch <- batch |
429 |
+ runParams$batchBackground <- batchBackground |
|
429 | 430 |
batchIndex <- unique(batch) |
430 | 431 |
|
431 | 432 |
## Set result lists upfront for all cells from different batches |
... | ... |
@@ -458,7 +459,7 @@ setMethod( |
458 | 459 |
|
459 | 460 |
zBat <- NULL |
460 | 461 |
countsBat <- counts[, batch == bat] |
461 |
- bgBat <- countsBackground[, batchBackgound == bat] |
|
462 |
+ bgBat <- countsBackground[, batchBackground == bat] |
|
462 | 463 |
|
463 | 464 |
## Convert to sparse matrix |
464 | 465 |
if (!inherits(countsBat, "dgCMatrix")) { |
... | ... |
@@ -458,7 +458,7 @@ setMethod( |
458 | 458 |
|
459 | 459 |
zBat <- NULL |
460 | 460 |
countsBat <- counts[, batch == bat] |
461 |
- bgBat <- countsBackground[, bactchBackgound == bat] |
|
461 |
+ bgBat <- countsBackground[, batchBackgound == bat] |
|
462 | 462 |
|
463 | 463 |
## Convert to sparse matrix |
464 | 464 |
if (!inherits(countsBat, "dgCMatrix")) { |
... | ... |
@@ -168,6 +168,7 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
168 | 168 |
countsBackground <- NULL |
169 | 169 |
if (!is.null(background)) { |
170 | 170 |
# Remove cells with the same ID between x and the background matrix |
171 |
+ # Also update bgBatch when background is updated and bgBatch is not null |
|
171 | 172 |
temp <- .checkBackground(x = x, |
172 | 173 |
background = background, |
173 | 174 |
bgBatch = bgBatch, |
... | ... |
@@ -258,6 +259,7 @@ setMethod("decontX", "ANY", function(x, |
258 | 259 |
countsBackground <- NULL |
259 | 260 |
if (!is.null(background)) { |
260 | 261 |
# Remove cells with the same ID between x and the background matrix |
262 |
+ # Also update bgBatch when background is updated and bgBatch is not null |
|
261 | 263 |
temp <- .checkBackground(x = x, |
262 | 264 |
background = background, |
263 | 265 |
bgBatch = bgBatch, |
... | ... |
@@ -168,10 +168,14 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
168 | 168 |
countsBackground <- NULL |
169 | 169 |
if (!is.null(background)) { |
170 | 170 |
# Remove cells with the same ID between x and the background matrix |
171 |
- background <- .checkBackground(x = x, background = background, |
|
172 |
- logfile = logfile, verbose = verbose) |
|
171 |
+ temp <- .checkBackground(x = x, |
|
172 |
+ background = background, |
|
173 |
+ bgBatch = bgBatch, |
|
174 |
+ logfile = logfile, |
|
175 |
+ verbose = verbose) |
|
173 | 176 |
|
174 |
- # TODO: Does bgBatch needs to be checked? |
|
177 |
+ background <- temp$background |
|
178 |
+ bgBatch <- temp$bgBatch |
|
175 | 179 |
|
176 | 180 |
if (is.null(bgAssayName)) { |
177 | 181 |
bgAssayName <- assayName |
... | ... |
@@ -254,10 +258,14 @@ setMethod("decontX", "ANY", function(x, |
254 | 258 |
countsBackground <- NULL |
255 | 259 |
if (!is.null(background)) { |
256 | 260 |
# Remove cells with the same ID between x and the background matrix |
257 |
- background <- .checkBackground(x = x, background = background, |
|
258 |
- logfile = logfile, verbose = verbose) |
|
261 |
+ temp <- .checkBackground(x = x, |
|
262 |
+ background = background, |
|
263 |
+ bgBatch = bgBatch, |
|
264 |
+ logfile = logfile, |
|
265 |
+ verbose = verbose) |
|
259 | 266 |
|
260 |
- # TODO: Does bgBatch needs to be checked? |
|
267 |
+ background <- temp$background |
|
268 |
+ bgBatch <- temp$bgBatch |
|
261 | 269 |
|
262 | 270 |
} |
263 | 271 |
|
... | ... |
@@ -1388,8 +1396,10 @@ simulateContamination <- function(C = 300, |
1388 | 1396 |
} |
1389 | 1397 |
|
1390 | 1398 |
|
1391 |
-.checkBackground <- function(x, background, logfile = NULL, verbose = FALSE) { |
|
1399 |
+.checkBackground <- function(x, background, bgBatch, |
|
1400 |
+ logfile = NULL, verbose = FALSE) { |
|
1392 | 1401 |
# Remove background barcodes that have already appeared in x |
1402 |
+ # If bgBatch param is supplied, also remove duplicate bgBatch |
|
1393 | 1403 |
if(!is.null(colnames(background))) { |
1394 | 1404 |
dupBarcode <- colnames(background) %in% colnames(x) |
1395 | 1405 |
} else { |
... | ... |
@@ -1413,6 +1423,20 @@ simulateContamination <- function(C = 300, |
1413 | 1423 |
verbose = verbose |
1414 | 1424 |
) |
1415 | 1425 |
background <- background[, !(dupBarcode), drop = FALSE] |
1426 |
+ |
|
1427 |
+ if(!is.null(bgBatch)){ |
|
1428 |
+ if (length(bgBatch) != length(dupBarcode)) { |
|
1429 |
+ stop( |
|
1430 |
+ "Length of bgBatch must be equal to the number of columns", |
|
1431 |
+ "of background matrix." |
|
1432 |
+ ) |
|
1433 |
+ } |
|
1434 |
+ bgBatch <- bgBatch[!(dupBarcode)] |
|
1435 |
+ } |
|
1416 | 1436 |
} |
1417 |
- return(background) |
|
1437 |
+ |
|
1438 |
+ re = list(background = background, |
|
1439 |
+ bgBatch = bgBatch) |
|
1440 |
+ |
|
1441 |
+ return(re) |
|
1418 | 1442 |
} |
... | ... |
@@ -170,7 +170,8 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
170 | 170 |
# Remove cells with the same ID between x and the background matrix |
171 | 171 |
background <- .checkBackground(x = x, background = background, |
172 | 172 |
logfile = logfile, verbose = verbose) |
173 |
- # Does bgBatch needs to be checked? |
|
173 |
+ |
|
174 |
+ # TODO: Does bgBatch needs to be checked? |
|
174 | 175 |
|
175 | 176 |
if (is.null(bgAssayName)) { |
176 | 177 |
bgAssayName <- assayName |
... | ... |
@@ -255,6 +256,9 @@ setMethod("decontX", "ANY", function(x, |
255 | 256 |
# Remove cells with the same ID between x and the background matrix |
256 | 257 |
background <- .checkBackground(x = x, background = background, |
257 | 258 |
logfile = logfile, verbose = verbose) |
259 |
+ |
|
260 |
+ # TODO: Does bgBatch needs to be checked? |
|
261 |
+ |
|
258 | 262 |
} |
259 | 263 |
|
260 | 264 |
.decontX( |
... | ... |
@@ -407,9 +411,9 @@ setMethod( |
407 | 411 |
if (is.null(batch)) { |
408 | 412 |
batch <- rep("all_cells", nC) |
409 | 413 |
|
410 |
- # When no batch, batchBackground can have max 1 batch, depending on if |
|
411 |
- # countsBackground supplied |
|
412 |
- batchBackground <- rep("all_cells", ncol(countsBackground)) |
|
414 |
+ if (!is.null(countsBackground)) { |
|
415 |
+ batchBackground <- rep("all_cells", ncol(countsBackground)) |
|
416 |
+ } |
|
413 | 417 |
} |
414 | 418 |
runParams$batch <- batch |
415 | 419 |
batchIndex <- unique(batch) |
... | ... |
@@ -26,17 +26,17 @@ |
26 | 26 |
#' should be considered different batches. Default NULL. |
27 | 27 |
#' @param background A numeric matrix of counts or a |
28 | 28 |
#' \linkS4class{SingleCellExperiment} with the matrix located in the assay |
29 |
-#' slot under \code{assayName}. It should have the same structure as \code{x} |
|
30 |
-#' except it contains the matrix of empty droplets instead of cells. When |
|
31 |
-#' supplied, empirical distribution of transcripts from these empty droplets |
|
29 |
+#' slot under \code{assayName}. It should have the same data format as \code{x} |
|
30 |
+#' except it contains the empty droplets instead of cells. When supplied, |
|
31 |
+#' empirical distribution of transcripts from these empty droplets |
|
32 | 32 |
#' will be used as the contamination distribution. Default NULL. |
33 | 33 |
#' @param bgAssayName Character. Name of the assay to use if \code{background} |
34 | 34 |
#' is a \linkS4class{SingleCellExperiment}. Default to same as |
35 | 35 |
#' \code{assayName}. |
36 |
-#' @param bgBatch Numeric or chracter vector. Batch labels for |
|
37 |
-#' \code{background}. Its unique values should be the same as in \code{batch}, |
|
38 |
-#' such that each batch of cells have their corresponding background batch. |
|
39 |
-#' Default to NULL. |
|
36 |
+#' @param bgBatch Numeric or character vector. Batch labels for |
|
37 |
+#' \code{background}. Its unique values should be the same as those in |
|
38 |
+#' \code{batch}, such that each batch of cells have their corresponding batch |
|
39 |
+#' of empty droplets as background, pointed by this parameter. Default to NULL. |
|
40 | 40 |
#' @param maxIter Integer. Maximum iterations of the EM algorithm. Default 500. |
41 | 41 |
#' @param convergence Numeric. The EM algorithm will be stopped if the maximum |
42 | 42 |
#' difference in the contamination estimates between the previous and |
... | ... |
@@ -170,6 +170,8 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
170 | 170 |
# Remove cells with the same ID between x and the background matrix |
171 | 171 |
background <- .checkBackground(x = x, background = background, |
172 | 172 |
logfile = logfile, verbose = verbose) |
173 |
+ # Does bgBatch needs to be checked? |
|
174 |
+ |
|
173 | 175 |
if (is.null(bgAssayName)) { |
174 | 176 |
bgAssayName <- assayName |
175 | 177 |
} |
... | ... |
@@ -392,7 +394,7 @@ setMethod( |
392 | 394 |
nC <- ncol(counts) |
393 | 395 |
allCellNames <- colnames(counts) |
394 | 396 |
|
395 |
- ## Set up final deconaminated matrix |
|
397 |
+ ## Set up final decontaminated matrix |
|
396 | 398 |
estRmat <- Matrix::Matrix( |
397 | 399 |
data = 0, |
398 | 400 |
ncol = totalCells, |
... | ... |
@@ -404,6 +406,9 @@ setMethod( |
404 | 406 |
## Generate batch labels if none were supplied |
405 | 407 |
if (is.null(batch)) { |
406 | 408 |
batch <- rep("all_cells", nC) |
409 |
+ |
|
410 |
+ # When no batch, batchBackground can have max 1 batch, depending on if |
|
411 |
+ # countsBackground supplied |
|
407 | 412 |
batchBackground <- rep("all_cells", ncol(countsBackground)) |
408 | 413 |
} |
409 | 414 |
runParams$batch <- batch |
... | ... |
@@ -439,6 +444,7 @@ setMethod( |
439 | 444 |
|
440 | 445 |
zBat <- NULL |
441 | 446 |
countsBat <- counts[, batch == bat] |
447 |
+ bgBat <- countsBackground[, bactchBackgound == bat] |
|
442 | 448 |
|
443 | 449 |
## Convert to sparse matrix |
444 | 450 |
if (!inherits(countsBat, "dgCMatrix")) { |
... | ... |
@@ -451,9 +457,9 @@ setMethod( |
451 | 457 |
) |
452 | 458 |
countsBat <- methods::as(countsBat, "dgCMatrix") |
453 | 459 |
} |
454 |
- if (!is.null(countsBackground)) { |
|
455 |
- if (!inherits(countsBackground, "dgCMatrix")) { |
|
456 |
- countsBackground <- methods::as(countsBackground, "dgCMatrix") |
|
460 |
+ if (!is.null(bgBat)) { |
|
461 |
+ if (!inherits(bgBat, "dgCMatrix")) { |
|
462 |
+ bgBat <- methods::as(bgBat, "dgCMatrix") |
|
457 | 463 |
} |
458 | 464 |
} |
459 | 465 |
|
... | ... |
@@ -465,7 +471,7 @@ setMethod( |
465 | 471 |
counts = countsBat, |
466 | 472 |
z = zBat, |
467 | 473 |
batch = bat, |
468 |
- countsBackground = countsBackground, |
|
474 |
+ countsBackground = bgBat, |
|
469 | 475 |
maxIter = maxIter, |
470 | 476 |
delta = delta, |
471 | 477 |
estimateDelta = estimateDelta, |
... | ... |
@@ -484,7 +490,7 @@ setMethod( |
484 | 490 |
counts = countsBat, |
485 | 491 |
z = zBat, |
486 | 492 |
batch = bat, |
487 |
- countsBackground = countsBackground, |
|
493 |
+ countsBackground = bgBat, |
|
488 | 494 |
maxIter = maxIter, |
489 | 495 |
delta = delta, |
490 | 496 |
estimateDelta = estimateDelta, |
... | ... |
@@ -154,6 +154,7 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
154 | 154 |
batch = NULL, |
155 | 155 |
background = NULL, |
156 | 156 |
bgAssayName = NULL, |
157 |
+ bgBatch = NULL, |
|
157 | 158 |
maxIter = 500, |
158 | 159 |
delta = c(10, 10), |
159 | 160 |
estimateDelta = TRUE, |
... | ... |
@@ -182,6 +183,7 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
182 | 183 |
z = z, |
183 | 184 |
batch = batch, |
184 | 185 |
countsBackground = countsBackground, |
186 |
+ batchBackground = bgBatch, |
|
185 | 187 |
maxIter = maxIter, |
186 | 188 |
convergence = convergence, |
187 | 189 |
iterLogLik = iterLogLik, |
... | ... |
@@ -234,6 +236,7 @@ setMethod("decontX", "ANY", function(x, |
234 | 236 |
z = NULL, |
235 | 237 |
batch = NULL, |
236 | 238 |
background = NULL, |
239 |
+ bgBatch = NULL, |
|
237 | 240 |
maxIter = 500, |
238 | 241 |
delta = c(10, 10), |
239 | 242 |
estimateDelta = TRUE, |
... | ... |
@@ -257,6 +260,7 @@ setMethod("decontX", "ANY", function(x, |
257 | 260 |
z = z, |
258 | 261 |
batch = batch, |
259 | 262 |
countsBackground = countsBackground, |
263 |
+ batchBackground = bgBatch, |
|
260 | 264 |
maxIter = maxIter, |
261 | 265 |
convergence = convergence, |
262 | 266 |
iterLogLik = iterLogLik, |
... | ... |
@@ -339,6 +343,7 @@ setMethod( |
339 | 343 |
z = NULL, |
340 | 344 |
batch = NULL, |
341 | 345 |
countsBackground = NULL, |
346 |
+ batchBackground = NULL, |
|
342 | 347 |
maxIter = 200, |
343 | 348 |
convergence = 0.001, |
344 | 349 |
iterLogLik = 10, |
... | ... |
@@ -369,6 +374,7 @@ setMethod( |
369 | 374 |
runParams <- list( |
370 | 375 |
z = z, |
371 | 376 |
batch = batch, |
377 |
+ batchBackground = batchBackground, |
|
372 | 378 |
maxIter = maxIter, |
373 | 379 |
delta = delta, |
374 | 380 |
estimateDelta = estimateDelta, |
... | ... |
@@ -398,6 +404,7 @@ setMethod( |
398 | 404 |
## Generate batch labels if none were supplied |
399 | 405 |
if (is.null(batch)) { |
400 | 406 |
batch <- rep("all_cells", nC) |
407 |
+ batchBackground <- rep("all_cells", ncol(countsBackground)) |
|
401 | 408 |
} |
402 | 409 |
runParams$batch <- batch |
403 | 410 |
batchIndex <- unique(batch) |
... | ... |
@@ -33,6 +33,10 @@ |
33 | 33 |
#' @param bgAssayName Character. Name of the assay to use if \code{background} |
34 | 34 |
#' is a \linkS4class{SingleCellExperiment}. Default to same as |
35 | 35 |
#' \code{assayName}. |
36 |
+#' @param bgBatch Numeric or chracter vector. Batch labels for |
|
37 |
+#' \code{background}. Its unique values should be the same as in \code{batch}, |
|
38 |
+#' such that each batch of cells have their corresponding background batch. |
|
39 |
+#' Default to NULL. |
|
36 | 40 |
#' @param maxIter Integer. Maximum iterations of the EM algorithm. Default 500. |
37 | 41 |
#' @param convergence Numeric. The EM algorithm will be stopped if the maximum |
38 | 42 |
#' difference in the contamination estimates between the previous and |
... | ... |
@@ -546,7 +546,7 @@ setMethod( |
546 | 546 |
|
547 | 547 |
.logMessages( |
548 | 548 |
date(), |
549 |
- ".. Converting decontaminated matrix to ", class(counts), |
|
549 |
+ ".. Converting decontaminated matrix to", class(counts), |
|
550 | 550 |
logfile = logfile, |
551 | 551 |
append = TRUE, |
552 | 552 |
verbose = verbose |
... | ... |
@@ -1372,14 +1372,16 @@ simulateContamination <- function(C = 300, |
1372 | 1372 |
|
1373 | 1373 |
if (any(dupBarcode)) { |
1374 | 1374 |
.logMessages( |
1375 |
+ date(), |
|
1376 |
+ ".. ", |
|
1375 | 1377 |
sum(dupBarcode), |
1376 |
- " columns in the background matrix were removed as they were found in", |
|
1378 |
+ " cells in the background matrix were removed as they were found in", |
|
1377 | 1379 |
" the filtered matrix.", |
1378 | 1380 |
logfile = logfile, |
1379 | 1381 |
append = TRUE, |
1380 | 1382 |
verbose = verbose |
1381 | 1383 |
) |
1382 |
- background <- background[, !(dupBarcode)] |
|
1384 |
+ background <- background[, !(dupBarcode), drop = FALSE] |
|
1383 | 1385 |
} |
1384 | 1386 |
return(background) |
1385 | 1387 |
} |
... | ... |
@@ -495,22 +495,21 @@ setMethod( |
495 | 495 |
z = as.integer(res$z), |
496 | 496 |
pseudocount = 1e-20 |
497 | 497 |
) |
498 |
- |
|
498 |
+ |
|
499 | 499 |
# Speed up sparse matrix value assignment by cbind -> order recovery |
500 |
- |
|
501 |
- allCol <- paste0("col_", 1:ncol(estRmat)) |
|
500 |
+ allCol <- paste0("col_", seq_len(ncol(estRmat))) |
|
502 | 501 |
colnames(estRmat) <- allCol |
503 |
- |
|
502 |
+ |
|
504 | 503 |
subCol <- paste0("col_", which(batch == bat)) |
505 | 504 |
colnames(estRmat.temp) <- subCol |
506 |
- |
|
505 |
+ |
|
507 | 506 |
estRmat <- estRmat[, !(allCol %in% subCol)] |
508 | 507 |
estRmat <- cbind(estRmat, estRmat.temp) |
509 |
- |
|
508 |
+ |
|
510 | 509 |
# Recover order |
511 | 510 |
estRmat <- estRmat[, allCol] |
512 |
- |
|
513 |
- #(Old method) estRmat[seq(nrow(counts)), which(batch == bat)] <- estRmat.temp |
|
511 |
+ |
|
512 |
+ ##estRmat[seq(nrow(counts)), which(batch == bat)] <- estRmat.temp |
|
514 | 513 |
dimnames(estRmat) <- list(geneNames, allCellNames) |
515 | 514 |
|
516 | 515 |
resBatch[[bat]] <- list( |
... | ... |
@@ -495,7 +495,22 @@ setMethod( |
495 | 495 |
z = as.integer(res$z), |
496 | 496 |
pseudocount = 1e-20 |
497 | 497 |
) |
498 |
- estRmat[seq(nrow(counts)), which(batch == bat)] <- estRmat.temp |
|
498 |
+ |
|
499 |
+ # Speed up sparse matrix value assignment by cbind -> order recovery |
|
500 |
+ |
|
501 |
+ allCol <- paste0("col_", 1:ncol(estRmat)) |
|
502 |
+ colnames(estRmat) <- allCol |
|
503 |
+ |
|
504 |
+ subCol <- paste0("col_", which(batch == bat)) |
|
505 |
+ colnames(estRmat.temp) <- subCol |
|
506 |
+ |
|
507 |
+ estRmat <- estRmat[, !(allCol %in% subCol)] |
|
508 |
+ estRmat <- cbind(estRmat, estRmat.temp) |
|
509 |
+ |
|
510 |
+ # Recover order |
|
511 |
+ estRmat <- estRmat[, allCol] |
|
512 |
+ |
|
513 |
+ #(Old method) estRmat[seq(nrow(counts)), which(batch == bat)] <- estRmat.temp |
|
499 | 514 |
dimnames(estRmat) <- list(geneNames, allCellNames) |
500 | 515 |
|
501 | 516 |
resBatch[[bat]] <- list( |
... | ... |
@@ -487,6 +487,17 @@ setMethod( |
487 | 487 |
) |
488 | 488 |
) |
489 | 489 |
} |
490 |
+ |
|
491 |
+ ## Try to convert class of new matrix to class of original matrix |
|
492 |
+ |
|
493 |
+ .logMessages( |
|
494 |
+ date(), |
|
495 |
+ ".. Calculating final decontaminated matrix", |
|
496 |
+ logfile = logfile, |
|
497 |
+ append = TRUE, |
|
498 |
+ verbose = verbose |
|
499 |
+ ) |
|
500 |
+ |
|
490 | 501 |
estRmat.temp <- calculateNativeMatrix( |
491 | 502 |
counts = countsBat, |
492 | 503 |
theta = res$theta, |
... | ... |
@@ -495,6 +506,8 @@ setMethod( |
495 | 506 |
z = as.integer(res$z), |
496 | 507 |
pseudocount = 1e-20 |
497 | 508 |
) |
509 |
+ |
|
510 |
+ # This part needs to be optimized |
|
498 | 511 |
estRmat[seq(nrow(counts)), which(batch == bat)] <- estRmat.temp |
499 | 512 |
dimnames(estRmat) <- list(geneNames, allCellNames) |
500 | 513 |
|
... | ... |
@@ -528,19 +541,17 @@ setMethod( |
528 | 541 |
"z" = returnZ |
529 | 542 |
) |
530 | 543 |
|
531 |
- ## Try to convert class of new matrix to class of original matrix |
|
532 |
- if (inherits(counts, "dgCMatrix")) { |
|
544 |
+ |
|
545 |
+ if (inherits(counts, c("DelayedMatrix", "DelayedArray"))) { |
|
546 |
+ |
|
533 | 547 |
.logMessages( |
534 | 548 |
date(), |
535 |
- ".. Finalizing decontaminated matrix", |
|
549 |
+ ".. Converting decontaminated matrix to ", class(counts), |
|
536 | 550 |
logfile = logfile, |
537 | 551 |
append = TRUE, |
538 | 552 |
verbose = verbose |
539 | 553 |
) |
540 |
- } |
|
541 |
- |
|
542 |
- if (inherits(counts, c("DelayedMatrix", "DelayedArray"))) { |
|
543 |
- |
|
554 |
+ |
|
544 | 555 |
## Determine class of seed in DelayedArray |
545 | 556 |
seed.class <- unique(DelayedArray::seedApply(counts, class))[[1]] |
546 | 557 |
if (seed.class == "HDF5ArraySeed") { |
... | ... |
@@ -162,22 +162,9 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
162 | 162 |
verbose = TRUE) { |
163 | 163 |
countsBackground <- NULL |
164 | 164 |
if (!is.null(background)) { |
165 |
- # Remove background barcodes that have already appeared in x |
|
166 |
- dupBarcode <- background$Barcode %in% x$Barcode |
|
167 |
- |
|
168 |
- if (any(dupBarcode)) { |
|
169 |
- .logMessages( |
|
170 |
- sum(dupBarcode), |
|
171 |
- " columns in background removed as they are found in filtered matrix", |
|
172 |
- logfile = logfile, |
|
173 |
- append = TRUE, |
|
174 |
- verbose = verbose |
|
175 |
- ) |
|
176 |
- } |
|
177 |
- |
|
178 |
- background <- background[, !(dupBarcode)] |
|
179 |
- |
|
180 |
- |
|
165 |
+ # Remove cells with the same ID between x and the background matrix |
|
166 |
+ background <- .checkBackground(x = x, background = background, |
|
167 |
+ logfile = logfile, verbose = verbose) |
|
181 | 168 |
if (is.null(bgAssayName)) { |
182 | 169 |
bgAssayName <- assayName |
183 | 170 |
} |
... | ... |
@@ -256,20 +243,9 @@ setMethod("decontX", "ANY", function(x, |
256 | 243 |
|
257 | 244 |
countsBackground <- NULL |
258 | 245 |
if (!is.null(background)) { |
259 |
- # Remove background barcodes that have already appeared in x |
|
260 |
- dupBarcode <- colnames(background) %in% colnames(x) |
|
261 |
- |
|
262 |
- if (any(dupBarcode)) { |
|
263 |
- .logMessages( |
|
264 |
- sum(dupBarcode), |
|
265 |
- " columns in background removed as they are found in filtered matrix", |
|
266 |
- logfile = logfile, |
|
267 |
- append = TRUE, |
|
268 |
- verbose = verbose |
|
269 |
- ) |
|
270 |
- } |
|
271 |
- |
|
272 |
- countsBackground <- background[, !(dupBarcode)] |
|
246 |
+ # Remove cells with the same ID between x and the background matrix |
|
247 |
+ background <- .checkBackground(x = x, background = background, |
|
248 |
+ logfile = logfile, verbose = verbose) |
|
273 | 249 |
} |
274 | 250 |
|
275 | 251 |
.decontX( |
... | ... |
@@ -1368,3 +1344,31 @@ simulateContamination <- function(C = 300, |
1368 | 1344 |
) |
1369 | 1345 |
) |
1370 | 1346 |
} |
1347 |
+ |
|
1348 |
+ |
|
1349 |
+.checkBackground <- function(x, background, logfile = NULL, verbose = FALSE) { |
|
1350 |
+ # Remove background barcodes that have already appeared in x |
|
1351 |
+ if(!is.null(colnames(background))) { |
|
1352 |
+ dupBarcode <- colnames(background) %in% colnames(x) |
|
1353 |
+ } else { |
|
1354 |
+ dupBarcode <- FALSE |
|
1355 |
+ warning("No column names were found for the 'background' matrix. ", |
|
1356 |
+ "No checking was performed between the ids in the 'backgroud' ", |
|
1357 |
+ "matrix and 'x'.", |
|
1358 |
+ " Please ensure that no true cells are included in the background ", |
|
1359 |
+ "matrix. Otherwise, results will be incorrect.") |
|
1360 |
+ } |
|
1361 |
+ |
|
1362 |
+ if (any(dupBarcode)) { |
|
1363 |
+ .logMessages( |
|
1364 |
+ sum(dupBarcode), |
|
1365 |
+ " columns in the background matrix were removed as they were found in", |
|
1366 |
+ " the filtered matrix.", |
|
1367 |
+ logfile = logfile, |
|
1368 |
+ append = TRUE, |
|
1369 |
+ verbose = verbose |
|
1370 |
+ ) |
|
1371 |
+ background <- background[, !(dupBarcode)] |
|
1372 |
+ } |
|
1373 |
+ return(background) |
|
1374 |
+} |
... | ... |
@@ -1014,23 +1014,17 @@ addLogLikelihood <- function(llA, llB) { |
1014 | 1014 |
} |
1015 | 1015 |
sce <- scater::logNormCounts(sce, log = TRUE) |
1016 | 1016 |
|
1017 |
- if (nrow(sce) <= varGenes) { |
|
1018 |
- topVariableGenes <- seq_len(nrow(sce)) |
|
1019 |
- } else if (nrow(sce) > varGenes) { |
|
1020 |
- sce.var <- scran::modelGeneVar(sce) |
|
1021 |
- topVariableGenes <- order(sce.var$bio, |
|
1022 |
- decreasing = TRUE |
|
1023 |
- )[seq(varGenes)] |
|
1024 |
- } |
|
1025 |
- sce <- sce[topVariableGenes, ] |
|
1026 |
- |
|
1027 | 1017 |
if (!is.null(seed)) { |
1028 | 1018 |
with_seed( |
1029 | 1019 |
seed, |
1030 |
- resUmap <- scater::calculateUMAP(sce, n_threads = 1) |
|
1020 |
+ resUmap <- scater::calculateUMAP(sce, ntop = varGenes, |
|
1021 |
+ n_threads = 1, |
|
1022 |
+ exprs_values = "logcounts") |
|
1031 | 1023 |
) |
1032 | 1024 |
} else { |
1033 |
- resUmap <- scater::calculateUMAP(sce, n_threads = 1) |
|
1025 |
+ resUmap <- scater::calculateUMAP(sce, ntop = varGenes, |
|
1026 |
+ n_threads = 1, |
|
1027 |
+ exprs_values = "logcounts") |
|
1034 | 1028 |
} |
1035 | 1029 |
|
1036 | 1030 |
z <- NULL |
... | ... |
@@ -205,7 +205,7 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
205 | 205 |
|
206 | 206 |
## Add results into column annotation |
207 | 207 |
SummarizedExperiment::colData(x)$decontX_contamination <- result$contamination |
208 |
- SummarizedExperiment::colData(x)$decontX_clusters <- result$z |
|
208 |
+ SummarizedExperiment::colData(x)$decontX_clusters <- as.factor(result$z) |
|
209 | 209 |
|
210 | 210 |
## Put estimated UMAPs into SCE |
211 | 211 |
batchIndex <- unique(result$runParams$batch) |
... | ... |
@@ -338,11 +338,13 @@ setGeneric("decontXcounts<-", function(object, ..., value) { |
338 | 338 |
}) |
339 | 339 |
|
340 | 340 |
#' @export |
341 |
+#' @rdname decontXcounts |
|
341 | 342 |
setMethod("decontXcounts", "SingleCellExperiment", GET_FUN("decontXcounts")) |
342 | 343 |
|
343 | 344 |
#' @export |
344 |
-setReplaceMethod( |
|
345 |
- "decontXcounts", c("SingleCellExperiment", "ANY"), |
|
345 |
+#' @rdname decontXcounts |
|
346 |
+setMethod( |
|
347 |
+ "decontXcounts<-", c("SingleCellExperiment", "ANY"), |
|
346 | 348 |
SET_FUN("decontXcounts") |
347 | 349 |
) |
348 | 350 |
|
... | ... |
@@ -204,8 +204,8 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
204 | 204 |
) |
205 | 205 |
|
206 | 206 |
## Add results into column annotation |
207 |
- colData(x)$decontX_contamination <- result$contamination |
|
208 |
- colData(x)$decontX_clusters <- result$z |
|
207 |
+ SummarizedExperiment::colData(x)$decontX_contamination <- result$contamination |
|
208 |
+ SummarizedExperiment::colData(x)$decontX_clusters <- result$z |
|
209 | 209 |
|
210 | 210 |
## Put estimated UMAPs into SCE |
211 | 211 |
batchIndex <- unique(result$runParams$batch) |
... | ... |
@@ -296,14 +296,14 @@ setMethod("decontX", "ANY", function(x, |
296 | 296 |
GET_FUN <- function(exprs_values, ...) { |
297 | 297 |
(exprs_values) # To ensure evaluation |
298 | 298 |
function(object, ...) { |
299 |
- assay(object, i = exprs_values, ...) |
|
299 |
+ SummarizedExperiment::assay(object, i = exprs_values, ...) |
|
300 | 300 |
} |
301 | 301 |
} |
302 | 302 |
|
303 | 303 |
SET_FUN <- function(exprs_values, ...) { |
304 | 304 |
(exprs_values) # To ensure evaluation |
305 | 305 |
function(object, ..., value) { |
306 |
- assay(object, i = exprs_values, ...) <- value |
|
306 |
+ SummarizedExperiment::assay(object, i = exprs_values, ...) <- value |
|
307 | 307 |
object |
308 | 308 |
} |
309 | 309 |
} |
... | ... |
@@ -168,7 +168,7 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
168 | 168 |
if (any(dupBarcode)) { |
169 | 169 |
.logMessages( |
170 | 170 |
sum(dupBarcode), |
171 |
- " columns in background removed because they are found in filtered matrix", |
|
171 |
+ " columns in background removed as they are found in filtered matrix", |
|
172 | 172 |
logfile = logfile, |
173 | 173 |
append = TRUE, |
174 | 174 |
verbose = verbose |
... | ... |
@@ -262,7 +262,7 @@ setMethod("decontX", "ANY", function(x, |
262 | 262 |
if (any(dupBarcode)) { |
263 | 263 |
.logMessages( |
264 | 264 |
sum(dupBarcode), |
265 |
- " columns in background removed because they are found in filtered matrix", |
|
265 |
+ " columns in background removed as they are found in filtered matrix", |
|
266 | 266 |
logfile = logfile, |
267 | 267 |
append = TRUE, |
268 | 268 |
verbose = verbose |
... | ... |
@@ -271,7 +271,7 @@ setMethod("decontX", "ANY", function(x, |
271 | 271 |
|
272 | 272 |
countsBackground <- background[, !(dupBarcode)] |
273 | 273 |
} |
274 |
- |
|
274 |
+ |
|
275 | 275 |
.decontX( |
276 | 276 |
counts = x, |
277 | 277 |
z = z, |
... | ... |
@@ -30,6 +30,9 @@ |
30 | 30 |
#' except it contains the matrix of empty droplets instead of cells. When |
31 | 31 |
#' supplied, empirical distribution of transcripts from these empty droplets |
32 | 32 |
#' will be used as the contamination distribution. Default NULL. |
33 |
+#' @param bgAssayName Character. Name of the assay to use if \code{background} |
|
34 |
+#' is a \linkS4class{SingleCellExperiment}. Default to same as |
|
35 |
+#' \code{assayName}. |
|
33 | 36 |
#' @param maxIter Integer. Maximum iterations of the EM algorithm. Default 500. |
34 | 37 |
#' @param convergence Numeric. The EM algorithm will be stopped if the maximum |
35 | 38 |
#' difference in the contamination estimates between the previous and |
... | ... |
@@ -146,6 +149,7 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
146 | 149 |
z = NULL, |
147 | 150 |
batch = NULL, |
148 | 151 |
background = NULL, |
152 |
+ bgAssayName = NULL, |
|
149 | 153 |
maxIter = 500, |
150 | 154 |
delta = c(10, 10), |
151 | 155 |
estimateDelta = TRUE, |
... | ... |
@@ -172,7 +176,12 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
172 | 176 |
} |
173 | 177 |
|
174 | 178 |
background <- background[, !(dupBarcode)] |
175 |
- countsBackground <- SummarizedExperiment::assay(background, i = assayName) |
|
179 |
+ |
|
180 |
+ |
|
181 |
+ if (is.null(bgAssayName)) { |
|
182 |
+ bgAssayName <- assayName |
|
183 |
+ } |
|
184 |
+ countsBackground <- SummarizedExperiment::assay(background, i = bgAssayName) |
|
176 | 185 |
} |
177 | 186 |
|
178 | 187 |
mat <- SummarizedExperiment::assay(x, i = assayName) |
... | ... |
@@ -160,7 +160,7 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
160 | 160 |
if (!is.null(background)) { |
161 | 161 |
# Remove background barcodes that have already appeared in x |
162 | 162 |
dupBarcode <- background$Barcode %in% x$Barcode |
163 |
- |
|
163 |
+ |
|
164 | 164 |
if (any(dupBarcode)) { |
165 | 165 |
.logMessages( |
166 | 166 |
sum(dupBarcode), |
... | ... |
@@ -170,7 +170,7 @@ setMethod("decontX", "SingleCellExperiment", function(x, |
170 | 170 |
verbose = verbose |
171 | 171 |
) |
172 | 172 |
} |
173 |
- |
|
173 |
+ |
|
174 | 174 |
background <- background[, !(dupBarcode)] |
175 | 175 |
countsBackground <- SummarizedExperiment::assay(background, i = assayName) |
176 | 176 |
} |
... | ... |
@@ -249,7 +249,7 @@ setMethod("decontX", "ANY", function(x, |
249 | 249 |
if (!is.null(background)) { |
250 | 250 |
# Remove background barcodes that have already appeared in x |
251 | 251 |
dupBarcode <- colnames(background) %in% colnames(x) |
252 |
- |
|
252 |
+ |
|
253 | 253 |
if (any(dupBarcode)) { |
254 | 254 |
.logMessages( |
255 | 255 |
sum(dupBarcode), |
... | ... |
@@ -259,7 +259,7 @@ setMethod("decontX", "ANY", function(x, |
259 | 259 |