... | ... |
@@ -1,5 +1,6 @@ |
1 |
-# Todo |
|
2 |
-# combine methods |
|
1 |
+# Contig -- always equalize to cell |
|
2 |
+# Cluster -- equalize to contig |
|
3 |
+# Cell -- equalize after calling |
|
3 | 4 |
|
4 | 5 |
valid_KeyedTbl = function(tbl, keys){ |
5 | 6 |
tbl_nm = deparse(substitute(tbl)) |
... | ... |
@@ -25,6 +26,7 @@ valid_KeyedTbl = function(tbl, keys){ |
25 | 26 |
#' @param cell_pk character vector naming fields in `cell_tbl` that uniquely identify a cell barcode |
26 | 27 |
#' @param cluster_tbl A data frame that provide cluster assignments for each contig |
27 | 28 |
#' @param cluster_pk If `cluster_tbl` was provided, a character vector naming fields in `cluster_tbl` that uniquely identify a cluster |
29 |
+#' @param equalize `logical`. Should the contig, cells and clusters be equalized by taking the intersection of their common keys? |
|
28 | 30 |
#' @return \code{ContigCellDB} |
29 | 31 |
#' |
30 | 32 |
#' @section Accessors/mutators: |
... | ... |
@@ -82,9 +84,10 @@ ContigCellDB = function(contig_tbl, contig_pk, cell_tbl, cell_pk, cluster_tbl, c |
82 | 84 |
} |
83 | 85 |
|
84 | 86 |
#' @describeIn ContigCellDB-fun provide defaults that correspond to identifiers in 10X VDJ data |
87 |
+#' @param ... passed to [ContigCellDB()] |
|
85 | 88 |
#' @export |
86 |
-ContigCellDB_10XVDJ = function(contig_tbl, contig_pk = c('barcode', 'contig_id'), cell_pk = 'barcode'){ |
|
87 |
- ContigCellDB(contig_tbl = contig_tbl, contig_pk = contig_pk, cell_pk = cell_pk) |
|
89 |
+ContigCellDB_10XVDJ = function(contig_tbl, contig_pk = c('barcode', 'contig_id'), cell_pk = 'barcode', ...){ |
|
90 |
+ ContigCellDB(contig_tbl = contig_tbl, contig_pk = contig_pk, cell_pk = cell_pk, ...) |
|
88 | 91 |
} |
89 | 92 |
|
90 | 93 |
access_cdb = function(x, name){ |
... | ... |
@@ -104,7 +107,7 @@ replace_cdb = function(x, name, value){ |
104 | 107 |
} |
105 | 108 |
if(name %in% c('contig_tbl','contig_pk')){ |
106 | 109 |
valid_KeyedTbl(x$contig_tbl, x$contig_pk) |
107 |
- x = equalize_ccdb(x) |
|
110 |
+ x = equalize_ccdb(x, contig = TRUE, cell = FALSE, cluster = TRUE) |
|
108 | 111 |
} |
109 | 112 |
if(name %in% c('cell_tbl','cell_pk')){ |
110 | 113 |
valid_KeyedTbl(x$cell_tbl, x$cell_pk) |
... | ... |
@@ -112,7 +115,7 @@ replace_cdb = function(x, name, value){ |
112 | 115 |
} |
113 | 116 |
if(name %in% c('cluster_tbl','cluster_pk')){ |
114 | 117 |
valid_KeyedTbl(x$cluster_tbl, x$cluster_pk) |
115 |
- x = equalize_ccdb(x, contig = TRUE, cell = FALSE, cluster = FALSE) |
|
118 |
+ x = equalize_ccdb(x, contig = TRUE, cell = FALSE, cluster = TRUE) |
|
116 | 119 |
} |
117 | 120 |
invisible(x) |
118 | 121 |
} |
... | ... |
@@ -141,7 +144,7 @@ setMethod("$", signature = c(x = 'ContigCellDB'), access_cdb) |
141 | 144 |
#' ccdb_ex |
142 | 145 |
#' ccdb_ex$contig_tbl = dplyr::filter(ccdb_ex$contig_tbl, pop == 'b6') |
143 | 146 |
#' ccdb_ex |
144 |
-setReplaceMethod("$", signature = c(x = 'ContigCellDB'), replace_cdb) |
|
147 |
+setReplaceMethod("$", signature = c(x = 'ContigCellDB'), function(x, name, value) replace_cdb(x, name, value)) |
|
145 | 148 |
|
146 | 149 |
setMethod('show', signature = c(object = 'ContigCellDB'), function(object){ |
147 | 150 |
cat(class(object), "of", nrow(object$contig_tbl), "contigs") |
... | ... |
@@ -200,20 +203,20 @@ equalize_ccdb = function(x, cell = TRUE, contig = TRUE, cluster = TRUE){ |
200 | 203 |
if(cell) x@cell_tbl = semi_join(x$cell_tbl, x$contig_tbl, by = x$cell_pk) |
201 | 204 |
if(contig) x@contig_tbl = semi_join(x$contig_tbl, x$cell_tbl, by = x$cell_pk) |
202 | 205 |
if(nrow(x$cluster_tbl) > 0 && cluster) x@cluster_tbl = semi_join(x$cluster_tbl, x$contig_tbl, by = x$cluster_pk) |
203 |
- x@equalized = TRUE |
|
206 |
+ x@equalized = (cell & contig & cluster) | x@equalized |
|
204 | 207 |
x |
205 | 208 |
} |
206 | 209 |
|
207 | 210 |
replace_cluster_tbl = function(ccdb, cluster_tbl, contig_tbl, cluster_pk){ |
208 | 211 |
if(nrow(ccdb$cluster_tbl)>0 && !missing(cluster_pk)){ |
209 |
- warning("Replacing `cluster_tbl` with ", paste(ccdb$cluster_pk, sep = ', ', '.')) |
|
212 |
+ warning("Replacing `cluster_tbl` with ", paste(ccdb$cluster_pk, sep = ', '), '.') |
|
210 | 213 |
} |
211 | 214 |
if(!missing(cluster_pk)) ccdb@cluster_pk = cluster_pk |
212 | 215 |
ccdb@cluster_tbl = cluster_tbl |
213 | 216 |
if(!missing(contig_tbl)){ |
217 |
+ if(nrow(contig_tbl) != nrow(ccdb@contig_tbl)) stop("Length mismatch; this is a bug!") |
|
214 | 218 |
ccdb@contig_tbl = contig_tbl |
215 | 219 |
valid_KeyedTbl(ccdb$contig_tbl, ccdb$contig_pk) |
216 |
- ccdb = equalize_ccdb(ccdb, cell = FALSE, cluster = FALSE, contig = TRUE) |
|
217 | 220 |
} |
218 | 221 |
valid_KeyedTbl(ccdb$cluster_tbl, ccdb$cluster_pk) |
219 | 222 |
validObject(ccdb) |
... | ... |
@@ -271,7 +274,7 @@ mutate_cdb <- function(ccdb, ..., tbl='contig_tbl'){ |
271 | 274 |
#' splat = split_cdb(ccdb_ex, 'chain', 'contig_tbl') |
272 | 275 |
#' stopifnot(all(splat$TRA$contig_tbl$chain == 'TRA')) |
273 | 276 |
#' stopifnot(all(splat$TRB$contig_tbl$chain == 'TRB')) |
274 |
-split_cdb = function(ccdb, fields, tbl = 'contig_tbl', drop = FALSE){ |
|
277 |
+split_cdb = function(ccdb, fields, tbl = 'contig_tbl', drop = FALSE, equalize = TRUE){ |
|
275 | 278 |
thetbl = access_cdb(ccdb, tbl) |
276 | 279 |
if(!is.character(fields)){ |
277 | 280 |
stop('`field` must be a character naming fields in `tbl`') |
... | ... |
@@ -281,7 +284,8 @@ split_cdb = function(ccdb, fields, tbl = 'contig_tbl', drop = FALSE){ |
281 | 284 |
} |
282 | 285 |
split_tbl = split(thetbl, thetbl[fields], drop = drop) |
283 | 286 |
out = purrr::map(split_tbl, function(tt){ |
284 |
- replace_cdb(ccdb, tbl, tt) |
|
287 |
+ tmp = replace_cdb(ccdb, tbl, tt) |
|
288 |
+ if(equalize) equalize_ccdb(tmp) |
|
285 | 289 |
}) |
286 | 290 |
out |
287 | 291 |
} |
... | ... |
@@ -293,7 +297,7 @@ rbind.ContigCellDB <- function(..., deparse.level=1) |
293 | 297 |
.bind_rows_ccdb(objects[[1L]], objects[-1L]) |
294 | 298 |
} |
295 | 299 |
|
296 |
-#' #' Combine `ContigCellDB` along rows (contigs, cells or clusters) |
|
300 |
+#' Combine `ContigCellDB` along rows (contigs, cells or clusters) |
|
297 | 301 |
#' |
298 | 302 |
#' The union of the rows in each of the objects is taken, |
299 | 303 |
#' thus removing any rows that has an exact duplicate. This |
... | ... |
@@ -324,7 +328,5 @@ setMethod('rbind', 'ContigCellDB', rbind.ContigCellDB) |
324 | 328 |
for(tt in c('contig_pk', 'cell_pk', 'cluster_pk')){ |
325 | 329 |
pks[[tt]] = purrr::reduce(purrr::map(all_objs, access_cdb, name = tt), union) |
326 | 330 |
} |
327 |
- |
|
328 |
- |
|
329 | 331 |
ContigCellDB(tbls$contig_tbl, pks$contig_pk, tbls$cell_tbl, pks$cell_pk, tbls$cluster_tbl, pks$cluster_pk) |
330 | 332 |
} |
... | ... |
@@ -31,7 +31,7 @@ test_that('Can rbind', { |
31 | 31 |
expect_equal(unite, ccdb_ex) |
32 | 32 |
|
33 | 33 |
ccdb_ex = cluster_germline(ccdb_ex, segment_keys = 'sample') |
34 |
- splat_cell = split_cdb(ccdb_ex, c('sample', 'pop'), 'cell_tbl', drop = TRUE) |
|
34 |
+ splat_cell = split_cdb(ccdb_ex, c('sample', 'pop'), 'cell_tbl', drop = TRUE, equalize = TRUE) |
|
35 | 35 |
unite_cell = do.call(rbind, splat_cell) |
36 | 36 |
expect_equal(unite_cell, ccdb_ex) |
37 | 37 |
|
... | ... |
@@ -3,7 +3,7 @@ context('Cluster Logistic Testing') |
3 | 3 |
data(ccdb_ex) |
4 | 4 |
ccdb_ex = cluster_germline(ccdb_ex) %>% fine_clustering('cdr3_nt', 'DNA') |
5 | 5 |
ccdb_ex = canonicalize_cluster(ccdb_ex) |
6 |
-ccdb_ex_trb = filter_cdb(ccdb_ex, chain == 'TRB') %>% canonicalize_cell(, contig_filter_args = chain == 'TRB', contig_fields = c('chain', 'v_gene', 'j_gene')) |
|
6 |
+ccdb_ex_trb = filter_cdb(ccdb_ex, chain == 'TRB') %>% equalize_ccdb() %>% canonicalize_cell(, contig_filter_args = chain == 'TRB', contig_fields = c('chain', 'v_gene', 'j_gene')) |
|
7 | 7 |
|
8 | 8 |
theclust = filter(ccdb_ex$cluster_tbl, chain == "TRB", v_gene == "TRBV13-2", j_gene == "TRBJ2-7") %>% mutate( x_ = 1) |
9 | 9 |
|
... | ... |
@@ -15,16 +15,18 @@ theform = ~ pop + (1|sample) |
15 | 15 |
test_that('Manual glmer testing equals cluster_logistic_test', { |
16 | 16 |
if(!requireNamespace('lme4')) skip('Install lme4') |
17 | 17 |
library(lme4) |
18 |
- # test against a manually data -- TRUE if the cell was in the cluster, false otherwise |
|
18 |
+ # test against a manual data -- TRUE if the cell was in the cluster, false otherwise |
|
19 | 19 |
manual_glmer = glmer(update(theform, x_ ~ .) , data = manual_cell, family = 'binomial') |
20 | 20 |
automatic1 = cluster_logistic_test(theform, ccdb = ccdb_ex_trb, cluster_whitelist = theclust, keep_fit = TRUE, silent = TRUE) |
21 | 21 |
expect_equal(fixef(automatic1$fit[[1]]), fixef(manual_glmer)) |
22 | 22 |
|
23 |
+ # Adding additional clusters doesn't change results |
|
23 | 24 |
wl2 = tibble(cluster_idx = c(theclust$cluster_idx, ccdb_ex_trb$cluster_tbl$cluster_idx[1:3])) |
24 | 25 |
automatic2 = cluster_logistic_test(theform, ccdb = ccdb_ex_trb, cluster_whitelist = wl2, keep_fit = TRUE, silent = TRUE) |
25 | 26 |
expect_equal(unique(automatic2$cluster_idx), wl2$cluster_idx) |
26 | 27 |
expect_equal(semi_join(automatic2, theclust, by = 'cluster_idx') %>% select(term:p.value), automatic1 %>% select(term:p.value)) |
27 | 28 |
|
29 |
+ |
|
28 | 30 |
automatic3 = cluster_test_by(ccdb_ex, formula = theform, cluster_whitelist = wl2, silent = TRUE) |
29 | 31 |
expect_false(any(automatic3$chain == 'TRA')) |
30 | 32 |
expect_equal(semi_join(automatic3, theclust, by = 'cluster_idx') %>% select(term:p.value), automatic1 %>% select(term:p.value)) |
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
--- |
2 |
-title: "Combining Repertoire with Expression Using Nested colData" |
|
2 |
+title: "Combining Repertoire with Expression with SingleCellExperiment" |
|
3 | 3 |
output: BiocStyle::html_document |
4 | 4 |
vignette: > |
5 |
- %\VignetteIndexEntry{Clustering repertoire via CDR3 sequences} |
|
5 |
+ %\VignetteIndexEntry{Combining Repertoire with Expression with SingleCellExperiment} |
|
6 | 6 |
%\VignetteEngine{knitr::rmarkdown} |
7 | 7 |
%\VignetteEncoding{UTF-8} |
8 | 8 |
--- |
... | ... |
@@ -15,7 +15,6 @@ knitr::opts_chunk$set( |
15 | 15 |
|
16 | 16 |
```{r setup} |
17 | 17 |
library(CellaRepertorium) |
18 |
-library(MultiAssayExperiment) |
|
19 | 18 |
library(SingleCellExperiment) |
20 | 19 |
library(dplyr) |
21 | 20 |
library(ggplot2) |
... | ... |
@@ -62,7 +61,7 @@ sce = SingleCellExperiment(assay = expression, colData = all_bc) |
62 | 61 |
```{r} |
63 | 62 |
ccdb2 = ContigCellDB(ccdb_ex$contig_tbl, contig_pk = ccdb_ex$contig_pk, cell_tbl = colData(sce), cell_pk = ccdb_ex$cell_pk, equalize = FALSE) |
64 | 63 |
|
65 |
-ccdb2 = cdhit_ccdb(ccdb2, 'cdr3', type = 'AA', cluster_name = 'aa80', identity = .8) |
|
64 |
+ccdb2 = cdhit_ccdb(ccdb2, 'cdr3', type = 'AA', cluster_name = 'aa80', identity = .8, min_length = 5) |
|
66 | 65 |
ccdb2 = fine_clustering(ccdb2, sequence_key = 'cdr3', type = 'AA', keep_clustering_details = FALSE) |
67 | 66 |
|
68 | 67 |
``` |
... | ... |
@@ -73,9 +72,9 @@ Key is to construct with `equalize = FALSE`, which will allow some cells to lack |
73 | 72 |
# Chain pairings |
74 | 73 |
|
75 | 74 |
```{r} |
76 |
-colData(sce)$alpha = canonicalize_cell(ccdb2, chain == 'TRA', contig_fields = c('chain', 'v_gene','d_gene', 'j_gene', 'AA97')) |
|
75 |
+colData(sce)$alpha = canonicalize_cell(ccdb2, chain == 'TRA', contig_fields = c('chain', 'v_gene','d_gene', 'j_gene', 'aa80')) |
|
77 | 76 |
|
78 |
-colData(sce)$beta = canonicalize_cell(ccdb2, chain == 'TRB', contig_fields = c('chain', 'v_gene','d_gene', 'j_gene', 'AA97')) |
|
77 |
+colData(sce)$beta = canonicalize_cell(ccdb2, chain == 'TRB', contig_fields = c('chain', 'v_gene','d_gene', 'j_gene', 'aa80')) |
|
79 | 78 |
|
80 | 79 |
``` |
81 | 80 |
|