Browse code

Doc polishing

Andrew McDavid authored on 21/06/2019 15:24:37
Showing 76 changed files

... ...
@@ -57,3 +57,4 @@ NeedsCompilation: yes
57 57
 RoxygenNote: 6.1.1
58 58
 URL: https://github.com/amcdavid/CellaRepertorium
59 59
 BugReports: https://github.com/amcdavid/CellaRepertorium/issues
60
+Roxygen: list(markdown = TRUE)
... ...
@@ -28,18 +28,39 @@ valid_KeyedTbl = function(tbl, keys){
28 28
 #' @param cell_pk character vector naming fields in `cell_tbl` that uniquely identify a cell barcode
29 29
 #' @param cluster_tbl A data frame that provide cluster assignments for each contig
30 30
 #' @param cluster_pk If `cluster_tbl` was provided, a character vector naming fields in `cluster_tbl` that uniquely identify a cluster
31
-#'
32 31
 #' @return \code{ContigCellDB}
32
+#'
33
+#' @section Accessors/mutators:
34
+#' See \code{\link[=ContigCellDB-mutate]{$,ContigCellDB-method}} for more on how to access and mutate slots.
35
+#' At the moment, there is not a good way to combine objects without manually touching slots with `@`,
36
+#' but a `rbind` method is in the offing.
33 37
 #' @export
34 38
 #' @importFrom S4Vectors List SimpleList
35 39
 #' @importFrom tibble as_tibble
36 40
 #' @importFrom methods new slot slot<- validObject
37 41
 #' @rdname ContigCellDB-fun
38
-#'
42
+#' @seealso \code{\link[=ContigCellDB-mutate]{$,ContigCellDB-method}}
39 43
 #' @examples
40 44
 #' data(contigs_qc)
41
-#' ContigCellDB(contigs_qc, contig_pk = c('barcode', 'pop', 'sample', 'contig_id'),
45
+#' contigs_qc
46
+#'
47
+#' cdb = ContigCellDB(contigs_qc, contig_pk = c('barcode', 'pop', 'sample', 'contig_id'),
42 48
 #'  cell_pk = c('barcode', 'pop', 'sample'))
49
+#'  cdb
50
+#'
51
+#'  # everything that was in contigs_qc
52
+#'  cdb$contig_tbl
53
+#'
54
+#'  # Only the cell_pk are included by default (until clustering/canonicalization)
55
+#'  cdb$cell_tbl
56
+#'
57
+#'  # Empty, since no cluster_pk was specified
58
+#'  cdb$cluster_tbl
59
+#'
60
+#'  # Keys
61
+#'  cdb$contig_pk
62
+#'  cdb$cell_pk
63
+#'  cdb$cluster_pk
43 64
 ContigCellDB = function(contig_tbl, contig_pk, cell_tbl, cell_pk, cluster_tbl, cluster_pk = character()){
44 65
     valid_KeyedTbl(contig_tbl, contig_pk)
45 66
     equalized = FALSE
... ...
@@ -73,9 +94,9 @@ ContigCellDB_10XVDJ = function(contig_tbl, contig_pk = c('barcode', 'contig_id')
73 94
 #' @param x A ContigCellDB object
74 95
 #' @param name a slot of a ContigCellDB object (one of  `c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')`)
75 96
 #'
76
-#' @return Slot of ContigCellDB
97
+#' @return Update or return a slot of [ContigCellDB()]
77 98
 #' @export
78
-#'
99
+#' @aliases ContigCellDB-mutate
79 100
 #' @examples
80 101
 #' ccdb_ex$contig_tbl
81 102
 #' ccdb_ex$cell_tbl
... ...
@@ -88,13 +109,8 @@ setMethod("$", signature = c(x = 'ContigCellDB'), function(x, name){
88 109
     }
89 110
 })
90 111
 
91
-#' Access public members of ContigCellDB object
92
-#'
93
-#' @param x A ContigCellDB object
94
-#' @param name Name of a slot for a ContigCellDB object (one of  `c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')`)
95 112
 #' @param value The value assigned to a slot of ContigCellDB object
96
-#'
97
-#' @return A ContigCellDB object
113
+#' @rdname cash-ContigCellDB-method
98 114
 #' @export
99 115
 #'
100 116
 #' @examples
... ...
@@ -19,7 +19,6 @@ globalVariables('cluster_idx')
19 19
 #' You may need to lower it below 5 for AAseq with identity less than .7.
20 20
 #' @param min_length Minimum length for sequences to be clustered.  An error if something smaller is passed.
21 21
 #' @param s fraction of shorter sequence covered by alignment.
22
-#' @param name program name (?)
23 22
 #' @param showProgress show a status bar
24 23
 #' @param only_index if TRUE only return the integer cluster indices, otherwise return a tibble.
25 24
 #' @param ... other arguments that can be passed to cdhit, see https://github.com/weizhongli/cdhit/wiki/3.-User's-Guide#CDHIT for details.  These will override any default values.
... ...
@@ -41,8 +40,9 @@ globalVariables('cluster_idx')
41 40
 #' cdhit(aaseq,identity = 1, G = 0, aL = .9, aS = .9,  only_index = TRUE)[1:10]
42 41
 #' # a tibble
43 42
 #' tbl = cdhit(aaseq, identity = 1, G = 0, aL = .9, aS = .9, only_index = FALSE)
44
-cdhit = function(seqs, identity = NULL, kmerSize = NULL, min_length = 6, s = 1, name = 'CD-Hit', only_index = FALSE, showProgress = interactive(), ...) {
43
+cdhit = function(seqs, identity = NULL, kmerSize = NULL, min_length = 6, s = 1, only_index = FALSE, showProgress = interactive(), ...) {
45 44
     if(any(width(seqs) < min_length)) stop("Some sequences shorter than `min_length`; remove these or decrease min_length")
45
+    name = 'CD-Hit'
46 46
     uopts = list(...)
47 47
     options = list()
48 48
     options$i <- tempfile()
... ...
@@ -78,12 +78,17 @@ cdhit = function(seqs, identity = NULL, kmerSize = NULL, min_length = 6, s = 1,
78 78
         dplyr::group_by(cluster_idx) %>% dplyr::mutate(n_cluster = dplyr::n())
79 79
 }
80 80
 
81
-##' @describeIn cdhit Run `cdhit` on `ClusterContigDB` object
82
-##' @param object An object of class `ClusterContigDB`
81
+
82
+##' Use [cdhit()] to cluster a [ContigCellDB()]
83
+##'
84
+##' @param object An object of class [ContigCellDB()]
83 85
 ##' @param sequence_key `character` naming the column in the `contig_tbl` containing the sequence to be clustered
84 86
 ##' @param type one of 'DNA' or 'AA'
85 87
 ##' @param cluster_name `character` specifying key, and name for the clustering.
88
+##' @return [ContigCellDB()]
89
+##' @inheritDotParams cdhit -seqs -only_index
86 90
 ##' @export
91
+##' @seealso [cdhit()]
87 92
 ##' @examples
88 93
 ##' res = CellaRepertorium:::cdhit_ccdb(ccdb_ex, 'cdr3_nt', type = 'DNA',
89 94
 ##' cluster_name = 'DNA97', identity = .965, min_length = 12, G = 1)
... ...
@@ -1,11 +1,11 @@
1 1
 
2 2
 #' Cluster contigs by germline properties
3 3
 #'
4
-#' @param ccdb `ContigCellDB`
4
+#' @param ccdb [ContigCellDB()]
5 5
 #' @param segment_keys fields in `contig_tbl` that identify a cluster
6 6
 #' @param cluster_name name of cluster to be added to `cluster_tbl`
7 7
 #'
8
-#' @return `ContigCellDB`
8
+#' @return [ContigCellDB()]
9 9
 #' @export
10 10
 #'
11 11
 #' @examples
... ...
@@ -24,16 +24,16 @@ globalVariables(c('fc', 'd(medoid)', 'is_medoid', 'n_cluster'))
24 24
 # Also canonicalize..
25 25
 #' Perform additional clustering of sequences within groups
26 26
 #'
27
-#' @param ccdb A ContigCellDB object
27
+#' @param ccdb A [ContigCellDB()] object
28 28
 #' @param sequence_key `character` naming column in `contig_tbl` with sequence
29 29
 #' @param type 'AA' or 'DNA'
30 30
 #' @param max_affinity `numeric` naming the maximal affinity for the sparse affinity matrix that is constructed.  Not currently used.
31 31
 #' @param keep_clustering_details `logical` -- should output of `fine_cluster_seqs` be kept as a list column
32
-#' @param ... passed to `fine_cluster_seqs`
32
+#' @inheritDotParams fine_cluster_seqs -seqs -type -cluster_fun -cluster_method
33 33
 #' @importFrom dplyr select bind_cols
34 34
 #'
35 35
 #' @example inst/examples/small_cluster_example.R
36
-#' @return `ContigCellDB` object with updated `contig_tbl` and `cluster_tbl`
36
+#' @return [ContigCellDB()] object with updated `contig_tbl` and `cluster_tbl`
37 37
 #' @export
38 38
 fine_clustering = function(ccdb, sequence_key, type, max_affinity = NULL, keep_clustering_details = FALSE, ...){
39 39
     contig_tbl = ccdb$contig_tbl
... ...
@@ -89,23 +89,25 @@ left_join_warn = function(x, y, by, overwrite = FALSE, join = left_join, ...){
89 89
 
90 90
 #' Find a canonical contig to represent a cluster
91 91
 #'
92
-#' @param ccdb `ContigCellDB`
93
-#' @param contig_filter_args an expression passed to dplyr::filter.  Unlike `filter`, multiple criteria must be `&` together, rather than using commas to separate.
94
-#' that act on `ccdb$contig_tbl``
92
+#' @param ccdb [ContigCellDB()]
93
+#' @param contig_filter_args an expression passed to [dplyr::filter()].
94
+#' Unlike `filter`, multiple criteria must be `&` together, rather than using
95
+#' commas to separate. These act on `ccdb$contig_tbl`
95 96
 #' @param tie_break_keys (optional) `character` naming fields in `contig_tbl`
96 97
 #' that are used sort the contig table in descending order.
97 98
 #' Used to break ties if `contig_filter_args` does not return a unique contig
98 99
 #' for each cluster
99 100
 #' @param order The rank order of the contig, based on `tie_break_keys`
100
-#' to return
101
+#' to return.  If `tie_break_keys` included an ordered factor (such as chain)
102
+#' this could be used to return the second chain.
101 103
 #' @param representative an optional field from `contig_tbl` that will be made
102 104
 #' unique. Serve as a surrogate `cluster_pk`.
103 105
 #' @param contig_fields Optional fields from `contig_tbl` that will be copied into
104 106
 #' the `cluster_tbl` from the canonical contig.
105 107
 #'
106
-#' @return `ContigCellDB`
108
+#' @return [ContigCellDB()]
107 109
 #' @export
108
-#' @seealso canonicalize_cell
110
+#' @seealso [canonicalize_cell()]
109 111
 #' @example inst/examples/small_cluster_example.R
110 112
 canonicalize_cluster = function(ccdb, contig_filter_args = is_medoid,
111 113
 tie_break_keys = character(), order = 1, representative = ccdb$cluster_pk[1], contig_fields = c('cdr3', 'cdr3_nt', 'chain', 'v_gene', 'd_gene', 'j_gene')){
... ...
@@ -140,20 +142,20 @@ tie_break_keys = character(), order = 1, representative = ccdb$cluster_pk[1], co
140 142
 #' @param type character either `AA` or `DNA` specifying type of `seqs`
141 143
 #' @param big_memory_brute attempt to cluster more than 4000 sequences?  Clustering is quadratic, so this will take a long time and might exhaust memory
142 144
 #' @param method one of 'substitutionMatrix' or 'levenshtein'
143
-#' @param substitution_matrix a character vector naming a substition matrix available in Biostrings, or a substitution matrix itself
145
+#' @param substitution_matrix a character vector naming a substitution matrix available in Biostrings, or a  substitution matrix itself
144 146
 #' @param cluster_fun `character`, one of "hclust" or "none", determining if distance matrices should also be clustered with `hclust`
145 147
 #' @param cluster_method character passed to `hclust`
146 148
 #'
147
-#' @seealso hclust, stringDist
148
-#' @return `list` containing
149
+#' @seealso [hclust()], [Biostrings::stringDist()]
150
+#' @return `list`
149 151
 #' @export
150 152
 #' @import Biostrings
151 153
 #' @examples
152 154
 #' fasta_path = system.file('extdata', 'demo.fasta', package='CellaRepertorium')
153 155
 #' aaseq = Biostrings::readAAStringSet(fasta_path)[1:100]
154
-#' cls = fine_cluster_seqs(aaseq)
156
+#' cls = fine_cluster_seqs(aaseq, cluster_fun = 'hclust')
155 157
 #' plot(cls$cluster)
156
-fine_cluster_seqs = function(seqs, type = 'AA', big_memory_brute = FALSE, method = 'levenshtein', substitution_matrix = 'BLOSUM100', cluster_fun = 'hclust', cluster_method = 'complete'){
158
+fine_cluster_seqs = function(seqs, type = 'AA', big_memory_brute = FALSE, method = 'levenshtein', substitution_matrix = 'BLOSUM100', cluster_fun = 'none', cluster_method = 'complete'){
157 159
     if(length(seqs) > 4000 & !big_memory_brute) stop("Won't try to cluster ", length(seqs), " sequences unless `big_memory_brute` = TRUE.  (Make sure you actually have lots of memory!)")
158 160
     type = match.arg(type, choices = c('AA', 'DNA'))
159 161
     cluster_fun = match.arg(cluster_fun, c('hclust', 'none'))
... ...
@@ -204,7 +206,7 @@ fine_cluster_seqs = function(seqs, type = 'AA', big_memory_brute = FALSE, method
204 206
 #' Calculate the entropy of a vector
205 207
 #'
206 208
 #' @param v categorical vector
207
-#' @param pseudo_count number of pseudo counts to add on, to stablize empty categories
209
+#' @param pseudo_count number of pseudo counts to add on, to stabilize empty categories
208 210
 #' @param na.action how to handle NA values
209 211
 #'
210 212
 #' @return the sample entropy
... ...
@@ -3,8 +3,13 @@
3 3
 #' The details of how these are generated are shown in the vignette mouse_tcell_qc
4 4
 #' and are serialized to serve as an examples for other vignettes and documentation.
5 5
 #' @format A data frame of 3399 contigs and 22 fields,
6
-#'  all except 4 are originally defined in https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/output/annotation#contig
7
-#'  The following fields were defined ex post facto. anno_file: Path to original csv file, pop: Mouse strain. sample: An artificial "replicate" from the original data defined by subsampling with replacement, celltype:The putative cell type of the contig.
6
+#'  all except 4 are originally defined in <https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/output/annotation#contig>
7
+#'  The following fields were defined ex post facto.
8
+#'
9
+#'  1. `anno_file`: Path to original csv file
10
+#'  2. `pop`: Mouse strain.
11
+#'  3. `sample`: An artificial "replicate" from the original data defined by subsampling with replacement
12
+#'  4. `celltype`: The putative cell type of the contig.
8 13
 #'
9 14
 "contigs_qc"
10 15
 
... ...
@@ -6,7 +6,7 @@ globalVariables(c('prev'))
6 6
 #' If order = 1 then the canonical chain-cluster will be the most prevalent, and if order = 2, it will be the 2nd most prevalent, and so on.  Ties are broken arbitrarily (possibly by lexicographic order of `cluster_idx`).
7 7
 #' @param tbl `data.frame` containing columns specified in `cell_identifiers`, `cluster_idx` and optionally `chain_identifiers`
8 8
 #' @param cell_identifiers `character` vector specifying columns in `tbl` that identify a cell
9
-#' @param cluster_idx `character` specifying the column in `tbl` that identifies a clsuter
9
+#' @param cluster_idx `character` specifying the column in `tbl` that identifies a cluster
10 10
 #' @param order return the 1st, 2nd, 3rd, etc, most common chain-cluster
11 11
 #'
12 12
 #' @return `data.frame` with columns from `cell_identifiers` and a single `cluster_idx` for each cell
... ...
@@ -38,14 +38,14 @@ canonicalize_by_chain = function(tbl,  cell_identifiers = 'barcode', sort_factor
38 38
 
39 39
 #' Find a canonical contig to represent a cell
40 40
 #'
41
-#' Using filtering in `...` and sorting in `tie_break_keys` and `order` find a
41
+#' Using filtering in `contig_filter_args` and sorting in `tie_break_keys` and `order` find a
42 42
 #' single, canonical contig to represent each cell
43 43
 #' Fields in `contig_fields` will be copied over to the `cell_tbl`.
44 44
 #' @inheritParams canonicalize_cluster
45 45
 #'
46 46
 #' @return `ContigCellDB` with additional fields in `cell_tbl`
47 47
 #' @export
48
-#' @seealso canonicalize_cluster
48
+#' @seealso [canonicalize_cluster()]
49 49
 #' @examples
50 50
 #' # Report beta chain with highest umi-count, breaking ties with reads
51 51
 #' beta = canonicalize_cell(ccdb_ex, chain == 'TRB',
... ...
@@ -136,7 +136,7 @@ get_canonical_representative = function(seqs, medoid_idx, warn_if_distinct = FAL
136 136
 #' @return list of tables.  The `cell_tbl` is keyed by the `cell_identifiers`, with fields "cluster_idx.1", "cluster_idx.2", etc, IDing the contigs present in each cell. "cluster_idx.1_fct" and "cluster_idx.2_fct" cast these fields to factors and are reordered to maximize the number of pairs along the diagonal. The `idx1_tbl` and `idx2_tbl` report information (passed in about the `cluster_idx` by `feature_tbl`.)  The `cluster_pair_tbl` reports all pairings found of contigs, and the number of times observed.
137 137
 #' @export
138 138
 #'
139
-#' @seealso canonicalize_by_prevalence, canonicalize_by_chain
139
+#' @seealso [canonicalize_by_prevalence()], [canonicalize_by_chain()]
140 140
 #' @importFrom tibble as_data_frame
141 141
 #' @importFrom dplyr bind_rows left_join ungroup summarize anti_join
142 142
 #' @importFrom stringr str_length str_c
... ...
@@ -1,4 +1,4 @@
1
-#' Calculate number of cluster-subject singletons
1
+#' Calculate number of cluster-subject singletons for the purposes of permutation testing
2 2
 #'
3 3
 #' @param cluster_idx factor-like cluster variable
4 4
 #' @param subject factor-like subject
... ...
@@ -6,7 +6,7 @@
6 6
 #' @return average number of singletons
7 7
 #' @export
8 8
 #'
9
-#' @seealso cluster_permute_test
9
+#' @seealso [cluster_permute_test()]
10 10
 purity = function(cluster_idx, subject) {
11 11
     n_label_cluster = dplyr::bind_cols(cluster_idx = cluster_idx, subject = subject) %>%
12 12
         group_by(cluster_idx, subject) %>% summarize(n = n()) %>% ungroup()
... ...
@@ -31,7 +31,7 @@ purity = function(cluster_idx, subject) {
31 31
 #' @param alternative `character` naming the direction `statistic` should be fall under the alternative hypothesis
32 32
 #'
33 33
 #' @return a list containing the observed value of the statistic, its expectation (under independence), a p-value, and the monte carlo standard error (of the expected value).
34
-#' @seealso purity
34
+#' @seealso [purity()]
35 35
 #' @export
36 36
 #'
37 37
 #' @examples
... ...
@@ -27,29 +27,34 @@ Requires R >= 3.5.
27 27
 ## Data requirements and package structure
28 28
 
29 29
 The fundamental unit this package operates on is the **contig**, which is a section of contiguously stitched reads from a single **cell**.  Each contig belongs to one (and only one) cell, however, cells generate multiple contigs.  
30
+<img src = man/figures/contig_schematic.png />
30 31
 
31
-```{r, echo = FALSE}
32
-knitr::include_graphics('vignettes/img/contig_schematic.png')
33
-```
34 32
 
35
-Contigs can also belong to a **cluster**.  Because of these two many-to-one mappings, these data can be thought as a series of ragged arrays.  The links between them mean they are relational data. A `ContigCellDB` object wraps each of these objects as a sequence of three `data.frames` (well, `tibbles`, actually).   `ContigCellDB` also tracks columns (the primary keys) that unique identify each row in each of these tables.  The `contig_tbl` is the `tibble` containing **contigs**, the `cell_tbl` contains the **cells**, and the `cluster_tbl` contains the **clusters**.  
33
+Contigs can also belong to a **cluster**.  Because of these two many-to-one mappings, these data can be thought as a series of ragged arrays.  The links between them mean they are relational data. A `ContigCellDB()` object wraps each of these objects as a sequence of three `data.frames` (well, `dplyr::tibble()`, actually).   `ContigCellDB()` also tracks columns (the primary keys) that unique identify each row in each of these tables.  The `contig_tbl` is the `tibble` containing **contigs**, the `cell_tbl` contains the **cells**, and the `cluster_tbl` contains the **clusters**.  
36 34
 
37
-The `contig_pk`, `cell_pk` and `cluster_pk` identify the columns that identify a contig, cell and cluster, respectively.  These will serve as foreign keys that link the three tables together.
35
+The `contig_pk`, `cell_pk` and `cluster_pk` specify the columns that identify a contig, cell and cluster, respectively.  These will serve as foreign keys that link the three tables together.
38 36
 The tables are kept in sync so that subsetting the contigs will subset the cells, and clusters, and vice-versa.
39 37
 
40
-```{r, echo = FALSE}
41
-knitr::include_graphics('vignettes/img/table_schematic.png')
42
-```
38
+<img src = man/figures/table_schematic.png />
43 39
 
44
-Of course, each of these tables can contain many other columns that will serve as covariates for various analyses, such as the CDR3 sequence, or the identity of the V, D and J regions.  Various derived quantities that describe cells and clusters can also be calculated, and added to these tables, such as the medoid of a cluster --  a contig that  minimizes the average distance to all other clusters. 
40
+Of course, each of these tables can contain many other columns that will serve as covariates for various analyses, such as the **CDR3** sequence, or the identity of the **V**, **D** and **J** regions.  Various derived quantities that describe cells and clusters can also be calculated, and added to these tables, such as the **medoid** of a cluster --  a contig that minimizes the average distance to all other clusters. 
45 41
 
46
-## Functions
42
+## Some functions of interest
47 43
 
44
+Mainly, this package seeks to enforce proper schema of single cell repertoire 
45
+data and stay out the user's way for various summaries they might conduct.  
48 46
 [a screencap of something interesting?]
49 47
 
50
-*  `cdhit_ccdb`: An R interface to CDhit, which was originally ported by Thomas Lin Pedersen.
51
-*  `fine_clustering`: clustering CDR3 by edit distances (possibly using empirical amino acid substitution matrices)
52
-*  `cluster_permute_test`: permutation tests of cluster statistics
53
-*  `pairing_tables`: Generate pairings of contigs within each cell in a way that they can be plotted
54 48
 
49
+However, there are a variety of specialized functions, as well:
50
+*  `cdhit_ccdb()`: An R interface to CDhit, which was originally ported by Thomas Lin Pedersen.
51
+*  `fine_clustering()`: clustering CDR3 by edit distances (possibly using empirical amino acid substitution matrices)
52
+*  `canonicalize_cell()`: Return a single contig for each cell, e.g., for combining VDJ information with 5'-based single cell expressoin
53
+*  `cluster_permute_test()`: permutation tests of cluster statistics
54
+*  `pairing_tables()`: Generate pairings of contigs within each cell in a way that they can be plotted
55
+
56
+## Inferfacing related packages for clonal analyses
55 57
 
58
+*  To combine repertoire information with expression of endogenuous mRNAs, this package has been used with `SingleCellExperiment::SingleCellExperiment()` and [Seurat](https://satijalab.org/seurat/) after generating various cell canonicalizations.
59
+*  Functionality is under development facilitate submitting actual contig `fasta` to tools such as the IGMT's [HighV-QUEST](http://imgt.org/HighV-QUEST/home.action)
60
+*  Many tools from the [Immcantation](https://alakazam.readthedocs.io/en/version-0.2.11/) suite can work directly on `ContigCellDB()` objects.
... ...
@@ -18,19 +18,18 @@ Requires R \>= 3.5.
18 18
 The fundamental unit this package operates on is the **contig**, which
19 19
 is a section of contiguously stitched reads from a single **cell**. Each
20 20
 contig belongs to one (and only one) cell, however, cells generate
21
-multiple contigs.
22
-
23
-![](vignettes/img/contig_schematic.png)<!-- -->
21
+multiple contigs.  
22
+<img src = man/figures/contig_schematic.png />
24 23
 
25 24
 Contigs can also belong to a **cluster**. Because of these two
26 25
 many-to-one mappings, these data can be thought as a series of ragged
27 26
 arrays. The links between them mean they are relational data. A
28
-`ContigCellDB` object wraps each of these objects as a sequence of three
29
-`data.frames` (well, `tibbles`, actually). `ContigCellDB` also tracks
30
-columns (the primary keys) that unique identify each row in each of
31
-these tables. The `contig_tbl` is the `tibble` containing **contigs**,
32
-the `cell_tbl` contains the **cells**, and the `cluster_tbl` contains
33
-the **clusters**.
27
+`ContigCellDB()` object wraps each of these objects as a sequence of
28
+three `data.frames` (well, `tibbles`, actually). `ContigCellDB` also
29
+tracks columns (the primary keys) that unique identify each row in each
30
+of these tables. The `contig_tbl` is the `tibble` containing
31
+**contigs**, the `cell_tbl` contains the **cells**, and the
32
+`cluster_tbl` contains the **clusters**.
34 33
 
35 34
 The `contig_pk`, `cell_pk` and `cluster_pk` identify the columns that
36 35
 identify a contig, cell and cluster, respectively. These will serve as
... ...
@@ -38,7 +37,7 @@ foreign keys that link the three tables together. The tables are kept in
38 37
 sync so that subsetting the contigs will subset the cells, and clusters,
39 38
 and vice-versa.
40 39
 
41
-![](vignettes/img/table_schematic.png)<!-- -->
40
+<img src = man/figures/table_schematic.png />
42 41
 
43 42
 Of course, each of these tables can contain many other columns that will
44 43
 serve as covariates for various analyses, such as the CDR3 sequence, or
... ...
@@ -12,13 +12,14 @@ reference:
12 12
   contents:
13 13
    - '`cluster_germline`'
14 14
    - '`fine_clustering`'
15
-   - '`cdhit`'
15
+   - '`cdhit_ccdb`'
16 16
    - '`pairing_tables`'
17 17
 - title: Canonicalization
18 18
   desc: "Methods to return single contigs for cells or clusters"
19 19
   contents:
20 20
    - '`canonicalize_cell`'
21 21
    - '`canonicalize_cluster`'
22
+   - '`ig_chain_recode`'
22 23
 - title: Statistical testing
23 24
   contents:
24 25
    - '`cluster_permute_test`'
... ...
@@ -27,15 +28,12 @@ reference:
27 28
   contents:
28 29
    - '`ccdb_ex`'
29 30
    - '`contigs_qc`'
30
-   - '`canonicalize_by_prevalence`'
31 31
 - title: Internal or WIP
32 32
   desc: "Functions that may be made internal, removed, or with interfaces subject to change."
33 33
   contents:
34
-  - '`cluster_germline`'
35
-  - '`cluster_permute_test`'
34
+  - '`canonicalize_by_prevalence`'
36 35
   - '`.cluster_permute_test`'
37 36
   - '`entropy`'
38
-  - '`ig_chain_recode`'
39 37
   - '`fancy_name_contigs`'
40 38
   - '`fine_cluster_seqs`'
41 39
   - '`get_canonical_representative`'
... ...
@@ -30,7 +30,7 @@
30 30
       </button>
31 31
       <span class="navbar-brand">
32 32
         <a class="navbar-link" href="../index.html">CellaRepertorium</a>
33
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.1</span>
33
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.2</span>
34 34
       </span>
35 35
     </div>
36 36
 
... ...
@@ -134,7 +134,7 @@
134 134
 <h1 class="hasAnchor">
135 135
 <a href="#cluster-cdr3-protein-sequences" class="anchor"></a>Cluster CDR3 protein sequences</h1>
136 136
 <div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb4-1" data-line-number="1"></a>
137
-<a class="sourceLine" id="cb4-2" data-line-number="2">aa80 =<span class="st"> </span>CellaRepertorium<span class="op">:::</span><span class="kw"><a href="../reference/cdhit.html">cdhit_ccdb</a></span>(cdb, <span class="st">'cdr3'</span>, <span class="dt">type =</span> <span class="st">'AA'</span>, <span class="dt">cluster_name =</span> <span class="st">'aa80'</span>, <span class="dt">identity =</span> <span class="fl">.8</span>)</a>
137
+<a class="sourceLine" id="cb4-2" data-line-number="2">aa80 =<span class="st"> </span><span class="kw"><a href="../reference/cdhit_ccdb.html">cdhit_ccdb</a></span>(cdb, <span class="st">'cdr3'</span>, <span class="dt">type =</span> <span class="st">'AA'</span>, <span class="dt">cluster_name =</span> <span class="st">'aa80'</span>, <span class="dt">identity =</span> <span class="fl">.8</span>)</a>
138 138
 <a class="sourceLine" id="cb4-3" data-line-number="3">aa80 =<span class="st"> </span><span class="kw"><a href="../reference/fine_clustering.html">fine_clustering</a></span>(aa80, <span class="dt">sequence_key =</span> <span class="st">'cdr3'</span>, <span class="dt">type =</span> <span class="st">'AA'</span>, <span class="dt">keep_clustering_details =</span> <span class="ot">TRUE</span>)</a>
139 139
 <a class="sourceLine" id="cb4-4" data-line-number="4"><span class="co">#&gt; Calculating intradistances on 988 clusters.</span></a>
140 140
 <a class="sourceLine" id="cb4-5" data-line-number="5"><span class="co">#&gt; Summarizing</span></a>
... ...
@@ -146,12 +146,12 @@
146 146
 <a class="sourceLine" id="cb4-11" data-line-number="11"><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span>(aa80<span class="op">$</span>cluster_tbl <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(n_cluster<span class="op">&gt;</span><span class="dv">1</span>) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://tidyr.tidyverse.org/reference/gather.html">gather</a></span>(key, value, <span class="op">-</span>aa80, <span class="op">-</span>fc) , <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">x =</span> value))<span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/facet_wrap.html">facet_wrap</a></span>(<span class="op">~</span>key, <span class="dt">scales =</span> <span class="st">'free'</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_histogram.html">geom_histogram</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/scale_continuous.html">scale_y_sqrt</a></span>()</a>
147 147
 <a class="sourceLine" id="cb4-12" data-line-number="12"><span class="co">#&gt; `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</span></a></code></pre></div>
148 148
 <p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-4-1.png" width="700"></p>
149
-<p>We cluster the CDR3 translated amino acid residues with the program <a href="http://weizhongli-lab.org/cdhit_suite/cgi-bin/index.cgi?cmd=cd-hit">CD-HIT</a>. A sequence is included in a cluster if it matches by 100% similiarity and has the same CDR3 length. Note that this can and should be relaxed – especially in the beta chain we see “near clones” that only differ by a residue or two, seemingly in stylized places.</p>
149
+<p>We cluster the CDR3 translated amino acid residues with the program <a href="http://weizhongli-lab.org/cdhit_suite/cgi-bin/index.cgi?cmd=cd-hit">CD-HIT</a>. A sequence is included in a cluster if it matches by 100% similarity and has the same CDR3 length. Note that this can and should be relaxed – especially in the beta chain we see “near clones” that only differ by a residue or two, seemingly in stylized places.</p>
150 150
 </div>
151 151
 <div id="cluster-cdr3-dna-sequences" class="section level1">
152 152
 <h1 class="hasAnchor">
153 153
 <a href="#cluster-cdr3-dna-sequences" class="anchor"></a>Cluster CDR3 DNA sequences</h1>
154
-<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb5-1" data-line-number="1">cdb =<span class="st"> </span>CellaRepertorium<span class="op">:::</span><span class="kw"><a href="../reference/cdhit.html">cdhit_ccdb</a></span>(cdb, <span class="st">'cdr3_nt'</span>, <span class="dt">type =</span> <span class="st">'DNA'</span>, <span class="dt">cluster_name =</span> <span class="st">'DNA97'</span>, <span class="dt">identity =</span> <span class="fl">.965</span>, <span class="dt">min_length =</span> MIN_CDR3_AA<span class="op">*</span><span class="dv">3-1</span>, <span class="dt">G =</span> <span class="dv">1</span>)</a>
154
+<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb5-1" data-line-number="1">cdb =<span class="st"> </span><span class="kw"><a href="../reference/cdhit_ccdb.html">cdhit_ccdb</a></span>(cdb, <span class="st">'cdr3_nt'</span>, <span class="dt">type =</span> <span class="st">'DNA'</span>, <span class="dt">cluster_name =</span> <span class="st">'DNA97'</span>, <span class="dt">identity =</span> <span class="fl">.965</span>, <span class="dt">min_length =</span> MIN_CDR3_AA<span class="op">*</span><span class="dv">3-1</span>, <span class="dt">G =</span> <span class="dv">1</span>)</a>
155 155
 <a class="sourceLine" id="cb5-2" data-line-number="2">cdb =<span class="st"> </span><span class="kw"><a href="../reference/fine_clustering.html">fine_clustering</a></span>(cdb, <span class="dt">sequence_key =</span> <span class="st">'cdr3_nt'</span>, <span class="dt">type =</span> <span class="st">'DNA'</span>)</a>
156 156
 <a class="sourceLine" id="cb5-3" data-line-number="3"><span class="co">#&gt; Calculating intradistances on 1342 clusters.</span></a>
157 157
 <a class="sourceLine" id="cb5-4" data-line-number="4"><span class="co">#&gt; Summarizing</span></a>
... ...
@@ -163,7 +163,7 @@
163 163
 <div id="cluster-by-v-j-identity" class="section level1">
164 164
 <h1 class="hasAnchor">
165 165
 <a href="#cluster-by-v-j-identity" class="anchor"></a>Cluster by V-J identity</h1>
166
-<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb6-1" data-line-number="1">germline_cluster =<span class="st"> </span>CellaRepertorium<span class="op">:::</span><span class="kw"><a href="../reference/cluster_germline.html">cluster_germline</a></span>(cdb, <span class="dt">segment_keys =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/c">c</a></span>(<span class="st">'v_gene'</span>, <span class="st">'j_gene'</span>, <span class="st">'chain'</span>), <span class="dt">cluster_name =</span> <span class="st">'segment_idx'</span>)</a>
166
+<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb6-1" data-line-number="1">germline_cluster =<span class="st"> </span><span class="kw"><a href="../reference/cluster_germline.html">cluster_germline</a></span>(cdb, <span class="dt">segment_keys =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/c">c</a></span>(<span class="st">'v_gene'</span>, <span class="st">'j_gene'</span>, <span class="st">'chain'</span>), <span class="dt">cluster_name =</span> <span class="st">'segment_idx'</span>)</a>
167 167
 <a class="sourceLine" id="cb6-2" data-line-number="2"><span class="co">#&gt; Warning in replace_cluster_tbl(ccdb, cluster_tbl, cl_con_tbl, cluster_pk =</span></a>
168 168
 <a class="sourceLine" id="cb6-3" data-line-number="3"><span class="co">#&gt; cluster_name): Replacing `cluster_tbl` with DNA97, .</span></a></code></pre></div>
169 169
 <p>We can cluster by any other feature of the contigs. Here we cluster each contig based on the chain and V-J genes. This gives us the set of observed V-J pairings:</p>
... ...
@@ -178,53 +178,26 @@
178 178
 <p>Number of pairs</p>
179 179
 <div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb8-1" data-line-number="1"><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span>(germline_cluster<span class="op">$</span>cluster_tbl <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(chain <span class="op">==</span><span class="st"> 'TRB'</span>), <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">x =</span> v_gene, <span class="dt">y =</span> j_gene, <span class="dt">fill =</span> avg_distance)) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_tile.html">geom_tile</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/theme.html">theme</a></span>(<span class="dt">axis.text.x =</span> <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/element.html">element_text</a></span>(<span class="dt">angle =</span> <span class="dv">90</span>))</a></code></pre></div>
180 180
 <p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-8-1.png" width="700"></p>
181
-<p>Average Levenstein distance of CDR3 within each pair</p>
181
+<p>Average Levenstein distance of CDR3 within each pair. This might be turned into a z-score by fitting a weighted linear model with sum-to-zero contrasts and returning the pearson residuals. This could determine if a pairing has an unexpected small, or large, within cluster distance.</p>
182 182
 </div>
183
-<div id="some-simple-phylogenetic-relationship" class="section level1">
183
+<div id="oligo-clusters" class="section level1">
184 184
 <h1 class="hasAnchor">
185
-<a href="#some-simple-phylogenetic-relationship" class="anchor"></a>Some simple phylogenetic relationship</h1>
186
-<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb9-1" data-line-number="1"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/library">library</a></span>(ggdendro)</a>
187
-<a class="sourceLine" id="cb9-2" data-line-number="2"></a>
188
-<a class="sourceLine" id="cb9-3" data-line-number="3"><span class="co"># This should be turned into a function in the package somehow</span></a>
189
-<a class="sourceLine" id="cb9-4" data-line-number="4"><span class="co"># But plot arguments will be super-variable</span></a>
190
-<a class="sourceLine" id="cb9-5" data-line-number="5"><span class="co"># Maybe just return the `hc` object?</span></a>
191
-<a class="sourceLine" id="cb9-6" data-line-number="6">dendro_plot =<span class="st"> </span><span class="cf">function</span>(ccdb, idx, <span class="dt">method =</span> <span class="st">'complete'</span>){</a>
192
-<a class="sourceLine" id="cb9-7" data-line-number="7">    h =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(ccdb<span class="op">$</span>cluster_tbl, <span class="op">!!</span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/tidyeval.html">sym</a></span>(ccdb<span class="op">$</span>cluster_pk) <span class="op">==</span><span class="st"> </span>idx) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/pull.html">pull</a></span>(fc) <span class="op">%&gt;%</span><span class="st"> </span>.[[<span class="dv">1</span>]]</a>
193
-<a class="sourceLine" id="cb9-8" data-line-number="8">    quer =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(ccdb<span class="op">$</span>contig_tbl, <span class="op">!!</span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/tidyeval.html">sym</a></span>(ccdb<span class="op">$</span>cluster_pk) <span class="op">==</span><span class="st"> </span>idx)</a>
194
-<a class="sourceLine" id="cb9-9" data-line-number="9">    hc =<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/stats/topics/hclust">hclust</a></span>(<span class="kw"><a href="https://www.rdocumentation.org/packages/stats/topics/dist">as.dist</a></span>(h<span class="op">$</span>distance_mat), <span class="dt">method =</span> method) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/ggdendro/topics/dendro_data">dendro_data</a></span>(<span class="dt">type =</span> <span class="st">"rectangle"</span>)</a>
195
-<a class="sourceLine" id="cb9-10" data-line-number="10">    hc<span class="op">$</span>labels =<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/cbind">cbind</a></span>(hc<span class="op">$</span>labels, quer)</a>
196
-<a class="sourceLine" id="cb9-11" data-line-number="11">   <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span>(hc<span class="op">$</span>segments, <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">x=</span>x, <span class="dt">y=</span>y)) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_segment.html">geom_segment</a></span>(<span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">xend=</span>xend, <span class="dt">yend=</span>yend)) <span class="op">+</span><span class="st"> </span></a>
197
-<a class="sourceLine" id="cb9-12" data-line-number="12"><span class="st">  </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggtheme.html">theme_classic</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_text.html">geom_text</a></span>(<span class="dt">data =</span> hc<span class="op">$</span>labels, <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">color =</span> sample, <span class="dt">label =</span> fancy_name), <span class="dt">size =</span> <span class="dv">3</span>, <span class="dt">angle =</span> <span class="dv">60</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/scale_continuous.html">scale_x_continuous</a></span>(<span class="dt">breaks =</span> <span class="ot">NULL</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">ylab</a></span>(<span class="st">'AA Distance'</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">xlab</a></span>(<span class="st">''</span>)</a>
198
-<a class="sourceLine" id="cb9-13" data-line-number="13">}</a>
199
-<a class="sourceLine" id="cb9-14" data-line-number="14"></a>
200
-<a class="sourceLine" id="cb9-15" data-line-number="15">MIN_OLIGO =<span class="st"> </span><span class="dv">7</span></a>
201
-<a class="sourceLine" id="cb9-16" data-line-number="16">to_plot =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(aa80<span class="op">$</span>cluster_tbl, n_cluster <span class="op">&gt;=</span><span class="st"> </span>MIN_OLIGO)</a>
202
-<a class="sourceLine" id="cb9-17" data-line-number="17"></a>
203
-<a class="sourceLine" id="cb9-18" data-line-number="18"><span class="kw"><a href="https://purrr.tidyverse.org/reference/map.html">map</a></span>(to_plot<span class="op">$</span>aa80, <span class="op">~</span><span class="st"> </span><span class="kw">dendro_plot</span>(aa80, .))</a>
204
-<a class="sourceLine" id="cb9-19" data-line-number="19"><span class="co">#&gt; [[1]]</span></a></code></pre></div>
205
-<p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-9-1.png" width="700"></p>
206
-<pre><code>#&gt; 
207
-#&gt; [[2]]</code></pre>
208
-<p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-9-2.png" width="700"></p>
209
-<pre><code>#&gt; 
210
-#&gt; [[3]]</code></pre>
211
-<p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-9-3.png" width="700"></p>
212
-<pre><code>#&gt; 
213
-#&gt; [[4]]</code></pre>
214
-<p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-9-4.png" width="700"></p>
215
-<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb13-1" data-line-number="1">aa80 =<span class="st"> </span><span class="kw"><a href="../reference/canonicalize_cluster.html">canonicalize_cluster</a></span>(aa80, <span class="dt">representative =</span> <span class="st">'cdr3'</span>, <span class="dt">contig_fields =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/c">c</a></span>(<span class="st">'cdr3'</span>, <span class="st">'cdr3_nt'</span>, <span class="st">'chain'</span>, <span class="st">'v_gene'</span>, <span class="st">'d_gene'</span>, <span class="st">'j_gene'</span>))</a></code></pre></div>
216
-<p>Pull the fields listed in <code>contig_fields</code> into the <code>cluster_tbl</code>, using the values found in the medoid contig</p>
217
-<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb14-1" data-line-number="1">oligo_clusters =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(aa80<span class="op">$</span>cluster_tbl, n_cluster <span class="op">&gt;=</span><span class="st"> </span>MIN_OLIGO)</a>
218
-<a class="sourceLine" id="cb14-2" data-line-number="2">oligo_contigs =<span class="st"> </span>aa80</a>
219
-<a class="sourceLine" id="cb14-3" data-line-number="3">oligo_contigs<span class="op">$</span>contig_tbl =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/join.html">semi_join</a></span>(oligo_contigs<span class="op">$</span>contig_tbl, oligo_clusters, <span class="dt">by =</span> <span class="st">'aa80'</span>)</a>
220
-<a class="sourceLine" id="cb14-4" data-line-number="4">oligo_contigs</a>
221
-<a class="sourceLine" id="cb14-5" data-line-number="5"><span class="co">#&gt; ContigCellDB of 54 contigs; 54 cells; and 4 clusters.</span></a>
222
-<a class="sourceLine" id="cb14-6" data-line-number="6"><span class="co">#&gt; Contigs keyed by barcode, pop, sample, contig_id; cells keyed by barcode, pop, sample.</span></a></code></pre></div>
185
+<a href="#oligo-clusters" class="anchor"></a>Oligo clusters</h1>
186
+<p>Next, we will examine the clusters that are found in many contigs. First we will get a canonical contig to represent each cluster. This will be the medoid contig, by default.</p>
187
+<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb9-1" data-line-number="1">aa80 =<span class="st"> </span><span class="kw"><a href="../reference/canonicalize_cluster.html">canonicalize_cluster</a></span>(aa80, <span class="dt">representative =</span> <span class="st">'cdr3'</span>, <span class="dt">contig_fields =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/c">c</a></span>(<span class="st">'cdr3'</span>, <span class="st">'cdr3_nt'</span>, <span class="st">'chain'</span>, <span class="st">'v_gene'</span>, <span class="st">'d_gene'</span>, <span class="st">'j_gene'</span>))</a></code></pre></div>
188
+<p><code>aa80</code> now includes the fields listed in <code>contig_fields</code> in the <code>cluster_tbl</code>, using the values found in the medoid contig.</p>
189
+<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb10-1" data-line-number="1">MIN_OLIGO =<span class="st"> </span><span class="dv">7</span></a>
190
+<a class="sourceLine" id="cb10-2" data-line-number="2">oligo_clusters =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(aa80<span class="op">$</span>cluster_tbl, n_cluster <span class="op">&gt;=</span><span class="st"> </span>MIN_OLIGO)</a>
191
+<a class="sourceLine" id="cb10-3" data-line-number="3">oligo_contigs =<span class="st"> </span>aa80</a>
192
+<a class="sourceLine" id="cb10-4" data-line-number="4">oligo_contigs<span class="op">$</span>contig_tbl =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/join.html">semi_join</a></span>(oligo_contigs<span class="op">$</span>contig_tbl, oligo_clusters, <span class="dt">by =</span> <span class="st">'aa80'</span>)</a>
193
+<a class="sourceLine" id="cb10-5" data-line-number="5">oligo_contigs</a>
194
+<a class="sourceLine" id="cb10-6" data-line-number="6"><span class="co">#&gt; ContigCellDB of 54 contigs; 54 cells; and 4 clusters.</span></a>
195
+<a class="sourceLine" id="cb10-7" data-line-number="7"><span class="co">#&gt; Contigs keyed by barcode, pop, sample, contig_id; cells keyed by barcode, pop, sample.</span></a></code></pre></div>
223 196
 <p>Get contigs/cells/clusters found at least 7 times (across contigs). Note that replacing <code>contig_tbl</code> with the subset selected with the <code>semi_join</code> also automatically subsetted the <code>cell_tbl</code> and <code>cluster_tbl</code>.</p>
224
-<div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb15-1" data-line-number="1">oligo_clusters =<span class="st"> </span>oligo_contigs<span class="op">$</span>contig_tbl <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/group_by.html">group_by</a></span>(aa80) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/summarise.html">summarize</a></span>(<span class="st">`</span><span class="dt">n subjects observed</span><span class="st">`</span> =<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/length">length</a></span>(<span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/unique">unique</a></span>(sample))) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/join.html">left_join</a></span>(oligo_clusters)</a>
225
-<a class="sourceLine" id="cb15-2" data-line-number="2"><span class="co">#&gt; Joining, by = "aa80"</span></a>
226
-<a class="sourceLine" id="cb15-3" data-line-number="3"></a>
227
-<a class="sourceLine" id="cb15-4" data-line-number="4">knitr<span class="op">::</span><span class="kw"><a href="https://www.rdocumentation.org/packages/knitr/topics/kable">kable</a></span>(oligo_clusters <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span>(aa80<span class="op">:</span>cdr3, chain<span class="op">:</span>j_gene, avg_distance, n_cluster))</a></code></pre></div>
197
+<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb11-1" data-line-number="1">oligo_clusters =<span class="st"> </span>oligo_contigs<span class="op">$</span>contig_tbl <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/group_by.html">group_by</a></span>(aa80) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/summarise.html">summarize</a></span>(<span class="st">`</span><span class="dt">n subjects observed</span><span class="st">`</span> =<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/length">length</a></span>(<span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/unique">unique</a></span>(sample))) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/join.html">left_join</a></span>(oligo_clusters)</a>
198
+<a class="sourceLine" id="cb11-2" data-line-number="2"><span class="co">#&gt; Joining, by = "aa80"</span></a>
199
+<a class="sourceLine" id="cb11-3" data-line-number="3"></a>
200
+<a class="sourceLine" id="cb11-4" data-line-number="4">knitr<span class="op">::</span><span class="kw"><a href="https://www.rdocumentation.org/packages/knitr/topics/kable">kable</a></span>(oligo_clusters <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span>(aa80<span class="op">:</span>cdr3, chain<span class="op">:</span>j_gene, avg_distance, n_cluster))</a></code></pre></div>
228 201
 <table class="table">
229 202
 <thead><tr class="header">
230 203
 <th align="right">aa80</th>
... ...
@@ -285,21 +258,48 @@
285 258
 </tbody>
286 259
 </table>
287 260
 <p>Report some statistics about these expanded clusters.</p>
288
-</div>
289
-<div id="oligo-clusters" class="section level1">
290
-<h1 class="hasAnchor">
291
-<a href="#oligo-clusters" class="anchor"></a>Oligo clusters</h1>
292
-<div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb16-1" data-line-number="1">oligo_plot =<span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span>(oligo_contigs<span class="op">$</span>contig_tbl, <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">x =</span> representative, <span class="dt">fill =</span> chain)) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_bar.html">geom_bar</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/coord_flip.html">coord_flip</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/scale_brewer.html">scale_fill_brewer</a></span>(<span class="dt">type =</span> <span class="st">'qual'</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggtheme.html">theme_minimal</a></span>()</a>
293
-<a class="sourceLine" id="cb16-2" data-line-number="2">oligo_plot</a></code></pre></div>
294
-<p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-13-1.png" width="700"></p>
261
+<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb12-1" data-line-number="1">oligo_plot =<span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span>(oligo_contigs<span class="op">$</span>contig_tbl, <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">x =</span> representative, <span class="dt">fill =</span> chain)) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_bar.html">geom_bar</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/coord_flip.html">coord_flip</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/scale_brewer.html">scale_fill_brewer</a></span>(<span class="dt">type =</span> <span class="st">'qual'</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggtheme.html">theme_minimal</a></span>()</a>
262
+<a class="sourceLine" id="cb12-2" data-line-number="2">oligo_plot</a></code></pre></div>
263
+<p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-12-1.png" width="700"></p>
295 264
 <p>These always come from a single chain.</p>
296
-<div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb17-1" data-line-number="1">oligo_plot <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">fill =</span>   sample) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/facet_wrap.html">facet_wrap</a></span>(<span class="op">~</span>pop)</a></code></pre></div>
297
-<p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-14-1.png" width="700"></p>
265
+<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb13-1" data-line-number="1">oligo_plot <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">fill =</span>   sample) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/facet_wrap.html">facet_wrap</a></span>(<span class="op">~</span>pop)</a></code></pre></div>
266
+<p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-13-1.png" width="700"></p>
298 267
 <p>But come from multiple populations and samples.</p>
268
+<div id="some-simple-phylogenetic-relationships" class="section level2">
269
+<h2 class="hasAnchor">
270
+<a href="#some-simple-phylogenetic-relationships" class="anchor"></a>Some simple phylogenetic relationships</h2>
271
+<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb14-1" data-line-number="1"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/library">library</a></span>(ggdendro)</a>
272
+<a class="sourceLine" id="cb14-2" data-line-number="2"></a>
273
+<a class="sourceLine" id="cb14-3" data-line-number="3"><span class="co"># This should be turned into a function in the package somehow</span></a>
274
+<a class="sourceLine" id="cb14-4" data-line-number="4"><span class="co"># But plot arguments will be super-variable</span></a>
275
+<a class="sourceLine" id="cb14-5" data-line-number="5"><span class="co"># Maybe just return the `hc` object?</span></a>
276
+<a class="sourceLine" id="cb14-6" data-line-number="6">dendro_plot =<span class="st"> </span><span class="cf">function</span>(ccdb, idx, <span class="dt">method =</span> <span class="st">'complete'</span>){</a>
277
+<a class="sourceLine" id="cb14-7" data-line-number="7">    h =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(ccdb<span class="op">$</span>cluster_tbl, <span class="op">!!</span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/tidyeval.html">sym</a></span>(ccdb<span class="op">$</span>cluster_pk) <span class="op">==</span><span class="st"> </span>idx) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/pull.html">pull</a></span>(fc) <span class="op">%&gt;%</span><span class="st"> </span>.[[<span class="dv">1</span>]]</a>
278
+<a class="sourceLine" id="cb14-8" data-line-number="8">    quer =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(ccdb<span class="op">$</span>contig_tbl, <span class="op">!!</span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/tidyeval.html">sym</a></span>(ccdb<span class="op">$</span>cluster_pk) <span class="op">==</span><span class="st"> </span>idx)</a>
279
+<a class="sourceLine" id="cb14-9" data-line-number="9">    hc =<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/stats/topics/hclust">hclust</a></span>(<span class="kw"><a href="https://www.rdocumentation.org/packages/stats/topics/dist">as.dist</a></span>(h<span class="op">$</span>distance_mat), <span class="dt">method =</span> method) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/ggdendro/topics/dendro_data">dendro_data</a></span>(<span class="dt">type =</span> <span class="st">"rectangle"</span>)</a>
280
+<a class="sourceLine" id="cb14-10" data-line-number="10">    hc<span class="op">$</span>labels =<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/cbind">cbind</a></span>(hc<span class="op">$</span>labels, quer)</a>
281
+<a class="sourceLine" id="cb14-11" data-line-number="11">   <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span>(hc<span class="op">$</span>segments, <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">x=</span>x, <span class="dt">y=</span>y)) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_segment.html">geom_segment</a></span>(<span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">xend=</span>xend, <span class="dt">yend=</span>yend)) <span class="op">+</span><span class="st"> </span></a>
282
+<a class="sourceLine" id="cb14-12" data-line-number="12"><span class="st">  </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggtheme.html">theme_classic</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_text.html">geom_text</a></span>(<span class="dt">data =</span> hc<span class="op">$</span>labels, <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">color =</span> sample, <span class="dt">label =</span> fancy_name), <span class="dt">size =</span> <span class="dv">3</span>, <span class="dt">angle =</span> <span class="dv">60</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/scale_continuous.html">scale_x_continuous</a></span>(<span class="dt">breaks =</span> <span class="ot">NULL</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">ylab</a></span>(<span class="st">'AA Distance'</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">xlab</a></span>(<span class="st">''</span>)</a>
283
+<a class="sourceLine" id="cb14-13" data-line-number="13">}</a>
284
+<a class="sourceLine" id="cb14-14" data-line-number="14"></a>
285
+<a class="sourceLine" id="cb14-15" data-line-number="15">to_plot =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(aa80<span class="op">$</span>cluster_tbl, n_cluster <span class="op">&gt;=</span><span class="st"> </span>MIN_OLIGO)</a>
286
+<a class="sourceLine" id="cb14-16" data-line-number="16"></a>
287
+<a class="sourceLine" id="cb14-17" data-line-number="17"><span class="kw"><a href="https://purrr.tidyverse.org/reference/map.html">map</a></span>(to_plot<span class="op">$</span>aa80, <span class="op">~</span><span class="st"> </span><span class="kw">dendro_plot</span>(aa80, .))</a>
288
+<a class="sourceLine" id="cb14-18" data-line-number="18"><span class="co">#&gt; [[1]]</span></a></code></pre></div>
289
+<p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-14-1.png" width="700"></p>
290
+<pre><code>#&gt; 
291
+#&gt; [[2]]</code></pre>
292
+<p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-14-2.png" width="700"></p>
293
+<pre><code>#&gt; 
294
+#&gt; [[3]]</code></pre>
295
+<p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-14-3.png" width="700"></p>
296
+<pre><code>#&gt; 
297
+#&gt; [[4]]</code></pre>
298
+<p><img src="cdr3_clustering_files/figure-html/unnamed-chunk-14-4.png" width="700"></p>
299 299
 </div>
300
-<div id="formal-testing-for-frequency-differences" class="section level1">
301
-<h1 class="hasAnchor">
302
-<a href="#formal-testing-for-frequency-differences" class="anchor"></a>Formal testing for frequency differences</h1>
300
+<div id="formal-testing-for-frequency-differences" class="section level2">
301
+<h2 class="hasAnchor">
302
+<a href="#formal-testing-for-frequency-differences" class="anchor"></a>Formal testing for frequency differences</h2>
303 303
 <div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb18-1" data-line-number="1"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/library">library</a></span>(lme4)</a>
304 304
 <a class="sourceLine" id="cb18-2" data-line-number="2"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/library">library</a></span>(broom)</a>
305 305
 <a class="sourceLine" id="cb18-3" data-line-number="3">per_chain_sample =<span class="st"> </span>good_cluster_cells <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/group_by.html">group_by</a></span>(sample, pop, chain) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/summarise.html">summarize</a></span>(<span class="dt">total_cells =</span> <span class="kw"><a href="https://dplyr.tidyverse.org/reference/n.html">n</a></span>(), <span class="dt">weeks_premature =</span> weeks_premature[<span class="dv">1</span>])</a>
... ...
@@ -316,9 +316,11 @@
316 316
 <a class="sourceLine" id="cb19-3" data-line-number="3"><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span>(mm_outj, <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">x =</span> representative, <span class="dt">ymin =</span> ci_lo, <span class="dt">ymax =</span> ci_hi, <span class="dt">y =</span> <span class="kw">clamp</span>(estimate))) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_linerange.html">geom_pointrange</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/facet_wrap.html">facet_wrap</a></span>(<span class="op">~</span>term, <span class="dt">scales =</span> <span class="st">'free'</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/coord_flip.html">coord_flip</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggtheme.html">theme_minimal</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_abline.html">geom_hline</a></span>(<span class="dt">yintercept =</span> <span class="dv">0</span>, <span class="dt">lty =</span> <span class="dv">2</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">xlab</a></span>(<span class="st">"Isomorph"</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">ylab</a></span>(<span class="st">"log odds of isomorph"</span>)</a></code></pre></div>
317 317
 <p>We test if the binomial rate of clone expression differs between CD31+/- or term, for each clone.</p>
318 318
 </div>
319
+</div>
319 320
 <div id="clonal-pairs" class="section level1">
320 321
 <h1 class="hasAnchor">
321 322
 <a href="#clonal-pairs" class="anchor"></a>Clonal pairs</h1>
323
+<p>Next, we can examine the pairing between <span class="math inline">\(\alpha-\beta\)</span> chains and see if any pairs are found more than once.</p>
322 324
 <div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb20-1" data-line-number="1">class_colors =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/reexports.html">tibble</a></span>(<span class="dt">chain =</span>  <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/unique">unique</a></span>(aa80<span class="op">$</span>cluster_tbl<span class="op">$</span>chain)) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate</a></span>(<span class="dt">class_color =</span>  RColorBrewer<span class="op">::</span><span class="kw"><a href="https://www.rdocumentation.org/packages/RColorBrewer/topics/ColorBrewer">brewer.pal</a></span>(<span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/length">length</a></span>(chain),<span class="st">"Set1"</span>)[<span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/seq">seq_along</a></span>(chain)])</a>
323 325
 <a class="sourceLine" id="cb20-2" data-line-number="2"><span class="co">#&gt; Warning in RColorBrewer::brewer.pal(length(chain), "Set1"): minimal value for n is 3, returning requested palette with 3 different levels</span></a>
324 326
 <a class="sourceLine" id="cb20-3" data-line-number="3"></a>
... ...
@@ -328,6 +330,7 @@
328 330
 <a class="sourceLine" id="cb20-7" data-line-number="7"><span class="co">#&gt; `forcats::fct_explicit_na`</span></a>
329 331
 <a class="sourceLine" id="cb20-8" data-line-number="8"><span class="co">#&gt; Warning: Column `representative` joining factors with different levels,</span></a>
330 332
 <a class="sourceLine" id="cb20-9" data-line-number="9"><span class="co">#&gt; coercing to character vector</span></a></code></pre></div>
333
+<p><code>pairing_tables</code> finds all contig combinations of order <code>table_order</code> across cells. Among those combinations that occur at least <code>min_expansion</code> times, the expanded combinations and and any other combinations that shared an expanded combo.</p>
331 334
 <div class="sourceCode" id="cb21"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb21-1" data-line-number="1">pairs_plt =<span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span>(pairing_list<span class="op">$</span>cell_tbl, <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">x =</span> cluster_idx<span class="fl">.1</span>_fct, <span class="dt">y =</span> cluster_idx<span class="fl">.2</span>_fct, <span class="dt">color =</span> sample, <span class="dt">shape =</span> pop)) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_jitter.html">geom_jitter</a></span>(<span class="dt">width =</span> <span class="fl">.2</span>, <span class="dt">height =</span> <span class="fl">.2</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggtheme.html">theme_minimal</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">xlab</a></span>(<span class="st">'TRB'</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">ylab</a></span>(<span class="st">'TRA'</span>)</a>
332 335
 <a class="sourceLine" id="cb21-2" data-line-number="2"></a>
333 336
 <a class="sourceLine" id="cb21-3" data-line-number="3">feature_tbl =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/join.html">left_join</a></span>(<span class="kw"><a href="https://dplyr.tidyverse.org/reference/bind.html">bind_rows</a></span>(pairing_list<span class="op">$</span>idx1_tbl, pairing_list<span class="op">$</span>idx2_tbl), class_colors)</a>
... ...
@@ -352,18 +355,20 @@
352 355
 <div id="expanded-clones" class="section level2">
353 356
 <h2 class="hasAnchor">
354 357
 <a href="#expanded-clones" class="anchor"></a>Expanded clones</h2>
355
-<div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb22-1" data-line-number="1">pairing_list =<span class="st"> </span><span class="kw"><a href="../reference/pairing_tables.html">pairing_tables</a></span>(oligo_clusters_all <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span>(representative, dataset, barcode, chain, umis, reads), <span class="dt">cluster_idx =</span> <span class="st">'representative'</span>, <span class="dt">cell_identifiers =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/c">c</a></span>(<span class="st">'dataset'</span>, <span class="st">'barcode'</span>), <span class="dt">canonicalize_fun =</span> canonicalize_by_prevalence, <span class="dt">table_order =</span> <span class="dv">2</span>, <span class="dt">orphan_level =</span> <span class="dv">1</span>, <span class="dt">min_expansion =</span> <span class="dv">4</span>, <span class="dt">feature_tbl =</span> feature_tbl, <span class="dt">cell_tbl =</span> good_cells, <span class="dt">cluster_whitelist =</span> <span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(oligo_clusters, n_cluster<span class="op">&gt;</span><span class="dv">8</span>) <span class="op">%&gt;%</span><span class="st"> </span>dplyr<span class="op">::</span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span>(<span class="dt">cluster_idx.1 =</span> representative) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/unique">unique</a></span>())</a>
356
-<a class="sourceLine" id="cb22-2" data-line-number="2">pairs_plt =<span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span>(pairing_list<span class="op">$</span>cell_tbl, <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">x =</span> cluster_idx<span class="fl">.1</span>_fct, <span class="dt">y =</span> cluster_idx<span class="fl">.2</span>_fct, <span class="dt">color =</span> sample, <span class="dt">shape =</span> pop)) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_jitter.html">geom_jitter</a></span>(<span class="dt">width =</span> <span class="fl">.2</span>, <span class="dt">height =</span> <span class="fl">.2</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggtheme.html">theme_minimal</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">xlab</a></span>(<span class="st">'TRB'</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">ylab</a></span>(<span class="st">'TRA'</span>)</a>
357
-<a class="sourceLine" id="cb22-3" data-line-number="3"></a>
358
-<a class="sourceLine" id="cb22-4" data-line-number="4">feature_tbl =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/join.html">left_join</a></span>(<span class="kw"><a href="https://dplyr.tidyverse.org/reference/bind.html">bind_rows</a></span>(pairing_list<span class="op">$</span>idx1_tbl, pairing_list<span class="op">$</span>idx2_tbl), class_colors)</a>
359
-<a class="sourceLine" id="cb22-5" data-line-number="5"></a>
360
-<a class="sourceLine" id="cb22-6" data-line-number="6">ylab =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/reexports.html">data_frame</a></span>(<span class="dt">representative =</span>  <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot_build.html">ggplot_build</a></span>(pairs_plt)<span class="op">$</span>layout<span class="op">$</span>panel_params[[<span class="dv">1</span>]]<span class="op">$</span>y.label) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/join.html">left_join</a></span>(feature_tbl) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate</a></span>(<span class="dt">class_color =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/ifelse">ifelse</a></span>(<span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/NA">is.na</a></span>(class_color), <span class="st">'#E41A1C'</span>, class_color))</a>
361
-<a class="sourceLine" id="cb22-7" data-line-number="7"></a>
362
-<a class="sourceLine" id="cb22-8" data-line-number="8">xlab =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/reexports.html">data_frame</a></span>(<span class="dt">representative =</span>  <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot_build.html">ggplot_build</a></span>(pairs_plt)<span class="op">$</span>layout<span class="op">$</span>panel_params[[<span class="dv">1</span>]]<span class="op">$</span>x.label) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/join.html">left_join</a></span>(feature_tbl) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate</a></span>(<span class="dt">class_color =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/ifelse">ifelse</a></span>(<span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/NA">is.na</a></span>(class_color), <span class="st">'#E41A1C'</span>, class_color))</a>
363
-<a class="sourceLine" id="cb22-9" data-line-number="9"></a>
364
-<a class="sourceLine" id="cb22-10" data-line-number="10">pairs_plt =<span class="st"> </span>pairs_plt <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/theme.html">theme</a></span>(<span class="dt">axis.text.x =</span> <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/element.html">element_text</a></span>(<span class="dt">angle =</span> <span class="dv">90</span>, <span class="dt">color =</span> xlab<span class="op">$</span>class_color, <span class="dt">size =</span> <span class="dv">8</span>), <span class="dt">axis.text.y =</span> <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/element.html">element_text</a></span>(<span class="dt">color =</span> ylab<span class="op">$</span>class_color, <span class="dt">size =</span> <span class="dv">8</span>))</a>
365
-<a class="sourceLine" id="cb22-11" data-line-number="11"></a>
366
-<a class="sourceLine" id="cb22-12" data-line-number="12">pairs_plt</a></code></pre></div>
358
+<div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb22-1" data-line-number="1">whitelist =<span class="st"> </span>oligo_clusters <span class="op">%&gt;%</span><span class="st"> </span>dplyr<span class="op">::</span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span>(<span class="dt">cluster_idx.1 =</span> representative) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/unique">unique</a></span>()</a>
359
+<a class="sourceLine" id="cb22-2" data-line-number="2">pairing_list =<span class="st"> </span><span class="kw"><a href="../reference/pairing_tables.html">pairing_tables</a></span>(aa80, <span class="dt">canonicalize_fun =</span> canonicalize_by_prevalence, <span class="dt">table_order =</span> <span class="dv">2</span>, <span class="dt">orphan_level =</span> <span class="dv">1</span>, <span class="dt">min_expansion =</span> <span class="ot">Inf</span>, <span class="dt">cluster_whitelist =</span> whitelist,  <span class="dt">cluster_keys =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/c">c</a></span>(<span class="st">'cdr3'</span>, <span class="st">'representative'</span>, <span class="st">'chain'</span>, <span class="st">'v_gene'</span>, <span class="st">'j_gene'</span>, <span class="st">'avg_distance'</span>))</a>
360
+<a class="sourceLine" id="cb22-3" data-line-number="3">pairs_plt =<span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span>(pairing_list<span class="op">$</span>cell_tbl, <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">x =</span> cluster_idx<span class="fl">.1</span>_fct, <span class="dt">y =</span> cluster_idx<span class="fl">.2</span>_fct, <span class="dt">color =</span> sample, <span class="dt">shape =</span> pop)) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_jitter.html">geom_jitter</a></span>(<span class="dt">width =</span> <span class="fl">.2</span>, <span class="dt">height =</span> <span class="fl">.2</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggtheme.html">theme_minimal</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">xlab</a></span>(<span class="st">'TRB'</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">ylab</a></span>(<span class="st">'TRA'</span>)</a>
361
+<a class="sourceLine" id="cb22-4" data-line-number="4"></a>
362
+<a class="sourceLine" id="cb22-5" data-line-number="5">feature_tbl =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/join.html">left_join</a></span>(<span class="kw"><a href="https://dplyr.tidyverse.org/reference/bind.html">bind_rows</a></span>(pairing_list<span class="op">$</span>idx1_tbl, pairing_list<span class="op">$</span>idx2_tbl), class_colors)</a>
363
+<a class="sourceLine" id="cb22-6" data-line-number="6"></a>
364
+<a class="sourceLine" id="cb22-7" data-line-number="7">ylab =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/reexports.html">data_frame</a></span>(<span class="dt">representative =</span>  <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot_build.html">ggplot_build</a></span>(pairs_plt)<span class="op">$</span>layout<span class="op">$</span>panel_params[[<span class="dv">1</span>]]<span class="op">$</span>y.label) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/join.html">left_join</a></span>(feature_tbl) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate</a></span>(<span class="dt">class_color =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/ifelse">ifelse</a></span>(<span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/NA">is.na</a></span>(class_color), <span class="st">'#E41A1C'</span>, class_color))</a>
365
+<a class="sourceLine" id="cb22-8" data-line-number="8"></a>
366
+<a class="sourceLine" id="cb22-9" data-line-number="9">xlab =<span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/reexports.html">data_frame</a></span>(<span class="dt">representative =</span>  <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot_build.html">ggplot_build</a></span>(pairs_plt)<span class="op">$</span>layout<span class="op">$</span>panel_params[[<span class="dv">1</span>]]<span class="op">$</span>x.label) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/join.html">left_join</a></span>(feature_tbl) <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate</a></span>(<span class="dt">class_color =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/ifelse">ifelse</a></span>(<span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/NA">is.na</a></span>(class_color), <span class="st">'#E41A1C'</span>, class_color))</a>
367
+<a class="sourceLine" id="cb22-10" data-line-number="10"></a>
368
+<a class="sourceLine" id="cb22-11" data-line-number="11">pairs_plt =<span class="st"> </span>pairs_plt <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/theme.html">theme</a></span>(<span class="dt">axis.text.x =</span> <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/element.html">element_text</a></span>(<span class="dt">angle =</span> <span class="dv">90</span>, <span class="dt">color =</span> xlab<span class="op">$</span>class_color, <span class="dt">size =</span> <span class="dv">8</span>), <span class="dt">axis.text.y =</span> <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/element.html">element_text</a></span>(<span class="dt">color =</span> ylab<span class="op">$</span>class_color, <span class="dt">size =</span> <span class="dv">8</span>))</a>
369
+<a class="sourceLine" id="cb22-12" data-line-number="12"></a>
370
+<a class="sourceLine" id="cb22-13" data-line-number="13">pairs_plt</a></code></pre></div>
371
+<p>By setting <code>min_expansion = Inf, cluster_whitelist = whitelist</code> we can examine any pairings for a set of cluster_idx, in this case the ones that were seen multiple times. Interestingly (and unlike some human samples) the expanded clusters are <span class="math inline">\(\beta\)</span>-chain, and their <span class="math inline">\(\alpha\)</span> chains are sprinkled quite evenly across clusters.</p>
367 372
 </div>
368 373
 </div>
369 374
 <div id="length-of-cdr3" class="section level1">
... ...
@@ -387,7 +392,7 @@
387 392
 <a class="sourceLine" id="cb24-12" data-line-number="12"><span class="co">#&gt; coercing into character vector</span></a>
388 393
 <a class="sourceLine" id="cb24-13" data-line-number="13"><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a></span>(cdr_len <span class="op">%&gt;%</span><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(group <span class="op">==</span><span class="st"> 'fixed'</span>, term <span class="op">!=</span><span class="st"> '(Intercept)'</span>), <span class="kw"><a href="https://ggplot2.tidyverse.org/reference/aes.html">aes</a></span>(<span class="dt">x =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/interaction">interaction</a></span>(chain, term), <span class="dt">y =</span> estimate, <span class="dt">ymin =</span> conf.low, <span class="dt">ymax =</span> conf.high)) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/geom_linerange.html">geom_pointrange</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/ggtheme.html">theme_minimal</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/coord_flip.html">coord_flip</a></span>() <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">ylab</a></span>(<span class="st">'Length(CDR3 Nt)'</span>) <span class="op">+</span><span class="st"> </span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">xlab</a></span>(<span class="st">'Term/Chain'</span>)</a></code></pre></div>
389 394
 <p><img src="cdr3_clustering_files/figure-html/cdr3_len-1.png" width="288"></p>
390
-<p>We end up with a convergence warning. This is not a suprise, because the <code>samples</code> aren’t actually replicates – they are just subsamples drawn for illustrative purposes. The Balbc mice have .5 fewer nucleotides per contig, on average, and this is not significant.</p>
395
+<p>We end up with a (harmless) convergence warning about a singular fit. This is expected, because the <code>samples</code> aren’t actually replicates – they are just subsamples drawn for illustrative purposes. The Balbc mice have .5 fewer nucleotides per contig, on average, and this is not significant.</p>
391 396
 </div>
392 397
   </div>
393 398
 
... ...
@@ -401,9 +406,12 @@
401 406
       <li><a href="#cluster-cdr3-protein-sequences">Cluster CDR3 protein sequences</a></li>
402 407
       <li><a href="#cluster-cdr3-dna-sequences">Cluster CDR3 DNA sequences</a></li>
403 408
       <li><a href="#cluster-by-v-j-identity">Cluster by V-J identity</a></li>
404
-      <li><a href="#some-simple-phylogenetic-relationship">Some simple phylogenetic relationship</a></li>
405
-      <li><a href="#oligo-clusters">Oligo clusters</a></li>
409
+      <li>
410
+<a href="#oligo-clusters">Oligo clusters</a><ul class="nav nav-pills nav-stacked">
411
+<li><a href="#some-simple-phylogenetic-relationships">Some simple phylogenetic relationships</a></li>
406 412
       <li><a href="#formal-testing-for-frequency-differences">Formal testing for frequency differences</a></li>
413
+      </ul>
414
+</li>
407 415
       <li>
408 416
 <a href="#clonal-pairs">Clonal pairs</a><ul class="nav nav-pills nav-stacked">
409 417
 <li><a href="#expanded-clones">Expanded clones</a></li>
410 418
Binary files a/docs/articles/cdr3_clustering_files/figure-html/plot_expanded-1.png and b/docs/articles/cdr3_clustering_files/figure-html/plot_expanded-1.png differ
411 419
new file mode 100644
412 420
Binary files /dev/null and b/docs/articles/cdr3_clustering_files/figure-html/unnamed-chunk-12-1.png differ
413 421
Binary files a/docs/articles/cdr3_clustering_files/figure-html/unnamed-chunk-13-1.png and b/docs/articles/cdr3_clustering_files/figure-html/unnamed-chunk-13-1.png differ
414 422
Binary files a/docs/articles/cdr3_clustering_files/figure-html/unnamed-chunk-14-1.png and b/docs/articles/cdr3_clustering_files/figure-html/unnamed-chunk-14-1.png differ
415 423
new file mode 100644
416 424
Binary files /dev/null and b/docs/articles/cdr3_clustering_files/figure-html/unnamed-chunk-14-2.png differ
417 425
new file mode 100644
418 426
Binary files /dev/null and b/docs/articles/cdr3_clustering_files/figure-html/unnamed-chunk-14-3.png differ
419 427
new file mode 100644
420 428
Binary files /dev/null and b/docs/articles/cdr3_clustering_files/figure-html/unnamed-chunk-14-4.png differ
... ...
@@ -60,7 +60,7 @@
60 60
       </button>
61 61
       <span class="navbar-brand">
62 62
         <a class="navbar-link" href="../index.html">CellaRepertorium</a>
63
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.1</span>
63
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.2</span>
64 64
       </span>
65 65
     </div>
66 66
 
... ...
@@ -30,7 +30,7 @@
30 30
       </button>
31 31
       <span class="navbar-brand">
32 32
         <a class="navbar-link" href="../index.html">CellaRepertorium</a>
33
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.1</span>
33
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.2</span>
34 34
       </span>
35 35
     </div>
36 36
 
37 37
Binary files a/docs/articles/mouse_tcell_qc_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/mouse_tcell_qc_files/figure-html/unnamed-chunk-5-1.png differ
38 38
Binary files a/docs/articles/mouse_tcell_qc_files/figure-html/unnamed-chunk-5-2.png and b/docs/articles/mouse_tcell_qc_files/figure-html/unnamed-chunk-5-2.png differ
... ...
@@ -60,7 +60,7 @@
60 60
       </button>
61 61
       <span class="navbar-brand">
62 62
         <a class="navbar-link" href="index.html">CellaRepertorium</a>
63
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.1</span>
63
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.2</span>
64 64
       </span>
65 65
     </div>
66 66
 
... ...
@@ -37,7 +37,7 @@
37 37
       </button>
38 38
       <span class="navbar-brand">
39 39
         <a class="navbar-link" href="index.html">CellaRepertorium</a>
40
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.1</span>
40
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.2</span>
41 41
       </span>
42 42
     </div>
43 43
 
... ...
@@ -104,28 +104,34 @@
104 104
 <div id="data-requirements-and-package-structure" class="section level2">
105 105
 <h2 class="hasAnchor">
106 106
 <a href="#data-requirements-and-package-structure" class="anchor"></a>Data requirements and package structure</h2>
107
-<p>The fundamental unit this package operates on is the <strong>contig</strong>, which is a section of contiguously stitched reads from a single <strong>cell</strong>. Each contig belongs to one (and only one) cell, however, cells generate multiple contigs.</p>
108
-<p><img src="../../../../Box%20Sync/research/scRNAseq/CellaRepertorium/vignettes/img/contig_schematic.png"><!-- --></p>
109
-<p>Contigs can also belong to a <strong>cluster</strong>. Because of these two many-to-one mappings, these data can be thought as a series of ragged arrays. The links between them mean they are relational data. A <code>ContigCellDB</code> object wraps each of these objects as a sequence of three <code>data.frames</code> (well, <code>tibbles</code>, actually). <code>ContigCellDB</code> also tracks columns (the primary keys) that unique identify each row in each of these tables. The <code>contig_tbl</code> is the <code>tibble</code> containing <strong>contigs</strong>, the <code>cell_tbl</code> contains the <strong>cells</strong>, and the <code>cluster_tbl</code> contains the <strong>clusters</strong>.</p>
110
-<p>The <code>contig_pk</code>, <code>cell_pk</code> and <code>cluster_pk</code> identify the columns that identify a contig, cell and cluster, respectively. These will serve as foreign keys that link the three tables together. The tables are kept in sync so that subsetting the contigs will subset the cells, and clusters, and vice-versa.</p>
111
-<p><img src="../../../../Box%20Sync/research/scRNAseq/CellaRepertorium/vignettes/img/table_schematic.png"><!-- --></p>
112
-<p>Of course, each of these tables can contain many other columns that will serve as covariates for various analyses, such as the CDR3 sequence, or the identity of the V, D and J regions. Various derived quantities that describe cells and clusters can also be calculated, and added to these tables, such as the medoid of a cluster – a contig that minimizes the average distance to all other clusters.</p>
107
+<p>The fundamental unit this package operates on is the <strong>contig</strong>, which is a section of contiguously stitched reads from a single <strong>cell</strong>. Each contig belongs to one (and only one) cell, however, cells generate multiple contigs.<br><img src="reference/figures/contig_schematic.png"></p>
108
+<p>Contigs can also belong to a <strong>cluster</strong>. Because of these two many-to-one mappings, these data can be thought as a series of ragged arrays. The links between them mean they are relational data. A <code><a href="reference/ContigCellDB-fun.html">ContigCellDB()</a></code> object wraps each of these objects as a sequence of three <code>data.frames</code> (well, <code><a href="https://dplyr.tidyverse.org/reference/reexports.html">dplyr::tibble()</a></code>, actually). <code><a href="reference/ContigCellDB-fun.html">ContigCellDB()</a></code> also tracks columns (the primary keys) that unique identify each row in each of these tables. The <code>contig_tbl</code> is the <code>tibble</code> containing <strong>contigs</strong>, the <code>cell_tbl</code> contains the <strong>cells</strong>, and the <code>cluster_tbl</code> contains the <strong>clusters</strong>.</p>
109
+<p>The <code>contig_pk</code>, <code>cell_pk</code> and <code>cluster_pk</code> specify the columns that identify a contig, cell and cluster, respectively. These will serve as foreign keys that link the three tables together. The tables are kept in sync so that subsetting the contigs will subset the cells, and clusters, and vice-versa.</p>
110
+<p><img src="reference/figures/table_schematic.png"></p>
111
+<p>Of course, each of these tables can contain many other columns that will serve as covariates for various analyses, such as the <strong>CDR3</strong> sequence, or the identity of the <strong>V</strong>, <strong>D</strong> and <strong>J</strong> regions. Various derived quantities that describe cells and clusters can also be calculated, and added to these tables, such as the <strong>medoid</strong> of a cluster – a contig that minimizes the average distance to all other clusters.</p>
113 112
 </div>
114
-<div id="functions" class="section level2">
113
+<div id="some-functions-of-interest" class="section level2">
115 114
 <h2 class="hasAnchor">
116
-<a href="#functions" class="anchor"></a>Functions</h2>
115
+<a href="#some-functions-of-interest" class="anchor"></a>Some functions of interest</h2>
117 116
 <p>[a screencap of something interesting?]</p>
118 117
 <ul>
119 118
 <li>
120
-<code>cdhit_ccdb</code>: An R interface to CDhit, which was originally ported by Thomas Lin Pedersen.</li>
119
+<code><a href="reference/cdhit_ccdb.html">cdhit_ccdb()</a></code>: An R interface to CDhit, which was originally ported by Thomas Lin Pedersen.</li>
121 120
 <li>
122
-<code>fine_clustering</code>: clustering CDR3 by edit distances (possibly using empirical amino acid substitution matrices)</li>
121
+<code><a href="reference/fine_clustering.html">fine_clustering()</a></code>: clustering CDR3 by edit distances (possibly using empirical amino acid substitution matrices)</li>
123 122
 <li>
124
-<code>cluster_permute_test</code>: permutation tests of cluster statistics</li>
123
+<code><a href="reference/canonicalize_cell.html">canonicalize_cell()</a></code>: Return a single contig for each cell, e.g., for combining VDJ information with 5’-based single cell expressoin</li>
125 124
 <li>
126
-<code>pairing_tables</code>: Generate pairings of contigs within each cell in a way that they can be plotted</li>
125
+<code><a href="reference/cluster_permute_test.html">cluster_permute_test()</a></code>: permutation tests of cluster statistics</li>
126
+<li>
127
+<code><a href="reference/pairing_tables.html">pairing_tables()</a></code>: Generate pairings of contigs within each cell in a way that they can be plotted</li>
127 128
 </ul>
128 129
 </div>
130
+<div id="inferfacing-related-packages" class="section level2">
131
+<h2 class="hasAnchor">
132
+<a href="#inferfacing-related-packages" class="anchor"></a>Inferfacing Related packages</h2>
133
+<p>To combine repertoire information with expression of endogenuous mRNAs have been measured, this package has been used with <code><a href="https://www.rdocumentation.org/packages/SingleCellExperiment/topics/SingleCellExperiment">SingleCellExperiment::SingleCellExperiment()</a></code> and (<code>Seurat</code>)[<a href="https://satijalab.org/seurat/" class="uri">https://satijalab.org/seurat/</a>] after generating various cell canonicalizations. Many tools from <a href="https://alakazam.readthedocs.io/en/version-0.2.11/"><code>Immcantation</code></a> work directly on <code><a href="reference/ContigCellDB-fun.html">ContigCellDB()</a></code> objects.</p>
134
+</div>
129 135
 </div>
130 136
   </div>
131 137
 
... ...
@@ -63,7 +63,7 @@
63 63
       </button>
64 64
       <span class="navbar-brand">
65 65
         <a class="navbar-link" href="../index.html">CellaRepertorium</a>
66
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.1</span>
66
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.2</span>
67 67
       </span>
68 68
     </div>
69 69
 
... ...
@@ -140,7 +140,7 @@
140 140
     </tr>
141 141
     <tr>
142 142
       <th>contig_pk</th>
143
-      <td><p>character vector naming fields in `contig_tbl` that uniquely identify a row/contig</p></td>
143
+      <td><p>character vector naming fields in <code>contig_tbl</code> that uniquely identify a row/contig</p></td>
144 144
     </tr>
145 145
     <tr>
146 146
       <th>cell_tbl</th>
... ...
@@ -148,7 +148,7 @@
148 148
     </tr>
149 149
     <tr>
150 150
       <th>cell_pk</th>
151
-      <td><p>character vector naming fields in `cell_tbl` that uniquely identify a cell barcode</p></td>
151
+      <td><p>character vector naming fields in <code>cell_tbl</code> that uniquely identify a cell barcode</p></td>
152 152
     </tr>
153 153
     <tr>
154 154
       <th>cluster_tbl</th>
... ...
@@ -156,7 +156,7 @@
156 156
     </tr>
157 157
     <tr>
158 158
       <th>cluster_pk</th>
159
-      <td><p>If `cluster_tbl` was provided, a character vector naming fields in `cluster_tbl` that uniquely identify a cluster</p></td>
159
+      <td><p>If <code>cluster_tbl</code> was provided, a character vector naming fields in <code>cluster_tbl</code> that uniquely identify a cluster</p></td>
160 160
     </tr>
161 161
     </table>
162 162
     
... ...
@@ -170,12 +170,69 @@
170 170
 <li><p><code>ContigCellDB_10XVDJ</code>: provide defaults that correspond to identifiers in 10X VDJ data</p></li>
171 171
 </ul>
172 172
     
173
+    <h2 class="hasAnchor" id="accessors-mutators"><a class="anchor" href="#accessors-mutators"></a>Accessors/mutators</h2>
174
+
175
+    
176
+    <p>See <code><a href='cash-ContigCellDB-method.html'>$,ContigCellDB-method</a></code> for more on how to access and mutate slots.
177
+At the moment, there is not a good way to combine objects without manually touching slots with <code>@</code>,
178
+but a <code>rbind</code> method is in the offing.</p>
179
+    
180
+    <h2 class="hasAnchor" id="see-also"><a class="anchor" href="#see-also"></a>See also</h2>
181
+
182
+    <div class='dont-index'><p><code><a href='cash-ContigCellDB-method.html'>$,ContigCellDB-method</a></code></p></div>
183
+    
173 184
 
174 185
     <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
175 186
     <pre class="examples"><div class='input'><span class='fu'><a href='https://www.rdocumentation.org/packages/utils/topics/data'>data</a></span>(<span class='no'>contigs_qc</span>)
176
-<span class='fu'>ContigCellDB</span>(<span class='no'>contigs_qc</span>, <span class='kw'>contig_pk</span> <span class='kw'>=</span> <span class='fu'><a href='https://www.rdocumentation.org/packages/base/topics/c'>c</a></span>(<span class='st'>'barcode'</span>, <span class='st'>'pop'</span>, <span class='st'>'sample'</span>, <span class='st'>'contig_id'</span>),
177
- <span class='kw'>cell_pk</span> <span class='kw'>=</span> <span class='fu'><a href='https://www.rdocumentation.org/packages/base/topics/c'>c</a></span>(<span class='st'>'barcode'</span>, <span class='st'>'pop'</span>, <span class='st'>'sample'</span>))</div><div class='output co'>#&gt; ContigCellDB of 1508 contigs; 832 cells; and 0 clusters.
178
-#&gt; Contigs keyed by barcode, pop, sample, contig_id; cells keyed by barcode, pop, sample.</div></pre>
187
+<span class='fu'><a href='https://www.rdocumentation.org/packages/utils/topics/head'>head</a></span>(<span class='no'>contigs_qc</span>)</div><div class='output co'>#&gt; <span style='color: #555555;'># A tibble: 6 x 22</span><span>
188
+#&gt;   anno_file pop   sample barcode is_cell contig_id high_confidence length chain
189
+#&gt;   </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span>     </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span> </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span>  </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span>   </span><span style='color: #555555;font-style: italic;'>&lt;lgl&gt;</span><span>   </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span>     </span><span style='color: #555555;font-style: italic;'>&lt;lgl&gt;</span><span>            </span><span style='color: #555555;font-style: italic;'>&lt;dbl&gt;</span><span> </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span>
190
+#&gt; </span><span style='color: #555555;'>1</span><span> /Users/a… b6    4      AAAGTA… TRUE    AAAGTAGT… TRUE               611 TRB  
191
+#&gt; </span><span style='color: #555555;'>2</span><span> /Users/a… b6    4      AAAGTA… TRUE    AAAGTAGT… TRUE               609 TRB  
192
+#&gt; </span><span style='color: #555555;'>3</span><span> /Users/a… b6    4      AAAGTA… TRUE    AAAGTAGT… TRUE               538 TRA  
193
+#&gt; </span><span style='color: #555555;'>4</span><span> /Users/a… b6    4      AACCAT… TRUE    AACCATGC… TRUE               799 TRA  
194
+#&gt; </span><span style='color: #555555;'>5</span><span> /Users/a… b6    4      AACTGG… TRUE    AACTGGTG… TRUE               634 TRB  
195
+#&gt; </span><span style='color: #555555;'>6</span><span> /Users/a… b6    4      AACTGG… TRUE    AACTGGTG… TRUE               923 TRA  
196
+#&gt; </span><span style='color: #555555;'># … with 13 more variables: v_gene </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>, d_gene </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>, j_gene </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>,
197
+#&gt; #   c_gene </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>, full_length </span><span style='color: #555555;font-style: italic;'>&lt;lgl&gt;</span><span style='color: #555555;'>, productive </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>, cdr3 </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>,
198
+#&gt; #   cdr3_nt </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>, reads </span><span style='color: #555555;font-style: italic;'>&lt;dbl&gt;</span><span style='color: #555555;'>, umis </span><span style='color: #555555;font-style: italic;'>&lt;dbl&gt;</span><span style='color: #555555;'>, raw_clonotype_id </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>,
199
+#&gt; #   raw_consensus_id </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>, celltype </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span></div><div class='input'><span class='no'>cdb</span> <span class='kw'>=</span> <span class='fu'>ContigCellDB</span>(<span class='no'>contigs_qc</span>, <span class='kw'>contig_pk</span> <span class='kw'>=</span> <span class='fu'><a href='https://www.rdocumentation.org/packages/base/topics/c'>c</a></span>(<span class='st'>'barcode'</span>, <span class='st'>'pop'</span>, <span class='st'>'sample'</span>, <span class='st'>'contig_id'</span>),
200
+ <span class='kw'>cell_pk</span> <span class='kw'>=</span> <span class='fu'><a href='https://www.rdocumentation.org/packages/base/topics/c'>c</a></span>(<span class='st'>'barcode'</span>, <span class='st'>'pop'</span>, <span class='st'>'sample'</span>))
201
+ <span class='no'>cdb</span></div><div class='output co'>#&gt; ContigCellDB of 1508 contigs; 832 cells; and 0 clusters.
202
+#&gt; Contigs keyed by barcode, pop, sample, contig_id; cells keyed by barcode, pop, sample.</div><div class='input'> <span class='co'># everything that was in contigs_qc</span>
203
+ <span class='no'>cdb</span>$<span class='no'>contig_tbl</span></div><div class='output co'>#&gt; </span><span style='color: #555555;'># A tibble: 1,508 x 22</span><span>
204
+#&gt;    anno_file pop   sample barcode is_cell contig_id high_confidence length chain
205
+#&gt;    </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span>     </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span> </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span>  </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span>   </span><span style='color: #555555;font-style: italic;'>&lt;lgl&gt;</span><span>   </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span>     </span><span style='color: #555555;font-style: italic;'>&lt;lgl&gt;</span><span>            </span><span style='color: #555555;font-style: italic;'>&lt;dbl&gt;</span><span> </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span>
206
+#&gt; </span><span style='color: #555555;'> 1</span><span> /Users/a… b6    4      AAAGTA… TRUE    AAAGTAGT… TRUE               611 TRB  
207
+#&gt; </span><span style='color: #555555;'> 2</span><span> /Users/a… b6    4      AAAGTA… TRUE    AAAGTAGT… TRUE               609 TRB  
208
+#&gt; </span><span style='color: #555555;'> 3</span><span> /Users/a… b6    4      AAAGTA… TRUE    AAAGTAGT… TRUE               538 TRA  
209
+#&gt; </span><span style='color: #555555;'> 4</span><span> /Users/a… b6    4      AACCAT… TRUE    AACCATGC… TRUE               799 TRA  
210
+#&gt; </span><span style='color: #555555;'> 5</span><span> /Users/a… b6    4      AACTGG… TRUE    AACTGGTG… TRUE               634 TRB  
211
+#&gt; </span><span style='color: #555555;'> 6</span><span> /Users/a… b6    4      AACTGG… TRUE    AACTGGTG… TRUE               923 TRA  
212
+#&gt; </span><span style='color: #555555;'> 7</span><span> /Users/a… b6    4      AAGCCG… TRUE    AAGCCGCA… TRUE               693 TRB  
213
+#&gt; </span><span style='color: #555555;'> 8</span><span> /Users/a… b6    4      AAGTCT… TRUE    AAGTCTGG… TRUE               658 TRB  
214
+#&gt; </span><span style='color: #555555;'> 9</span><span> /Users/a… b6    4      AAGTCT… TRUE    AAGTCTGG… TRUE               558 TRA  
215
+#&gt; </span><span style='color: #555555;'>10</span><span> /Users/a… b6    4      ACACCA… TRUE    ACACCAAA… TRUE               614 TRB  
216
+#&gt; </span><span style='color: #555555;'># … with 1,498 more rows, and 13 more variables: v_gene </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>, d_gene </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>,
217
+#&gt; #   j_gene </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>, c_gene </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>, full_length </span><span style='color: #555555;font-style: italic;'>&lt;lgl&gt;</span><span style='color: #555555;'>, productive </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>,
218
+#&gt; #   cdr3 </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>, cdr3_nt </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>, reads </span><span style='color: #555555;font-style: italic;'>&lt;dbl&gt;</span><span style='color: #555555;'>, umis </span><span style='color: #555555;font-style: italic;'>&lt;dbl&gt;</span><span style='color: #555555;'>, raw_clonotype_id </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>,
219
+#&gt; #   raw_consensus_id </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span style='color: #555555;'>, celltype </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span></div><div class='input'> <span class='co'># Only the cell_pk are included by default (until clustering/canonicalization)</span>
220
+ <span class='no'>cdb</span>$<span class='no'>cell_tbl</span></div><div class='output co'>#&gt; </span><span style='color: #555555;'># A tibble: 832 x 3</span><span>
221
+#&gt;    barcode            pop   sample
222
+#&gt;    </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span>              </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span> </span><span style='color: #555555;font-style: italic;'>&lt;chr&gt;</span><span> 
223
+#&gt; </span><span style='color: #555555;'> 1</span><span> AAAGTAGTCGCGCCAA-1 b6    4     
224
+#&gt; </span><span style='color: #555555;'> 2</span><span> AACCATGCATTTGCCC-1 b6    4     
225
+#&gt; </span><span style='color: #555555;'> 3</span><span> AACTGGTGTCTGATCA-1 b6    4     
226
+#&gt; </span><span style='color: #555555;'> 4</span><span> AAGCCGCAGTAAGTAC-1 b6    4     
227
+#&gt; </span><span style='color: #555555;'> 5</span><span> AAGTCTGGTTCAACCA-1 b6    4     
228
+#&gt; </span><span style='color: #555555;'> 6</span><span> ACACCAAAGTCCAGGA-1 b6    4     
229
+#&gt; </span><span style='color: #555555;'> 7</span><span> ACATGGTAGTGTTTGC-1 b6    4     
230
+#&gt; </span><span style='color: #555555;'> 8</span><span> ACCCACTTCCACGACG-1 b6    4     
231
+#&gt; </span><span style='color: #555555;'> 9</span><span> ACGCCAGGTCCGAATT-1 b6    4     
232
+#&gt; </span><span style='color: #555555;'>10</span><span> ACGCCAGTCCAATGGT-1 b6    4     
233
+#&gt; </span><span style='color: #555555;'># … with 822 more rows</span><span></div><div class='input'> <span class='co'># Empty, since no cluster_pk was specified</span>
234
+ <span class='no'>cdb</span>$<span class='no'>cluster_tbl</span></div><div class='output co'>#&gt; </span><span style='color: #555555;'># A tibble: 0 x 0</span><span></div><div class='input'> <span class='co'># Keys</span>
235
+ <span class='no'>cdb</span>$<span class='no'>contig_pk</span></div><div class='output co'>#&gt; [1] "barcode"   "pop"       "sample"    "contig_id"</div><div class='input'> <span class='no'>cdb</span>$<span class='no'>cell_pk</span></div><div class='output co'>#&gt; [1] "barcode" "pop"     "sample" </div><div class='input'> <span class='no'>cdb</span>$<span class='no'>cluster_pk</span></div><div class='output co'>#&gt; character(0)</div></span></pre>
179 236
   </div>
180 237
   <div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
181 238
     <h2>Contents</h2>
... ...
@@ -185,6 +242,10 @@
185 242
       <li><a href="#value">Value</a></li>
186 243
 
187 244
       <li><a href="#functions">Functions</a></li>
245
+
246
+      <li><a href="#accessors-mutators">Accessors/mutators</a></li>
247
+
248
+      <li><a href="#see-also">See also</a></li>
188 249
       
189 250
       <li><a href="#examples">Examples</a></li>
190 251
     </ul>
... ...
@@ -33,8 +33,8 @@
33 33
 <meta property="og:title" content="For each cell, return a single, canonical chain-cluster — canonicalize_by_prevalence" />
34 34
 
35 35
 <meta property="og:description" content="In single cell data, multiple chains (heavy-light or alpha-beta) are expected.  In some cases, there could be more than two (eg multiple alpha alleles for T cells).
36
-This picks a cluster id for each cell based on the overall prevalence of cluster ids over all cells in `tbl`.
37
-If order = 1 then the canonical chain-cluster will be the most prevalent, and if order = 2, it will be the 2nd most prevalent, and so on.  Ties are broken arbitrarily (possibly by lexicographic order of `cluster_idx`)." />
36
+This picks a cluster id for each cell based on the overall prevalence of cluster ids over all cells in tbl.
37
+If order = 1 then the canonical chain-cluster will be the most prevalent, and if order = 2, it will be the 2nd most prevalent, and so on.  Ties are broken arbitrarily (possibly by lexicographic order of cluster_idx)." />
38 38
 <meta name="twitter:card" content="summary" />
39 39
 
40 40
 
... ...
@@ -65,7 +65,7 @@ If order = 1 then the canonical chain-cluster will be the most prevalent, and if
65 65
       </button>
66 66
       <span class="navbar-brand">
67 67
         <a class="navbar-link" href="../index.html">CellaRepertorium</a>
68
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.1</span>
68
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.2</span>
69 69
       </span>
70 70
     </div>
71 71
 
... ...
@@ -124,8 +124,8 @@ If order = 1 then the canonical chain-cluster will be the most prevalent, and if
124 124
     <div class="ref-description">
125 125
     
126 126
     <p>In single cell data, multiple chains (heavy-light or alpha-beta) are expected.  In some cases, there could be more than two (eg multiple alpha alleles for T cells).
127
-This picks a cluster id for each cell based on the overall prevalence of cluster ids over all cells in `tbl`.
128
-If order = 1 then the canonical chain-cluster will be the most prevalent, and if order = 2, it will be the 2nd most prevalent, and so on.  Ties are broken arbitrarily (possibly by lexicographic order of `cluster_idx`).</p>
127
+This picks a cluster id for each cell based on the overall prevalence of cluster ids over all cells in <code>tbl</code>.
128
+If order = 1 then the canonical chain-cluster will be the most prevalent, and if order = 2, it will be the 2nd most prevalent, and so on.  Ties are broken arbitrarily (possibly by lexicographic order of <code>cluster_idx</code>).</p>
129 129
     
130 130
     </div>
131 131
 
... ...
@@ -142,15 +142,15 @@ If order = 1 then the canonical chain-cluster will be the most prevalent, and if
142 142
     <colgroup><col class="name" /><col class="desc" /></colgroup>
143 143
     <tr>
144 144
       <th>tbl</th>
145
-      <td><p>`data.frame` containing columns specified in `cell_identifiers`, `cluster_idx` and optionally `chain_identifiers`</p></td>
145
+      <td><p><code>data.frame</code> containing columns specified in <code>cell_identifiers</code>, <code>cluster_idx</code> and optionally <code>chain_identifiers</code></p></td>
146 146
     </tr>
147 147
     <tr>
148 148
       <th>cell_identifiers</th>
149
-      <td><p>`character` vector specifying columns in `tbl` that identify a cell</p></td>
149
+      <td><p><code>character</code> vector specifying columns in <code>tbl</code> that identify a cell</p></td>
150 150
     </tr>
151 151
     <tr>
152 152
       <th>cluster_idx</th>
153
-      <td><p>`character` specifying the column in `tbl` that identifies a clsuter</p></td>
153
+      <td><p><code>character</code> specifying the column in <code>tbl</code> that identifies a cluster</p></td>
154 154
     </tr>
155 155
     <tr>
156 156
       <th>order</th>
... ...
@@ -158,17 +158,17 @@ If order = 1 then the canonical chain-cluster will be the most prevalent, and if
158 158
     </tr>
159 159
     <tr>
160 160
       <th>sort_factors</th>
161
-      <td><p>`character` vector naming columns in `tbl` to sorted on, within  `cell_identifier`. Sorted by first element first, then ties broken by subsequent elements.  Sorted in decreasing order for each element.</p></td>
161
+      <td><p><code>character</code> vector naming columns in <code>tbl</code> to sorted on, within  <code>cell_identifier</code>. Sorted by first element first, then ties broken by subsequent elements.  Sorted in decreasing order for each element.</p></td>
162 162
     </tr>
163 163
     <tr>
164 164
       <th>chain_levels</th>
165
-      <td><p>an optional `character` vector providing the sort order of the `chain` column in `tbl`.  Set to length zero to disable.</p></td>
165
+      <td><p>an optional <code>character</code> vector providing the sort order of the <code>chain</code> column in <code>tbl</code>.  Set to length zero to disable.</p></td>
166 166
     </tr>
167 167
     </table>
168 168
     
169 169
     <h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
170 170
 
171
-    <p>`data.frame` with columns from `cell_identifiers` and a single `cluster_idx` for each cell</p>
171
+    <p><code>data.frame</code> with columns from <code>cell_identifiers</code> and a single <code>cluster_idx</code> for each cell</p>
172 172
     
173 173
     <h2 class="hasAnchor" id="functions"><a class="anchor" href="#functions"></a>Functions</h2>
174 174
 
... ...
@@ -32,9 +32,9 @@
32 32
 
33 33
 <meta property="og:title" content="Find a canonical contig to represent a cell — canonicalize_cell" />
34 34
 
35
-<meta property="og:description" content="Using filtering in `...` and sorting in `tie_break_keys` and `order` find a
35
+<meta property="og:description" content="Using filtering in contig_filter_args and sorting in tie_break_keys and order find a
36 36
 single, canonical contig to represent each cell
37
-Fields in `contig_fields` will be copied over to the `cell_tbl`." />
37
+Fields in contig_fields will be copied over to the cell_tbl." />
38 38
 <meta name="twitter:card" content="summary" />
39 39
 
40 40
 
... ...
@@ -65,7 +65,7 @@ Fields in `contig_fields` will be copied over to the `cell_tbl`." />
65 65
       </button>
66 66
       <span class="navbar-brand">
67 67
         <a class="navbar-link" href="../index.html">CellaRepertorium</a>
68
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.1</span>
68
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.2</span>
69 69
       </span>
70 70
     </div>
71 71
 
... ...
@@ -123,9 +123,9 @@ Fields in `contig_fields` will be copied over to the `cell_tbl`." />
123 123
 
124 124
     <div class="ref-description">
125 125
     
126
-    <p>Using filtering in `...` and sorting in `tie_break_keys` and `order` find a
126
+    <p>Using filtering in <code>contig_filter_args</code> and sorting in <code>tie_break_keys</code> and <code>order</code> find a
127 127
 single, canonical contig to represent each cell
128
-Fields in `contig_fields` will be copied over to the `cell_tbl`.</p>
128
+Fields in <code>contig_fields</code> will be copied over to the <code>cell_tbl</code>.</p>
129 129
     
130 130
     </div>
131 131
 
... ...
@@ -137,39 +137,41 @@ Fields in `contig_fields` will be copied over to the `cell_tbl`.</p>
137 137
     <colgroup><col class="name" /><col class="desc" /></colgroup>
138 138
     <tr>
139 139
       <th>ccdb</th>
140
-      <td><p>`ContigCellDB`</p></td>
140
+      <td><p><code><a href='ContigCellDB-fun.html'>ContigCellDB()</a></code></p></td>
141 141
     </tr>
142 142
     <tr>
143 143
       <th>contig_filter_args</th>
144
-      <td><p>an expression passed to dplyr::filter.  Unlike `filter`, multiple criteria must be `&amp;` together, rather than using commas to separate.
145
-that act on `ccdb$contig_tbl``</p></td>
144
+      <td><p>an expression passed to <code><a href='https://dplyr.tidyverse.org/reference/filter.html'>dplyr::filter()</a></code>.
145
+Unlike <code>filter</code>, multiple criteria must be <code>&amp;</code> together, rather than using
146
+commas to separate. These act on <code>ccdb$contig_tbl</code></p></td>
146 147
     </tr>
147 148
     <tr>
148 149
       <th>tie_break_keys</th>
149
-      <td><p>(optional) `character` naming fields in `contig_tbl`
150
+      <td><p>(optional) <code>character</code> naming fields in <code>contig_tbl</code>
150 151
 that are used sort the contig table in descending order.
151
-Used to break ties if `contig_filter_args` does not return a unique contig
152
+Used to break ties if <code>contig_filter_args</code> does not return a unique contig
152 153
 for each cluster</p></td>
153 154
     </tr>
154 155
     <tr>
155 156
       <th>contig_fields</th>
156
-      <td><p>Optional fields from `contig_tbl` that will be copied into
157
-the `cluster_tbl` from the canonical contig.</p></td>
157
+      <td><p>Optional fields from <code>contig_tbl</code> that will be copied into
158
+the <code>cluster_tbl</code> from the canonical contig.</p></td>
158 159
     </tr>
159 160
     <tr>
160 161
       <th>order</th>
161
-      <td><p>The rank order of the contig, based on `tie_break_keys`
162
-to return</p></td>
162
+      <td><p>The rank order of the contig, based on <code>tie_break_keys</code>
163
+to return.  If <code>tie_break_keys</code> included an ordered factor (such as chain)
164
+this could be used to return the second chain.</p></td>
163 165
     </tr>
164 166
     </table>
165 167
     
166 168
     <h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
167 169
 
168
-    <p>`ContigCellDB` with additional fields in `cell_tbl`</p>
170
+    <p><code>ContigCellDB</code> with additional fields in <code>cell_tbl</code></p>
169 171
     
170 172
     <h2 class="hasAnchor" id="see-also"><a class="anchor" href="#see-also"></a>See also</h2>
171 173
 
172
-    <div class='dont-index'><p>canonicalize_cluster</p></div>
174
+    <div class='dont-index'><p><code><a href='canonicalize_cluster.html'>canonicalize_cluster()</a></code></p></div>
173 175
     
174 176
 
175 177
     <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
... ...
@@ -63,7 +63,7 @@
63 63
       </button>
64 64
       <span class="navbar-brand">
65 65
         <a class="navbar-link" href="../index.html">CellaRepertorium</a>
66
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.1</span>
66
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.2</span>
67 67
       </span>
68 68
     </div>
69 69
 
... ...
@@ -135,44 +135,46 @@
135 135
     <colgroup><col class="name" /><col class="desc" /></colgroup>
136 136
     <tr>
137 137
       <th>ccdb</th>
138
-      <td><p>`ContigCellDB`</p></td>
138
+      <td><p><code><a href='ContigCellDB-fun.html'>ContigCellDB()</a></code></p></td>
139 139
     </tr>
140 140
     <tr>
141 141
       <th>contig_filter_args</th>
142
-      <td><p>an expression passed to dplyr::filter.  Unlike `filter`, multiple criteria must be `&amp;` together, rather than using commas to separate.
143
-that act on `ccdb$contig_tbl``</p></td>
142
+      <td><p>an expression passed to <code><a href='https://dplyr.tidyverse.org/reference/filter.html'>dplyr::filter()</a></code>.
143
+Unlike <code>filter</code>, multiple criteria must be <code>&amp;</code> together, rather than using
144
+commas to separate. These act on <code>ccdb$contig_tbl</code></p></td>
144 145
     </tr>
145 146
     <tr>
146 147
       <th>tie_break_keys</th>
147
-      <td><p>(optional) `character` naming fields in `contig_tbl`
148
+      <td><p>(optional) <code>character</code> naming fields in <code>contig_tbl</code>
148 149
 that are used sort the contig table in descending order.
149
-Used to break ties if `contig_filter_args` does not return a unique contig
150
+Used to break ties if <code>contig_filter_args</code> does not return a unique contig
150 151
 for each cluster</p></td>
151 152
     </tr>
152 153
     <tr>
153 154
       <th>order</th>
154
-      <td><p>The rank order of the contig, based on `tie_break_keys`
155
-to return</p></td>
155
+      <td><p>The rank order of the contig, based on <code>tie_break_keys</code>
156
+to return.  If <code>tie_break_keys</code> included an ordered factor (such as chain)
157
+this could be used to return the second chain.</p></td>
156 158
     </tr>
157 159
     <tr>
158 160
       <th>representative</th>
159
-      <td><p>an optional field from `contig_tbl` that will be made
160
-unique. Serve as a surrogate `cluster_pk`.</p></td>
161
+      <td><p>an optional field from <code>contig_tbl</code> that will be made
162
+unique. Serve as a surrogate <code>cluster_pk</code>.</p></td>
161 163
     </tr>
162 164
     <tr>
163 165
       <th>contig_fields</th>
164
-      <td><p>Optional fields from `contig_tbl` that will be copied into
165
-the `cluster_tbl` from the canonical contig.</p></td>
166
+      <td><p>Optional fields from <code>contig_tbl</code> that will be copied into
167
+the <code>cluster_tbl</code> from the canonical contig.</p></td>
166 168
     </tr>
167 169
     </table>
168 170
     
169 171
     <h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
170 172
 
171
-    <p>`ContigCellDB`</p>
173
+    <p><code><a href='ContigCellDB-fun.html'>ContigCellDB()</a></code></p>
172 174
     
173 175
     <h2 class="hasAnchor" id="see-also"><a class="anchor" href="#see-also"></a>See also</h2>
174 176
 
175
-    <div class='dont-index'><p>canonicalize_cell</p></div>
177
+    <div class='dont-index'><p><code><a href='canonicalize_cell.html'>canonicalize_cell()</a></code></p></div>
176 178
     
177 179
 
178 180
     <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
... ...
@@ -185,7 +187,7 @@ the `cluster_tbl` from the canonical contig.</p></td>
185 187
 #&gt; <span class='message'></span>
186 188
 #&gt; <span class='message'>    intersect, setdiff, setequal, union</span></div><div class='input'><span class='no'>ccdb_ex_small</span> <span class='kw'>=</span> <span class='no'>ccdb_ex</span>
187 189
 <span class='no'>ccdb_ex_small</span>$<span class='no'>cell_tbl</span> <span class='kw'>=</span> <span class='no'>ccdb_ex_small</span>$<span class='no'>cell_tbl</span>[<span class='fl'>1</span>:<span class='fl'>200</span>,]
188
-<span class='no'>ccdb_ex_small</span> <span class='kw'>=</span> <span class='fu'><a href='cdhit.html'>cdhit_ccdb</a></span>(<span class='no'>ccdb_ex_small</span>,
190
+<span class='no'>ccdb_ex_small</span> <span class='kw'>=</span> <span class='fu'><a href='cdhit_ccdb.html'>cdhit_ccdb</a></span>(<span class='no'>ccdb_ex_small</span>,
189 191
 <span class='kw'>sequence_key</span> <span class='kw'>=</span> <span class='st'>'cdr3_nt'</span>, <span class='kw'>type</span> <span class='kw'>=</span> <span class='st'>'DNA'</span>, <span class='kw'>cluster_name</span> <span class='kw'>=</span> <span class='st'>'DNA97'</span>,
190 192
 <span class='kw'>identity</span> <span class='kw'>=</span> <span class='fl'>.965</span>, <span class='kw'>min_length</span> <span class='kw'>=</span> <span class='fl'>12</span>, <span class='kw'>G</span> <span class='kw'>=</span> <span class='fl'>1</span>)
191 193
 <span class='no'>ccdb_ex_small</span> <span class='kw'>=</span> <span class='fu'><a href='fine_clustering.html'>fine_clustering</a></span>(<span class='no'>ccdb_ex_small</span>, <span class='kw'>sequence_key</span> <span class='kw'>=</span> <span class='st'>'cdr3_nt'</span>, <span class='kw'>type</span> <span class='kw'>=</span> <span class='st'>'DNA'</span>)</div><div class='output co'>#&gt; <span class='message'>Calculating intradistances on 329 clusters.</span></div><div class='output co'>#&gt; <span class='message'>Summarizing</span></div><div class='input'>
... ...
@@ -63,7 +63,7 @@
63 63
       </button>
64 64
       <span class="navbar-brand">
65 65
         <a class="navbar-link" href="../index.html">CellaRepertorium</a>
66
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.1</span>
66
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.2</span>
67 67
       </span>
68 68
     </div>
69 69
 
... ...
@@ -126,7 +126,10 @@
126 126
     </div>
127 127
 
128 128
     <pre class="usage"># S4 method for ContigCellDB
129
-$(x, name)</pre>
129
+$(x, name)
130
+
131
+# S4 method for ContigCellDB
132
+$(x, name) <- value</pre>
130 133
     
131 134
     <h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
132 135
     <table class="ref-arguments">
... ...
@@ -137,13 +140,17 @@ $(x, name)</pre>
137 140
     </tr>
138 141
     <tr>
139 142
       <th>name</th>
140
-      <td><p>a slot of a ContigCellDB object (one of  `c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')`)</p></td>
143
+      <td><p>a slot of a ContigCellDB object (one of  <code><a href='https://www.rdocumentation.org/packages/base/topics/c'>c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')</a></code>)</p></td>
144
+    </tr>
145
+    <tr>
146
+      <th>value</th>
147
+      <td><p>The value assigned to a slot of ContigCellDB object</p></td>
141 148
     </tr>
142 149
     </table>
143 150
     
144 151
     <h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
145 152
 
146
-    <p>Slot of ContigCellDB</p>
153
+    <p>Update or return a slot of <code><a href='ContigCellDB-fun.html'>ContigCellDB()</a></code></p>
147 154
     
148 155
 
149 156
     <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
... ...
@@ -176,7 +183,12 @@ $(x, name)</pre>
176 183
 #&gt; </span><span style='color: #555555;'> 8</span><span> b6    4      ACCCACTTCCACGACG-1
177 184
 #&gt; </span><span style='color: #555555;'> 9</span><span> b6    4      ACGCCAGGTCCGAATT-1
178 185
 #&gt; </span><span style='color: #555555;'>10</span><span> b6    4      ACGCCAGTCCAATGGT-1
179
-#&gt; </span><span style='color: #555555;'># … with 822 more rows</span><span></div><div class='input'><span class='no'>ccdb_ex</span>$<span class='no'>cluster_tbl</span></div><div class='output co'>#&gt; </span><span style='color: #555555;'># A tibble: 0 x 0</span><span></div></span></pre>
186
+#&gt; </span><span style='color: #555555;'># … with 822 more rows</span><span></div><div class='input'><span class='no'>ccdb_ex</span>$<span class='no'>cluster_tbl</span></div><div class='output co'>#&gt; </span><span style='color: #555555;'># A tibble: 0 x 0</span><span></div><div class='input'><span class='no'>ccdb_ex</span>$<span class='no'>contig_pk</span> <span class='kw'>=</span> <span class='fu'><a href='https://www.rdocumentation.org/packages/base/topics/c'>c</a></span>(<span class='st'>"sample"</span>,<span class='st'>"barcode"</span>,<span class='st'>"contig_id"</span>) <span class='co'># 'pop' is technically redundant with 'sample'</span>
187
+<span class='co'># Take a subset of ccdb_ex</span>
188
+<span class='no'>ccdb_ex</span></div><div class='output co'>#&gt; ContigCellDB of 1508 contigs; 832 cells; and 0 clusters.
189
+#&gt; Contigs keyed by sample, barcode, contig_id; cells keyed by pop, sample, barcode.</div><div class='input'><span class='no'>ccdb_ex</span>$<span class='no'>contig_tbl</span> <span class='kw'>=</span> <span class='kw pkg'>dplyr</span><span class='kw ns'>::</span><span class='fu'><a href='https://dplyr.tidyverse.org/reference/filter.html'>filter</a></span>(<span class='no'>ccdb_ex</span>$<span class='no'>contig_tbl</span>, <span class='no'>pop</span> <span class='kw'>==</span> <span class='st'>'b6'</span>)
190
+<span class='no'>ccdb_ex</span></div><div class='output co'>#&gt; ContigCellDB of 767 contigs; 426 cells; and 0 clusters.
191
+#&gt; Contigs keyed by sample, barcode, contig_id; cells keyed by pop, sample, barcode.</div></span></pre>
180 192
   </div>
181 193
   <div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
182 194
     <h2>Contents</h2>
... ...
@@ -63,7 +63,7 @@
63 63
       </button>
64 64
       <span class="navbar-brand">
65 65
         <a class="navbar-link" href="../index.html">CellaRepertorium</a>
66
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.1</span>
66
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.2</span>
67 67
       </span>
68 68
     </div>
69 69
 
... ...
@@ -137,7 +137,7 @@ $(x, name) <- value</pre>
137 137
     </tr>
138 138
     <tr>
139 139
       <th>name</th>
140
-      <td><p>Name of a slot for a ContigCellDB object (one of  `c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')`)</p></td>
140
+      <td><p>Name of a slot for a ContigCellDB object (one of  <code><a href='https://www.rdocumentation.org/packages/base/topics/c'>c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')</a></code>)</p></td>
141 141
     </tr>
142 142
     <tr>
143 143
       <th>value</th>
... ...
@@ -6,7 +6,7 @@
6 6
 <meta http-equiv="X-UA-Compatible" content="IE=edge">
7 7
 <meta name="viewport" content="width=device-width, initial-scale=1.0">
8 8
 
9
-<title>A preconstructed `ContigClusterDB` from the `contigs_qc` data — ccdb_ex • CellaRepertorium</title>
9
+<title>A preconstructed <code>ContigClusterDB</code> from the <code>contigs_qc</code> data — ccdb_ex • CellaRepertorium</title>
10 10
 
11 11
 <!-- jquery -->
12 12
 <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js" integrity="sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=" crossorigin="anonymous"></script>
... ...
@@ -30,7 +30,7 @@
30 30
 
31 31
 
32 32
 
33
-<meta property="og:title" content="A preconstructed `ContigClusterDB` from the `contigs_qc` data — ccdb_ex" />
33