... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: CellaRepertorium |
2 | 2 |
Type: Package |
3 | 3 |
Title: Methods for clustering and analyzing high-throughput single cell immune cell repertoires (RepSeq) |
4 |
-Version: 0.2.1 |
|
4 |
+Version: 0.2.2 |
|
5 | 5 |
Author: Andrew McDavid |
6 | 6 |
Maintainer: Andrew McDavid <Andrew_McDavid@urmc.rochester.edu> |
7 | 7 |
Description: Methods to cluster and analyze high-throughput single cell immune cell repertoires, |
... | ... |
@@ -11,6 +11,7 @@ Description: Methods to cluster and analyze high-throughput single cell immune c |
11 | 11 |
License: GPL-3 |
12 | 12 |
Encoding: UTF-8 |
13 | 13 |
LazyData: true |
14 |
+Depends: R (>= 3.5.0) |
|
14 | 15 |
Imports: |
15 | 16 |
dplyr, |
16 | 17 |
tibble, |
... | ... |
@@ -21,10 +22,17 @@ Imports: |
21 | 22 |
methods, |
22 | 23 |
rlang, |
23 | 24 |
purrr, |
24 |
- Matrix |
|
25 |
+ Matrix, |
|
26 |
+ S4Vectors |
|
25 | 27 |
Suggests: |
26 |
- testthat, |
|
27 |
- readr |
|
28 |
-RoxygenNote: 6.1.0 |
|
28 |
+ testthat, |
|
29 |
+ readr, |
|
30 |
+ knitr, |
|
31 |
+ rmarkdown, |
|
32 |
+ ggplot2, |
|
33 |
+ BiocStyle, |
|
34 |
+ tidyr |
|
35 |
+RoxygenNote: 6.1.1 |
|
29 | 36 |
LinkingTo: Rcpp |
30 | 37 |
NeedsCompilation: yes |
38 |
+VignetteBuilder: knitr |
... | ... |
@@ -1,5 +1,7 @@ |
1 | 1 |
# Generated by roxygen2: do not edit by hand |
2 | 2 |
|
3 |
+export(ContigCellDB) |
|
4 |
+export(ContigCellDB_10XVDJ) |
|
3 | 5 |
export(canonicalize_by_chain) |
4 | 6 |
export(canonicalize_by_prevalence) |
5 | 7 |
export(cdhit) |
... | ... |
@@ -12,6 +14,8 @@ export(modal_category) |
12 | 14 |
export(np) |
13 | 15 |
export(pairing_tables) |
14 | 16 |
import(Biostrings) |
17 |
+importFrom(S4Vectors,List) |
|
18 |
+importFrom(S4Vectors,SimpleList) |
|
15 | 19 |
importFrom(dplyr,"%>%") |
16 | 20 |
importFrom(dplyr,anti_join) |
17 | 21 |
importFrom(dplyr,bind_rows) |
18 | 22 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,41 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/AllClasses.R |
|
3 |
+\name{ContigCellDB} |
|
4 |
+\alias{ContigCellDB} |
|
5 |
+\alias{ContigCellDB_10XVDJ} |
|
6 |
+\title{Construct a ContigCellDB} |
|
7 |
+\usage{ |
|
8 |
+ContigCellDB(contig_tbl, contig_pk, cell_tbl, cell_pk, |
|
9 |
+ cluster_tbls = List(), cluster_pk = List()) |
|
10 |
+ |
|
11 |
+ContigCellDB_10XVDJ(contig_tbl, contig_pk = c("barcode", "contig_id"), |
|
12 |
+ cell_pk = "barcode") |
|
13 |
+} |
|
14 |
+\arguments{ |
|
15 |
+\item{contig_tbl}{a data frame of contigs, and additional fields describing their properties} |
|
16 |
+ |
|
17 |
+\item{contig_pk}{character vector naming fields in `contig_tbl` that uniquely identify a row/contig} |
|
18 |
+ |
|
19 |
+\item{cell_tbl}{a data frame of cell barcodes, and (optional) additional fields describing their properties} |
|
20 |
+ |
|
21 |
+\item{cell_pk}{character vector naming fields in `cell_tbl` that uniquely identify a cell barcode} |
|
22 |
+ |
|
23 |
+\item{cluster_tbls}{An optional list of data frames that provide cluster assignments for each contig} |
|
24 |
+ |
|
25 |
+\item{cluster_pk}{If `cluster_tbls` was provided, a list of character vector naming fields in `cluster_tbls` that uniquely identify a cluster} |
|
26 |
+} |
|
27 |
+\value{ |
|
28 |
+\code{ContigCellDB} |
|
29 |
+} |
|
30 |
+\description{ |
|
31 |
+Construct a ContigCellDB |
|
32 |
+} |
|
33 |
+\section{Functions}{ |
|
34 |
+\itemize{ |
|
35 |
+\item \code{ContigCellDB_10XVDJ}: provide defaults that correspond to identifiers in 10X VDJ data |
|
36 |
+}} |
|
37 |
+ |
|
38 |
+\examples{ |
|
39 |
+data(contigs_qc) |
|
40 |
+ContigCellDB(contigs_qc, contig_pk = c('barcode', 'pop', 'sample', 'contig_id'), cell_pk = c('barcode', 'pop', 'sample')) |
|
41 |
+} |
0 | 42 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,23 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/data.R |
|
3 |
+\docType{data} |
|
4 |
+\name{contigs_qc} |
|
5 |
+\alias{contigs_qc} |
|
6 |
+\title{Filtered and annotated contigs of TCR from mice} |
|
7 |
+\format{A data frame of 3399 contigs and 22 fields, |
|
8 |
+ all except 4 are originally defined in https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/output/annotation#contig |
|
9 |
+ The following fields were defined ex post facto |
|
10 |
+ \describe{ |
|
11 |
+ \item{anno_file}{Path to original csv file} |
|
12 |
+ \item{pop}{Mouse strain} |
|
13 |
+ \item{sample}{An artificial "replicate" from the original data defined by subsampling with replacement} |
|
14 |
+ \item{celltype}{The putative cell type of the contig} |
|
15 |
+ }} |
|
16 |
+\usage{ |
|
17 |
+contigs_qc |
|
18 |
+} |
|
19 |
+\description{ |
|
20 |
+The details of how these are generated are shown in the vignette mouse_tcell_qc |
|
21 |
+and are serialied to serve as an examples for other vignettes and documentation. |
|
22 |
+} |
|
23 |
+\keyword{datasets} |
... | ... |
@@ -4,12 +4,12 @@ |
4 | 4 |
\alias{fancy_name_contigs} |
5 | 5 |
\title{Generate a legible name for a series of contigs} |
6 | 6 |
\usage{ |
7 |
-fancy_name_contigs(contig_frame, prefix) |
|
7 |
+fancy_name_contigs(contig_tbl, prefix) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 |
-\item{contig_frame}{An `all_contig_annotations.csv` file, output from VDJ Cell ranger. Importantly, this should contain columns `chain`, `v_gene`, `d_gene`, `j_gene`} |
|
11 |
- |
|
12 | 10 |
\item{prefix}{an optional prefix added to each contig, eg, possibly a sample id.} |
11 |
+ |
|
12 |
+\item{contig_frame}{An `all_contig_annotations.csv` file, output from VDJ Cell ranger. Importantly, this should contain columns `chain`, `v_gene`, `d_gene`, `j_gene`} |
|
13 | 13 |
} |
14 | 14 |
\value{ |
15 | 15 |
\code{character} |
... | ... |
@@ -19,9 +19,10 @@ Generate a legible name for a series of contigs |
19 | 19 |
} |
20 | 20 |
\examples{ |
21 | 21 |
library(dplyr) |
22 |
-contig_anno_path = system.file('extdata', 'cellranger_contig_annotation.csv', package = 'CellaRepertorium') |
|
22 |
+contig_anno_path = system.file('extdata', 'all_contig_annotations_balbc_1.csv.xz', |
|
23 |
+ package = 'CellaRepertorium') |
|
23 | 24 |
contig_anno = readr::read_csv(contig_anno_path) |
24 | 25 |
contig_anno = contig_anno \%>\% mutate(fancy_name = |
25 |
- fancy_name_contigs(., prefix = paste(sample, pop, sep = '_'))) |
|
26 |
+ fancy_name_contigs(., prefix = 'b6_1')) |
|
26 | 27 |
stopifnot(!any(duplicated(contig_anno$fancy_name))) |
27 | 28 |
} |
... | ... |
@@ -6,7 +6,7 @@ |
6 | 6 |
\usage{ |
7 | 7 |
fine_cluster(seqs, type = "AA", big_memory_brute = FALSE, |
8 | 8 |
method = "levenshtein", substitution_matrix = "BLOSUM100", |
9 |
- cluster = "hclust", cluster_method = "complete") |
|
9 |
+ cluster_fun = "hclust", cluster_method = "complete") |
|
10 | 10 |
} |
11 | 11 |
\arguments{ |
12 | 12 |
\item{seqs}{character vector, DNAStringSet or AAStringSet} |
... | ... |
@@ -19,6 +19,8 @@ fine_cluster(seqs, type = "AA", big_memory_brute = FALSE, |
19 | 19 |
|
20 | 20 |
\item{substitution_matrix}{a character vector naming a substition matrix available in Biostrings, or a substitution matrix itself} |
21 | 21 |
|
22 |
+\item{cluster_fun}{`character`, one of "hclust" or "none", determining if distance matrices should also be clustered with `hclust`} |
|
23 |
+ |
|
22 | 24 |
\item{cluster_method}{character passed to `hclust`} |
23 | 25 |
} |
24 | 26 |
\value{ |
... | ... |
@@ -65,7 +65,8 @@ cluster_tbl2 = bind_rows(cluster_tbl, cluster_tbl \%>\% mutate(cell_idx = rep(4: |
65 | 65 |
#all pairs found twice |
66 | 66 |
pt3 = pairing_tables(cluster_tbl2, 'cell_idx', 'clust_idx', canonicalize_by_prevalence, min_expansion = 1) |
67 | 67 |
pt3$cell_tbl |
68 |
-# canonicalize_by_chain by default expects fields umis, reads to break ties, could wrap the function to change this |
|
68 |
+# `canonicalize_by_chain` expects fields `umis`, `reads` |
|
69 |
+# to break ties, wrap the function to change this |
|
69 | 70 |
cluster_tbl3 = cluster_tbl2 \%>\% |
70 | 71 |
mutate(umis = 1, reads = 1, chain = rep(c('TRA', 'TRB'), times = 6)) |
71 | 72 |
pt4 = pairing_tables(cluster_tbl3, 'cell_idx', 'clust_idx', |