Browse code

Update docs Bump version Require 3.5 (as data was serialized in v3)

Andrew McDavid authored on 02/06/2019 04:33:47
Showing 9 changed files

... ...
@@ -1,3 +1,5 @@
1 1
 ^.*\.Rproj$
2 2
 ^\.Rproj\.user$
3 3
 extdata/.*\.json$
4
+^data-raw$
5
+extdata/refdata-cellranger-vdj-GRCh38-alts-ensembl-2.0.0/
... ...
@@ -3,3 +3,6 @@
3 3
 .DS_Store
4 4
 *.so
5 5
 .Rproj.user
6
+data-raw/*.csv
7
+data-raw/*.json
8
+inst/doc
... ...
@@ -1,7 +1,7 @@
1 1
 Package: CellaRepertorium
2 2
 Type: Package
3 3
 Title: Methods for clustering and analyzing high-throughput single cell immune cell repertoires (RepSeq)
4
-Version: 0.2.1
4
+Version: 0.2.2
5 5
 Author: Andrew McDavid
6 6
 Maintainer: Andrew McDavid <Andrew_McDavid@urmc.rochester.edu>
7 7
 Description: Methods to cluster and analyze high-throughput single cell immune cell repertoires,
... ...
@@ -11,6 +11,7 @@ Description: Methods to cluster and analyze high-throughput single cell immune c
11 11
 License: GPL-3
12 12
 Encoding: UTF-8
13 13
 LazyData: true
14
+Depends: R (>= 3.5.0)
14 15
 Imports:
15 16
    dplyr,
16 17
    tibble,
... ...
@@ -21,10 +22,17 @@ Imports:
21 22
    methods,
22 23
    rlang,
23 24
    purrr,
24
-   Matrix
25
+   Matrix,
26
+   S4Vectors
25 27
 Suggests: 
26
-   testthat,
27
-   readr
28
-RoxygenNote: 6.1.0
28
+    testthat,
29
+    readr,
30
+    knitr,
31
+    rmarkdown,
32
+    ggplot2,
33
+    BiocStyle,
34
+    tidyr
35
+RoxygenNote: 6.1.1
29 36
 LinkingTo: Rcpp
30 37
 NeedsCompilation: yes
38
+VignetteBuilder: knitr
... ...
@@ -1,5 +1,7 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3
+export(ContigCellDB)
4
+export(ContigCellDB_10XVDJ)
3 5
 export(canonicalize_by_chain)
4 6
 export(canonicalize_by_prevalence)
5 7
 export(cdhit)
... ...
@@ -12,6 +14,8 @@ export(modal_category)
12 14
 export(np)
13 15
 export(pairing_tables)
14 16
 import(Biostrings)
17
+importFrom(S4Vectors,List)
18
+importFrom(S4Vectors,SimpleList)
15 19
 importFrom(dplyr,"%>%")
16 20
 importFrom(dplyr,anti_join)
17 21
 importFrom(dplyr,bind_rows)
18 22
new file mode 100644
... ...
@@ -0,0 +1,41 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/AllClasses.R
3
+\name{ContigCellDB}
4
+\alias{ContigCellDB}
5
+\alias{ContigCellDB_10XVDJ}
6
+\title{Construct a ContigCellDB}
7
+\usage{
8
+ContigCellDB(contig_tbl, contig_pk, cell_tbl, cell_pk,
9
+  cluster_tbls = List(), cluster_pk = List())
10
+
11
+ContigCellDB_10XVDJ(contig_tbl, contig_pk = c("barcode", "contig_id"),
12
+  cell_pk = "barcode")
13
+}
14
+\arguments{
15
+\item{contig_tbl}{a data frame of contigs, and additional fields describing their properties}
16
+
17
+\item{contig_pk}{character vector naming fields in `contig_tbl` that uniquely identify a row/contig}
18
+
19
+\item{cell_tbl}{a data frame of cell barcodes, and (optional) additional fields describing their properties}
20
+
21
+\item{cell_pk}{character vector naming fields in `cell_tbl` that uniquely identify a cell barcode}
22
+
23
+\item{cluster_tbls}{An optional list of data frames that provide cluster assignments for each contig}
24
+
25
+\item{cluster_pk}{If `cluster_tbls` was provided, a list of character vector naming fields in `cluster_tbls` that uniquely identify a cluster}
26
+}
27
+\value{
28
+\code{ContigCellDB}
29
+}
30
+\description{
31
+Construct a ContigCellDB
32
+}
33
+\section{Functions}{
34
+\itemize{
35
+\item \code{ContigCellDB_10XVDJ}: provide defaults that correspond to identifiers in 10X VDJ data
36
+}}
37
+
38
+\examples{
39
+data(contigs_qc)
40
+ContigCellDB(contigs_qc, contig_pk = c('barcode', 'pop', 'sample', 'contig_id'), cell_pk = c('barcode', 'pop', 'sample'))
41
+}
0 42
new file mode 100644
... ...
@@ -0,0 +1,23 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/data.R
3
+\docType{data}
4
+\name{contigs_qc}
5
+\alias{contigs_qc}
6
+\title{Filtered and annotated contigs of TCR from mice}
7
+\format{A data frame of 3399 contigs and 22 fields,
8
+ all except 4 are originally defined in https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/output/annotation#contig
9
+ The following fields were defined ex post facto
10
+ \describe{
11
+   \item{anno_file}{Path to original csv file}
12
+   \item{pop}{Mouse strain}
13
+   \item{sample}{An artificial "replicate" from the original data defined by subsampling with replacement}
14
+   \item{celltype}{The putative cell type of the contig}
15
+ }}
16
+\usage{
17
+contigs_qc
18
+}
19
+\description{
20
+The details of how these are generated are shown in the vignette mouse_tcell_qc
21
+and are serialied to serve as an examples for other vignettes and documentation.
22
+}
23
+\keyword{datasets}
... ...
@@ -4,12 +4,12 @@
4 4
 \alias{fancy_name_contigs}
5 5
 \title{Generate a legible name for a series of contigs}
6 6
 \usage{
7
-fancy_name_contigs(contig_frame, prefix)
7
+fancy_name_contigs(contig_tbl, prefix)
8 8
 }
9 9
 \arguments{
10
-\item{contig_frame}{An `all_contig_annotations.csv` file, output from VDJ Cell ranger.  Importantly, this should contain columns `chain`, `v_gene`, `d_gene`, `j_gene`}
11
-
12 10
 \item{prefix}{an optional prefix added to each contig, eg, possibly a sample id.}
11
+
12
+\item{contig_frame}{An `all_contig_annotations.csv` file, output from VDJ Cell ranger.  Importantly, this should contain columns `chain`, `v_gene`, `d_gene`, `j_gene`}
13 13
 }
14 14
 \value{
15 15
 \code{character}
... ...
@@ -19,9 +19,10 @@ Generate a legible name for a series of contigs
19 19
 }
20 20
 \examples{
21 21
 library(dplyr)
22
-contig_anno_path = system.file('extdata', 'cellranger_contig_annotation.csv', package = 'CellaRepertorium')
22
+contig_anno_path = system.file('extdata', 'all_contig_annotations_balbc_1.csv.xz',
23
+    package = 'CellaRepertorium')
23 24
 contig_anno = readr::read_csv(contig_anno_path)
24 25
 contig_anno = contig_anno \%>\% mutate(fancy_name =
25
-    fancy_name_contigs(., prefix = paste(sample, pop, sep = '_')))
26
+    fancy_name_contigs(., prefix = 'b6_1'))
26 27
 stopifnot(!any(duplicated(contig_anno$fancy_name)))
27 28
 }
... ...
@@ -6,7 +6,7 @@
6 6
 \usage{
7 7
 fine_cluster(seqs, type = "AA", big_memory_brute = FALSE,
8 8
   method = "levenshtein", substitution_matrix = "BLOSUM100",
9
-  cluster = "hclust", cluster_method = "complete")
9
+  cluster_fun = "hclust", cluster_method = "complete")
10 10
 }
11 11
 \arguments{
12 12
 \item{seqs}{character vector, DNAStringSet or AAStringSet}
... ...
@@ -19,6 +19,8 @@ fine_cluster(seqs, type = "AA", big_memory_brute = FALSE,
19 19
 
20 20
 \item{substitution_matrix}{a character vector naming a substition matrix available in Biostrings, or a substitution matrix itself}
21 21
 
22
+\item{cluster_fun}{`character`, one of "hclust" or "none", determining if distance matrices should also be clustered with `hclust`}
23
+
22 24
 \item{cluster_method}{character passed to `hclust`}
23 25
 }
24 26
 \value{
... ...
@@ -65,7 +65,8 @@ cluster_tbl2 = bind_rows(cluster_tbl, cluster_tbl \%>\% mutate(cell_idx = rep(4:
65 65
 #all pairs found twice
66 66
 pt3 = pairing_tables(cluster_tbl2, 'cell_idx', 'clust_idx', canonicalize_by_prevalence, min_expansion = 1)
67 67
 pt3$cell_tbl
68
-# canonicalize_by_chain by default expects fields umis, reads to break ties, could wrap the function to change this
68
+# `canonicalize_by_chain` expects fields `umis`, `reads`
69
+# to break ties,  wrap the function to change this
69 70
 cluster_tbl3 = cluster_tbl2 \%>\%
70 71
     mutate(umis = 1, reads = 1, chain = rep(c('TRA', 'TRB'), times = 6))
71 72
 pt4 = pairing_tables(cluster_tbl3, 'cell_idx', 'clust_idx',