... | ... |
@@ -1,31 +1,41 @@ |
1 |
-Package: CellaRepertorium |
|
2 | 1 |
Type: Package |
3 |
-Title: Methods for clustering and analyzing high-throughput single cell immune cell repertoires (RepSeq) |
|
4 |
-Version: 0.3.0 |
|
5 |
-Author: Andrew McDavid |
|
2 |
+Package: CellaRepertorium |
|
3 |
+Title: Methods for clustering and analyzing high-throughput |
|
4 |
+ single cell immune cell repertoires (RepSeq) |
|
5 |
+Version: 0.3.1 |
|
6 |
+Authors@R: |
|
7 |
+ c(person(given = "Andrew", |
|
8 |
+ family = "McDavid", |
|
9 |
+ role = c('aut', 'cre'), |
|
10 |
+ email = "Andrew_McDavid@urmc.rochester.edu"), |
|
11 |
+ person(given = "Yu", |
|
12 |
+ family = "Gu", |
|
13 |
+ role = "aut", |
|
14 |
+ email = "Yu_Gu@urmc.rochester.edu")) |
|
6 | 15 |
Maintainer: Andrew McDavid <Andrew_McDavid@urmc.rochester.edu> |
7 |
-Description: Methods to cluster and analyze high-throughput single cell immune cell repertoires, |
|
8 |
- especially from the 10X Genomics VDJ solution. |
|
9 |
- Contains an R interface to CD-HIT (Li and Godzik 2006). |
|
10 |
- Tests for specific expansion, as well as omnibus oligoclonality under hypergeometric models. |
|
16 |
+Description: Methods to cluster and analyze high-throughput |
|
17 |
+ single cell immune cell repertoires, especially from the 10X Genomics |
|
18 |
+ VDJ solution. Contains an R interface to CD-HIT (Li and Godzik 2006). |
|
19 |
+ Methods to visualize and analyze paired heavy-light chain data. |
|
20 |
+ Tests for specific expansion, as well as omnibus oligoclonality under |
|
21 |
+ hypergeometric models. |
|
11 | 22 |
License: GPL-3 |
12 |
-Encoding: UTF-8 |
|
13 |
-LazyData: true |
|
14 |
-Depends: R (>= 3.5.0) |
|
23 |
+Depends: |
|
24 |
+ R (>= 3.5.0) |
|
15 | 25 |
Imports: |
16 |
- dplyr, |
|
17 |
- tibble, |
|
18 |
- stringr, |
|
19 |
- Biostrings, |
|
20 |
- Rcpp, |
|
21 |
- reshape2, |
|
22 |
- methods, |
|
23 |
- rlang, |
|
24 |
- purrr, |
|
25 |
- Matrix, |
|
26 |
- S4Vectors, |
|
27 |
- tidyr, |
|
28 |
- forcats |
|
26 |
+ dplyr, |
|
27 |
+ tibble, |
|
28 |
+ stringr, |
|
29 |
+ Biostrings, |
|
30 |
+ Rcpp, |
|
31 |
+ reshape2, |
|
32 |
+ methods, |
|
33 |
+ rlang, |
|
34 |
+ purrr, |
|
35 |
+ Matrix, |
|
36 |
+ S4Vectors, |
|
37 |
+ tidyr, |
|
38 |
+ forcats |
|
29 | 39 |
Suggests: |
30 | 40 |
testthat, |
31 | 41 |
readr, |
... | ... |
@@ -33,8 +43,15 @@ Suggests: |
33 | 43 |
rmarkdown, |
34 | 44 |
ggplot2, |
35 | 45 |
BiocStyle, |
36 |
- ggdendro |
|
37 |
-RoxygenNote: 6.1.1 |
|
38 |
-LinkingTo: Rcpp |
|
46 |
+ ggdendro, |
|
47 |
+ broom, |
|
48 |
+ lme4, |
|
49 |
+ RColorBrewer |
|
50 |
+LinkingTo: |
|
51 |
+ Rcpp |
|
52 |
+VignetteBuilder: |
|
53 |
+ knitr |
|
54 |
+Encoding: UTF-8 |
|
55 |
+LazyData: true |
|
39 | 56 |
NeedsCompilation: yes |
40 |
-VignetteBuilder: knitr |
|
57 |
+RoxygenNote: 6.1.1 |
... | ... |
@@ -4,6 +4,7 @@ export(ContigCellDB) |
4 | 4 |
export(ContigCellDB_10XVDJ) |
5 | 5 |
export(canonicalize_by_chain) |
6 | 6 |
export(canonicalize_by_prevalence) |
7 |
+export(canonicalize_by_subset) |
|
7 | 8 |
export(cdhit) |
8 | 9 |
export(cdhit_ccdb) |
9 | 10 |
export(cluster_permute_test) |
... | ... |
@@ -12,7 +13,6 @@ export(enumerate_pairing) |
12 | 13 |
export(fancy_name_contigs) |
13 | 14 |
export(fine_cluster_seqs) |
14 | 15 |
export(fine_clustering) |
15 |
-export(get_canonical_representative) |
|
16 | 16 |
export(ig_chain_recode) |
17 | 17 |
export(modal_category) |
18 | 18 |
export(np) |
... | ... |
@@ -28,14 +28,17 @@ importFrom(dplyr,anti_join) |
28 | 28 |
importFrom(dplyr,bind_cols) |
29 | 29 |
importFrom(dplyr,bind_rows) |
30 | 30 |
importFrom(dplyr,case_when) |
31 |
+importFrom(dplyr,do) |
|
31 | 32 |
importFrom(dplyr,filter) |
32 | 33 |
importFrom(dplyr,group_by) |
33 | 34 |
importFrom(dplyr,left_join) |
34 | 35 |
importFrom(dplyr,mutate) |
35 | 36 |
importFrom(dplyr,right_join) |
37 |
+importFrom(dplyr,rowwise) |
|
36 | 38 |
importFrom(dplyr,select) |
37 | 39 |
importFrom(dplyr,semi_join) |
38 | 40 |
importFrom(dplyr,summarize) |
41 |
+importFrom(dplyr,transmute) |
|
39 | 42 |
importFrom(dplyr,ungroup) |
40 | 43 |
importFrom(methods,"slot<-") |
41 | 44 |
importFrom(methods,as) |
... | ... |
@@ -28,7 +28,7 @@ valid_KeyedTbl = function(tbl, keys){ |
28 | 28 |
#' @param cell_tbl a data frame of cell barcodes, and (optional) additional fields describing their properties |
29 | 29 |
#' @param cell_pk character vector naming fields in `cell_tbl` that uniquely identify a cell barcode |
30 | 30 |
#' @param cluster_tbl A data frame that provide cluster assignments for each contig |
31 |
-#' @param cluster_pk If `cluster_tbl` was provided, a list of character vector naming fields in `cluster_tbl` that uniquely identify a cluster |
|
31 |
+#' @param cluster_pk If `cluster_tbl` was provided, a character vector naming fields in `cluster_tbl` that uniquely identify a cluster |
|
32 | 32 |
#' |
33 | 33 |
#' @return \code{ContigCellDB} |
34 | 34 |
#' @export |
... | ... |
@@ -39,7 +39,8 @@ valid_KeyedTbl = function(tbl, keys){ |
39 | 39 |
#' |
40 | 40 |
#' @examples |
41 | 41 |
#' data(contigs_qc) |
42 |
-#' ContigCellDB(contigs_qc, contig_pk = c('barcode', 'pop', 'sample', 'contig_id'), cell_pk = c('barcode', 'pop', 'sample')) |
|
42 |
+#' ContigCellDB(contigs_qc, contig_pk = c('barcode', 'pop', 'sample', 'contig_id'), |
|
43 |
+#' cell_pk = c('barcode', 'pop', 'sample')) |
|
43 | 44 |
ContigCellDB = function(contig_tbl, contig_pk, cell_tbl, cell_pk, cluster_tbl, cluster_pk = character()){ |
44 | 45 |
valid_KeyedTbl(contig_tbl, contig_pk) |
45 | 46 |
equalized = FALSE |
... | ... |
@@ -68,12 +69,14 @@ ContigCellDB_10XVDJ = function(contig_tbl, contig_pk = c('barcode', 'contig_id') |
68 | 69 |
#' Creat a method for ContigCellDB object to access its slots |
69 | 70 |
#' |
70 | 71 |
#' @param x A ContigCellDB object |
71 |
-#' @param name Name of a slot for a ContigCellDB object |
|
72 |
+#' @param name a slot of a ContigCellDB object (one of `c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')`) |
|
72 | 73 |
#' |
73 | 74 |
#' @return Slots of ContigCellDB |
74 | 75 |
#' @export |
75 | 76 |
#' |
76 | 77 |
#' @examples |
78 |
+#' ccdb_ex$contig_tbl |
|
79 |
+#' ccdb_ex$cell_tbl |
|
77 | 80 |
#' ccdb_ex$cluster_tbl |
78 | 81 |
setMethod("$", signature = c(x = 'ContigCellDB'), function(x, name){ |
79 | 82 |
if(name %in% c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')){ |
... | ... |
@@ -83,17 +86,21 @@ setMethod("$", signature = c(x = 'ContigCellDB'), function(x, name){ |
83 | 86 |
} |
84 | 87 |
}) |
85 | 88 |
|
86 |
-#' Creat a function of ContigCellDB object to replace values of its slots |
|
89 |
+#' Create a function of ContigCellDB object to replace values of its slots |
|
87 | 90 |
#' |
88 | 91 |
#' @param x A ContigCellDB object |
89 |
-#' @param name Name of a slot for a ContigCellDB object |
|
92 |
+#' @param name Name of a slot for a ContigCellDB object (one of `c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')`) |
|
90 | 93 |
#' @param value The value assigned to a slot of ContigCellDB object |
91 | 94 |
#' |
92 | 95 |
#' @return A ContigCellDB object |
93 | 96 |
#' @export |
94 | 97 |
#' |
95 | 98 |
#' @examples |
96 |
-#' ccdb_ex$contig_pk <- c("pop","barcode","contig_id") |
|
99 |
+#' ccdb_ex$contig_pk = c("sample","barcode","contig_id") # 'pop' is technically redundant with 'sample' |
|
100 |
+#' # Take a subset of ccdb_ex |
|
101 |
+#' ccdb_ex |
|
102 |
+#' ccdb_ex$contig_tbl = dplyr::filter(ccdb_ex$contig_tbl, pop == 'b6') |
|
103 |
+#' ccdb_ex |
|
97 | 104 |
setReplaceMethod("$", signature = c(x = 'ContigCellDB'), function(x, name, value){ |
98 | 105 |
if(name %in% c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')){ |
99 | 106 |
slot(x, name) <- value |
... | ... |
@@ -82,8 +82,14 @@ cdhit = function(seqs, identity = NULL, kmerSize = NULL, min_length = 6, s = 1, |
82 | 82 |
##' @param object An object of class `ClusterContigDB` |
83 | 83 |
##' @param sequence_key `character` naming the column in the `contig_tbl` containing the sequence to be clustered |
84 | 84 |
##' @param type one of 'DNA' or 'AA' |
85 |
-##' @param cluster_name What index should the clustering be stored in? By default, a new, unnamed cluster is added. |
|
85 |
+##' @param cluster_name `character` specifying key, and name for the clustering. |
|
86 | 86 |
##' @export |
87 |
+##' @examples |
|
88 |
+##' res = CellaRepertorium:::cdhit_ccdb(ccdb_ex, 'cdr3_nt', type = 'DNA', |
|
89 |
+##' cluster_name = 'DNA97', identity = .965, min_length = 12, G = 1) |
|
90 |
+##' res$cluster_tbl |
|
91 |
+##' res$contig_tbl |
|
92 |
+##' res$cluster_pk |
|
87 | 93 |
cdhit_ccdb = function(object, sequence_key, type = c('DNA', 'AA'), cluster_name = 'cluster_idx', ...){ |
88 | 94 |
seqs = object$contig_tbl[[sequence_key]] |
89 | 95 |
if(length(seqs) < 1) stop("No sequences were provided") |
... | ... |
@@ -7,6 +7,7 @@ cluster_germline = function(ccdb, segment_keys = c('v_gene', 'j_gene', 'chain'), |
7 | 7 |
replace_cluster_tbl(ccdb, cluster_tbl, cl_con_tbl, cluster_pk = cluster_name) |
8 | 8 |
} |
9 | 9 |
|
10 |
+globalVariables(c('fc', 'd(medoid)', 'is_medoid', 'n_cluster')) |
|
10 | 11 |
|
11 | 12 |
# Also canonicalize.. |
12 | 13 |
#' Perform additional clustering of sequences within groups |
... | ... |
@@ -146,8 +147,8 @@ fine_cluster_seqs = function(seqs, type = 'AA', big_memory_brute = FALSE, method |
146 | 147 |
} |
147 | 148 |
|
148 | 149 |
medoid = which.min(colMeans(sd)) |
149 |
- homology = sd[medoid,] |
|
150 |
- list(cluster = hc, distance = sd, homology = homology, medoid = medoid, max_dist = max(sd)) |
|
150 |
+ distance = sd[medoid,] |
|
151 |
+ list(cluster = hc, distance_mat = sd, distance = distance, medoid = medoid, max_dist = max(sd)) |
|
151 | 152 |
} |
152 | 153 |
|
153 | 154 |
|
... | ... |
@@ -1,3 +1,4 @@ |
1 |
+globalVariables(c('prev')) |
|
1 | 2 |
#' For each cell, return a single, canonical chain-cluster |
2 | 3 |
#' |
3 | 4 |
#' In single cell data, multiple chains (heavy-light or alpha-beta) are expected. In some cases, there could be more than two (eg multiple alpha alleles for T cells). |
... | ... |
@@ -19,7 +20,7 @@ canonicalize_by_prevalence = function(tbl, cell_identifiers = 'barcode', cluster |
19 | 20 |
|
20 | 21 |
|
21 | 22 |
|
22 |
- |
|
23 |
+# I believe this function is obsoleted by `canonicalize_by_subset`` |
|
23 | 24 |
#' @param sort_factors `character` vector naming columns in `tbl` to sorted on, within `cell_identifier`. Sorted by first element first, then ties broken by subsequent elements. Sorted in decreasing order for each element. |
24 | 25 |
#' @param chain_levels an optional `character` vector providing the sort order of the `chain` column in `tbl`. Set to length zero to disable. |
25 | 26 |
#' @export |
... | ... |
@@ -35,12 +36,40 @@ canonicalize_by_chain = function(tbl, cell_identifiers = 'barcode', sort_factor |
35 | 36 |
|
36 | 37 |
} |
37 | 38 |
|
38 |
-canonicalize_by_subset = function(ccdb, tie_break_keys = c('umis', 'reads'), contig_fields = tie_break_keys, ...){ |
|
39 |
+#' Return single contig for each cell based on filtering and sorting |
|
40 |
+#' |
|
41 |
+#' @param ccdb `ContigCellDB` |
|
42 |
+#' @param ... unquoted expressions passed to `dplyr::filter` that will be applied to the `contig_tbl` |
|
43 |
+#' @param tie_break_keys columns used to sort the `contig_tbl` in **decreasing** order |
|
44 |
+#' @param order `integer` specifying which entry (ordinal) will be selected from the sorted contig_tbl, within each cell |
|
45 |
+#' @param contig_fields columns in the `contig_tbl` that will be copied over to the `cell_tbl` |
|
46 |
+#' |
|
47 |
+#' @return `ContigCellDB` with additional fields in `cell_tbl` |
|
48 |
+#' @export |
|
49 |
+#' |
|
50 |
+#' @examples |
|
51 |
+#' # Report beta chain with highest umi-count, breaking ties with reads |
|
52 |
+#' beta = canonicalize_by_subset(ccdb_ex, chain == 'TRB', |
|
53 |
+#' tie_break_keys = c('umis', 'reads'), |
|
54 |
+#' contig_fields = c('umis', 'reads', 'chain', 'v_gene', 'd_gene', 'j_gene')) |
|
55 |
+#' head(beta$cell_tbl) |
|
56 |
+#' # Only adds fields to `cell_tbl` |
|
57 |
+#' stopifnot(all.equal(beta$cell_tbl[ccdb_ex$cell_pk], |
|
58 |
+#' ccdb_ex$cell_tbl[ccdb_ex$cell_pk])) |
|
59 |
+#' #Report cdr3 with highest UMI count, but only when > 5 UMIs support it |
|
60 |
+#' umi5 = canonicalize_by_subset(ccdb_ex, umis > 5, |
|
61 |
+#' tie_break_keys = c('umis', 'reads'), contig_fields = c('umis', 'cdr3')) |
|
62 |
+#' stopifnot(all(umi5$cell_tbl$umis > 5, na.rm = TRUE)) |
|
63 |
+canonicalize_by_subset = function(ccdb, ..., tie_break_keys = c('umis', 'reads'), contig_fields = tie_break_keys, order = 1){ |
|
39 | 64 |
tbl = ccdb$contig_tbl |
40 |
- ft = filter(tbl, quos(...)) |
|
65 |
+ # Filter with expressions in ... |
|
66 |
+ ft = filter(.data = tbl, !!!rlang::quos(...)) |
|
67 |
+ # setup quosures to arrange the data |
|
41 | 68 |
arranging = purrr::map(tie_break_keys, ~ rlang::quo(desc(!!sym(.x)))) |
42 |
- ft2 = ft %>% group_by(!!!syms(ccdb$cell_pk)) %>% dplyr::arrange(!!!arranging) |
|
69 |
+ # take first row of each cell |
|
70 |
+ ft2 = ft %>% group_by(!!!syms(ccdb$cell_pk)) %>% dplyr::arrange(!!!arranging) %>% dplyr::do(dplyr::slice(., order)) |
|
43 | 71 |
cell_tbl = ccdb$cell_tbl |
72 |
+ # join with cell tbl (so same number of cells) |
|
44 | 73 |
ccdb$cell_tbl = right_join_warn(ft2[unique(c(contig_fields, ccdb$cell_pk))], cell_tbl, by = ccdb$cell_pk) |
45 | 74 |
ccdb |
46 | 75 |
} |
... | ... |
@@ -52,6 +81,7 @@ canonicalize_by_subset = function(ccdb, tie_break_keys = c('umis', 'reads'), con |
52 | 81 |
|
53 | 82 |
#' Given a family of similar sequences, return a "representative" |
54 | 83 |
#' |
84 |
+#' This function can be deprecated. |
|
55 | 85 |
#' @param seqs character vector |
56 | 86 |
#' @param medoid_idx optional index into seqs |
57 | 87 |
#' @param warn_if_distinct Should a warning be emitted if there are distinct elements in seqs? |
... | ... |
@@ -60,10 +90,9 @@ canonicalize_by_subset = function(ccdb, tie_break_keys = c('umis', 'reads'), con |
60 | 90 |
#' sequence is returned |
61 | 91 |
#' |
62 | 92 |
#' @return character vector |
63 |
-#' @export |
|
64 | 93 |
#' |
65 | 94 |
#' @examples |
66 |
-#' get_canonical_representative(c('apple', 'manzana', 'pomme')) |
|
95 |
+#' CellaRepertorium:::get_canonical_representative(c('apple', 'manzana', 'pomme')) |
|
67 | 96 |
get_canonical_representative = function(seqs, medoid_idx, warn_if_distinct = FALSE){ |
68 | 97 |
if(!missing(medoid_idx)){ |
69 | 98 |
if(!is.integer(medoid_idx) || medoid_idx < 1 || medoid_idx > length(seqs)) stop("Illegal `medoid_idx`") |
... | ... |
@@ -79,6 +108,7 @@ get_canonical_representative = function(seqs, medoid_idx, warn_if_distinct = FAL |
79 | 108 |
return(rep) |
80 | 109 |
} |
81 | 110 |
|
111 |
+# This should return a ccdb or a subclass? |
|
82 | 112 |
#' Generate a list of tables representing clusters paired in cells |
83 | 113 |
#' |
84 | 114 |
#' A contingency table of every combination of `cluster_idx` up to `table_order` is generated. |
... | ... |
@@ -115,31 +145,37 @@ get_canonical_representative = function(seqs, medoid_idx, warn_if_distinct = FAL |
115 | 145 |
#' @importFrom rlang sym syms := |
116 | 146 |
#' @examples |
117 | 147 |
#' library(dplyr) |
118 |
-#' cluster_tbl = data_frame(clust_idx = gl(3, 2), cell_idx = rep(1:3, times = 2)) |
|
148 |
+#' tbl = tibble(clust_idx = gl(3, 2), cell_idx = rep(1:3, times = 2), contig_idx = 1:6) |
|
149 |
+#' ccdb = ContigCellDB(tbl, contig_pk = c('cell_idx', 'contig_idx'), |
|
150 |
+#' cell_pk = 'cell_idx', cluster_pk = 'clust_idx') |
|
119 | 151 |
#' # no pairs found twice |
120 |
-#' pt1 = pairing_tables(cluster_tbl, 'cell_idx', 'clust_idx', canonicalize_by_prevalence) |
|
152 |
+#' pt1 = pairing_tables(ccdb, canonicalize_by_prevalence) |
|
121 | 153 |
#' # all pairs found, found once. |
122 |
-#' pt2 = pairing_tables(cluster_tbl, 'cell_idx', 'clust_idx', |
|
123 |
-#' canonicalize_by_prevalence, min_expansion = 1) |
|
154 |
+#' pt2 = pairing_tables(ccdb, canonicalize_by_prevalence, min_expansion = 1) |
|
124 | 155 |
#' pt2$cell_tbl |
125 |
-#' cluster_tbl2 = bind_rows(cluster_tbl, cluster_tbl %>% mutate(cell_idx = rep(4:6, times = 2))) |
|
156 |
+#' tbl2 = bind_rows(tbl, tbl %>% mutate(cell_idx = rep(4:6, times = 2))) |
|
157 |
+#' ccdb2 = ContigCellDB(tbl2, contig_pk = c('cell_idx', 'contig_idx'), cell_pk = 'cell_idx', |
|
158 |
+#' cluster_pk = 'clust_idx') |
|
126 | 159 |
#' #all pairs found twice |
127 |
-#' pt3 = pairing_tables(cluster_tbl2, 'cell_idx', 'clust_idx', canonicalize_by_prevalence, min_expansion = 1) |
|
160 |
+#' pt3 = pairing_tables(ccdb2, canonicalize_by_prevalence, min_expansion = 1) |
|
128 | 161 |
#' pt3$cell_tbl |
129 | 162 |
#' # `canonicalize_by_chain` expects fields `umis`, `reads` |
130 | 163 |
#' # to break ties, wrap the function to change this |
131 |
-#' cluster_tbl3 = cluster_tbl2 %>% |
|
164 |
+#' ccdb2$contig_tbl = ccdb2$contig_tbl %>% |
|
132 | 165 |
#' mutate(umis = 1, reads = 1, chain = rep(c('TRA', 'TRB'), times = 6)) |
133 |
-#' pt4 = pairing_tables(cluster_tbl3, 'cell_idx', 'clust_idx', |
|
134 |
-#' canonicalize_by_chain, min_expansion = 1, table_order = 2) |
|
135 |
-pairing_tables = function(cluster_tbl, cell_identifiers = 'barcode', cluster_idx = 'cluster_idx', canonicalize_fun = canonicalize_by_chain, table_order = 2, min_expansion = 2, orphan_level = 1, cluster_whitelist = NULL, cluster_blacklist = NULL, cell_tbl = NULL, feature_tbl = NULL ){ |
|
166 |
+#' pt4 = pairing_tables(ccdb2, canonicalize_by_chain, min_expansion = 1, table_order = 2) |
|
167 |
+pairing_tables = function(ccdb, canonicalize_fun = canonicalize_by_chain, table_order = 2, min_expansion = 2, orphan_level = 1, cluster_keys = character(), cluster_whitelist = NULL, cluster_blacklist = NULL){ |
|
136 | 168 |
|
137 | 169 |
if(orphan_level > table_order) stop('`ophan_level` must be less than or equal to `table_order`') |
138 | 170 |
if(table_order < 1) stop('Table order must be at least 1') |
139 | 171 |
|
140 | 172 |
# get `table_order` most common clusters for each cell |
141 | 173 |
# forcibly rename cluster_idx -> "cluster_idx" |
142 |
- bar_chain_tbls = purrr::map(seq_len(table_order), function(i) canonicalize_fun(cluster_tbl, cell_identifiers = cell_identifiers, cluster_idx = cluster_idx, order = i) %>% dplyr::select(!!!c(syms(cell_identifiers), rlang::quo(cluster_idx))) %>% dplyr::rename( !!paste0('cluster_idx.', i) := !!cluster_idx)) |
|
174 |
+ contig_tbl = ccdb$contig_tbl |
|
175 |
+ cell_identifiers = ccdb$cell_pk |
|
176 |
+ cluster_idx = ccdb$cluster_pk |
|
177 |
+ cell_tbl = ccdb$cell_tbl |
|
178 |
+ bar_chain_tbls = purrr::map(seq_len(table_order), function(i) canonicalize_fun(contig_tbl, cell_identifiers = cell_identifiers, cluster_idx = cluster_idx, order = i) %>% dplyr::select(!!!c(syms(cell_identifiers), rlang::quo(cluster_idx))) %>% dplyr::rename( !!paste0('cluster_idx.', i) := !!cluster_idx)) |
|
143 | 179 |
# for each cell, what clusters are present |
144 | 180 |
oligo_cluster_pairs = purrr::reduce(bar_chain_tbls, left_join, by = cell_identifiers) |
145 | 181 |
|
... | ... |
@@ -52,7 +52,7 @@ get_gaps = function(ca){ |
52 | 52 |
data_frame(vd_gap, dj_gap, vj_gap) |
53 | 53 |
} |
54 | 54 |
|
55 |
-read_contig_json = function(file, seq_cols = c('quals', 'aa_sequence', '')){ |
|
55 |
+read_contig_json = function(anno_file, seq_cols = c('quals', 'aa_sequence', '')){ |
|
56 | 56 |
jsn = fromJSON(file(anno_file), flatten = TRUE) |
57 | 57 |
# contig sequences and gaps |
58 | 58 |
} |
... | ... |
@@ -22,7 +22,7 @@ ContigCellDB_10XVDJ(contig_tbl, contig_pk = c("barcode", "contig_id"), |
22 | 22 |
|
23 | 23 |
\item{cluster_tbl}{A data frame that provide cluster assignments for each contig} |
24 | 24 |
|
25 |
-\item{cluster_pk}{If `cluster_tbl` was provided, a list of character vector naming fields in `cluster_tbl` that uniquely identify a cluster} |
|
25 |
+\item{cluster_pk}{If `cluster_tbl` was provided, a character vector naming fields in `cluster_tbl` that uniquely identify a cluster} |
|
26 | 26 |
} |
27 | 27 |
\value{ |
28 | 28 |
\code{ContigCellDB} |
... | ... |
@@ -37,5 +37,6 @@ Construct a ContigCellDB |
37 | 37 |
|
38 | 38 |
\examples{ |
39 | 39 |
data(contigs_qc) |
40 |
-ContigCellDB(contigs_qc, contig_pk = c('barcode', 'pop', 'sample', 'contig_id'), cell_pk = c('barcode', 'pop', 'sample')) |
|
40 |
+ContigCellDB(contigs_qc, contig_pk = c('barcode', 'pop', 'sample', 'contig_id'), |
|
41 |
+ cell_pk = c('barcode', 'pop', 'sample')) |
|
41 | 42 |
} |
42 | 43 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,40 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/pairing-methods.R |
|
3 |
+\name{canonicalize_by_subset} |
|
4 |
+\alias{canonicalize_by_subset} |
|
5 |
+\title{Return single contig for each cell based on filtering and sorting} |
|
6 |
+\usage{ |
|
7 |
+canonicalize_by_subset(ccdb, ..., tie_break_keys = c("umis", "reads"), |
|
8 |
+ contig_fields = tie_break_keys, order = 1) |
|
9 |
+} |
|
10 |
+\arguments{ |
|
11 |
+\item{ccdb}{`ContigCellDB`} |
|
12 |
+ |
|
13 |
+\item{...}{unquoted expressions passed to `dplyr::filter` that will be applied to the `contig_tbl`} |
|
14 |
+ |
|
15 |
+\item{tie_break_keys}{columns used to sort the `contig_tbl` in **decreasing** order} |
|
16 |
+ |
|
17 |
+\item{contig_fields}{columns in the `contig_tbl` that will be copied over to the `cell_tbl`} |
|
18 |
+ |
|
19 |
+\item{order}{`integer` specifying which entry (ordinal) will be selected from the sorted contig_tbl, within each cell} |
|
20 |
+} |
|
21 |
+\value{ |
|
22 |
+`ContigCellDB` with additional fields in `cell_tbl` |
|
23 |
+} |
|
24 |
+\description{ |
|
25 |
+Return single contig for each cell based on filtering and sorting |
|
26 |
+} |
|
27 |
+\examples{ |
|
28 |
+# Report beta chain with highest umi-count, breaking ties with reads |
|
29 |
+beta = canonicalize_by_subset(ccdb_ex, chain == 'TRB', |
|
30 |
+tie_break_keys = c('umis', 'reads'), |
|
31 |
+contig_fields = c('umis', 'reads', 'chain', 'v_gene', 'd_gene', 'j_gene')) |
|
32 |
+head(beta$cell_tbl) |
|
33 |
+# Only adds fields to `cell_tbl` |
|
34 |
+stopifnot(all.equal(beta$cell_tbl[ccdb_ex$cell_pk], |
|
35 |
+ccdb_ex$cell_tbl[ccdb_ex$cell_pk])) |
|
36 |
+#Report cdr3 with highest UMI count, but only when > 5 UMIs support it |
|
37 |
+umi5 = canonicalize_by_subset(ccdb_ex, umis > 5, |
|
38 |
+tie_break_keys = c('umis', 'reads'), contig_fields = c('umis', 'cdr3')) |
|
39 |
+stopifnot(all(umi5$cell_tbl$umis > 5, na.rm = TRUE)) |
|
40 |
+} |
... | ... |
@@ -10,7 +10,7 @@ |
10 | 10 |
\arguments{ |
11 | 11 |
\item{x}{A ContigCellDB object} |
12 | 12 |
|
13 |
-\item{name}{Name of a slot for a ContigCellDB object} |
|
13 |
+\item{name}{a slot of a ContigCellDB object (one of `c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')`)} |
|
14 | 14 |
} |
15 | 15 |
\value{ |
16 | 16 |
Slots of ContigCellDB |
... | ... |
@@ -19,5 +19,7 @@ Slots of ContigCellDB |
19 | 19 |
Creat a method for ContigCellDB object to access its slots |
20 | 20 |
} |
21 | 21 |
\examples{ |
22 |
+ccdb_ex$contig_tbl |
|
23 |
+ccdb_ex$cell_tbl |
|
22 | 24 |
ccdb_ex$cluster_tbl |
23 | 25 |
} |
... | ... |
@@ -3,14 +3,14 @@ |
3 | 3 |
\docType{methods} |
4 | 4 |
\name{$<-,ContigCellDB-method} |
5 | 5 |
\alias{$<-,ContigCellDB-method} |
6 |
-\title{Creat a function of ContigCellDB object to replace values of its slots} |
|
6 |
+\title{Create a function of ContigCellDB object to replace values of its slots} |
|
7 | 7 |
\usage{ |
8 | 8 |
\S4method{$}{ContigCellDB}(x, name) <- value |
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 | 11 |
\item{x}{A ContigCellDB object} |
12 | 12 |
|
13 |
-\item{name}{Name of a slot for a ContigCellDB object} |
|
13 |
+\item{name}{Name of a slot for a ContigCellDB object (one of `c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')`)} |
|
14 | 14 |
|
15 | 15 |
\item{value}{The value assigned to a slot of ContigCellDB object} |
16 | 16 |
} |
... | ... |
@@ -18,8 +18,12 @@ |
18 | 18 |
A ContigCellDB object |
19 | 19 |
} |
20 | 20 |
\description{ |
21 |
-Creat a function of ContigCellDB object to replace values of its slots |
|
21 |
+Create a function of ContigCellDB object to replace values of its slots |
|
22 | 22 |
} |
23 | 23 |
\examples{ |
24 |
-ccdb_ex$contig_pk <- c("pop","barcode","contig_id") |
|
24 |
+ccdb_ex$contig_pk = c("sample","barcode","contig_id") # 'pop' is technically redundant with 'sample' |
|
25 |
+# Take a subset of ccdb_ex |
|
26 |
+ccdb_ex |
|
27 |
+ccdb_ex$contig_tbl = dplyr::filter(ccdb_ex$contig_tbl, pop == 'b6') |
|
28 |
+ccdb_ex |
|
25 | 29 |
} |
... | ... |
@@ -38,7 +38,7 @@ You may need to lower it below 5 for AAseq with identity less than .7.} |
38 | 38 |
|
39 | 39 |
\item{type}{one of 'DNA' or 'AA'} |
40 | 40 |
|
41 |
-\item{cluster_name}{What index should the clustering be stored in? By default, a new, unnamed cluster is added.} |
|
41 |
+\item{cluster_name}{`character` specifying key, and name for the clustering.} |
|
42 | 42 |
} |
43 | 43 |
\value{ |
44 | 44 |
vector of \code{integer} of length \code{seqs} providing the cluster ID for each sequence, or a `tibble`. See details. |
... | ... |
@@ -73,4 +73,9 @@ cdhit(aaseq,identity = 1, G = 0, aL = 1, aS = 1, only_index = TRUE)[1:10] |
73 | 73 |
cdhit(aaseq,identity = 1, G = 0, aL = .9, aS = .9, only_index = TRUE)[1:10] |
74 | 74 |
# a tibble |
75 | 75 |
tbl = cdhit(aaseq, identity = 1, G = 0, aL = .9, aS = .9, only_index = FALSE) |
76 |
+res = CellaRepertorium:::cdhit_ccdb(ccdb_ex, 'cdr3_nt', type = 'DNA', |
|
77 |
+cluster_name = 'DNA97', identity = .965, min_length = 12, G = 1) |
|
78 |
+res$cluster_tbl |
|
79 |
+res$contig_tbl |
|
80 |
+res$cluster_pk |
|
76 | 81 |
} |
... | ... |
@@ -27,6 +27,10 @@ fine_clustering(ccdb, sequence_key, type, max_affinity = NULL, |
27 | 27 |
Perform additional clustering of sequences within groups |
28 | 28 |
} |
29 | 29 |
\examples{ |
30 |
-ccdb_ex = CellaRepertorium:::cdhit_ccdb(ccdb_ex, 'cdr3_nt', type = 'DNA', cluster_name = 'DNA97', identity = .965, min_length = 12, G = 1) |
|
31 |
-ccdb_ex = fine_clustering(ccdb_ex, sequence_key = 'cdr3_nt', type = 'DNA') |
|
30 |
+ccdb_ex_small = ccdb_ex |
|
31 |
+ccdb_ex_small$cell_tbl = ccdb_ex_small$cell_tbl[1:200,] |
|
32 |
+ccdb_ex_small = CellaRepertorium:::cdhit_ccdb(ccdb_ex_small, |
|
33 |
+sequence_key = 'cdr3_nt', type = 'DNA', cluster_name = 'DNA97', |
|
34 |
+identity = .965, min_length = 12, G = 1) |
|
35 |
+ccdb_ex_small = fine_clustering(ccdb_ex_small, sequence_key = 'cdr3_nt', type = 'DNA') |
|
32 | 36 |
} |
... | ... |
@@ -20,8 +20,8 @@ sequence is returned} |
20 | 20 |
character vector |
21 | 21 |
} |
22 | 22 |
\description{ |
23 |
-Given a family of similar sequences, return a "representative" |
|
23 |
+This function can be deprecated. |
|
24 | 24 |
} |
25 | 25 |
\examples{ |
26 |
-get_canonical_representative(c('apple', 'manzana', 'pomme')) |
|
26 |
+CellaRepertorium:::get_canonical_representative(c('apple', 'manzana', 'pomme')) |
|
27 | 27 |
} |
... | ... |
@@ -4,20 +4,15 @@ |
4 | 4 |
\alias{pairing_tables} |
5 | 5 |
\title{Generate a list of tables representing clusters paired in cells} |
6 | 6 |
\usage{ |
7 |
-pairing_tables(cluster_tbl, cell_identifiers = "barcode", |
|
8 |
- cluster_idx = "cluster_idx", |
|
9 |
- canonicalize_fun = canonicalize_by_chain, table_order = 2, |
|
10 |
- min_expansion = 2, orphan_level = 1, cluster_whitelist = NULL, |
|
11 |
- cluster_blacklist = NULL, cell_tbl = NULL, feature_tbl = NULL) |
|
7 |
+pairing_tables(ccdb, canonicalize_fun = canonicalize_by_chain, |
|
8 |
+ table_order = 2, min_expansion = 2, orphan_level = 1, |
|
9 |
+ cluster_keys = character(), cluster_whitelist = NULL, |
|
10 |
+ cluster_blacklist = NULL) |
|
12 | 11 |
} |
13 | 12 |
\arguments{ |
14 |
-\item{cluster_tbl}{a table with all combinations of clusters in all cells} |
|
13 |
+\item{ccdb}{`ContigCellDB`} |
|
15 | 14 |
|
16 |
-\item{cell_identifiers}{character vector naming fields that key a cell} |
|
17 |
- |
|
18 |
-\item{cluster_idx}{character naming a single field IDing the clusters} |
|
19 |
- |
|
20 |
-\item{canonicalize_fun}{a function with signature `canonicalize_fun(cluster_tbl, cell_identifiers, cluster_idx, order = i)` that for each `cell_identifier` returns a single contig that depends on the `order`. For instance \link{canonicalize_by_prevalence} or \link{canonicalize_by_chain}.} |
|
15 |
+\item{canonicalize_fun}{a function with signature `canonicalize_fun(ContigCellDB, order = i)` that for each cell, returns a single contig that depends on the `order`. For instance \link{canonicalize_by_prevalence} or \link{canonicalize_by_chain}.} |
|
21 | 16 |
|
22 | 17 |
\item{table_order}{Integer larger than 1. What order of cluster_idx will be paired, eg, order = 2 means that the most common and second most common cluster_idx will be sought for each cell} |
23 | 18 |
|
... | ... |
@@ -25,13 +20,11 @@ pairing_tables(cluster_tbl, cell_identifiers = "barcode", |
25 | 20 |
|
26 | 21 |
\item{orphan_level}{Integer larger than 0 and less than or equal to `table_order`. Given that at least `min_expansion` cells are found that have `table_order` chains identical, how many `cluster_idx` pairs will we match on to select other cells. Example: `ophan_level=1` means that cells that share just a single chain with the} |
27 | 22 |
|
28 |
-\item{cluster_whitelist}{a table of "cluster_idx" that should always be reported. In contrast to the `cluster_tbl`, here the clusters must be named "cluster_idx.1", "cluster_idx.2" (if order-2 pairs are being selected).} |
|
29 |
- |
|
30 |
-\item{cluster_blacklist}{a table of "cluster_idx" that will never be reported. Must be named as per `cluster_whitelist`.} |
|
23 |
+\item{cluster_keys}{optional `character` naming additional columns in `ccdb$cluster_tbl` to be reported in the pairing} |
|
31 | 24 |
|
32 |
-\item{cell_tbl}{optional, ancillary table with additional cell features. Must also be keyed by `cell_identifiers`} |
|
25 |
+\item{cluster_whitelist}{a table of pairings or clusters that should always be reported. Here the clusters must be named "cluster_idx.1", "cluster_idx.2" (if order-2 pairs are being selected) rather than with `ccdb$cluster_pk``} |
|
33 | 26 |
|
34 |
-\item{feature_tbl}{optional, ancillary table with additional cluster features. Must also be keyed by `cluster_idx`} |
|
27 |
+\item{cluster_blacklist}{a table of pairings or clusters that will never be reported. Must be named as per `cluster_whitelist`.} |
|
35 | 28 |
} |
36 | 29 |
\value{ |
37 | 30 |
list of tables. The `cell_tbl` is keyed by the `cell_identifiers`, with fields "cluster_idx.1", "cluster_idx.2", etc, IDing the contigs present in each cell. "cluster_idx.1_fct" and "cluster_idx.2_fct" cast these fields to factors and are reordered to maximize the number of pairs along the diagonal. The `idx1_tbl` and `idx2_tbl` report information (passed in about the `cluster_idx` by `feature_tbl`.) The `cluster_pair_tbl` reports all pairings found of contigs, and the number of times observed. |
... | ... |
@@ -54,23 +47,25 @@ This facilitates plotting. |
54 | 47 |
|
55 | 48 |
\examples{ |
56 | 49 |
library(dplyr) |
57 |
-cluster_tbl = data_frame(clust_idx = gl(3, 2), cell_idx = rep(1:3, times = 2)) |
|
50 |
+tbl = tibble(clust_idx = gl(3, 2), cell_idx = rep(1:3, times = 2), contig_idx = 1:6) |
|
51 |
+ccdb = ContigCellDB(tbl, contig_pk = c('cell_idx', 'contig_idx'), |
|
52 |
+cell_pk = 'cell_idx', cluster_pk = 'clust_idx') |
|
58 | 53 |
# no pairs found twice |
59 |
-pt1 = pairing_tables(cluster_tbl, 'cell_idx', 'clust_idx', canonicalize_by_prevalence) |
|
54 |
+pt1 = pairing_tables(ccdb, canonicalize_by_prevalence) |
|
60 | 55 |
# all pairs found, found once. |
61 |
-pt2 = pairing_tables(cluster_tbl, 'cell_idx', 'clust_idx', |
|
62 |
- canonicalize_by_prevalence, min_expansion = 1) |
|
56 |
+pt2 = pairing_tables(ccdb, canonicalize_by_prevalence, min_expansion = 1) |
|
63 | 57 |
pt2$cell_tbl |
64 |
-cluster_tbl2 = bind_rows(cluster_tbl, cluster_tbl \%>\% mutate(cell_idx = rep(4:6, times = 2))) |
|
58 |
+tbl2 = bind_rows(tbl, tbl \%>\% mutate(cell_idx = rep(4:6, times = 2))) |
|
59 |
+ccdb2 = ContigCellDB(tbl2, contig_pk = c('cell_idx', 'contig_idx'), cell_pk = 'cell_idx', |
|
60 |
+cluster_pk = 'clust_idx') |
|
65 | 61 |
#all pairs found twice |
66 |
-pt3 = pairing_tables(cluster_tbl2, 'cell_idx', 'clust_idx', canonicalize_by_prevalence, min_expansion = 1) |
|
62 |
+pt3 = pairing_tables(ccdb2, canonicalize_by_prevalence, min_expansion = 1) |
|
67 | 63 |
pt3$cell_tbl |
68 | 64 |
# `canonicalize_by_chain` expects fields `umis`, `reads` |
69 | 65 |
# to break ties, wrap the function to change this |
70 |
-cluster_tbl3 = cluster_tbl2 \%>\% |
|
66 |
+ccdb2$contig_tbl = ccdb2$contig_tbl \%>\% |
|
71 | 67 |
mutate(umis = 1, reads = 1, chain = rep(c('TRA', 'TRB'), times = 6)) |
72 |
-pt4 = pairing_tables(cluster_tbl3, 'cell_idx', 'clust_idx', |
|
73 |
- canonicalize_by_chain, min_expansion = 1, table_order = 2) |
|
68 |
+pt4 = pairing_tables(ccdb2, canonicalize_by_chain, min_expansion = 1, table_order = 2) |
|
74 | 69 |
} |
75 | 70 |
\seealso{ |
76 | 71 |
canonicalize_by_prevalence, canonicalize_by_chain |
79 | 74 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,339 @@ |
1 |
+ GNU GENERAL PUBLIC LICENSE |
|
2 |
+ Version 2, June 1991 |
|
3 |
+ |
|
4 |
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc., |
|
5 |
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
6 |
+ Everyone is permitted to copy and distribute verbatim copies |
|
7 |
+ of this license document, but changing it is not allowed. |
|
8 |
+ |
|
9 |
+ Preamble |
|
10 |
+ |
|
11 |
+ The licenses for most software are designed to take away your |
|
12 |
+freedom to share and change it. By contrast, the GNU General Public |
|
13 |
+License is intended to guarantee your freedom to share and change free |
|
14 |
+software--to make sure the software is free for all its users. This |
|
15 |
+General Public License applies to most of the Free Software |
|
16 |
+Foundation's software and to any other program whose authors commit to |
|
17 |
+using it. (Some other Free Software Foundation software is covered by |
|
18 |
+the GNU Lesser General Public License instead.) You can apply it to |
|
19 |
+your programs, too. |
|
20 |
+ |
|
21 |
+ When we speak of free software, we are referring to freedom, not |
|
22 |
+price. Our General Public Licenses are designed to make sure that you |
|
23 |
+have the freedom to distribute copies of free software (and charge for |
|
24 |
+this service if you wish), that you receive source code or can get it |
|
25 |
+if you want it, that you can change the software or use pieces of it |
|
26 |
+in new free programs; and that you know you can do these things. |
|
27 |
+ |
|
28 |
+ To protect your rights, we need to make restrictions that forbid |
|
29 |
+anyone to deny you these rights or to ask you to surrender the rights. |
|
30 |
+These restrictions translate to certain responsibilities for you if you |
|
31 |
+distribute copies of the software, or if you modify it. |
|
32 |
+ |
|
33 |
+ For example, if you distribute copies of such a program, whether |
|
34 |
+gratis or for a fee, you must give the recipients all the rights that |
|
35 |
+you have. You must make sure that they, too, receive or can get the |
|
36 |
+source code. And you must show them these terms so they know their |
|
37 |
+rights. |
|
38 |
+ |
|
39 |
+ We protect your rights with two steps: (1) copyright the software, and |
|
40 |
+(2) offer you this license which gives you legal permission to copy, |
|
41 |
+distribute and/or modify the software. |
|
42 |
+ |
|
43 |
+ Also, for each author's protection and ours, we want to make certain |
|
44 |
+that everyone understands that there is no warranty for this free |
|
45 |
+software. If the software is modified by someone else and passed on, we |
|
46 |
+want its recipients to know that what they have is not the original, so |
|
47 |
+that any problems introduced by others will not reflect on the original |
|
48 |
+authors' reputations. |
|
49 |
+ |
|
50 |
+ Finally, any free program is threatened constantly by software |
|
51 |
+patents. We wish to avoid the danger that redistributors of a free |
|
52 |
+program will individually obtain patent licenses, in effect making the |
|
53 |
+program proprietary. To prevent this, we have made it clear that any |
|
54 |
+patent must be licensed for everyone's free use or not licensed at all. |
|
55 |
+ |
|
56 |
+ The precise terms and conditions for copying, distribution and |
|
57 |
+modification follow. |
|
58 |
+ |
|
59 |
+ GNU GENERAL PUBLIC LICENSE |
|
60 |
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION |
|
61 |
+ |
|
62 |
+ 0. This License applies to any program or other work which contains |
|
63 |
+a notice placed by the copyright holder saying it may be distributed |
|
64 |
+under the terms of this General Public License. The "Program", below, |
|
65 |
+refers to any such program or work, and a "work based on the Program" |
|
66 |
+means either the Program or any derivative work under copyright law: |
|
67 |
+that is to say, a work containing the Program or a portion of it, |
|
68 |
+either verbatim or with modifications and/or translated into another |
|
69 |
+language. (Hereinafter, translation is included without limitation in |
|
70 |
+the term "modification".) Each licensee is addressed as "you". |
|
71 |
+ |
|
72 |
+Activities other than copying, distribution and modification are not |
|
73 |
+covered by this License; they are outside its scope. The act of |
|
74 |
+running the Program is not restricted, and the output from the Program |
|
75 |
+is covered only if its contents constitute a work based on the |
|
76 |
+Program (independent of having been made by running the Program). |
|
77 |
+Whether that is true depends on what the Program does. |
|
78 |
+ |
|
79 |
+ 1. You may copy and distribute verbatim copies of the Program's |
|
80 |
+source code as you receive it, in any medium, provided that you |
|
81 |
+conspicuously and appropriately publish on each copy an appropriate |
|
82 |
+copyright notice and disclaimer of warranty; keep intact all the |
|
83 |
+notices that refer to this License and to the absence of any warranty; |
|
84 |
+and give any other recipients of the Program a copy of this License |
|
85 |
+along with the Program. |
|
86 |
+ |
|
87 |
+You may charge a fee for the physical act of transferring a copy, and |
|
88 |
+you may at your option offer warranty protection in exchange for a fee. |
|
89 |
+ |
|
90 |
+ 2. You may modify your copy or copies of the Program or any portion |
|
91 |
+of it, thus forming a work based on the Program, and copy and |
|
92 |
+distribute such modifications or work under the terms of Section 1 |
|
93 |
+above, provided that you also meet all of these conditions: |
|
94 |
+ |
|
95 |
+ a) You must cause the modified files to carry prominent notices |
|
96 |
+ stating that you changed the files and the date of any change. |
|
97 |
+ |
|
98 |
+ b) You must cause any work that you distribute or publish, that in |
|
99 |
+ whole or in part contains or is derived from the Program or any |
|
100 |
+ part thereof, to be licensed as a whole at no charge to all third |
|
101 |
+ parties under the terms of this License. |
|
102 |
+ |
|
103 |
+ c) If the modified program normally reads commands interactively |
|
104 |
+ when run, you must cause it, when started running for such |
|
105 |
+ interactive use in the most ordinary way, to print or display an |
|
106 |
+ announcement including an appropriate copyright notice and a |
|
107 |
+ notice that there is no warranty (or else, saying that you provide |
|
108 |
+ a warranty) and that users may redistribute the program under |
|
109 |
+ these conditions, and telling the user how to view a copy of this |
|
110 |
+ License. (Exception: if the Program itself is interactive but |
|
111 |
+ does not normally print such an announcement, your work based on |
|
112 |
+ the Program is not required to print an announcement.) |
|
113 |
+ |
|
114 |
+These requirements apply to the modified work as a whole. If |
|
115 |
+identifiable sections of that work are not derived from the Program, |
|
116 |
+and can be reasonably considered independent and separate works in |
|
117 |
+themselves, then this License, and its terms, do not apply to those |
|
118 |
+sections when you distribute them as separate works. But when you |
|
119 |
+distribute the same sections as part of a whole which is a work based |
|
120 |
+on the Program, the distribution of the whole must be on the terms of |
|
121 |
+this License, whose permissions for other licensees extend to the |
|
122 |
+entire whole, and thus to each and every part regardless of who wrote it. |
|
123 |
+ |
|
124 |
+Thus, it is not the intent of this section to claim rights or contest |
|
125 |
+your rights to work written entirely by you; rather, the intent is to |
|
126 |
+exercise the right to control the distribution of derivative or |
|
127 |
+collective works based on the Program. |
|
128 |
+ |
|
129 |
+In addition, mere aggregation of another work not based on the Program |
|
130 |
+with the Program (or with a work based on the Program) on a volume of |
|
131 |
+a storage or distribution medium does not bring the other work under |
|
132 |
+the scope of this License. |
|
133 |
+ |
|
134 |
+ 3. You may copy and distribute the Program (or a work based on it, |
|
135 |
+under Section 2) in object code or executable form under the terms of |
|
136 |
+Sections 1 and 2 above provided that you also do one of the following: |
|
137 |
+ |
|
138 |
+ a) Accompany it with the complete corresponding machine-readable |
|
139 |
+ source code, which must be distributed under the terms of Sections |
|
140 |
+ 1 and 2 above on a medium customarily used for software interchange; or, |
|
141 |
+ |
|
142 |
+ b) Accompany it with a written offer, valid for at least three |
|
143 |
+ years, to give any third party, for a charge no more than your |
|
144 |
+ cost of physically performing source distribution, a complete |
|
145 |
+ machine-readable copy of the corresponding source code, to be |
|
146 |
+ distributed under the terms of Sections 1 and 2 above on a medium |
|
147 |
+ customarily used for software interchange; or, |
|
148 |
+ |
|
149 |
+ c) Accompany it with the information you received as to the offer |
|
150 |
+ to distribute corresponding source code. (This alternative is |
|
151 |
+ allowed only for noncommercial distribution and only if you |
|
152 |
+ received the program in object code or executable form with such |
|
153 |
+ an offer, in accord with Subsection b above.) |
|
154 |
+ |
|
155 |
+The source code for a work means the preferred form of the work for |
|
156 |
+making modifications to it. For an executable work, complete source |
|
157 |
+code means all the source code for all modules it contains, plus any |
|
158 |
+associated interface definition files, plus the scripts used to |
|
159 |
+control compilation and installation of the executable. However, as a |
|
160 |
+special exception, the source code distributed need not include |
|
161 |
+anything that is normally distributed (in either source or binary |
|
162 |
+form) with the major components (compiler, kernel, and so on) of the |
|
163 |
+operating system on which the executable runs, unless that component |
|
164 |
+itself accompanies the executable. |
|
165 |
+ |
|
166 |
+If distribution of executable or object code is made by offering |
|
167 |
+access to copy from a designated place, then offering equivalent |
|
168 |
+access to copy the source code from the same place counts as |
|
169 |
+distribution of the source code, even though third parties are not |
|
170 |
+compelled to copy the source along with the object code. |
|
171 |
+ |
|
172 |
+ 4. You may not copy, modify, sublicense, or distribute the Program |
|
173 |
+except as expressly provided under this License. Any attempt |
|
174 |
+otherwise to copy, modify, sublicense or distribute the Program is |
|
175 |
+void, and will automatically terminate your rights under this License. |
|
176 |
+However, parties who have received copies, or rights, from you under |
|
177 |
+this License will not have their licenses terminated so long as such |
|
178 |
+parties remain in full compliance. |
|
179 |
+ |
|
180 |
+ 5. You are not required to accept this License, since you have not |
|
181 |
+signed it. However, nothing else grants you permission to modify or |
|
182 |
+distribute the Program or its derivative works. These actions are |
|
183 |
+prohibited by law if you do not accept this License. Therefore, by |
|
184 |
+modifying or distributing the Program (or any work based on the |
|
185 |
+Program), you indicate your acceptance of this License to do so, and |
|
186 |
+all its terms and conditions for copying, distributing or modifying |
|
187 |
+the Program or works based on it. |
|
188 |
+ |
|
189 |
+ 6. Each time you redistribute the Program (or any work based on the |
|
190 |
+Program), the recipient automatically receives a license from the |
|
191 |
+original licensor to copy, distribute or modify the Program subject to |
|
192 |
+these terms and conditions. You may not impose any further |
|
193 |
+restrictions on the recipients' exercise of the rights granted herein. |
|
194 |
+You are not responsible for enforcing compliance by third parties to |
|
195 |
+this License. |
|
196 |
+ |
|
197 |
+ 7. If, as a consequence of a court judgment or allegation of patent |
|
198 |
+infringement or for any other reason (not limited to patent issues), |
|
199 |
+conditions are imposed on you (whether by court order, agreement or |
|
200 |
+otherwise) that contradict the conditions of this License, they do not |
|
201 |
+excuse you from the conditions of this License. If you cannot |
|
202 |
+distribute so as to satisfy simultaneously your obligations under this |
|
203 |
+License and any other pertinent obligations, then as a consequence you |
|
204 |
+may not distribute the Program at all. For example, if a patent |
|
205 |
+license would not permit royalty-free redistribution of the Program by |
|
206 |
+all those who receive copies directly or indirectly through you, then |
|
207 |
+the only way you could satisfy both it and this License would be to |
|
208 |
+refrain entirely from distribution of the Program. |
|
209 |
+ |
|
210 |
+If any portion of this section is held invalid or unenforceable under |
|
211 |
+any particular circumstance, the balance of the section is intended to |
|
212 |
+apply and the section as a whole is intended to apply in other |
|
213 |
+circumstances. |
|
214 |
+ |
|
215 |
+It is not the purpose of this section to induce you to infringe any |
|
216 |
+patents or other property right claims or to contest validity of any |
|
217 |
+such claims; this section has the sole purpose of protecting the |
|
218 |
+integrity of the free software distribution system, which is |
|
219 |
+implemented by public license practices. Many people have made |
|
220 |
+generous contributions to the wide range of software distributed |
|
221 |
+through that system in reliance on consistent application of that |
|
222 |
+system; it is up to the author/donor to decide if he or she is willing |
|
223 |
+to distribute software through any other system and a licensee cannot |
|
224 |
+impose that choice. |
|
225 |
+ |
|
226 |
+This section is intended to make thoroughly clear what is believed to |
|
227 |
+be a consequence of the rest of this License. |
|
228 |
+ |
|
229 |
+ 8. If the distribution and/or use of the Program is restricted in |
|
230 |
+certain countries either by patents or by copyrighted interfaces, the |
|
231 |
+original copyright holder who places the Program under this License |
|
232 |
+may add an explicit geographical distribution limitation excluding |
|
233 |
+those countries, so that distribution is permitted only in or among |
|
234 |
+countries not thus excluded. In such case, this License incorporates |
|
235 |
+the limitation as if written in the body of this License. |
|
236 |
+ |
|
237 |
+ 9. The Free Software Foundation may publish revised and/or new versions |
|
238 |
+of the General Public License from time to time. Such new versions will |
|
239 |
+be similar in spirit to the present version, but may differ in detail to |
|
240 |
+address new problems or concerns. |
|
241 |
+ |
|
242 |
+Each version is given a distinguishing version number. If the Program |
|
243 |
+specifies a version number of this License which applies to it and "any |
|
244 |
+later version", you have the option of following the terms and conditions |
|
245 |
+either of that version or of any later version published by the Free |
|
246 |
+Software Foundation. If the Program does not specify a version number of |
|
247 |
+this License, you may choose any version ever published by the Free Software |
|
248 |
+Foundation. |
|
249 |
+ |
|
250 |
+ 10. If you wish to incorporate parts of the Program into other free |
|
251 |
+programs whose distribution conditions are different, write to the author |
|
252 |
+to ask for permission. For software which is copyrighted by the Free |
|
253 |
+Software Foundation, write to the Free Software Foundation; we sometimes |
|
254 |
+make exceptions for this. Our decision will be guided by the two goals |
|
255 |
+of preserving the free status of all derivatives of our free software and |
|
256 |
+of promoting the sharing and reuse of software generally. |
|
257 |
+ |
|
258 |
+ NO WARRANTY |
|
259 |
+ |
|
260 |
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY |
|
261 |
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN |
|
262 |
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES |
|
263 |
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED |
|
264 |
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
|
265 |
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS |
|
266 |
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE |
|
267 |
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, |
|
268 |
+REPAIR OR CORRECTION. |
|
269 |
+ |
|
270 |
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING |
|
271 |
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR |
|
272 |
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, |
|
273 |
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING |
|
274 |
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED |
|
275 |
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY |
|
276 |
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER |
|
277 |
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE |
|
278 |
+POSSIBILITY OF SUCH DAMAGES. |
|
279 |
+ |
|
280 |
+ END OF TERMS AND CONDITIONS |
|
281 |
+ |
|
282 |
+ How to Apply These Terms to Your New Programs |
|
283 |
+ |
|
284 |
+ If you develop a new program, and you want it to be of the greatest |
|
285 |
+possible use to the public, the best way to achieve this is to make it |
|
286 |
+free software which everyone can redistribute and change under these terms. |
|
287 |
+ |
|
288 |
+ To do so, attach the following notices to the program. It is safest |
|
289 |
+to attach them to the start of each source file to most effectively |
|
290 |
+convey the exclusion of warranty; and each file should have at least |
|
291 |
+the "copyright" line and a pointer to where the full notice is found. |
|
292 |
+ |
|
293 |
+ <one line to give the program's name and a brief idea of what it does.> |
|
294 |
+ Copyright (C) <year> <name of author> |
|
295 |
+ |
|
296 |
+ This program is free software; you can redistribute it and/or modify |
|
297 |
+ it under the terms of the GNU General Public License as published by |
|
298 |
+ the Free Software Foundation; either version 2 of the License, or |
|
299 |
+ (at your option) any later version. |
|
300 |
+ |
|
301 |
+ This program is distributed in the hope that it will be useful, |
|
302 |
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
303 |
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
304 |
+ GNU General Public License for more details. |
|
305 |
+ |
|
306 |
+ You should have received a copy of the GNU General Public License along |
|
307 |
+ with this program; if not, write to the Free Software Foundation, Inc., |
|
308 |
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
|
309 |
+ |
|
310 |
+Also add information on how to contact you by electronic and paper mail. |
|
311 |
+ |
|
312 |
+If the program is interactive, make it output a short notice like this |
|
313 |
+when it starts in an interactive mode: |
|
314 |
+ |
|
315 |
+ Gnomovision version 69, Copyright (C) year name of author |
|
316 |
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. |
|
317 |
+ This is free software, and you are welcome to redistribute it |
|
318 |
+ under certain conditions; type `show c' for details. |
|
319 |
+ |
|
320 |
+The hypothetical commands `show w' and `show c' should show the appropriate |
|
321 |
+parts of the General Public License. Of course, the commands you use may |
|
322 |
+be called something other than `show w' and `show c'; they could even be |
|
323 |
+mouse-clicks or menu items--whatever suits your program. |
|
324 |
+ |
|
325 |
+You should also get your employer (if you work as a programmer) or your |
|
326 |
+school, if any, to sign a "copyright disclaimer" for the program, if |
|
327 |
+necessary. Here is a sample; alter the names: |
|
328 |
+ |
|
329 |
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program |
|
330 |
+ `Gnomovision' (which makes passes at compilers) written by James Hacker. |
|
331 |
+ |
|
332 |
+ <signature of Ty Coon>, 1 April 1989 |
|
333 |
+ Ty Coon, President of Vice |
|
334 |
+ |
|
335 |
+This General Public License does not permit incorporating your program into |
|
336 |
+proprietary programs. If your program is a subroutine library, you may |
|
337 |
+consider it more useful to permit linking proprietary applications with the |
|
338 |
+library. If this is what you want to do, use the GNU Lesser General |
|
339 |
+Public License instead of this License. |