Browse code

Doc/example updates

Andrew McDavid authored on 19/06/2019 06:04:46
Showing 19 changed files

... ...
@@ -5,3 +5,4 @@ extdata/.*\.json$
5 5
 extdata/refdata-cellranger-vdj-GRCh38-alts-ensembl-2.0.0/
6 6
 ^doc$
7 7
 ^Meta$
8
+manuscript/
... ...
@@ -8,3 +8,4 @@ data-raw/*.json
8 8
 inst/doc
9 9
 doc
10 10
 Meta
11
+*.dll
... ...
@@ -1,31 +1,41 @@
1
-Package: CellaRepertorium
2 1
 Type: Package
3
-Title: Methods for clustering and analyzing high-throughput single cell immune cell repertoires (RepSeq)
4
-Version: 0.3.0
5
-Author: Andrew McDavid
2
+Package: CellaRepertorium
3
+Title: Methods for clustering and analyzing high-throughput
4
+    single cell immune cell repertoires (RepSeq)
5
+Version: 0.3.1
6
+Authors@R: 
7
+    c(person(given = "Andrew",
8
+             family = "McDavid",
9
+             role = c('aut', 'cre'),
10
+             email = "Andrew_McDavid@urmc.rochester.edu"),
11
+      person(given = "Yu",
12
+             family = "Gu",
13
+             role = "aut",
14
+             email = "Yu_Gu@urmc.rochester.edu"))
6 15
 Maintainer: Andrew McDavid <Andrew_McDavid@urmc.rochester.edu>
7
-Description: Methods to cluster and analyze high-throughput single cell immune cell repertoires,
8
-    especially from the 10X Genomics VDJ solution. 
9
-    Contains an R interface to CD-HIT (Li and Godzik 2006).
10
-    Tests for specific expansion, as well as omnibus oligoclonality under hypergeometric models.
16
+Description: Methods to cluster and analyze high-throughput
17
+    single cell immune cell repertoires, especially from the 10X Genomics
18
+    VDJ solution. Contains an R interface to CD-HIT (Li and Godzik 2006).   
19
+    Methods to visualize and analyze paired heavy-light chain data.
20
+    Tests for specific expansion, as well as omnibus oligoclonality under
21
+    hypergeometric models.
11 22
 License: GPL-3
12
-Encoding: UTF-8
13
-LazyData: true
14
-Depends: R (>= 3.5.0)
23
+Depends: 
24
+    R (>= 3.5.0)
15 25
 Imports:
16
-   dplyr,
17
-   tibble,
18
-   stringr,
19
-   Biostrings,
20
-   Rcpp,
21
-   reshape2,
22
-   methods,
23
-   rlang,
24
-   purrr,
25
-   Matrix,
26
-   S4Vectors,
27
-   tidyr,
28
-   forcats
26
+    dplyr,
27
+    tibble,
28
+    stringr,
29
+    Biostrings,
30
+    Rcpp,
31
+    reshape2,
32
+    methods,
33
+    rlang,
34
+    purrr,
35
+    Matrix,
36
+    S4Vectors,
37
+    tidyr,
38
+    forcats
29 39
 Suggests: 
30 40
     testthat,
31 41
     readr,
... ...
@@ -33,8 +43,15 @@ Suggests:
33 43
     rmarkdown,
34 44
     ggplot2,
35 45
     BiocStyle,
36
-    ggdendro
37
-RoxygenNote: 6.1.1
38
-LinkingTo: Rcpp
46
+    ggdendro,
47
+    broom,
48
+    lme4,
49
+    RColorBrewer
50
+LinkingTo: 
51
+    Rcpp
52
+VignetteBuilder: 
53
+    knitr
54
+Encoding: UTF-8
55
+LazyData: true
39 56
 NeedsCompilation: yes
40
-VignetteBuilder: knitr
57
+RoxygenNote: 6.1.1
... ...
@@ -4,6 +4,7 @@ export(ContigCellDB)
4 4
 export(ContigCellDB_10XVDJ)
5 5
 export(canonicalize_by_chain)
6 6
 export(canonicalize_by_prevalence)
7
+export(canonicalize_by_subset)
7 8
 export(cdhit)
8 9
 export(cdhit_ccdb)
9 10
 export(cluster_permute_test)
... ...
@@ -12,7 +13,6 @@ export(enumerate_pairing)
12 13
 export(fancy_name_contigs)
13 14
 export(fine_cluster_seqs)
14 15
 export(fine_clustering)
15
-export(get_canonical_representative)
16 16
 export(ig_chain_recode)
17 17
 export(modal_category)
18 18
 export(np)
... ...
@@ -28,14 +28,17 @@ importFrom(dplyr,anti_join)
28 28
 importFrom(dplyr,bind_cols)
29 29
 importFrom(dplyr,bind_rows)
30 30
 importFrom(dplyr,case_when)
31
+importFrom(dplyr,do)
31 32
 importFrom(dplyr,filter)
32 33
 importFrom(dplyr,group_by)
33 34
 importFrom(dplyr,left_join)
34 35
 importFrom(dplyr,mutate)
35 36
 importFrom(dplyr,right_join)
37
+importFrom(dplyr,rowwise)
36 38
 importFrom(dplyr,select)
37 39
 importFrom(dplyr,semi_join)
38 40
 importFrom(dplyr,summarize)
41
+importFrom(dplyr,transmute)
39 42
 importFrom(dplyr,ungroup)
40 43
 importFrom(methods,"slot<-")
41 44
 importFrom(methods,as)
... ...
@@ -28,7 +28,7 @@ valid_KeyedTbl = function(tbl, keys){
28 28
 #' @param cell_tbl a data frame of cell barcodes, and (optional) additional fields describing their properties
29 29
 #' @param cell_pk character vector naming fields in `cell_tbl` that uniquely identify a cell barcode
30 30
 #' @param cluster_tbl A data frame that provide cluster assignments for each contig
31
-#' @param cluster_pk If `cluster_tbl` was provided, a list of character vector naming fields in `cluster_tbl` that uniquely identify a cluster
31
+#' @param cluster_pk If `cluster_tbl` was provided, a character vector naming fields in `cluster_tbl` that uniquely identify a cluster
32 32
 #'
33 33
 #' @return \code{ContigCellDB}
34 34
 #' @export
... ...
@@ -39,7 +39,8 @@ valid_KeyedTbl = function(tbl, keys){
39 39
 #'
40 40
 #' @examples
41 41
 #' data(contigs_qc)
42
-#' ContigCellDB(contigs_qc, contig_pk = c('barcode', 'pop', 'sample', 'contig_id'), cell_pk = c('barcode', 'pop', 'sample'))
42
+#' ContigCellDB(contigs_qc, contig_pk = c('barcode', 'pop', 'sample', 'contig_id'),
43
+#'  cell_pk = c('barcode', 'pop', 'sample'))
43 44
 ContigCellDB = function(contig_tbl, contig_pk, cell_tbl, cell_pk, cluster_tbl, cluster_pk = character()){
44 45
     valid_KeyedTbl(contig_tbl, contig_pk)
45 46
     equalized = FALSE
... ...
@@ -68,12 +69,14 @@ ContigCellDB_10XVDJ = function(contig_tbl, contig_pk = c('barcode', 'contig_id')
68 69
 #' Creat a method for ContigCellDB object to access its slots
69 70
 #'
70 71
 #' @param x A ContigCellDB object
71
-#' @param name Name of a slot for a ContigCellDB object
72
+#' @param name a slot of a ContigCellDB object (one of  `c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')`)
72 73
 #'
73 74
 #' @return Slots of ContigCellDB
74 75
 #' @export
75 76
 #'
76 77
 #' @examples
78
+#' ccdb_ex$contig_tbl
79
+#' ccdb_ex$cell_tbl
77 80
 #' ccdb_ex$cluster_tbl
78 81
 setMethod("$", signature = c(x = 'ContigCellDB'), function(x, name){
79 82
     if(name %in% c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')){
... ...
@@ -83,17 +86,21 @@ setMethod("$", signature = c(x = 'ContigCellDB'), function(x, name){
83 86
     }
84 87
 })
85 88
 
86
-#' Creat a function of ContigCellDB object to replace values of its slots
89
+#' Create a function of ContigCellDB object to replace values of its slots
87 90
 #'
88 91
 #' @param x A ContigCellDB object
89
-#' @param name Name of a slot for a ContigCellDB object
92
+#' @param name Name of a slot for a ContigCellDB object (one of  `c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')`)
90 93
 #' @param value The value assigned to a slot of ContigCellDB object
91 94
 #'
92 95
 #' @return A ContigCellDB object
93 96
 #' @export
94 97
 #'
95 98
 #' @examples
96
-#' ccdb_ex$contig_pk <- c("pop","barcode","contig_id")
99
+#' ccdb_ex$contig_pk = c("sample","barcode","contig_id") # 'pop' is technically redundant with 'sample'
100
+#' # Take a subset of ccdb_ex
101
+#' ccdb_ex
102
+#' ccdb_ex$contig_tbl = dplyr::filter(ccdb_ex$contig_tbl, pop == 'b6')
103
+#' ccdb_ex
97 104
 setReplaceMethod("$", signature = c(x = 'ContigCellDB'), function(x, name, value){
98 105
     if(name %in% c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')){
99 106
         slot(x, name) <- value
... ...
@@ -82,8 +82,14 @@ cdhit = function(seqs, identity = NULL, kmerSize = NULL, min_length = 6, s = 1,
82 82
 ##' @param object An object of class `ClusterContigDB`
83 83
 ##' @param sequence_key `character` naming the column in the `contig_tbl` containing the sequence to be clustered
84 84
 ##' @param type one of 'DNA' or 'AA'
85
-##' @param cluster_name What index should the clustering be stored in?  By default, a new, unnamed cluster is added.
85
+##' @param cluster_name `character` specifying key, and name for the clustering.
86 86
 ##' @export
87
+##' @examples
88
+##' res = CellaRepertorium:::cdhit_ccdb(ccdb_ex, 'cdr3_nt', type = 'DNA',
89
+##' cluster_name = 'DNA97', identity = .965, min_length = 12, G = 1)
90
+##' res$cluster_tbl
91
+##' res$contig_tbl
92
+##' res$cluster_pk
87 93
 cdhit_ccdb = function(object, sequence_key, type = c('DNA', 'AA'), cluster_name = 'cluster_idx', ...){
88 94
     seqs = object$contig_tbl[[sequence_key]]
89 95
     if(length(seqs) < 1) stop("No sequences were provided")
... ...
@@ -7,6 +7,7 @@ cluster_germline = function(ccdb, segment_keys = c('v_gene', 'j_gene', 'chain'),
7 7
     replace_cluster_tbl(ccdb, cluster_tbl, cl_con_tbl, cluster_pk = cluster_name)
8 8
 }
9 9
 
10
+globalVariables(c('fc', 'd(medoid)', 'is_medoid', 'n_cluster'))
10 11
 
11 12
 # Also canonicalize..
12 13
 #' Perform additional clustering of sequences within groups
... ...
@@ -146,8 +147,8 @@ fine_cluster_seqs = function(seqs, type = 'AA', big_memory_brute = FALSE, method
146 147
     }
147 148
 
148 149
     medoid = which.min(colMeans(sd))
149
-    homology = sd[medoid,]
150
-    list(cluster = hc, distance = sd, homology = homology, medoid = medoid, max_dist = max(sd))
150
+    distance = sd[medoid,]
151
+    list(cluster = hc, distance_mat = sd, distance = distance, medoid = medoid, max_dist = max(sd))
151 152
 }
152 153
 
153 154
 
... ...
@@ -1,3 +1,4 @@
1
+globalVariables(c('prev'))
1 2
 #' For each cell, return a single, canonical chain-cluster
2 3
 #'
3 4
 #' In single cell data, multiple chains (heavy-light or alpha-beta) are expected.  In some cases, there could be more than two (eg multiple alpha alleles for T cells).
... ...
@@ -19,7 +20,7 @@ canonicalize_by_prevalence = function(tbl, cell_identifiers = 'barcode', cluster
19 20
 
20 21
 
21 22
 
22
-
23
+# I believe this function is obsoleted by `canonicalize_by_subset``
23 24
 #' @param sort_factors `character` vector naming columns in `tbl` to sorted on, within  `cell_identifier`. Sorted by first element first, then ties broken by subsequent elements.  Sorted in decreasing order for each element.
24 25
 #' @param chain_levels an optional `character` vector providing the sort order of the `chain` column in `tbl`.  Set to length zero to disable.
25 26
 #' @export
... ...
@@ -35,12 +36,40 @@ canonicalize_by_chain = function(tbl,  cell_identifiers = 'barcode', sort_factor
35 36
 
36 37
 }
37 38
 
38
-canonicalize_by_subset = function(ccdb, tie_break_keys = c('umis', 'reads'), contig_fields = tie_break_keys, ...){
39
+#' Return single contig for each cell based on filtering and sorting
40
+#'
41
+#' @param ccdb `ContigCellDB`
42
+#' @param ... unquoted expressions passed to `dplyr::filter` that will be applied to the `contig_tbl`
43
+#' @param tie_break_keys columns used to sort the `contig_tbl` in **decreasing** order
44
+#' @param order `integer` specifying which entry (ordinal) will be selected from the sorted contig_tbl, within each cell
45
+#' @param contig_fields columns in the `contig_tbl` that will be copied over to the `cell_tbl`
46
+#'
47
+#' @return `ContigCellDB` with additional fields in `cell_tbl`
48
+#' @export
49
+#'
50
+#' @examples
51
+#' # Report beta chain with highest umi-count, breaking ties with reads
52
+#' beta = canonicalize_by_subset(ccdb_ex, chain == 'TRB',
53
+#' tie_break_keys = c('umis', 'reads'),
54
+#' contig_fields = c('umis', 'reads', 'chain', 'v_gene', 'd_gene', 'j_gene'))
55
+#' head(beta$cell_tbl)
56
+#' # Only adds fields to `cell_tbl`
57
+#' stopifnot(all.equal(beta$cell_tbl[ccdb_ex$cell_pk],
58
+#' ccdb_ex$cell_tbl[ccdb_ex$cell_pk]))
59
+#' #Report cdr3 with highest UMI count, but only when > 5 UMIs support it
60
+#' umi5 = canonicalize_by_subset(ccdb_ex, umis > 5,
61
+#' tie_break_keys = c('umis', 'reads'), contig_fields = c('umis', 'cdr3'))
62
+#' stopifnot(all(umi5$cell_tbl$umis > 5, na.rm = TRUE))
63
+canonicalize_by_subset = function(ccdb, ...,  tie_break_keys = c('umis', 'reads'), contig_fields = tie_break_keys, order = 1){
39 64
     tbl = ccdb$contig_tbl
40
-    ft = filter(tbl, quos(...))
65
+    # Filter with expressions in ...
66
+    ft = filter(.data = tbl, !!!rlang::quos(...))
67
+    # setup quosures to arrange the data
41 68
     arranging = purrr::map(tie_break_keys, ~ rlang::quo(desc(!!sym(.x))))
42
-    ft2 = ft %>% group_by(!!!syms(ccdb$cell_pk)) %>% dplyr::arrange(!!!arranging)
69
+    # take first row of each cell
70
+    ft2 = ft %>% group_by(!!!syms(ccdb$cell_pk)) %>% dplyr::arrange(!!!arranging) %>% dplyr::do(dplyr::slice(., order))
43 71
     cell_tbl = ccdb$cell_tbl
72
+    # join with cell tbl (so same number of cells)
44 73
     ccdb$cell_tbl = right_join_warn(ft2[unique(c(contig_fields, ccdb$cell_pk))], cell_tbl, by = ccdb$cell_pk)
45 74
     ccdb
46 75
 }
... ...
@@ -52,6 +81,7 @@ canonicalize_by_subset = function(ccdb, tie_break_keys = c('umis', 'reads'), con
52 81
 
53 82
 #' Given a family of similar sequences, return a "representative"
54 83
 #'
84
+#' This function can be deprecated.
55 85
 #' @param seqs character vector
56 86
 #' @param medoid_idx optional index into seqs
57 87
 #' @param warn_if_distinct Should a warning be emitted if there are distinct elements in seqs?
... ...
@@ -60,10 +90,9 @@ canonicalize_by_subset = function(ccdb, tie_break_keys = c('umis', 'reads'), con
60 90
 #' sequence is returned
61 91
 #'
62 92
 #' @return character vector
63
-#' @export
64 93
 #'
65 94
 #' @examples
66
-#' get_canonical_representative(c('apple', 'manzana', 'pomme'))
95
+#' CellaRepertorium:::get_canonical_representative(c('apple', 'manzana', 'pomme'))
67 96
 get_canonical_representative = function(seqs, medoid_idx, warn_if_distinct = FALSE){
68 97
     if(!missing(medoid_idx)){
69 98
         if(!is.integer(medoid_idx) || medoid_idx < 1 || medoid_idx > length(seqs)) stop("Illegal `medoid_idx`")
... ...
@@ -79,6 +108,7 @@ get_canonical_representative = function(seqs, medoid_idx, warn_if_distinct = FAL
79 108
     return(rep)
80 109
 }
81 110
 
111
+# This should return a ccdb or a subclass?
82 112
 #' Generate a list of tables representing clusters paired in cells
83 113
 #'
84 114
 #' A contingency table of every combination of `cluster_idx` up to `table_order` is generated.
... ...
@@ -115,31 +145,37 @@ get_canonical_representative = function(seqs, medoid_idx, warn_if_distinct = FAL
115 145
 #' @importFrom rlang sym syms :=
116 146
 #' @examples
117 147
 #' library(dplyr)
118
-#' cluster_tbl = data_frame(clust_idx = gl(3, 2), cell_idx = rep(1:3, times = 2))
148
+#' tbl = tibble(clust_idx = gl(3, 2), cell_idx = rep(1:3, times = 2), contig_idx = 1:6)
149
+#' ccdb = ContigCellDB(tbl, contig_pk = c('cell_idx', 'contig_idx'),
150
+#' cell_pk = 'cell_idx', cluster_pk = 'clust_idx')
119 151
 #' # no pairs found twice
120
-#' pt1 = pairing_tables(cluster_tbl, 'cell_idx', 'clust_idx', canonicalize_by_prevalence)
152
+#' pt1 = pairing_tables(ccdb, canonicalize_by_prevalence)
121 153
 #' # all pairs found, found once.
122
-#' pt2 = pairing_tables(cluster_tbl, 'cell_idx', 'clust_idx',
123
-#'     canonicalize_by_prevalence, min_expansion = 1)
154
+#' pt2 = pairing_tables(ccdb, canonicalize_by_prevalence, min_expansion = 1)
124 155
 #' pt2$cell_tbl
125
-#' cluster_tbl2 = bind_rows(cluster_tbl, cluster_tbl %>% mutate(cell_idx = rep(4:6, times = 2)))
156
+#' tbl2 = bind_rows(tbl, tbl %>% mutate(cell_idx = rep(4:6, times = 2)))
157
+#' ccdb2 = ContigCellDB(tbl2, contig_pk = c('cell_idx', 'contig_idx'), cell_pk = 'cell_idx',
158
+#' cluster_pk = 'clust_idx')
126 159
 #' #all pairs found twice
127
-#' pt3 = pairing_tables(cluster_tbl2, 'cell_idx', 'clust_idx', canonicalize_by_prevalence, min_expansion = 1)
160
+#' pt3 = pairing_tables(ccdb2, canonicalize_by_prevalence, min_expansion = 1)
128 161
 #' pt3$cell_tbl
129 162
 #' # `canonicalize_by_chain` expects fields `umis`, `reads`
130 163
 #' # to break ties,  wrap the function to change this
131
-#' cluster_tbl3 = cluster_tbl2 %>%
164
+#' ccdb2$contig_tbl = ccdb2$contig_tbl %>%
132 165
 #'     mutate(umis = 1, reads = 1, chain = rep(c('TRA', 'TRB'), times = 6))
133
-#' pt4 = pairing_tables(cluster_tbl3, 'cell_idx', 'clust_idx',
134
-#'     canonicalize_by_chain, min_expansion = 1, table_order = 2)
135
-pairing_tables = function(cluster_tbl, cell_identifiers = 'barcode', cluster_idx = 'cluster_idx', canonicalize_fun = canonicalize_by_chain, table_order = 2, min_expansion = 2,  orphan_level = 1, cluster_whitelist = NULL, cluster_blacklist = NULL, cell_tbl = NULL, feature_tbl = NULL ){
166
+#' pt4 = pairing_tables(ccdb2, canonicalize_by_chain, min_expansion = 1, table_order = 2)
167
+pairing_tables = function(ccdb,  canonicalize_fun = canonicalize_by_chain, table_order = 2, min_expansion = 2,  orphan_level = 1, cluster_keys = character(), cluster_whitelist = NULL, cluster_blacklist = NULL){
136 168
 
137 169
     if(orphan_level > table_order) stop('`ophan_level` must be less than or equal to `table_order`')
138 170
     if(table_order < 1) stop('Table order must be at least 1')
139 171
 
140 172
     # get `table_order` most common clusters for each cell
141 173
     # forcibly rename cluster_idx -> "cluster_idx"
142
-    bar_chain_tbls = purrr::map(seq_len(table_order), function(i) canonicalize_fun(cluster_tbl, cell_identifiers = cell_identifiers, cluster_idx = cluster_idx, order = i) %>% dplyr::select(!!!c(syms(cell_identifiers), rlang::quo(cluster_idx))) %>% dplyr::rename( !!paste0('cluster_idx.', i) := !!cluster_idx))
174
+    contig_tbl = ccdb$contig_tbl
175
+    cell_identifiers = ccdb$cell_pk
176
+    cluster_idx = ccdb$cluster_pk
177
+    cell_tbl = ccdb$cell_tbl
178
+    bar_chain_tbls = purrr::map(seq_len(table_order), function(i) canonicalize_fun(contig_tbl, cell_identifiers = cell_identifiers, cluster_idx = cluster_idx, order = i) %>% dplyr::select(!!!c(syms(cell_identifiers), rlang::quo(cluster_idx))) %>% dplyr::rename( !!paste0('cluster_idx.', i) := !!cluster_idx))
143 179
     # for each cell, what clusters are present
144 180
     oligo_cluster_pairs = purrr::reduce(bar_chain_tbls, left_join, by = cell_identifiers)
145 181
 
... ...
@@ -52,7 +52,7 @@ get_gaps = function(ca){
52 52
     data_frame(vd_gap, dj_gap, vj_gap)
53 53
 }
54 54
 
55
-read_contig_json = function(file, seq_cols = c('quals', 'aa_sequence', '')){
55
+read_contig_json = function(anno_file, seq_cols = c('quals', 'aa_sequence', '')){
56 56
     jsn = fromJSON(file(anno_file), flatten = TRUE)
57 57
     # contig sequences and gaps
58 58
 }
... ...
@@ -22,7 +22,7 @@ ContigCellDB_10XVDJ(contig_tbl, contig_pk = c("barcode", "contig_id"),
22 22
 
23 23
 \item{cluster_tbl}{A data frame that provide cluster assignments for each contig}
24 24
 
25
-\item{cluster_pk}{If `cluster_tbl` was provided, a list of character vector naming fields in `cluster_tbl` that uniquely identify a cluster}
25
+\item{cluster_pk}{If `cluster_tbl` was provided, a character vector naming fields in `cluster_tbl` that uniquely identify a cluster}
26 26
 }
27 27
 \value{
28 28
 \code{ContigCellDB}
... ...
@@ -37,5 +37,6 @@ Construct a ContigCellDB
37 37
 
38 38
 \examples{
39 39
 data(contigs_qc)
40
-ContigCellDB(contigs_qc, contig_pk = c('barcode', 'pop', 'sample', 'contig_id'), cell_pk = c('barcode', 'pop', 'sample'))
40
+ContigCellDB(contigs_qc, contig_pk = c('barcode', 'pop', 'sample', 'contig_id'),
41
+ cell_pk = c('barcode', 'pop', 'sample'))
41 42
 }
42 43
new file mode 100644
... ...
@@ -0,0 +1,40 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/pairing-methods.R
3
+\name{canonicalize_by_subset}
4
+\alias{canonicalize_by_subset}
5
+\title{Return single contig for each cell based on filtering and sorting}
6
+\usage{
7
+canonicalize_by_subset(ccdb, ..., tie_break_keys = c("umis", "reads"),
8
+  contig_fields = tie_break_keys, order = 1)
9
+}
10
+\arguments{
11
+\item{ccdb}{`ContigCellDB`}
12
+
13
+\item{...}{unquoted expressions passed to `dplyr::filter` that will be applied to the `contig_tbl`}
14
+
15
+\item{tie_break_keys}{columns used to sort the `contig_tbl` in **decreasing** order}
16
+
17
+\item{contig_fields}{columns in the `contig_tbl` that will be copied over to the `cell_tbl`}
18
+
19
+\item{order}{`integer` specifying which entry (ordinal) will be selected from the sorted contig_tbl, within each cell}
20
+}
21
+\value{
22
+`ContigCellDB` with additional fields in `cell_tbl`
23
+}
24
+\description{
25
+Return single contig for each cell based on filtering and sorting
26
+}
27
+\examples{
28
+# Report beta chain with highest umi-count, breaking ties with reads
29
+beta = canonicalize_by_subset(ccdb_ex, chain == 'TRB',
30
+tie_break_keys = c('umis', 'reads'),
31
+contig_fields = c('umis', 'reads', 'chain', 'v_gene', 'd_gene', 'j_gene'))
32
+head(beta$cell_tbl)
33
+# Only adds fields to `cell_tbl`
34
+stopifnot(all.equal(beta$cell_tbl[ccdb_ex$cell_pk],
35
+ccdb_ex$cell_tbl[ccdb_ex$cell_pk]))
36
+#Report cdr3 with highest UMI count, but only when > 5 UMIs support it
37
+umi5 = canonicalize_by_subset(ccdb_ex, umis > 5,
38
+tie_break_keys = c('umis', 'reads'), contig_fields = c('umis', 'cdr3'))
39
+stopifnot(all(umi5$cell_tbl$umis > 5, na.rm = TRUE))
40
+}
... ...
@@ -10,7 +10,7 @@
10 10
 \arguments{
11 11
 \item{x}{A ContigCellDB object}
12 12
 
13
-\item{name}{Name of a slot for a ContigCellDB object}
13
+\item{name}{a slot of a ContigCellDB object (one of  `c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')`)}
14 14
 }
15 15
 \value{
16 16
 Slots of ContigCellDB
... ...
@@ -19,5 +19,7 @@ Slots of ContigCellDB
19 19
 Creat a method for ContigCellDB object to access its slots
20 20
 }
21 21
 \examples{
22
+ccdb_ex$contig_tbl
23
+ccdb_ex$cell_tbl
22 24
 ccdb_ex$cluster_tbl
23 25
 }
... ...
@@ -3,14 +3,14 @@
3 3
 \docType{methods}
4 4
 \name{$<-,ContigCellDB-method}
5 5
 \alias{$<-,ContigCellDB-method}
6
-\title{Creat a function of ContigCellDB object to replace values of its slots}
6
+\title{Create a function of ContigCellDB object to replace values of its slots}
7 7
 \usage{
8 8
 \S4method{$}{ContigCellDB}(x, name) <- value
9 9
 }
10 10
 \arguments{
11 11
 \item{x}{A ContigCellDB object}
12 12
 
13
-\item{name}{Name of a slot for a ContigCellDB object}
13
+\item{name}{Name of a slot for a ContigCellDB object (one of  `c('contig_tbl', 'cell_tbl', 'contig_pk', 'cell_pk', 'cluster_tbl', 'cluster_pk')`)}
14 14
 
15 15
 \item{value}{The value assigned to a slot of ContigCellDB object}
16 16
 }
... ...
@@ -18,8 +18,12 @@
18 18
 A ContigCellDB object
19 19
 }
20 20
 \description{
21
-Creat a function of ContigCellDB object to replace values of its slots
21
+Create a function of ContigCellDB object to replace values of its slots
22 22
 }
23 23
 \examples{
24
-ccdb_ex$contig_pk <- c("pop","barcode","contig_id")
24
+ccdb_ex$contig_pk = c("sample","barcode","contig_id") # 'pop' is technically redundant with 'sample'
25
+# Take a subset of ccdb_ex
26
+ccdb_ex
27
+ccdb_ex$contig_tbl = dplyr::filter(ccdb_ex$contig_tbl, pop == 'b6')
28
+ccdb_ex
25 29
 }
... ...
@@ -38,7 +38,7 @@ You may need to lower it below 5 for AAseq with identity less than .7.}
38 38
 
39 39
 \item{type}{one of 'DNA' or 'AA'}
40 40
 
41
-\item{cluster_name}{What index should the clustering be stored in?  By default, a new, unnamed cluster is added.}
41
+\item{cluster_name}{`character` specifying key, and name for the clustering.}
42 42
 }
43 43
 \value{
44 44
 vector of \code{integer} of length \code{seqs} providing the cluster ID for each sequence, or a `tibble`.  See details.
... ...
@@ -73,4 +73,9 @@ cdhit(aaseq,identity = 1, G = 0, aL = 1, aS = 1,  only_index = TRUE)[1:10]
73 73
 cdhit(aaseq,identity = 1, G = 0, aL = .9, aS = .9,  only_index = TRUE)[1:10]
74 74
 # a tibble
75 75
 tbl = cdhit(aaseq, identity = 1, G = 0, aL = .9, aS = .9, only_index = FALSE)
76
+res = CellaRepertorium:::cdhit_ccdb(ccdb_ex, 'cdr3_nt', type = 'DNA',
77
+cluster_name = 'DNA97', identity = .965, min_length = 12, G = 1)
78
+res$cluster_tbl
79
+res$contig_tbl
80
+res$cluster_pk
76 81
 }
... ...
@@ -27,6 +27,10 @@ fine_clustering(ccdb, sequence_key, type, max_affinity = NULL,
27 27
 Perform additional clustering of sequences within groups
28 28
 }
29 29
 \examples{
30
-ccdb_ex = CellaRepertorium:::cdhit_ccdb(ccdb_ex, 'cdr3_nt', type = 'DNA', cluster_name = 'DNA97', identity = .965, min_length = 12, G = 1)
31
-ccdb_ex = fine_clustering(ccdb_ex, sequence_key = 'cdr3_nt', type = 'DNA') 
30
+ccdb_ex_small = ccdb_ex
31
+ccdb_ex_small$cell_tbl = ccdb_ex_small$cell_tbl[1:200,]
32
+ccdb_ex_small = CellaRepertorium:::cdhit_ccdb(ccdb_ex_small,
33
+sequence_key = 'cdr3_nt', type = 'DNA', cluster_name = 'DNA97',
34
+identity = .965, min_length = 12, G = 1)
35
+ccdb_ex_small = fine_clustering(ccdb_ex_small, sequence_key = 'cdr3_nt', type = 'DNA')
32 36
 }
... ...
@@ -20,8 +20,8 @@ sequence is returned}
20 20
 character vector
21 21
 }
22 22
 \description{
23
-Given a family of similar sequences, return a "representative"
23
+This function can be deprecated.
24 24
 }
25 25
 \examples{
26
-get_canonical_representative(c('apple', 'manzana', 'pomme'))
26
+CellaRepertorium:::get_canonical_representative(c('apple', 'manzana', 'pomme'))
27 27
 }
... ...
@@ -4,20 +4,15 @@
4 4
 \alias{pairing_tables}
5 5
 \title{Generate a list of tables representing clusters paired in cells}
6 6
 \usage{
7
-pairing_tables(cluster_tbl, cell_identifiers = "barcode",
8
-  cluster_idx = "cluster_idx",
9
-  canonicalize_fun = canonicalize_by_chain, table_order = 2,
10
-  min_expansion = 2, orphan_level = 1, cluster_whitelist = NULL,
11
-  cluster_blacklist = NULL, cell_tbl = NULL, feature_tbl = NULL)
7
+pairing_tables(ccdb, canonicalize_fun = canonicalize_by_chain,
8
+  table_order = 2, min_expansion = 2, orphan_level = 1,
9
+  cluster_keys = character(), cluster_whitelist = NULL,
10
+  cluster_blacklist = NULL)
12 11
 }
13 12
 \arguments{
14
-\item{cluster_tbl}{a table with all combinations of clusters in all cells}
13
+\item{ccdb}{`ContigCellDB`}
15 14
 
16
-\item{cell_identifiers}{character vector naming fields that key a cell}
17
-
18
-\item{cluster_idx}{character naming a single field IDing the clusters}
19
-
20
-\item{canonicalize_fun}{a function with signature `canonicalize_fun(cluster_tbl, cell_identifiers, cluster_idx, order = i)` that for each `cell_identifier` returns a single contig that depends on the `order`.  For instance \link{canonicalize_by_prevalence} or \link{canonicalize_by_chain}.}
15
+\item{canonicalize_fun}{a function with signature `canonicalize_fun(ContigCellDB, order = i)` that for each cell, returns a single contig that depends on the `order`.  For instance \link{canonicalize_by_prevalence} or \link{canonicalize_by_chain}.}
21 16
 
22 17
 \item{table_order}{Integer larger than 1. What order of cluster_idx will be paired, eg, order = 2 means that the most common and second most common cluster_idx will be sought for each cell}
23 18
 
... ...
@@ -25,13 +20,11 @@ pairing_tables(cluster_tbl, cell_identifiers = "barcode",
25 20
 
26 21
 \item{orphan_level}{Integer larger than 0 and less than or equal to `table_order`.  Given that at least `min_expansion` cells are found that have `table_order` chains identical, how many `cluster_idx` pairs will we match on to select other cells.  Example: `ophan_level=1` means that cells that share just a single chain with the}
27 22
 
28
-\item{cluster_whitelist}{a table of "cluster_idx" that should always be reported.  In contrast to the `cluster_tbl`, here the clusters must be named "cluster_idx.1", "cluster_idx.2" (if order-2 pairs are being selected).}
29
-
30
-\item{cluster_blacklist}{a table of "cluster_idx" that will never be reported.  Must be named as per `cluster_whitelist`.}
23
+\item{cluster_keys}{optional `character` naming additional columns in `ccdb$cluster_tbl` to be reported in the pairing}
31 24
 
32
-\item{cell_tbl}{optional, ancillary table with additional cell features.  Must also be keyed by `cell_identifiers`}
25
+\item{cluster_whitelist}{a table of pairings or clusters that should always be reported.  Here the clusters must be named "cluster_idx.1", "cluster_idx.2" (if order-2 pairs are being selected) rather than with `ccdb$cluster_pk``}
33 26
 
34
-\item{feature_tbl}{optional, ancillary table with additional cluster features.  Must also be keyed by `cluster_idx`}
27
+\item{cluster_blacklist}{a table of pairings or clusters that will never be reported.  Must be named as per `cluster_whitelist`.}
35 28
 }
36 29
 \value{
37 30
 list of tables.  The `cell_tbl` is keyed by the `cell_identifiers`, with fields "cluster_idx.1", "cluster_idx.2", etc, IDing the contigs present in each cell. "cluster_idx.1_fct" and "cluster_idx.2_fct" cast these fields to factors and are reordered to maximize the number of pairs along the diagonal. The `idx1_tbl` and `idx2_tbl` report information (passed in about the `cluster_idx` by `feature_tbl`.)  The `cluster_pair_tbl` reports all pairings found of contigs, and the number of times observed.
... ...
@@ -54,23 +47,25 @@ This facilitates plotting.
54 47
 
55 48
 \examples{
56 49
 library(dplyr)
57
-cluster_tbl = data_frame(clust_idx = gl(3, 2), cell_idx = rep(1:3, times = 2))
50
+tbl = tibble(clust_idx = gl(3, 2), cell_idx = rep(1:3, times = 2), contig_idx = 1:6)
51
+ccdb = ContigCellDB(tbl, contig_pk = c('cell_idx', 'contig_idx'),
52
+cell_pk = 'cell_idx', cluster_pk = 'clust_idx')
58 53
 # no pairs found twice
59
-pt1 = pairing_tables(cluster_tbl, 'cell_idx', 'clust_idx', canonicalize_by_prevalence)
54
+pt1 = pairing_tables(ccdb, canonicalize_by_prevalence)
60 55
 # all pairs found, found once.
61
-pt2 = pairing_tables(cluster_tbl, 'cell_idx', 'clust_idx',
62
-    canonicalize_by_prevalence, min_expansion = 1)
56
+pt2 = pairing_tables(ccdb, canonicalize_by_prevalence, min_expansion = 1)
63 57
 pt2$cell_tbl
64
-cluster_tbl2 = bind_rows(cluster_tbl, cluster_tbl \%>\% mutate(cell_idx = rep(4:6, times = 2)))
58
+tbl2 = bind_rows(tbl, tbl \%>\% mutate(cell_idx = rep(4:6, times = 2)))
59
+ccdb2 = ContigCellDB(tbl2, contig_pk = c('cell_idx', 'contig_idx'), cell_pk = 'cell_idx',
60
+cluster_pk = 'clust_idx')
65 61
 #all pairs found twice
66
-pt3 = pairing_tables(cluster_tbl2, 'cell_idx', 'clust_idx', canonicalize_by_prevalence, min_expansion = 1)
62
+pt3 = pairing_tables(ccdb2, canonicalize_by_prevalence, min_expansion = 1)
67 63
 pt3$cell_tbl
68 64
 # `canonicalize_by_chain` expects fields `umis`, `reads`
69 65
 # to break ties,  wrap the function to change this
70
-cluster_tbl3 = cluster_tbl2 \%>\%
66
+ccdb2$contig_tbl = ccdb2$contig_tbl \%>\%
71 67
     mutate(umis = 1, reads = 1, chain = rep(c('TRA', 'TRB'), times = 6))
72
-pt4 = pairing_tables(cluster_tbl3, 'cell_idx', 'clust_idx',
73
-    canonicalize_by_chain, min_expansion = 1, table_order = 2)
68
+pt4 = pairing_tables(ccdb2, canonicalize_by_chain, min_expansion = 1, table_order = 2)
74 69
 }
75 70
 \seealso{
76 71
 canonicalize_by_prevalence, canonicalize_by_chain
77 72
deleted file mode 100644
78 73
Binary files a/src/CellaRepertorium.dll and /dev/null differ
79 74
new file mode 100644
... ...
@@ -0,0 +1,339 @@
1
+                    GNU GENERAL PUBLIC LICENSE
2
+                       Version 2, June 1991
3
+
4
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6
+ Everyone is permitted to copy and distribute verbatim copies
7
+ of this license document, but changing it is not allowed.
8
+
9
+                            Preamble
10
+
11
+  The licenses for most software are designed to take away your
12
+freedom to share and change it.  By contrast, the GNU General Public
13
+License is intended to guarantee your freedom to share and change free
14
+software--to make sure the software is free for all its users.  This
15
+General Public License applies to most of the Free Software
16
+Foundation's software and to any other program whose authors commit to
17
+using it.  (Some other Free Software Foundation software is covered by
18
+the GNU Lesser General Public License instead.)  You can apply it to
19
+your programs, too.
20
+
21
+  When we speak of free software, we are referring to freedom, not
22
+price.  Our General Public Licenses are designed to make sure that you
23
+have the freedom to distribute copies of free software (and charge for
24
+this service if you wish), that you receive source code or can get it
25
+if you want it, that you can change the software or use pieces of it
26
+in new free programs; and that you know you can do these things.
27
+
28
+  To protect your rights, we need to make restrictions that forbid
29
+anyone to deny you these rights or to ask you to surrender the rights.
30
+These restrictions translate to certain responsibilities for you if you
31
+distribute copies of the software, or if you modify it.
32
+
33
+  For example, if you distribute copies of such a program, whether
34
+gratis or for a fee, you must give the recipients all the rights that
35
+you have.  You must make sure that they, too, receive or can get the
36
+source code.  And you must show them these terms so they know their
37
+rights.
38
+
39
+  We protect your rights with two steps: (1) copyright the software, and
40
+(2) offer you this license which gives you legal permission to copy,
41
+distribute and/or modify the software.
42
+
43
+  Also, for each author's protection and ours, we want to make certain
44
+that everyone understands that there is no warranty for this free
45
+software.  If the software is modified by someone else and passed on, we
46
+want its recipients to know that what they have is not the original, so
47
+that any problems introduced by others will not reflect on the original
48
+authors' reputations.
49
+
50
+  Finally, any free program is threatened constantly by software
51
+patents.  We wish to avoid the danger that redistributors of a free
52
+program will individually obtain patent licenses, in effect making the
53
+program proprietary.  To prevent this, we have made it clear that any
54
+patent must be licensed for everyone's free use or not licensed at all.
55
+
56
+  The precise terms and conditions for copying, distribution and
57
+modification follow.
58
+
59
+                    GNU GENERAL PUBLIC LICENSE
60
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61
+
62
+  0. This License applies to any program or other work which contains
63
+a notice placed by the copyright holder saying it may be distributed
64
+under the terms of this General Public License.  The "Program", below,
65
+refers to any such program or work, and a "work based on the Program"
66
+means either the Program or any derivative work under copyright law:
67
+that is to say, a work containing the Program or a portion of it,
68
+either verbatim or with modifications and/or translated into another
69
+language.  (Hereinafter, translation is included without limitation in
70
+the term "modification".)  Each licensee is addressed as "you".
71
+
72
+Activities other than copying, distribution and modification are not
73
+covered by this License; they are outside its scope.  The act of
74
+running the Program is not restricted, and the output from the Program
75
+is covered only if its contents constitute a work based on the
76
+Program (independent of having been made by running the Program).
77
+Whether that is true depends on what the Program does.
78
+
79
+  1. You may copy and distribute verbatim copies of the Program's
80
+source code as you receive it, in any medium, provided that you
81
+conspicuously and appropriately publish on each copy an appropriate
82
+copyright notice and disclaimer of warranty; keep intact all the
83
+notices that refer to this License and to the absence of any warranty;
84
+and give any other recipients of the Program a copy of this License
85
+along with the Program.
86
+
87
+You may charge a fee for the physical act of transferring a copy, and
88
+you may at your option offer warranty protection in exchange for a fee.
89
+
90
+  2. You may modify your copy or copies of the Program or any portion
91
+of it, thus forming a work based on the Program, and copy and
92
+distribute such modifications or work under the terms of Section 1
93
+above, provided that you also meet all of these conditions:
94
+
95
+    a) You must cause the modified files to carry prominent notices
96
+    stating that you changed the files and the date of any change.
97
+
98
+    b) You must cause any work that you distribute or publish, that in
99
+    whole or in part contains or is derived from the Program or any
100
+    part thereof, to be licensed as a whole at no charge to all third
101
+    parties under the terms of this License.
102
+
103
+    c) If the modified program normally reads commands interactively
104
+    when run, you must cause it, when started running for such
105
+    interactive use in the most ordinary way, to print or display an
106
+    announcement including an appropriate copyright notice and a
107
+    notice that there is no warranty (or else, saying that you provide
108
+    a warranty) and that users may redistribute the program under
109
+    these conditions, and telling the user how to view a copy of this
110
+    License.  (Exception: if the Program itself is interactive but
111
+    does not normally print such an announcement, your work based on
112
+    the Program is not required to print an announcement.)
113
+
114
+These requirements apply to the modified work as a whole.  If
115
+identifiable sections of that work are not derived from the Program,
116
+and can be reasonably considered independent and separate works in
117
+themselves, then this License, and its terms, do not apply to those
118
+sections when you distribute them as separate works.  But when you
119
+distribute the same sections as part of a whole which is a work based
120
+on the Program, the distribution of the whole must be on the terms of
121
+this License, whose permissions for other licensees extend to the
122
+entire whole, and thus to each and every part regardless of who wrote it.
123
+
124
+Thus, it is not the intent of this section to claim rights or contest
125
+your rights to work written entirely by you; rather, the intent is to
126
+exercise the right to control the distribution of derivative or
127
+collective works based on the Program.
128
+
129
+In addition, mere aggregation of another work not based on the Program
130
+with the Program (or with a work based on the Program) on a volume of
131
+a storage or distribution medium does not bring the other work under
132
+the scope of this License.
133
+
134
+  3. You may copy and distribute the Program (or a work based on it,
135
+under Section 2) in object code or executable form under the terms of
136
+Sections 1 and 2 above provided that you also do one of the following:
137
+
138
+    a) Accompany it with the complete corresponding machine-readable
139
+    source code, which must be distributed under the terms of Sections
140
+    1 and 2 above on a medium customarily used for software interchange; or,
141
+
142
+    b) Accompany it with a written offer, valid for at least three
143
+    years, to give any third party, for a charge no more than your
144
+    cost of physically performing source distribution, a complete
145
+    machine-readable copy of the corresponding source code, to be
146
+    distributed under the terms of Sections 1 and 2 above on a medium
147
+    customarily used for software interchange; or,
148
+
149
+    c) Accompany it with the information you received as to the offer
150
+    to distribute corresponding source code.  (This alternative is
151
+    allowed only for noncommercial distribution and only if you
152
+    received the program in object code or executable form with such
153
+    an offer, in accord with Subsection b above.)
154
+
155
+The source code for a work means the preferred form of the work for
156
+making modifications to it.  For an executable work, complete source
157
+code means all the source code for all modules it contains, plus any
158
+associated interface definition files, plus the scripts used to
159
+control compilation and installation of the executable.  However, as a
160
+special exception, the source code distributed need not include
161
+anything that is normally distributed (in either source or binary
162
+form) with the major components (compiler, kernel, and so on) of the
163
+operating system on which the executable runs, unless that component
164
+itself accompanies the executable.
165
+
166
+If distribution of executable or object code is made by offering
167
+access to copy from a designated place, then offering equivalent
168
+access to copy the source code from the same place counts as
169
+distribution of the source code, even though third parties are not
170
+compelled to copy the source along with the object code.
171
+
172
+  4. You may not copy, modify, sublicense, or distribute the Program
173
+except as expressly provided under this License.  Any attempt
174
+otherwise to copy, modify, sublicense or distribute the Program is
175
+void, and will automatically terminate your rights under this License.
176
+However, parties who have received copies, or rights, from you under
177
+this License will not have their licenses terminated so long as such
178
+parties remain in full compliance.
179
+
180
+  5. You are not required to accept this License, since you have not
181
+signed it.  However, nothing else grants you permission to modify or
182
+distribute the Program or its derivative works.  These actions are
183
+prohibited by law if you do not accept this License.  Therefore, by
184
+modifying or distributing the Program (or any work based on the
185
+Program), you indicate your acceptance of this License to do so, and
186
+all its terms and conditions for copying, distributing or modifying
187
+the Program or works based on it.
188
+
189
+  6. Each time you redistribute the Program (or any work based on the
190
+Program), the recipient automatically receives a license from the
191
+original licensor to copy, distribute or modify the Program subject to
192
+these terms and conditions.  You may not impose any further
193
+restrictions on the recipients' exercise of the rights granted herein.
194
+You are not responsible for enforcing compliance by third parties to
195
+this License.
196
+
197
+  7. If, as a consequence of a court judgment or allegation of patent
198
+infringement or for any other reason (not limited to patent issues),
199
+conditions are imposed on you (whether by court order, agreement or
200
+otherwise) that contradict the conditions of this License, they do not
201
+excuse you from the conditions of this License.  If you cannot
202
+distribute so as to satisfy simultaneously your obligations under this
203
+License and any other pertinent obligations, then as a consequence you
204
+may not distribute the Program at all.  For example, if a patent
205
+license would not permit royalty-free redistribution of the Program by
206
+all those who receive copies directly or indirectly through you, then
207
+the only way you could satisfy both it and this License would be to
208
+refrain entirely from distribution of the Program.
209
+
210
+If any portion of this section is held invalid or unenforceable under
211
+any particular circumstance, the balance of the section is intended to
212
+apply and the section as a whole is intended to apply in other
213
+circumstances.
214
+
215
+It is not the purpose of this section to induce you to infringe any
216
+patents or other property right claims or to contest validity of any
217
+such claims; this section has the sole purpose of protecting the
218
+integrity of the free software distribution system, which is
219
+implemented by public license practices.  Many people have made
220
+generous contributions to the wide range of software distributed
221
+through that system in reliance on consistent application of that
222
+system; it is up to the author/donor to decide if he or she is willing
223
+to distribute software through any other system and a licensee cannot
224
+impose that choice.
225
+
226
+This section is intended to make thoroughly clear what is believed to
227
+be a consequence of the rest of this License.
228
+
229
+  8. If the distribution and/or use of the Program is restricted in
230
+certain countries either by patents or by copyrighted interfaces, the
231
+original copyright holder who places the Program under this License
232
+may add an explicit geographical distribution limitation excluding
233
+those countries, so that distribution is permitted only in or among
234
+countries not thus excluded.  In such case, this License incorporates
235
+the limitation as if written in the body of this License.
236
+
237
+  9. The Free Software Foundation may publish revised and/or new versions
238
+of the General Public License from time to time.  Such new versions will
239
+be similar in spirit to the present version, but may differ in detail to
240
+address new problems or concerns.
241
+
242
+Each version is given a distinguishing version number.  If the Program
243
+specifies a version number of this License which applies to it and "any
244
+later version", you have the option of following the terms and conditions
245
+either of that version or of any later version published by the Free
246
+Software Foundation.  If the Program does not specify a version number of
247
+this License, you may choose any version ever published by the Free Software
248
+Foundation.
249
+
250
+  10. If you wish to incorporate parts of the Program into other free
251
+programs whose distribution conditions are different, write to the author
252
+to ask for permission.  For software which is copyrighted by the Free
253
+Software Foundation, write to the Free Software Foundation; we sometimes
254
+make exceptions for this.  Our decision will be guided by the two goals
255
+of preserving the free status of all derivatives of our free software and
256
+of promoting the sharing and reuse of software generally.
257
+
258
+                            NO WARRANTY
259
+
260
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268
+REPAIR OR CORRECTION.
269
+
270
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278
+POSSIBILITY OF SUCH DAMAGES.
279
+
280
+                     END OF TERMS AND CONDITIONS
281
+
282
+            How to Apply These Terms to Your New Programs
283
+
284
+  If you develop a new program, and you want it to be of the greatest
285
+possible use to the public, the best way to achieve this is to make it
286
+free software which everyone can redistribute and change under these terms.
287
+
288
+  To do so, attach the following notices to the program.  It is safest
289
+to attach them to the start of each source file to most effectively
290
+convey the exclusion of warranty; and each file should have at least
291
+the "copyright" line and a pointer to where the full notice is found.
292
+
293
+    <one line to give the program's name and a brief idea of what it does.>
294
+    Copyright (C) <year>  <name of author>
295
+
296
+    This program is free software; you can redistribute it and/or modify
297
+    it under the terms of the GNU General Public License as published by
298
+    the Free Software Foundation; either version 2 of the License, or
299
+    (at your option) any later version.
300
+
301
+    This program is distributed in the hope that it will be useful,
302
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
303
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304
+    GNU General Public License for more details.
305
+
306
+    You should have received a copy of the GNU General Public License along
307
+    with this program; if not, write to the Free Software Foundation, Inc.,
308
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309
+
310
+Also add information on how to contact you by electronic and paper mail.
311
+
312
+If the program is interactive, make it output a short notice like this
313
+when it starts in an interactive mode:
314
+
315
+    Gnomovision version 69, Copyright (C) year name of author
316
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317
+    This is free software, and you are welcome to redistribute it
318
+    under certain conditions; type `show c' for details.
319
+
320
+The hypothetical commands `show w' and `show c' should show the appropriate
321
+parts of the General Public License.  Of course, the commands you use may
322
+be called something other than `show w' and `show c'; they could even be
323
+mouse-clicks or menu items--whatever suits your program.
324
+
325
+You should also get your employer (if you work as a programmer) or your
326
+school, if any, to sign a "copyright disclaimer" for the program, if
327
+necessary.  Here is a sample; alter the names:
328
+
329
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
331
+
332
+  <signature of Ty Coon>, 1 April 1989
333
+  Ty Coon, President of Vice
334
+
335
+This General Public License does not permit incorporating your program into
336
+proprietary programs.  If your program is a subroutine library, you may
337
+consider it more useful to permit linking proprietary applications with the
338
+library.  If this is what you want to do, use the GNU Lesser General
339
+Public License instead of this License.