Browse code

Fixed build errors

From: davidcoffey <dcoffey@fredhutch.org>

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/LymphoSeq@128435 bc3139a8-67e5-0310-9ffc-ced21a209358

David Coffey authored on 10/04/2017 01:10:26
Showing 1 changed files

1 1
deleted file mode 100644
... ...
@@ -1,74 +0,0 @@
1
-#' Clonality
2
-#' 
3
-#' Creates a data frame giving the total number of sequences, number of unique 
4
-#' productive sequences, number of genomes, entropy, clonality, Gini 
5
-#' coefficient, and the frequency (\%) of the top productive sequences in a list 
6
-#' of sample data frames.
7
-#' 
8
-#' @param file.list A list of data frames consisting of antigen receptor 
9
-#' sequencing imported by the LymphoSeq function readImmunoSeq. "aminoAcid", "count", 
10
-#' and "frequencyCount" are required columns.  "estimatedNumberGenomes" is optional.  
11
-#' Note that clonality is usually calculated from productive nucleotide sequences.  
12
-#' Therefore, it is not recommended to run this function using a productive sequence
13
-#' list aggregated by amino acids.
14
-#' @return Returns a data frame giving the total number of sequences, number of 
15
-#' unique productive sequences, number of genomes, clonality, Gini coefficient, 
16
-#' and the frequency (\%) of the top productive sequence in each sample.
17
-#' @details Clonality is derived from the Shannon entropy, which is calculated 
18
-#' from the frequencies of all productive sequences divided by the logarithm of 
19
-#' the total number of unique productive sequences.  This normalized entropy 
20
-#' value is then inverted (1 - normalized entropy) to produce the clonality 
21
-#' metric.  
22
-#' 
23
-#' The Gini coefficient is an alternative metric used to calculate repertoire 
24
-#' diversity and is derived from the Lorenz curve.  The Lorenz curve is drawn 
25
-#' such that x-axis represents the cumulative percentage of unique sequences and 
26
-#' the y-axis represents the cumulative percentage of reads.  A line passing 
27
-#' through the origin with a slope of 1 reflects equal frequencies of all clones.  
28
-#' The Gini coefficient is the ratio of the area between the line of equality 
29
-#' and the observed Lorenz curve over the total area under the line of equality.  
30
-#' Both Gini coefficient and clonality are reported on a scale from 0 to 1 where 
31
-#' 0 indicates all sequences have the same frequency and 1 indicates the 
32
-#' repertoire is dominated by a single sequence.
33
-#' @examples
34
-#' file.path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq")
35
-#' 
36
-#' file.list <- readImmunoSeq(path = file.path)
37
-#' 
38
-#' clonality(file.list = file.list)
39
-#' @seealso \code{\link{lorenzCurve}}
40
-#' @export
41
-#' @importFrom ineq Gini
42
-clonality <- function(file.list) {
43
-    table <- data.frame(samples = names(file.list))
44
-    i <- 1
45
-    for (i in 1:length(file.list)) {
46
-        file <- file.list[[i]]
47
-        total.reads <- nrow(file)
48
-        file$estimatedNumberGenomes <- suppressWarnings(as.integer(file$estimatedNumberGenomes))
49
-        total.genomes <- sum(file$estimatedNumberGenomes)
50
-        total.count <- sum(file$count)
51
-        productive <- file[!grepl("\\*", file$aminoAcid) & file$aminoAcid != "", ]
52
-        frequency <- productive$count/sum(productive$count)
53
-        entropy <- -sum(frequency * log2(frequency), na.rm = TRUE)
54
-        unique.productive <- nrow(productive)
55
-        clonality <- 1 - round(entropy/log2(unique.productive), digits = 6)
56
-        table$totalSequences[i] <- total.reads
57
-        table$uniqueProductiveSequences[i] <- unique.productive
58
-        table$totalGenomes[i] <- total.genomes
59
-        table$totalCount[i] <- total.count
60
-        table$clonality[i] <- clonality
61
-        table$giniCoefficient[i] <- ineq::Gini(frequency)
62
-        table$topProductiveSequence[i] <- max(frequency) * 100
63
-        if (any(grepl("estimatedNumberGenomes", colnames(file)))) {
64
-            file$estimatedNumberGenomes <- suppressWarnings(as.integer(file$estimatedNumberGenomes))
65
-            total.genomes <- sum(file$estimatedNumberGenomes)
66
-            table$totalGenomes[i] <- ifelse(total.genomes == 0, NA, total.genomes)
67
-        } else {
68
-            table$totalGenomes[i] <- NA
69
-        }
70
-    }
71
-    table <- table[order(table$topProductiveSequence, decreasing = TRUE), ]
72
-    rownames(table) <- NULL
73
-    return(table)
74
-} 
75 0
\ No newline at end of file