#' Lorenz curve #' #' Plots a Lorenz curve derived from the frequency of the amino acid sequences. #' #' @param samples A character vector of sample names in list. #' @param list A list data frames generated using the LymphoSeq function readImmunoSeq #' or productiveSeq. "frequencyCount" is a required column. #' @return Returns a Lorenz curve. #' @details The Gini coefficient is an alternative metric used to calculate #' repertoire diversity and is derived from the Lorenz curve. The Lorenz curve #' is drawn such that x-axis represents the cumulative percentage of unique #' sequences and the y-axis represents the cumulative percentage of reads. A #' line passing through the origin with a slope of 1 reflects equal frequencies #' of all sequences. The Gini coefficient is the ratio of the area between the #' line of equality and the observed Lorenz curve over the total area under the #' line of equality. #' #' The plot is made using the package ggplot2 and can be reformatted #' using ggplot2 functions. See examples below. #' @seealso An excellent resource for examples on how to reformat a ggplot can #' be found in the R Graphics Cookbook online (\url{http://www.cookbook-r.com/Graphs/}). #' @examples #' file.path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq") #' #' file.list <- readImmunoSeq(path = file.path) #' #' lorenzCurve(samples = names(file.list), list = file.list) #' #' productive.aa <- productiveSeq(file.list = file.list, aggregate = "aminoAcid") #' #' lorenzCurve(samples = names(productive.aa), list = productive.aa) #' #' # Change the legend labels, line colors, and add a title #' samples <- c("TRB_Unsorted_0", "TRB_Unsorted_32", #' "TRB_Unsorted_83", "TRB_Unsorted_949", "TRB_Unsorted_1320") #' #' lorenz.curve <- lorenzCurve(samples = samples, list = productive.aa) #' #' labels <- c("Day 0", "Day 32", "Day 83", "Day 949", "Day 1320") #' #' colors <- c("navyblue", "red", "darkgreen", "orange", "purple") #' #' lorenz.curve + ggplot2::scale_color_manual(name = "Samples", breaks = samples, #' labels = labels, values = colors) + ggplot2::ggtitle("Figure Title") #' @export #' @import ggplot2 #' @importFrom RColorBrewer brewer.pal #' @importFrom ineq Lc lorenzCurve <- function(samples, list) { lorenz <- data.frame() i <- 1 for (i in 1:length(samples)) { sample <- samples[i] file <- list[[sample]] lc <- ineq::Lc(file$frequencyCount) lcdf <- data.frame(L = lc$L, p = lc$p) lcdf$sample <- rep(sample, nrow(lcdf)) lorenz <- rbind(lcdf, lorenz) } getPalette <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(9, "Set1")) plot <- ggplot2::ggplot(lorenz, aes_string(x = "p", y = "L", color = "sample")) + geom_line(size = 1) + theme_minimal() + scale_color_manual(values = getPalette(length(samples) + 1)) + scale_y_continuous(expand = c(0, 0)) + scale_x_continuous(expand = c(0,0)) + geom_abline(intercept = 0, slope = 1, color = "grey", linetype = 2) + coord_fixed() + labs(x = "Cumulative percentage of unique sequences", y = "Cumulative percentage of reads", color = "") return(plot) }