#' Create phylogenetic tree #' #' Create a phylogenetic tree using neighbor joining tree estimation for amino acid or #' nucleotide CDR3 sequences in a list of data frames. #' #' @param list A list data frames of unproductive nucleotide sequences or productive #' nucleotide sequences generated by the LymphoSeq function productiveSeq. #' vFamilyName, dFamilyName, jFamilyName, and count are required columns. #' @param sample A character vector indicating the name of the sample in the productive #' sequence list. #' @param type A character vector indicating whether "aminoAcid" or "nucleotide" sequences #' should be compared. #' @param layout A character vector indicating the tree layout. Options include #' "rectangular", "slanted", "fan", "circular", "radial" and "unrooted". #' @param label A Boolean indicating if the sequencing count should be shown next to the leaves. #' @return Returns a phylogenetic tree where each leaf represents a sequence color coded by the #' V, D, and J gene usage. The number next to each leaf refers to the sequence count. A triangle #' shaped leaf indicates the dominant sequence. Refer to the ggtree Bioconductor package #' documentation for details on how to manipulate the tree. #' @examples #' file.path <- system.file("extdata", "IGH_sequencing", package = "LymphoSeq") #' #' file.list <- readImmunoSeq(path = file.path) #' #' productive.nt <- productiveSeq(file.list = file.list, aggregate = "nucleotide") #' #' phyloTree(list = productive.nt, sample = "IGH_MVQ92552A_BL", type = "nucleotide", #' layout = "rectangular") #' #' phyloTree(list = productive.nt, sample = "IGH_MVQ92552A_BL", type = "aminoAcid", #' layout = "circular") #' #' # Add scale and title to figure #' library(ggtree) #' library(ggplot2) #' phyloTree(list = productive.nt, sample = "IGH_MVQ92552A_BL", type = "aminoAcid", #' layout = "rectangular") + #' ggtree::theme_tree2() + #' ggplot2::theme(legend.position = "right", legend.key = element_rect(colour = "white")) + #' ggplot2::ggtitle("Title") #' #' # Hide legend and leaf labels #' phyloTree(list = productive.nt, sample = "IGH_MVQ92552A_BL", type = "nucleotide", #' layout = "rectangular", label = FALSE) + #' ggplot2::theme(legend.position="none") #' #' @export #' @import ggtree #' @import ggplot2 #' @importFrom phangorn NJ phyloTree <- function(list, sample, type = "nucleotide", layout = "rectangular", label = TRUE) { file <- list[[sample]] if(length(grep(pattern = "vFamilyName|dFamilyName|jFamilyName|count", names(file))) != 4){ stop("vFamilyName, dFamilyName, jFamilyName, nucleotide, and count are required columns. Try aggregating the sequences by 'nucleotide' usng the function productiveSeq.", call. = FALSE) } if(nrow(file) < 3){ stop("Cannot draw phlogenetic tree with less than 3 sequences.", call. = FALSE) } file[file == ""] <- "unresolved" if(type == "nucleotide") { file <- file[nchar(file$nucleotide) > 9, ] distance <- adist(file$nucleotide, file$nucleotide) colnames(distance) <- rownames(distance) <- file$nucleotide names <- file$nucleotide } if(type == "aminoAcid") { file <- file[nchar(file$aminoAcid) > 3, ] distance <- adist(file$aminoAcid, file$aminoAcid) colnames(distance) <- rownames(distance) <- file$aminoAcid names <- file$aminoAcid } tree <- phangorn::NJ(distance) geneFamilies <- paste(file$vFamilyName, file$dFamilyName, file$jFamilyName) geneFamilies <- gsub(geneFamilies, pattern = "IGH|IGL|IGK|TCRB|TCRA", replacement = "") geneFamilies <- gsub(geneFamilies, pattern = "unresolved", replacement = "UNR") tree.annotation <- data.frame(names = names, count = file$count, geneFamilies = geneFamilies, aminoAcid = file$aminoAcid, frequencyCount = round(file$frequencyCount, digits = 2), dominant = c("Yes", rep("No", nrow(file) - 1))) getPalette <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(9, "Set1")) plot <- ggtree::ggtree(tree, layout = layout) %<+% tree.annotation + ggtree::geom_tippoint(aes_string(color = "geneFamilies", shape = "dominant"), size = 3) + ggplot2::scale_color_manual(values = getPalette(length(unique(geneFamilies)))) + ggplot2::guides(shape = FALSE) + ggplot2::theme(legend.position = "bottom", legend.key = element_rect(colour = "white")) + ggplot2::labs(color = "") if(label == TRUE){ plot <- plot + ggplot2::geom_text(aes_string(label="count"), nudge_x = 0.5, hjust = 0, size = 2.5, na.rm = TRUE, check_overlap = TRUE) } return(plot) }