#' Common sequences plot #' #' Creates a scatter plot of just the sequences in common between two samples. #' #' @param sample1 A name of a sample in a list of data frames generated by the #' LymphoSeq function productiveSeq. #' @param sample2 A name of a sample in a list of data frames generated by the #' LymphoSeq function productiveSeq. #' @param productive.aa A list of data frames of productive amino acid sequences #' produced by the LymphoSeq function productiveSeq containing the #' samples to be compared. #' @return Returns a frequency scatter plot of two samples showing only the #' shared sequences. #' @details The plot is made using the package ggplot2 and can be reformatted #' using ggplot2 functions. See examples below. #' @seealso An excellent resource for examples on how to reformat a ggplot can #' be found in the R Graphics Cookbook online (\url{http://www.cookbook-r.com/Graphs/}). #' @examples #' file.path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq") #' #' file.list <- readImmunoSeq(path = file.path) #' #' productive.aa <- productiveSeq(file.list = file.list, aggregate = "aminoAcid") #' #' commonSeqsPlot("TCRB_Day32_Unsorted", "TCRB_Day83_Unsorted", #' productive.aa = productive.aa) #' #' # Change the X and Y axises to log-10 scale #' commonSeqsPlot("TCRB_Day32_Unsorted", "TCRB_Day83_Unsorted", #' productive.aa = productive.aa) + #' ggplot2::scale_x_log10() + #' ggplot2::scale_y_log10() + #' ggplot2::annotation_logticks(sides = "bl") #' @export #' @import ggplot2 commonSeqsPlot <- function(sample1, sample2, productive.aa) { if(any(unlist(lapply(productive.aa, function(x) x[, "aminoAcid"] == "" | grepl("\\*", x[, "aminoAcid"]) | duplicated(x[, "aminoAcid"]))))){ stop("Your list contains unproductive sequences or has not been aggreated for productive amino acid sequences. Remove unproductive sequences first using the function productiveSeq with the aggregate parameter set to 'aminoAcid'.", call. = FALSE) } a <- productive.aa[[sample1]] b <- productive.aa[[sample2]] common <- intersect(a$aminoAcid, b$aminoAcid) common.seq <- data.frame(aminoAcid = common) common.a <- a[a$aminoAcid %in% common, ] common.b <- b[b$aminoAcid %in% common, ] common.seq$freq.a <- common.a$freq common.seq$freq.b <- common.b$freq common.seq$difference <- common.a$freq - common.b$freq common.seq$log.fold.change <- log(common.a$freq/common.b$freq) common.seq <- common.seq[order(common.seq$log.fold.change, decreasing = TRUE), ] plot <- ggplot2::ggplot(data = common.seq, aes_string("freq.a", "freq.b")) + geom_point(size = 3) + theme_minimal() + labs(x = paste(sample1, "frequency (%)"), y = paste(sample2, "frequency (%)")) return(plot) }