#' Search for T cell receptor beta CDR3 amino acid sequences with known antigen #' specificity #' #' Search for published T cell receptor beta CDR3 amino acid sequences with #' known antigen specificity in a list of data frames. #' #' @param list A list of data frames generated by the LymphoSeq functions #' readImmunoSeq or productiveSeq. "aminoAcid", "frequencyCount", and "count" #' are required columns. #' @return Returns a data frame of each sample name and instance in the sample #' that the published TCR sequence appeared along with additional #' information including antigen specificity, epitope, HLA type, and PubMed ID #' (PMID) for the reference where the sequence was characterized. The #' publishedTRB database is located in a separate package called LymphoSeqDB #' that should be loaded automatically. #' @examples #' file.path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq") #' #' file.list <- readImmunoSeq(path = file.path) #' #' productive.aa <- productiveSeq(file.list = file.list, aggregate = "aminoAcid") #' #' searchPublished(list = productive.aa) #' @seealso Refer to the LymphoSeqDB package for details regarding the #' publishedTRB database. #' @export #' @importFrom plyr llply #' @import LymphoSeqDB searchPublished <- function(list) { search <- plyr::llply(list, function(x) x[which(x[, "aminoAcid"] %in% LymphoSeqDB::publishedTRB$aminoAcid), ]) found <- NULL l <- 1 for (l in 1:length(search)) { if (nrow(search[[l]]) != 0) { search[[l]] <- cbind(rep(names(search)[l], nrow(search[[l]])), search[[l]]) colnames(search[[l]])[1] <- "sample" found <- rbind(found, search[[l]]) } } if (is.null(found)) { message("No sequences found.") } else { found$prevalence = NULL found <- merge(found, LymphoSeqDB::publishedTRB, by = "aminoAcid") found <- found[c("sample", setdiff(names(found), "sample"))] return(found) } }