##' Consolidates all pieces from parallelized gsnap output into ##' appropriately merged files ##' ##' If gsnap was run in single-end mode, this will consolidate all ##' parallelized into the 3 output files. If gsnap was run in ##' paired-end mode, consolidates to the 7 output files. ##' @title Consolidate gsnap's parallelized output files ##' @param sam_file_dir directory where gsnap's sam file output is stored ##' @param remove_merged remove the individual pieces after they are merged ##' @return list of the names of the files created from consolidation ##' @author Cory Barr ##' @export consolidateGsnapFiles <- function(sam_file_dir, remove_merged=FALSE, parallelized=TRUE) { if (! file.exists(sam_file_dir)) { stop(paste("Could not find the directory", sam_file_dir)) } else { sam_file_dir <- file_path_as_absolute(sam_file_dir) } return_list <- list() dir_files <- dir(sam_file_dir, full.names=TRUE) .consolidate <- function(mapping_class, remove_merged, dir_files) { base_names <- basename(dir_files) unconsolidated_files <- dir_files[grep(paste("^gsnap_out\\.*", "\\d+\\.", mapping_class, "$", sep=''), base_names, perl=TRUE)] consolidated_file <- consolidateSAMFiles(sam_files=unconsolidated_files, outfile=paste( "gsnap.merged", mapping_class, "bam", sep='.'), remove_merged=remove_merged) } mapping_classes <- basename(dir_files) mapping_classes <- mapping_classes[grep("gsnap_out\\.", mapping_classes)] mapping_classes <- unique(sub("^gsnap_out\\.\\d+\\.", "", mapping_classes)) apply_func <- lapply if(parallelized) apply_func <- mclapply merged_files <- lapply(mapping_classes, .consolidate, remove_merged=remove_merged, dir_files=dir_files) names(merged_files) <- mapping_classes return(merged_files) }