R/gsnap-command.R
b22faf0d
 ### =========================================================================
 ### gsnap command
 ### -------------------------------------------------------------------------
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### High-level interface
 ###
 
 setGeneric("gsnap", function(input_a, input_b = NULL, params, ...)
            standardGeneric("gsnap"))
 
e1b17d35
 setMethod("gsnap", c("character", "character_OR_NULL", "GsnapParam"),
b22faf0d
           function(input_a, input_b, params,
563f6d6b
                    output = file.path(getwd(),
                      file_path_sans_ext(basename(input_a), TRUE)),
8600ee5b
                    consolidate = TRUE, ...)
b22faf0d
           {
ebafcdf1
             if (!is.null(input_b) && length(input_a) != length(input_b))
               stop("If 'input_b' is non-NULL, it must have the same length",
                    " as 'input_a'")
6d010e6b
             if (anyNA(input_a) || anyNA(input_b))
563f6d6b
               stop("'input_a' and 'input_b' must not contain NA's")
ebafcdf1
             if (length(input_a) > 1L) {
38549873
                 args <- list(params, consolidate, ...)
                 return(GsnapOutputList(mapply(gsnap, input_a, input_b,
                                               output=output,
                                               MoreArgs = args)))
ebafcdf1
             }
             
ea3f9b5f
             output_dir <- dirname(output)
             if (!file.exists(output_dir))
               dir.create(output_dir, recursive = TRUE)
 
b22faf0d
             params <- initialize(params, ...)
             params_list <- as.list(params)
             if (gsnap_split_output(params)) {
563f6d6b
               params_list$split_output <- output
ea3f9b5f
               output_path <- output_dir
b22faf0d
             } else {
835aba91
               output_path <- paste0(output, ".sam")
               params_list$.redirect <- paste(">", output_path)
             }
 
             if (is.null(params_list$gunzip)) {
               if (file_ext(input_a) == "gz" && file_ext(input_b) == "gz") {
                 params_list$gunzip <- TRUE
               }
b22faf0d
             }
             
1116df95
             res <- do.call(.gsnap,
                          c(list(.input_a = input_a, .input_b = input_b,
                                 format = "sam"),
                            params_list))
297de88f
             gsnap_output <- GsnapOutput(path = output_path,
                                         version = gsnapVersion(),
                                         param = params)
97bd8ae2
             gsnap_output <- asBam(gsnap_output)
297de88f
             if (consolidate)
               consolidate(gsnap_output)
             return(gsnap_output)
b22faf0d
           })
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### Low-level interface
 ###
 
273e823f
 .gsnap <- function(db = NULL, dir = NULL, part = NULL,
                    input_buffer_size = 1000L,
00075ec0
                    barcode_length = 0L,
                    orientation = c("FR", "RF", "FF"),
                    fastq_id_start = NULL, fastq_id_end = NULL,
                    filter_chastity = c("off", "either", "both"),
                    gunzip = FALSE,
b22faf0d
                    batch = c("2", "0", "1", "3", "4"), max_mismatches = NULL,
                    query_unk_mismatch = 0L, genome_unk_mismatch = 1L,
00075ec0
                    terminal_threshold = 2L,
b22faf0d
                    indel_penalty = 2L,
                    indel_endlength = 4L, max_middle_insertions = 9L,
                    max_middle_deletions = 30L, max_end_insertions = 3L,
                    max_end_deletions = 6L, suboptimal_levels = 0L,
                    adapter_string = NULL,
00075ec0
                    trim_mismatch_score = -3L, trim_indel_score = -4L,
                    snpsdir = NULL, use_snps = NULL,
b22faf0d
                    cmetdir = NULL, atoidir = NULL,
00075ec0
                    mode = c("standard", "cmet-stranded", "cmet-nonstranded",
                      "atoi-stranded", "atoi-nonstranded"),
                    tallydir = NULL, use_tally = NULL,
835aba91
                    use_runlength = NULL, nthreads = 1L,
00075ec0
                    gmap_mode = "pairsearch,terminal,improve",
                    trigger_score_for_gmap = 5L, max_gmap_pairsearch = 3L,
                    max_gmap_improvement = 3L, microexon_spliceprob = 0.90,
7f5a9624
                    novelsplicing = 0L, splicingdir = NULL,
00075ec0
                    use_splicing = NULL, ambig_splice_noclip = FALSE,
                    localsplicedist = 200000L,
b22faf0d
                    local_splice_penalty = 0L, distant_splice_penalty = 3L,
                    distant_splice_endlength = 16L,
                    shortend_splice_endlength = 2L,
00075ec0
                    distant_splice_identity = 0.95,
                    antistranded_penalty = 0L, merge_distant_samechr = FALSE,
b22faf0d
                    pairmax_dna = 1000L,
00075ec0
                    pairmax_rna = 200000L, pairexpect = 200L, pairdev = 25L,
b22faf0d
                    quality_protocol = c("sanger", "illumina"),
                    quality_zero_score = 33L, quality_print_shift = 0L,
                    mapq_unique_score = NULL, npaths = 100L,
                    quiet_if_excessive = FALSE, ordered = FALSE,
                    show_rediff = FALSE, clip_overlap = FALSE,
                    print_snps = FALSE, failsonly = FALSE,
                    nofails = FALSE, fails_as_input = FALSE,
00075ec0
                    format = NULL, split_output = NULL,
                    output_buffer_size = 1000L,
                    no_sam_headers = FALSE,
b22faf0d
                    sam_headers_batch = NULL, read_group_id = NULL,
00075ec0
                    read_group_name = NULL, read_group_library = NULL,
                    read_group_platform = NULL, version = FALSE,
b22faf0d
                    .input_a = NULL, .input_b = NULL,
ea3f9b5f
                    .redirect = NULL ## e.g., > gsnap.sam
                    )
b22faf0d
 {
   formals <- formals(sys.function())
   problems <- do.call(c, lapply(names(formals), .valid_gmap_parameter, formals))
   if (!is.null(problems))
     stop("validation failed:\n  ", paste(problems, collapse = "\n  "))  
 
   orientation <- match.arg(orientation)
   batch <- match.arg(batch)
   mode <- match.arg(mode)
   quality_protocol <- match.arg(quality_protocol)
15fa56ac
   filter_chastity <- match.arg(filter_chastity)
ead94b30
 
   if (version) {
       .redirect <- ">/dev/null"
   }
b22faf0d
   
 ### TODO: if input_a is NULL, or split_output and .redirect are NULL:
 ###       return a pipe()
273e823f
   .system_gsnap(commandLine("gsnap"))
 }
 
 ..gsnap <- function(args, path = NULL) {
   .system_gsnap(.commandLine("gsnap", args, path))
 }
 
 .system_gsnap <- function(command) {
298febc1
   stderr <- tempfile("gsnap-stderr", fileext = "log")
   command <- paste(command, "2>", stderr)
   .system(command)
   output <- readLines(stderr)
273e823f
   if (!gsnapSucceeded(command, output))
     stop("Execution of gsnap failed, command-line: '", command,
          "'; last output line: '", tail(output, 1), "'")
   else output
 }
 
 gsnapSucceeded <- function(command, output) {
   if (!grepl("--version", command))
     any(grepl("^Processed \\d+ queries", output))
   else TRUE
b22faf0d
 }
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### Validity Checking
 ###
 
 .valid_GsnapParam_part <- function(x) {
   ## validate the part string (i/n)
 }
 .valid_GsnapParam_batch <- function(x) {
   ## one of 0, 1, 2, 3, 4
 }
 .valid_GsnapParam_max_mismatches <- function(x) {
   ## non-negative number or NULL
 }
 .valid_GsnapParam_suboptimal_levels <- function(x) {
   ## non-negative number or NULL
 }
 .valid_GsnapParam_use_snps <- function(x) {
   ## a file that exists (not sure)
 }
 .valid_GsnapParam_mode <- function(x) {
   ## one of standard, cmet, atoi
 }
 .valid_GsnapParam_nthreads <- function(x) {
   ## positive number
 }
 .valid_GsnapParam_novelsplicing <- function(x) {
   ## TRUE or FALSE
 }
 .valid_GsnapParam_splicing <- function(x) {
   ## a file that exists (not sure) or NULL
 }
 .valid_GsnapParam_npaths <- function(x) {
   ## positive number
 }
 .valid_GsnapParam_quiet_if_excessive <- function(x) {
   ## TRUE or FALSE
 }
 .valid_GsnapParam_nofails <- function(x) {
   ## TRUE or FALSE
 }
 .valid_GsnapParam_format <- function(x) {
   ## sam or NULL (this is a low-level check!)
 }
 .valid_GsnapParam_split_output <- function(x) {
   ## single string or NULL
 }
 .valid_GsnapParam_read_group_id <- function(x) {
   ## single string (any character constraints?) or NULL
 }
 .valid_GsnapParam_read_group_name  <- function(x) {
   ## single string or NULL
 }
 
 .valid_gmap_parameter <- function(name, params) {
   res <- TRUE ##if we're not validating the param, assume it is good
 
   validatorName <- paste(".valid_GsnapParam_", name, sep = "")
   if (exists(validatorName)) {
     validator <- get(validatorName)
     res <- validator(params)
   } else {
     res <- NULL
   }
 
   return(res)
 }
 
 .valid_GsnapParam <- function(object) {
   x <- as.list(object) # converts to low-level parameter list
   do.call(c, lapply(names(x), .valid_gmap_parameter, x))
 }
 
 setValidity("GsnapParam", .valid_GsnapParam)
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### Utilities
 ###
 
 gsnapVersion <- function() {
273e823f
   output <- .gsnap(version = TRUE)
   version_text <- sub("GSNAP version (.*?) .*", "\\1", output[1])
b22faf0d
   parseGsnapVersion(version_text)
 }
 
 parseGsnapVersion <- function(x) {
273e823f
   as.POSIXlt(x, format = "%Y-%m-%d")
b22faf0d
 }