Browse code

add ability to call GMAP

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110840 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 23/11/2015 23:25:34
Showing8 changed files

... ...
@@ -9,10 +9,10 @@ Description: GSNAP and GMAP are a pair of tools to align short-read
9 9
         methods to work with GMAP and GSNAP from within R. In addition,
10 10
         it provides methods to tally alignment results on a
11 11
         per-nucleotide basis using the bam_tally tool.
12
-Version: 1.13.3
12
+Version: 1.13.4
13 13
 Depends: R (>= 2.15.0), methods, GenomeInfoDb (>= 1.1.3),
14 14
         GenomicRanges (>= 1.17.12)
15
-Imports: S4Vectors, IRanges, Rsamtools (>= 1.17.8), rtracklayer (>= 1.25.5),
15
+Imports: S4Vectors, IRanges, Rsamtools (>= 1.17.8), rtracklayer (>= 1.31.2),
16 16
         GenomicFeatures (>= 1.17.13), Biostrings, VariantAnnotation (>= 1.11.4),
17 17
         tools, Biobase, BSgenome, GenomicAlignments (>= 1.1.9), BiocParallel
18 18
 Suggests: RUnit, BSgenome.Dmelanogaster.UCSC.dm3,
... ...
@@ -21,7 +21,9 @@ Suggests: RUnit, BSgenome.Dmelanogaster.UCSC.dm3,
21 21
         LungCancerLines
22 22
 Collate: GmapBamReader-class.R GmapGenomeDirectory-class.R
23 23
         GmapGenome-class.R GmapSnpDirectory-class.R GmapSnps-class.R
24
-        GsnapParam-class.R GsnapOutput-class.R atoiindex-command.R
24
+        GmapParam-class.R GsnapParam-class.R
25
+        GsnapOutput-class.R GmapOutput-class.R
26
+        atoiindex-command.R
25 27
         BamTallyParam-class.R bam_tally-command.R cmetindex-command.R
26 28
         get-genome-command.R gmap-command.R gmap_build-command.R
27 29
         gsnap-command.R iit-format.R iit_store-command.R info.R
... ...
@@ -18,7 +18,7 @@ importMethodsFrom(GenomicRanges, seqnames, strand)
18 18
 importMethodsFrom(Rsamtools, asBam)
19 19
 importClassesFrom(GenomicFeatures, TxDb)
20 20
 importFrom(GenomicFeatures, transcripts, exons)
21
-importClassesFrom(rtracklayer, RTLFile, FastaFile)
21
+importClassesFrom(rtracklayer, RTLFile, FastaFile, RTLFileList)
22 22
 importFrom(rtracklayer, "referenceSequence<-", import, export, FastaFile)
23 23
 importMethodsFrom(rtracklayer, export)
24 24
 importFrom(VariantAnnotation, readVcf, ScanVcfParam, fixed, VRanges, ref, alt,
... ...
@@ -32,10 +32,14 @@ importMethodsFrom(GenomicAlignments, qwidth)
32 32
 
33 33
 export(bam_tally, GmapGenome, GmapGenomeDirectory, GsnapParam,
34 34
        directory, BamTallyParam, makeGmapGenomePackage, GsnapOutput,
35
-       GmapSnps, GmapSnpDirectory, TP53Genome, TP53Which, variantSummary)
35
+       GmapSnps, GmapSnpDirectory, TP53Genome, TP53Which, variantSummary,
36
+       GmapParam)
36 37
 
37 38
 exportClasses(GmapGenome, GmapGenomeDirectory, GmapSnpDirectory,
38
-              GsnapOutput, GmapSnps, BamTallyParam, GsnapParam)
39
+              GsnapOutput, GmapSnps, BamTallyParam, GsnapParam, GmapParam,
40
+              GmapAlignerParam)
39 41
 
40 42
 exportMethods(bamPaths, path, genome, seqinfo, gsnap, "snps<-",
41
-              "spliceSites<-", getSeq)
43
+              "spliceSites<-", getSeq, gmap)
44
+
45
+S3method(as.list, GmapAlignerParam)
42 46
new file mode 100644
... ...
@@ -0,0 +1,98 @@
1
+### =========================================================================
2
+### GmapOutput class
3
+### -------------------------------------------------------------------------
4
+###
5
+### Stores a path to a GMAP output directory. This is based on the
6
+### assumption that the alignments from a sample will land in a
7
+### unique directory.
8
+###
9
+
10
+setClassUnion("GmapParamORNULL", c("GmapParam", "NULL"))
11
+setIs("GmapParamORNULL", "GmapAlignerParamORNULL")
12
+
13
+.valid_GmapOutput <- function(object) {
14
+    if (length(paths(object)) == 0L)
15
+        paste0("No GSNAP output at '", object@path, "'")
16
+}
17
+
18
+setClass("GmapOutput",
19
+         representation(path = "character",
20
+                        param = "GmapParamORNULL",
21
+                        version = "POSIXltORNULL"),
22
+         contains="GmapAlignerOutput",
23
+         validity = .valid_GmapOutput)
24
+
25
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
26
+### Accessors
27
+###
28
+
29
+setMethod("alignmentCategories", "GmapOutput", function(x) {
30
+              c("uniq", "mult", "chimera", "nomapping")
31
+          })
32
+
33
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
34
+### Constructor
35
+###
36
+
37
+GmapOutput <- function(path, param = NULL, version = NULL) {
38
+    newGmapAlignerOutput("GmapOutput", path, param, version)
39
+}
40
+
41
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
42
+### Coercion
43
+###
44
+
45
+formatToExt <- function(x) {
46
+    switch(x,
47
+           gff3_gene=, gff3_match_cdna=, gff3_match_est="gff3",
48
+           sampe=, samse="sam",
49
+           psl="psl", splicesites="splicesites", introns="introns",
50
+           map_exons="iit", map_ranges="iit", coords="coords")
51
+}
52
+
53
+setAs("GmapOutput", "RTLFileList", function(from) {
54
+          as(lapply(paths(from), FileForFormat, formatToExt(from@param@format)),
55
+             "List")
56
+      })
57
+
58
+setAs("GmapOutput", "RTLFile", function(from) {
59
+          p <- paths(from)
60
+          if (length(p) == 0L) {
61
+              stop("no output files at: ", path(from))
62
+          }
63
+          FileForFormat(p[1L], formatToExt(from@param@format))
64
+      })
65
+
66
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
67
+### Utilities
68
+###
69
+
70
+setMethod("import", c("GmapOutput", "missing", "missing"),
71
+          function(con, format, text, ...) {
72
+              if (con@param@format %in% c("samse", "sampe")) {
73
+                  f <- as(con, "BamFile")
74
+              } else {
75
+                  f <- as(con, "RTLFile")
76
+              }
77
+              ans <- import(f, ...)
78
+              seqinfo(ans) <- seqinfo(con@param@genome)
79
+              ans
80
+          })
81
+
82
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
83
+### List class
84
+###
85
+
86
+setClass("GmapOutputList",
87
+         prototype = prototype(elementType = "GmapOutput"),
88
+         contains = "GmapAlignerOutputList")
89
+
90
+setClass("SimpleGmapOutputList",
91
+         contains = c("GmapOutputList", "SimpleList"))
92
+
93
+GmapOutputList <- function(...) {
94
+    args <- list(...)
95
+    if (length(args) == 1 && is.list(args[[1]])) 
96
+        args <- args[[1]]
97
+    S4Vectors:::new_SimpleList_from_list("SimpleGmapOutputList", args)
98
+}
0 99
new file mode 100644
... ...
@@ -0,0 +1,60 @@
1
+### =========================================================================
2
+### GmapParam class
3
+### -------------------------------------------------------------------------
4
+###
5
+### High-level interface to gmap.
6
+###
7
+
8
+setClassUnion("GmapSnpsORNULL", c("GmapSnps", "NULL"))
9
+
10
+setClass("GmapAlignerParam",
11
+         representation(genome = "GmapGenome",
12
+                        part = "characterORNULL",
13
+                        batch = "character",
14
+                        snps = "GmapSnpsORNULL",
15
+                        mode = "character",
16
+                        nthreads = "integer",
17
+                        npaths = "integer",
18
+                        quiet_if_excessive = "logical",
19
+                        nofails = "logical", 
20
+                        split_output = "logical",
21
+                        extra = "list"))
22
+
23
+setClass("GmapParam",
24
+         representation(suboptimal_score = "integer",
25
+                        splicing = "logical",
26
+                        format = "character"),
27
+         contains="GmapAlignerParam")
28
+
29
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
30
+### Constructor
31
+###
32
+
33
+GmapParam <- function(genome, unique_only = FALSE,
34
+                      suboptimal_score = 0L, mode = "standard",
35
+                      snps = NULL,
36
+                      npaths = if (unique_only) 1L else 100L,
37
+                      quiet_if_excessive = unique_only, nofails = unique_only,
38
+                      split_output = !unique_only,
39
+                      splicing = TRUE, 
40
+                      nthreads = 1L, part = NULL, batch = "2",
41
+                      format = c("gff3_gene", "gff3_match_cdna",
42
+                          "gff3_match_est", "sampe", "samse", "psl",
43
+                          "splicesites", "introns",
44
+                          "map_exons", "map_ranges", "coords"),
45
+                      ...)
46
+{
47
+    format <- match.arg(format)
48
+    newGmapAlignerParam("GmapParam", genome, snps)
49
+}
50
+
51
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
52
+### Coercion
53
+###
54
+
55
+setAs("GmapParam", "list", function(from) {
56
+          to <- GmapAlignerParam_asList(from)
57
+          to$nosplicing <- !to$splicing
58
+          to$splicing <- NULL
59
+          to
60
+      })
... ...
@@ -9,35 +9,46 @@
9 9
 
10 10
 setClassUnion("POSIXltORNULL", c("POSIXlt", "NULL"))
11 11
 
12
-setClassUnion("GsnapParamORNULL", c("GsnapParam", "NULL"))
13
-
14 12
 .valid_GsnapOutput <- function(object) {
15 13
   if (length(samPaths(object)) == 0L && length(bamPaths(object)) == 0L)
16 14
     paste0("No GSNAP output at '", object@path, "'")
17 15
 }
18 16
 
19
-setClass("GsnapOutput",
17
+setClassUnion("GmapAlignerParamORNULL", c("GmapAlignerParam", "NULL"))
18
+
19
+setClassUnion("GsnapParamORNULL", c("GsnapParam", "NULL"))
20
+setIs("GsnapParamORNULL", "GmapAlignerParamORNULL")
21
+
22
+setClass("GmapAlignerOutput",
20 23
          representation(path = "character",
21
-                        param = "GsnapParamORNULL",
22
-                        version = "POSIXltORNULL"),
24
+                        param = "GmapAlignerParamORNULL",
25
+                        version = "POSIXltORNULL"))
26
+         
27
+setClass("GsnapOutput",
28
+         representation(param = "GsnapParamORNULL"),
29
+         contains="GmapAlignerOutput",
23 30
          validity = .valid_GsnapOutput)
24 31
 
25 32
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
26 33
 ### Accessors
27 34
 ###
28 35
 
29
-setMethod("path", "GsnapOutput", function(object) object@path)
36
+setMethod("path", "GmapAlignerOutput", function(object) object@path)
37
+
38
+setGeneric("alignmentCategories",
39
+           function(x) standardGeneric("alignmentCategories"))
30 40
 
31
-## should contain any category ever known to be used by gsnap
32
-.ALIGNMENT_CATEGORIES <- c("concordant_uniq", "concordant_mult",
33
-                           "concordant_transloc", "paired_mult",
34
-                           "paired_uniq_inv", "paired_uniq_long",
35
-                           "paired_uniq_scr", "unpaired_uniq", "unpaired_mult",
36
-                           "unpaired_transloc",  "halfmapping_uniq",
37
-                           "halfmapping_mult", "halfmapping_transloc",
38
-                           "nomapping", "concordant_circular",
39
-                           "halfmapping_circular", "paired_uniq_circular",
40
-                           "unpaired_circular")
41
+setMethod("alignmentCategories", "GsnapOutput", function(x) {
42
+              c("concordant_uniq", "concordant_mult",
43
+                "concordant_transloc", "paired_mult",
44
+                "paired_uniq_inv", "paired_uniq_long",
45
+                "paired_uniq_scr", "unpaired_uniq", "unpaired_mult",
46
+                "unpaired_transloc",  "halfmapping_uniq",
47
+                "halfmapping_mult", "halfmapping_transloc",
48
+                "nomapping", "concordant_circular",
49
+                "halfmapping_circular", "paired_uniq_circular",
50
+                "unpaired_circular")
51
+          })
41 52
 
42 53
 ### Q: How to handle the single BAM output case? We would still want a
43 54
 ### formal object, with a version, parameters, etc. If 'path' is a
... ...
@@ -53,31 +64,38 @@ is_dir <- function(x) {
53 64
 
54 65
 setGeneric("bamPath", function(x) standardGeneric("bamPath"))
55 66
 
56
-setMethod("bamPath", "GsnapOutput", function(x) {
57
-  paths <- bamPaths(x)
58
-  if (length(paths) > 1) # maybe this should be the analysis BAM if it exists?
59
-    paths <- paths["concordant_uniq"]
60
-  paths
61
-})
67
+setMethod("bamPath", "GmapAlignerOutput", function(x) {
68
+              paths <- bamPaths(x)
69
+              if (length(paths) > 1L)
70
+                  paths <- paths[alignmentCategories(x)[1L]]
71
+              paths
72
+          })
62 73
 
63 74
 ## file_ext does not allow '_'
64 75
 file_ext2 <- function(x) {
65 76
   gsub(".*\\.", "", x)
66 77
 }
67 78
 
79
+paths <- function(x) {
80
+    if (!is_dir(x)) {
81
+        return(path(x))
82
+    }
83
+    paths <- list_files_with_exts(path(x), alignmentCategories(x),
84
+                                  full.names = TRUE)
85
+    names(paths) <- file_ext2(paths)
86
+    paths[alignmentCategories(x)]
87
+}
88
+
68 89
 samPaths <- function(x) {
69 90
   if (!is_dir(x)) {
70 91
     if (file_ext(path(x)) == "sam")
71 92
       return(path(x))
72 93
     return(character())
73 94
   }
74
-  paths <- list_files_with_exts(path(x), .ALIGNMENT_CATEGORIES,
75
-                                full.names = TRUE)
76
-  names(paths) <- file_ext2(paths)
77
-  paths
95
+  paths(x)
78 96
 }
79 97
 
80
-setMethod("bamPaths", "GsnapOutput", function(x) {
98
+setMethod("bamPaths", "GmapAlignerOutput", function(x) {
81 99
   if (!is_dir(x)) {
82 100
     if (file_ext(path(x)) == "bam")
83 101
       return(path(x))
... ...
@@ -85,7 +103,7 @@ setMethod("bamPaths", "GsnapOutput", function(x) {
85 103
   }
86 104
   paths <- list_files_with_exts(path(x), "bam", full.names = TRUE)
87 105
   names(paths) <- file_ext2(file_path_sans_ext(paths))
88
-  paths <- paths[names(paths) %in% .ALIGNMENT_CATEGORIES]
106
+  paths <- paths[names(paths) %in% alignmentCategories(x)]
89 107
   paths
90 108
 })
91 109
 
... ...
@@ -93,29 +111,31 @@ setMethod("bamPaths", "GsnapOutput", function(x) {
93 111
 ### Constructor
94 112
 ###
95 113
 
114
+newGmapAlignerOutput <- function(Class, path, param = NULL, version = NULL) {
115
+    if (!is.character(path) || any(is.na(path)))
116
+        stop("'path' must be a character vector without any NA's")
117
+    if (!is(version, "POSIXltORNULL")) {
118
+        if (is.character(version))
119
+            version <- parseGsnapVersion(version)
120
+        else stop("'version' must be a version string, POSIXlt object or NULL")
121
+    }
122
+    path <- file_path_as_absolute(path)
123
+    new(Class, path = path, version = version, param = param)
124
+}
125
+
96 126
 GsnapOutput <- function(path, param = NULL, version = NULL) {
97
-  if (!is.character(path) || any(is.na(path)))
98
-    stop("'path' must be a character vector without any NA's")
99
-  if (!is(version, "POSIXltORNULL")) {
100
-    if (is.character(version))
101
-      version <- parseGsnapVersion(version)
102
-    else stop("'version' must be a version string, POSIXlt object or NULL")
103
-  }
104
-  if (!is(param, "GsnapParamORNULL"))
105
-    stop("'param' must be a GsnapParam object or NULL")
106
-  path <- file_path_as_absolute(path)
107
-  new("GsnapOutput", path = path, version = version, param = param)
127
+  newGmapAlignerOutput("GsnapOutput", path, param, version)
108 128
 }
109 129
 
110 130
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
111 131
 ### Coercion
112 132
 ###
113 133
 
114
-setAs("GsnapOutput", "BamFileList", function(from) {
134
+setAs("GmapAlignerOutput", "BamFileList", function(from) {
115 135
   BamFileList(bamPaths(from))
116 136
 })
117 137
 
118
-setAs("GsnapOutput", "BamFile", function(from) {
138
+setAs("GmapAlignerOutput", "BamFile", function(from) {
119 139
   BamFile(bamPath(from))
120 140
 })
121 141
 
... ...
@@ -123,19 +143,19 @@ setAs("GsnapOutput", "BamFile", function(from) {
123 143
 ### Utilities
124 144
 ###
125 145
 
126
-setMethod("merge", c("GsnapOutput", "GsnapOutput"), function(x, y) {
146
+setMethod("merge", c("GmapAlignerOutput", "GmapAlignerOutput"), function(x, y) {
127 147
 ### TODO: get Cory's lane merging code for BAMs in here
128 148
 })
129 149
 
130 150
 setGeneric("consolidate", function(x, ...) standardGeneric("consolidate"))
131 151
 
132
-setMethod("consolidate", "GsnapOutput", function(x) {
152
+setMethod("consolidate", "GmapAlignerOutput", function(x) {
133 153
 ### TODO: this should produce the pipeline's analyzed BAM
134 154
   x
135 155
 })
136 156
 
137 157
 ##converts all gsnap SAM files to BAM files and creates the .bai index files
138
-setMethod("asBam", "GsnapOutput",
158
+setMethod("asBam", "GmapAlignerOutput",
139 159
           function(file) {
140 160
             ## the input is not a file. It is a GsnapOutput,
141 161
             ## but param name is enforced
... ...
@@ -155,6 +175,11 @@ setMethod("asBam", "GsnapOutput",
155 175
             file
156 176
 })
157 177
 
178
+setMethod("import", c("GsnapOutput", "missing", "missing"),
179
+          function(con, format, text, ...) {
180
+              import(as(con, "BamFile"), ...)
181
+          })
182
+
158 183
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
159 184
 ### List class
160 185
 ###
... ...
@@ -167,9 +192,13 @@ setMethod("asBam", "GsnapOutput",
167 192
 ### character vectors. Would we vectorize the version or not? It makes
168 193
 ### sense to have an object that represents a single run of gsnap.
169 194
 
195
+setClass("GmapAlignerOutputList",
196
+         prototype = prototype(elementType = "GmapAlignerOutput"),
197
+         contains = "List")
198
+
170 199
 setClass("GsnapOutputList",
171 200
          prototype = prototype(elementType = "GsnapOutput"),
172
-         contains = "List")
201
+         contains = "GmapAlignerOutputList")
173 202
 
174 203
 setClass("SimpleGsnapOutputList",
175 204
          contains = c("GsnapOutputList", "SimpleList"))
... ...
@@ -181,7 +210,7 @@ GsnapOutputList <- function(...) {
181 210
   S4Vectors:::new_SimpleList_from_list("SimpleGsnapOutputList", args)
182 211
 }
183 212
 
184
-setAs("GsnapOutputList", "BamFileList", function(from) {
213
+setAs("GmapAlignerOutputList", "BamFileList", function(from) {
185 214
   BamFileList(lapply(from, as, "BamFile"))
186 215
 })
187 216
 
... ...
@@ -190,6 +219,6 @@ setAs("GsnapOutputList", "BamFileList", function(from) {
190 219
 ### Show
191 220
 ###
192 221
 
193
-setMethod("show", "GsnapOutput", function(object) {
194
-  cat("A GsnapOutput Object\n", "path: ", path(object), "\n", sep = "")
222
+setMethod("show", "GmapAlignerOutput", function(object) {
223
+  cat("A ", class(object), " Object\n", "path: ", path(object), "\n", sep = "")
195 224
 })
... ...
@@ -9,27 +9,16 @@
9 9
 ###
10 10
 
11 11
 setClassUnion("integerORNULL", c("integer", "NULL"))
12
-setClassUnion("GmapSnpsORNULL", c("GmapSnps", "NULL"))
13 12
 
14 13
 setClass("GsnapParam",
15
-         representation(genome = "GmapGenome",
16
-                        part = "characterORNULL", # used by parallelized_gsnap
17
-                        batch = "character", # weird "0", "1", ... 
18
-                        max_mismatches = "integerORNULL",
14
+         representation(max_mismatches = "integerORNULL",
19 15
                         suboptimal_levels = "integer",
20
-                        snps = "GmapSnpsORNULL",
21
-                        mode = "character",
22
-                        nthreads = "integer",
23 16
                         novelsplicing = "logical",
24 17
                         splicing = "characterORNULL",
25
-                        npaths = "integer",
26
-                        quiet_if_excessive = "logical",
27
-                        nofails = "logical", 
28
-                        split_output = "logical",
29 18
                         terminal_threshold = "integer",
30 19
                         gmap_mode = "character",
31
-                        clip_overlap = "logical",
32
-                        extra = "list"))
20
+                        clip_overlap = "logical"),
21
+         contains="GmapAlignerParam")
33 22
 
34 23
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
35 24
 ### Accessors
... ...
@@ -148,6 +137,27 @@ gsnap_extra <- function(x) {
148 137
 ### Constructor
149 138
 ###
150 139
 
140
+newGmapAlignerParam <- function(Class, genome, snps) {
141
+    if (missing(genome))
142
+        stop("The 'genome' must be specified (and coercible to GmapGenome)")
143
+    args <- formals(sys.function(sys.parent(1L)))
144
+    params <- mget(names(args), parent.frame())
145
+    params$unique_only <- NULL
146
+    paramClasses <- getSlots(Class)
147
+    paramClasses <- paramClasses[setdiff(names(paramClasses),
148
+                                         c("extra", "snps"))]
149
+    params <- mapply(as, params[names(paramClasses)], paramClasses,
150
+                     SIMPLIFY = FALSE)
151
+    if (!is.null(snps)) {
152
+        if (!is(snps, "GmapSnps")) {
153
+            snps <- GmapSnps(snps, genome)
154
+        }
155
+        params$snps <- snps
156
+    }
157
+    params$extra <- evalq(list(...), parent.frame())
158
+    do.call(new, c(Class, params))
159
+}
160
+
151 161
 GsnapParam <- function(genome, unique_only = FALSE, molecule = c("RNA", "DNA"),
152 162
                        max_mismatches = NULL,
153 163
                        suboptimal_levels = 0L, mode = "standard",
... ...
@@ -157,54 +167,43 @@ GsnapParam <- function(genome, unique_only = FALSE, molecule = c("RNA", "DNA"),
157 167
                        split_output = !unique_only,
158 168
                        novelsplicing = FALSE, splicing = NULL, 
159 169
                        nthreads = 1L, part = NULL, batch = "2",
160
-                       terminal_threshold = if (molecule == "DNA") 1000L else 2L,
161
-                       gmap_mode = if (molecule == "DNA") "none" else
162
-                       "pairsearch,terminal,improve",
170
+                       terminal_threshold =
171
+                           if (molecule == "DNA") 1000L else 2L,
172
+                       gmap_mode = if (molecule == "DNA") "none"
173
+                                   else "pairsearch,terminal,improve",
163 174
                        clip_overlap = FALSE, ...)
164 175
 {
165
-  if (missing(genome))
166
-    stop("The 'genome' must be specified (should be coercible to GmapGenome)")
167 176
   molecule <- match.arg(molecule)
168
-  args <- formals(sys.function())
169
-  params <- mget(names(args), environment())
170
-  params$unique_only <- NULL
171
-  paramClasses <- getSlots("GsnapParam")
172
-  paramClasses <- paramClasses[setdiff(names(paramClasses), c("extra", "snps"))]
173
-  params <- mapply(as, params[names(paramClasses)], paramClasses,
174
-                   SIMPLIFY = FALSE)
175
-  if (!is.null(snps)) {
176
-    if (!is(snps, "GmapSnps")) {
177
-      snps <- GmapSnps(snps, genome)
178
-    }
179
-    params$snps <- snps
180
-  }
181
-  params$extra <- list(...)
182
-  do.call(new, c("GsnapParam", params))
177
+  newGmapAlignerParam("GsnapParam", genome, snps)
183 178
 }
184 179
 
185 180
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
186 181
 ### Coercion
187 182
 ###
188 183
 
189
-setAs("GsnapParam", "list", function(from) {
184
+GmapAlignerParam_asList <- function(from) {
190 185
   to <- lapply(slotNames(from), slot, object = from)
191 186
   names(to) <- slotNames(from)
192
-  to$split_output <- if (to$split_output) "gsnap" else NULL
187
+  to$split_output <- if (to$split_output) tolower(sub("Param", "", class(from)))
193 188
   to$db <- genome(to$genome)
194 189
   to$dir <- path(directory(to$genome))
195 190
   to$genome <- NULL
196 191
   to$use_snps <- name(to$snps)
197 192
   to$snpsdir <- path(directory(to$snps))
198
-  to$novelsplicing <- as.integer(to$novelsplicing)
199
-  to <- rename(to, splicing = "use_splicing")
200 193
   extras <- to$extra
201 194
   to <- c(to, extras)
202
-  to$extra <- NULL
203
-  
195
+  to$extra <- NULL  
204 196
   to
205
-})
197
+}
198
+
199
+setAs("GsnapParam", "list", function(from) {
200
+          to <- GmapAlignerParam_asList(from)
201
+          to$novelsplicing <- as.integer(to$novelsplicing)
202
+          to <- rename(to, splicing = "use_splicing")
203
+          to
204
+      })
206 205
 
207
-setMethod("as.list", "GsnapParam", function(x) as(x, "list"))
206
+as.list.GmapAlignerParam <- function(x) as(x, "list")
208 207
 
209 208
 setAs("ANY", "characterORNULL", function(from) {
210 209
   if (is.null(from))
... ...
@@ -222,7 +221,7 @@ setAs("ANY", "integerORNULL", function(from) {
222 221
 ### Show
223 222
 ###
224 223
 
225
-setMethod("show", "GsnapParam", function(object) {
224
+setMethod("show", "GmapAlignerParam", function(object) {
226 225
   slots <- lapply(slotNames(object), slot, object = object)
227 226
   names(slots) <- slotNames(object)
228 227
   slots$genome <- paste0(slots$genome@name,
... ...
@@ -231,6 +230,6 @@ setMethod("show", "GsnapParam", function(object) {
231 230
     slots$snps <- paste0(name(slots$snps),
232 231
                          " (", path(directory(slots$snps)), ")")
233 232
   }
234
-  cat("A GsnapParams object\n",
233
+  cat("A ", class(object), " object\n",
235 234
       paste0(names(slots), ": ", slots, collapse = "\n"), "\n", sep = "")
236 235
 })
... ...
@@ -6,6 +6,133 @@
6 6
 ### High-level interface
7 7
 ###
8 8
 
9
+setGeneric("gmap", function(input, params, ...) standardGeneric("gmap"))
10
+
11
+setMethod("gmap", c("ANY", "GmapParam"),
12
+          function(input, params, ...) {
13
+              tmpfile <- tempfile()
14
+              export(input, tmpfile, format="fasta")
15
+              input <- tmpfile
16
+              callGeneric()
17
+          })
18
+
19
+setMethod("gmap", c("FastaFile", "GmapParam"),
20
+          function(input, params, ...) {
21
+              input <- path(input)
22
+              callGeneric()
23
+          })
24
+
25
+setMethod("gmap", c("character", "GmapParam"),
26
+          function(input, params,
27
+                   output = file.path(getwd(),
28
+                       file_path_sans_ext(basename(input), TRUE)), ...)
29
+              {
30
+                  if (any(is.na(input)))
31
+                      stop("'input' must not contain NA's")
32
+                  if (length(input) > 1L) {
33
+                      return(GmapOutputList(mapply(gmap, input,
34
+                                                   MoreArgs =
35
+                                                       list(params, output,
36
+                                                            ...))))
37
+                  }
38
+                  
39
+                  output_dir <- dirname(output)
40
+                  if (!file.exists(output_dir))
41
+                      dir.create(output_dir, recursive = TRUE)
42
+
43
+                  params <- initialize(params, ...)
44
+                  params_list <- as.list(params)
45
+                  if (gsnap_split_output(params)) {
46
+                      params_list$split_output <- output
47
+                      output_path <- output_dir
48
+                  } else {
49
+                      output_path <- paste0(output, ".",
50
+                                            formatToExt(params$format))
51
+                      params_list$.redirect <- paste(">", output_path)
52
+                  }
53
+
54
+                  res <- do.call(.gmap,
55
+                                 c(list(.input = input), params_list))
56
+                  gmap_output<- GmapOutput(path = output_path,
57
+                                           version = gmapVersion(),
58
+                                           param = params)
59
+
60
+                  if (params_list$format %in% c("samse", "sampe")) {
61
+                      gmap_output <- asBam(gmap_output)
62
+                  }
63
+                  gmap_output
64
+              })
65
+
9 66
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
10 67
 ### Low-level interface
11 68
 ###
69
+
70
+.gmap <- function(db = NULL, dir = NULL,
71
+                  kmer = NULL, basesize = NULL, sampling = NULL,
72
+                  genomefull = FALSE, gseg = NULL,
73
+                  selfalign = FALSE, pairalign = FALSE,
74
+                  part = NULL, input_buffer_size = 1000L,
75
+                  batch = c("0", "1", "2", "3", "4", "5"),
76
+                  expand_offsets = FALSE, nosplicing = FALSE,
77
+                  min_intronlength = 9L, intronlength = 1000000L,
78
+                  localsplicedist = 2000000L, totallength = 2400000L,
79
+                  chimera_margin = 40L, no_chimeras = FALSE,
80
+                  nthreads = NULL, chrsubsetfile = NULL,
81
+                  direction = c("auto", "sense_force", "antisense_force",
82
+                      "sense_filter", "antisense_filter"),
83
+                  trimendexons = 12L, canonical_mode = c("1", "0", "2"),
84
+                  cross_species = FALSE, allow_close_indels = c("1", "0", "2"),
85
+                  microexon_spliceprob = 0.90, cmetdir = NULL, atoidir = NULL,
86
+                  mode = c("standard", "cmet-stranded", "cmet-nonstranded",
87
+                      "atoi-stranded", "atoi-nonstranded"),
88
+                  prunelevel = c("0", "1", "2", "3"),
89
+                  format = NULL, npaths = 5L, quiet_if_excessive = FALSE,
90
+                  suboptimal_score = NULL, ordered = FALSE, md5 = FALSE,
91
+                  chimera_overlap = FALSE, failsonly = FALSE, nofails = FALSE,
92
+                  fails_as_input = FALSE, snpsdir = NULL, use_snps = NULL,
93
+                  split_output = NULL, append_output = FALSE,
94
+                  output_buffer_size = 1000L, fulllength = FALSE,
95
+                  cdsstart = NULL, truncate = FALSE, tolerant = FALSE,
96
+                  no_sam_headers = FALSE, sam_use_0M = FALSE,
97
+                  force_xs_dir = FALSE, md_lowercase_snp = FALSE,
98
+                  read_group_id = NULL, read_group_name = NULL,
99
+                  read_group_library = NULL, read_group_platform = NULL,
100
+                  quality_protocol = c("sanger", "illumina"),
101
+                  quality_print_shift = 0L, mapdir = NULL, map = NULL,
102
+                  mapexons = FALSE, mapboth = FALSE, version = FALSE,
103
+                  .input = NULL, .redirect = NULL)
104
+{
105
+    formals <- formals(sys.function())
106
+
107
+    expand_offsets <- as.integer(expand_offsets)
108
+    batch <- match.arg(batch)
109
+    direction <- match.arg(direction)
110
+    canonical_mode <- match.arg(canonical_mode)
111
+    allow_close_indels <- match.arg(allow_close_indels)
112
+    mode <- match.arg(mode)
113
+    prunelevel <- match.arg(prunelevel)
114
+    quality_protocol <- match.arg(quality_protocol)
115
+    
116
+    if (version) {
117
+        .redirect <- ">/dev/null"
118
+    }
119
+
120
+### TODO: if input_a is NULL, or split_output and .redirect are NULL:
121
+###       return a pipe()
122
+    .system_gsnap(commandLine("gmap"))
123
+}
124
+
125
+..gmap <- function(args, path = NULL) {
126
+    .system_gsnap(.commandLine("gmap", args, path))
127
+}
128
+
129
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
130
+### Utilities
131
+###
132
+
133
+gmapVersion <- function() {
134
+    output <- .gmap(version = TRUE)
135
+    version_text <- sub("GMAP version (.*?) .*", "\\1", output[1])
136
+    parseGsnapVersion(version_text)
137
+}
138
+
... ...
@@ -125,6 +125,10 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"),
125 125
   mode <- match.arg(mode)
126 126
   quality_protocol <- match.arg(quality_protocol)
127 127
   filter_chastity <- match.arg(filter_chastity)
128
+
129
+  if (version) {
130
+      .redirect <- ">/dev/null"
131
+  }
128 132
   
129 133
 ### TODO: if input_a is NULL, or split_output and .redirect are NULL:
130 134
 ###       return a pipe()