git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110840 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -9,10 +9,10 @@ Description: GSNAP and GMAP are a pair of tools to align short-read |
9 | 9 |
methods to work with GMAP and GSNAP from within R. In addition, |
10 | 10 |
it provides methods to tally alignment results on a |
11 | 11 |
per-nucleotide basis using the bam_tally tool. |
12 |
-Version: 1.13.3 |
|
12 |
+Version: 1.13.4 |
|
13 | 13 |
Depends: R (>= 2.15.0), methods, GenomeInfoDb (>= 1.1.3), |
14 | 14 |
GenomicRanges (>= 1.17.12) |
15 |
-Imports: S4Vectors, IRanges, Rsamtools (>= 1.17.8), rtracklayer (>= 1.25.5), |
|
15 |
+Imports: S4Vectors, IRanges, Rsamtools (>= 1.17.8), rtracklayer (>= 1.31.2), |
|
16 | 16 |
GenomicFeatures (>= 1.17.13), Biostrings, VariantAnnotation (>= 1.11.4), |
17 | 17 |
tools, Biobase, BSgenome, GenomicAlignments (>= 1.1.9), BiocParallel |
18 | 18 |
Suggests: RUnit, BSgenome.Dmelanogaster.UCSC.dm3, |
... | ... |
@@ -21,7 +21,9 @@ Suggests: RUnit, BSgenome.Dmelanogaster.UCSC.dm3, |
21 | 21 |
LungCancerLines |
22 | 22 |
Collate: GmapBamReader-class.R GmapGenomeDirectory-class.R |
23 | 23 |
GmapGenome-class.R GmapSnpDirectory-class.R GmapSnps-class.R |
24 |
- GsnapParam-class.R GsnapOutput-class.R atoiindex-command.R |
|
24 |
+ GmapParam-class.R GsnapParam-class.R |
|
25 |
+ GsnapOutput-class.R GmapOutput-class.R |
|
26 |
+ atoiindex-command.R |
|
25 | 27 |
BamTallyParam-class.R bam_tally-command.R cmetindex-command.R |
26 | 28 |
get-genome-command.R gmap-command.R gmap_build-command.R |
27 | 29 |
gsnap-command.R iit-format.R iit_store-command.R info.R |
... | ... |
@@ -18,7 +18,7 @@ importMethodsFrom(GenomicRanges, seqnames, strand) |
18 | 18 |
importMethodsFrom(Rsamtools, asBam) |
19 | 19 |
importClassesFrom(GenomicFeatures, TxDb) |
20 | 20 |
importFrom(GenomicFeatures, transcripts, exons) |
21 |
-importClassesFrom(rtracklayer, RTLFile, FastaFile) |
|
21 |
+importClassesFrom(rtracklayer, RTLFile, FastaFile, RTLFileList) |
|
22 | 22 |
importFrom(rtracklayer, "referenceSequence<-", import, export, FastaFile) |
23 | 23 |
importMethodsFrom(rtracklayer, export) |
24 | 24 |
importFrom(VariantAnnotation, readVcf, ScanVcfParam, fixed, VRanges, ref, alt, |
... | ... |
@@ -32,10 +32,14 @@ importMethodsFrom(GenomicAlignments, qwidth) |
32 | 32 |
|
33 | 33 |
export(bam_tally, GmapGenome, GmapGenomeDirectory, GsnapParam, |
34 | 34 |
directory, BamTallyParam, makeGmapGenomePackage, GsnapOutput, |
35 |
- GmapSnps, GmapSnpDirectory, TP53Genome, TP53Which, variantSummary) |
|
35 |
+ GmapSnps, GmapSnpDirectory, TP53Genome, TP53Which, variantSummary, |
|
36 |
+ GmapParam) |
|
36 | 37 |
|
37 | 38 |
exportClasses(GmapGenome, GmapGenomeDirectory, GmapSnpDirectory, |
38 |
- GsnapOutput, GmapSnps, BamTallyParam, GsnapParam) |
|
39 |
+ GsnapOutput, GmapSnps, BamTallyParam, GsnapParam, GmapParam, |
|
40 |
+ GmapAlignerParam) |
|
39 | 41 |
|
40 | 42 |
exportMethods(bamPaths, path, genome, seqinfo, gsnap, "snps<-", |
41 |
- "spliceSites<-", getSeq) |
|
43 |
+ "spliceSites<-", getSeq, gmap) |
|
44 |
+ |
|
45 |
+S3method(as.list, GmapAlignerParam) |
42 | 46 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,98 @@ |
1 |
+### ========================================================================= |
|
2 |
+### GmapOutput class |
|
3 |
+### ------------------------------------------------------------------------- |
|
4 |
+### |
|
5 |
+### Stores a path to a GMAP output directory. This is based on the |
|
6 |
+### assumption that the alignments from a sample will land in a |
|
7 |
+### unique directory. |
|
8 |
+### |
|
9 |
+ |
|
10 |
+setClassUnion("GmapParamORNULL", c("GmapParam", "NULL")) |
|
11 |
+setIs("GmapParamORNULL", "GmapAlignerParamORNULL") |
|
12 |
+ |
|
13 |
+.valid_GmapOutput <- function(object) { |
|
14 |
+ if (length(paths(object)) == 0L) |
|
15 |
+ paste0("No GSNAP output at '", object@path, "'") |
|
16 |
+} |
|
17 |
+ |
|
18 |
+setClass("GmapOutput", |
|
19 |
+ representation(path = "character", |
|
20 |
+ param = "GmapParamORNULL", |
|
21 |
+ version = "POSIXltORNULL"), |
|
22 |
+ contains="GmapAlignerOutput", |
|
23 |
+ validity = .valid_GmapOutput) |
|
24 |
+ |
|
25 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
26 |
+### Accessors |
|
27 |
+### |
|
28 |
+ |
|
29 |
+setMethod("alignmentCategories", "GmapOutput", function(x) { |
|
30 |
+ c("uniq", "mult", "chimera", "nomapping") |
|
31 |
+ }) |
|
32 |
+ |
|
33 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
34 |
+### Constructor |
|
35 |
+### |
|
36 |
+ |
|
37 |
+GmapOutput <- function(path, param = NULL, version = NULL) { |
|
38 |
+ newGmapAlignerOutput("GmapOutput", path, param, version) |
|
39 |
+} |
|
40 |
+ |
|
41 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
42 |
+### Coercion |
|
43 |
+### |
|
44 |
+ |
|
45 |
+formatToExt <- function(x) { |
|
46 |
+ switch(x, |
|
47 |
+ gff3_gene=, gff3_match_cdna=, gff3_match_est="gff3", |
|
48 |
+ sampe=, samse="sam", |
|
49 |
+ psl="psl", splicesites="splicesites", introns="introns", |
|
50 |
+ map_exons="iit", map_ranges="iit", coords="coords") |
|
51 |
+} |
|
52 |
+ |
|
53 |
+setAs("GmapOutput", "RTLFileList", function(from) { |
|
54 |
+ as(lapply(paths(from), FileForFormat, formatToExt(from@param@format)), |
|
55 |
+ "List") |
|
56 |
+ }) |
|
57 |
+ |
|
58 |
+setAs("GmapOutput", "RTLFile", function(from) { |
|
59 |
+ p <- paths(from) |
|
60 |
+ if (length(p) == 0L) { |
|
61 |
+ stop("no output files at: ", path(from)) |
|
62 |
+ } |
|
63 |
+ FileForFormat(p[1L], formatToExt(from@param@format)) |
|
64 |
+ }) |
|
65 |
+ |
|
66 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
67 |
+### Utilities |
|
68 |
+### |
|
69 |
+ |
|
70 |
+setMethod("import", c("GmapOutput", "missing", "missing"), |
|
71 |
+ function(con, format, text, ...) { |
|
72 |
+ if (con@param@format %in% c("samse", "sampe")) { |
|
73 |
+ f <- as(con, "BamFile") |
|
74 |
+ } else { |
|
75 |
+ f <- as(con, "RTLFile") |
|
76 |
+ } |
|
77 |
+ ans <- import(f, ...) |
|
78 |
+ seqinfo(ans) <- seqinfo(con@param@genome) |
|
79 |
+ ans |
|
80 |
+ }) |
|
81 |
+ |
|
82 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
83 |
+### List class |
|
84 |
+### |
|
85 |
+ |
|
86 |
+setClass("GmapOutputList", |
|
87 |
+ prototype = prototype(elementType = "GmapOutput"), |
|
88 |
+ contains = "GmapAlignerOutputList") |
|
89 |
+ |
|
90 |
+setClass("SimpleGmapOutputList", |
|
91 |
+ contains = c("GmapOutputList", "SimpleList")) |
|
92 |
+ |
|
93 |
+GmapOutputList <- function(...) { |
|
94 |
+ args <- list(...) |
|
95 |
+ if (length(args) == 1 && is.list(args[[1]])) |
|
96 |
+ args <- args[[1]] |
|
97 |
+ S4Vectors:::new_SimpleList_from_list("SimpleGmapOutputList", args) |
|
98 |
+} |
0 | 99 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,60 @@ |
1 |
+### ========================================================================= |
|
2 |
+### GmapParam class |
|
3 |
+### ------------------------------------------------------------------------- |
|
4 |
+### |
|
5 |
+### High-level interface to gmap. |
|
6 |
+### |
|
7 |
+ |
|
8 |
+setClassUnion("GmapSnpsORNULL", c("GmapSnps", "NULL")) |
|
9 |
+ |
|
10 |
+setClass("GmapAlignerParam", |
|
11 |
+ representation(genome = "GmapGenome", |
|
12 |
+ part = "characterORNULL", |
|
13 |
+ batch = "character", |
|
14 |
+ snps = "GmapSnpsORNULL", |
|
15 |
+ mode = "character", |
|
16 |
+ nthreads = "integer", |
|
17 |
+ npaths = "integer", |
|
18 |
+ quiet_if_excessive = "logical", |
|
19 |
+ nofails = "logical", |
|
20 |
+ split_output = "logical", |
|
21 |
+ extra = "list")) |
|
22 |
+ |
|
23 |
+setClass("GmapParam", |
|
24 |
+ representation(suboptimal_score = "integer", |
|
25 |
+ splicing = "logical", |
|
26 |
+ format = "character"), |
|
27 |
+ contains="GmapAlignerParam") |
|
28 |
+ |
|
29 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
30 |
+### Constructor |
|
31 |
+### |
|
32 |
+ |
|
33 |
+GmapParam <- function(genome, unique_only = FALSE, |
|
34 |
+ suboptimal_score = 0L, mode = "standard", |
|
35 |
+ snps = NULL, |
|
36 |
+ npaths = if (unique_only) 1L else 100L, |
|
37 |
+ quiet_if_excessive = unique_only, nofails = unique_only, |
|
38 |
+ split_output = !unique_only, |
|
39 |
+ splicing = TRUE, |
|
40 |
+ nthreads = 1L, part = NULL, batch = "2", |
|
41 |
+ format = c("gff3_gene", "gff3_match_cdna", |
|
42 |
+ "gff3_match_est", "sampe", "samse", "psl", |
|
43 |
+ "splicesites", "introns", |
|
44 |
+ "map_exons", "map_ranges", "coords"), |
|
45 |
+ ...) |
|
46 |
+{ |
|
47 |
+ format <- match.arg(format) |
|
48 |
+ newGmapAlignerParam("GmapParam", genome, snps) |
|
49 |
+} |
|
50 |
+ |
|
51 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
52 |
+### Coercion |
|
53 |
+### |
|
54 |
+ |
|
55 |
+setAs("GmapParam", "list", function(from) { |
|
56 |
+ to <- GmapAlignerParam_asList(from) |
|
57 |
+ to$nosplicing <- !to$splicing |
|
58 |
+ to$splicing <- NULL |
|
59 |
+ to |
|
60 |
+ }) |
... | ... |
@@ -9,35 +9,46 @@ |
9 | 9 |
|
10 | 10 |
setClassUnion("POSIXltORNULL", c("POSIXlt", "NULL")) |
11 | 11 |
|
12 |
-setClassUnion("GsnapParamORNULL", c("GsnapParam", "NULL")) |
|
13 |
- |
|
14 | 12 |
.valid_GsnapOutput <- function(object) { |
15 | 13 |
if (length(samPaths(object)) == 0L && length(bamPaths(object)) == 0L) |
16 | 14 |
paste0("No GSNAP output at '", object@path, "'") |
17 | 15 |
} |
18 | 16 |
|
19 |
-setClass("GsnapOutput", |
|
17 |
+setClassUnion("GmapAlignerParamORNULL", c("GmapAlignerParam", "NULL")) |
|
18 |
+ |
|
19 |
+setClassUnion("GsnapParamORNULL", c("GsnapParam", "NULL")) |
|
20 |
+setIs("GsnapParamORNULL", "GmapAlignerParamORNULL") |
|
21 |
+ |
|
22 |
+setClass("GmapAlignerOutput", |
|
20 | 23 |
representation(path = "character", |
21 |
- param = "GsnapParamORNULL", |
|
22 |
- version = "POSIXltORNULL"), |
|
24 |
+ param = "GmapAlignerParamORNULL", |
|
25 |
+ version = "POSIXltORNULL")) |
|
26 |
+ |
|
27 |
+setClass("GsnapOutput", |
|
28 |
+ representation(param = "GsnapParamORNULL"), |
|
29 |
+ contains="GmapAlignerOutput", |
|
23 | 30 |
validity = .valid_GsnapOutput) |
24 | 31 |
|
25 | 32 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
26 | 33 |
### Accessors |
27 | 34 |
### |
28 | 35 |
|
29 |
-setMethod("path", "GsnapOutput", function(object) object@path) |
|
36 |
+setMethod("path", "GmapAlignerOutput", function(object) object@path) |
|
37 |
+ |
|
38 |
+setGeneric("alignmentCategories", |
|
39 |
+ function(x) standardGeneric("alignmentCategories")) |
|
30 | 40 |
|
31 |
-## should contain any category ever known to be used by gsnap |
|
32 |
-.ALIGNMENT_CATEGORIES <- c("concordant_uniq", "concordant_mult", |
|
33 |
- "concordant_transloc", "paired_mult", |
|
34 |
- "paired_uniq_inv", "paired_uniq_long", |
|
35 |
- "paired_uniq_scr", "unpaired_uniq", "unpaired_mult", |
|
36 |
- "unpaired_transloc", "halfmapping_uniq", |
|
37 |
- "halfmapping_mult", "halfmapping_transloc", |
|
38 |
- "nomapping", "concordant_circular", |
|
39 |
- "halfmapping_circular", "paired_uniq_circular", |
|
40 |
- "unpaired_circular") |
|
41 |
+setMethod("alignmentCategories", "GsnapOutput", function(x) { |
|
42 |
+ c("concordant_uniq", "concordant_mult", |
|
43 |
+ "concordant_transloc", "paired_mult", |
|
44 |
+ "paired_uniq_inv", "paired_uniq_long", |
|
45 |
+ "paired_uniq_scr", "unpaired_uniq", "unpaired_mult", |
|
46 |
+ "unpaired_transloc", "halfmapping_uniq", |
|
47 |
+ "halfmapping_mult", "halfmapping_transloc", |
|
48 |
+ "nomapping", "concordant_circular", |
|
49 |
+ "halfmapping_circular", "paired_uniq_circular", |
|
50 |
+ "unpaired_circular") |
|
51 |
+ }) |
|
41 | 52 |
|
42 | 53 |
### Q: How to handle the single BAM output case? We would still want a |
43 | 54 |
### formal object, with a version, parameters, etc. If 'path' is a |
... | ... |
@@ -53,31 +64,38 @@ is_dir <- function(x) { |
53 | 64 |
|
54 | 65 |
setGeneric("bamPath", function(x) standardGeneric("bamPath")) |
55 | 66 |
|
56 |
-setMethod("bamPath", "GsnapOutput", function(x) { |
|
57 |
- paths <- bamPaths(x) |
|
58 |
- if (length(paths) > 1) # maybe this should be the analysis BAM if it exists? |
|
59 |
- paths <- paths["concordant_uniq"] |
|
60 |
- paths |
|
61 |
-}) |
|
67 |
+setMethod("bamPath", "GmapAlignerOutput", function(x) { |
|
68 |
+ paths <- bamPaths(x) |
|
69 |
+ if (length(paths) > 1L) |
|
70 |
+ paths <- paths[alignmentCategories(x)[1L]] |
|
71 |
+ paths |
|
72 |
+ }) |
|
62 | 73 |
|
63 | 74 |
## file_ext does not allow '_' |
64 | 75 |
file_ext2 <- function(x) { |
65 | 76 |
gsub(".*\\.", "", x) |
66 | 77 |
} |
67 | 78 |
|
79 |
+paths <- function(x) { |
|
80 |
+ if (!is_dir(x)) { |
|
81 |
+ return(path(x)) |
|
82 |
+ } |
|
83 |
+ paths <- list_files_with_exts(path(x), alignmentCategories(x), |
|
84 |
+ full.names = TRUE) |
|
85 |
+ names(paths) <- file_ext2(paths) |
|
86 |
+ paths[alignmentCategories(x)] |
|
87 |
+} |
|
88 |
+ |
|
68 | 89 |
samPaths <- function(x) { |
69 | 90 |
if (!is_dir(x)) { |
70 | 91 |
if (file_ext(path(x)) == "sam") |
71 | 92 |
return(path(x)) |
72 | 93 |
return(character()) |
73 | 94 |
} |
74 |
- paths <- list_files_with_exts(path(x), .ALIGNMENT_CATEGORIES, |
|
75 |
- full.names = TRUE) |
|
76 |
- names(paths) <- file_ext2(paths) |
|
77 |
- paths |
|
95 |
+ paths(x) |
|
78 | 96 |
} |
79 | 97 |
|
80 |
-setMethod("bamPaths", "GsnapOutput", function(x) { |
|
98 |
+setMethod("bamPaths", "GmapAlignerOutput", function(x) { |
|
81 | 99 |
if (!is_dir(x)) { |
82 | 100 |
if (file_ext(path(x)) == "bam") |
83 | 101 |
return(path(x)) |
... | ... |
@@ -85,7 +103,7 @@ setMethod("bamPaths", "GsnapOutput", function(x) { |
85 | 103 |
} |
86 | 104 |
paths <- list_files_with_exts(path(x), "bam", full.names = TRUE) |
87 | 105 |
names(paths) <- file_ext2(file_path_sans_ext(paths)) |
88 |
- paths <- paths[names(paths) %in% .ALIGNMENT_CATEGORIES] |
|
106 |
+ paths <- paths[names(paths) %in% alignmentCategories(x)] |
|
89 | 107 |
paths |
90 | 108 |
}) |
91 | 109 |
|
... | ... |
@@ -93,29 +111,31 @@ setMethod("bamPaths", "GsnapOutput", function(x) { |
93 | 111 |
### Constructor |
94 | 112 |
### |
95 | 113 |
|
114 |
+newGmapAlignerOutput <- function(Class, path, param = NULL, version = NULL) { |
|
115 |
+ if (!is.character(path) || any(is.na(path))) |
|
116 |
+ stop("'path' must be a character vector without any NA's") |
|
117 |
+ if (!is(version, "POSIXltORNULL")) { |
|
118 |
+ if (is.character(version)) |
|
119 |
+ version <- parseGsnapVersion(version) |
|
120 |
+ else stop("'version' must be a version string, POSIXlt object or NULL") |
|
121 |
+ } |
|
122 |
+ path <- file_path_as_absolute(path) |
|
123 |
+ new(Class, path = path, version = version, param = param) |
|
124 |
+} |
|
125 |
+ |
|
96 | 126 |
GsnapOutput <- function(path, param = NULL, version = NULL) { |
97 |
- if (!is.character(path) || any(is.na(path))) |
|
98 |
- stop("'path' must be a character vector without any NA's") |
|
99 |
- if (!is(version, "POSIXltORNULL")) { |
|
100 |
- if (is.character(version)) |
|
101 |
- version <- parseGsnapVersion(version) |
|
102 |
- else stop("'version' must be a version string, POSIXlt object or NULL") |
|
103 |
- } |
|
104 |
- if (!is(param, "GsnapParamORNULL")) |
|
105 |
- stop("'param' must be a GsnapParam object or NULL") |
|
106 |
- path <- file_path_as_absolute(path) |
|
107 |
- new("GsnapOutput", path = path, version = version, param = param) |
|
127 |
+ newGmapAlignerOutput("GsnapOutput", path, param, version) |
|
108 | 128 |
} |
109 | 129 |
|
110 | 130 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
111 | 131 |
### Coercion |
112 | 132 |
### |
113 | 133 |
|
114 |
-setAs("GsnapOutput", "BamFileList", function(from) { |
|
134 |
+setAs("GmapAlignerOutput", "BamFileList", function(from) { |
|
115 | 135 |
BamFileList(bamPaths(from)) |
116 | 136 |
}) |
117 | 137 |
|
118 |
-setAs("GsnapOutput", "BamFile", function(from) { |
|
138 |
+setAs("GmapAlignerOutput", "BamFile", function(from) { |
|
119 | 139 |
BamFile(bamPath(from)) |
120 | 140 |
}) |
121 | 141 |
|
... | ... |
@@ -123,19 +143,19 @@ setAs("GsnapOutput", "BamFile", function(from) { |
123 | 143 |
### Utilities |
124 | 144 |
### |
125 | 145 |
|
126 |
-setMethod("merge", c("GsnapOutput", "GsnapOutput"), function(x, y) { |
|
146 |
+setMethod("merge", c("GmapAlignerOutput", "GmapAlignerOutput"), function(x, y) { |
|
127 | 147 |
### TODO: get Cory's lane merging code for BAMs in here |
128 | 148 |
}) |
129 | 149 |
|
130 | 150 |
setGeneric("consolidate", function(x, ...) standardGeneric("consolidate")) |
131 | 151 |
|
132 |
-setMethod("consolidate", "GsnapOutput", function(x) { |
|
152 |
+setMethod("consolidate", "GmapAlignerOutput", function(x) { |
|
133 | 153 |
### TODO: this should produce the pipeline's analyzed BAM |
134 | 154 |
x |
135 | 155 |
}) |
136 | 156 |
|
137 | 157 |
##converts all gsnap SAM files to BAM files and creates the .bai index files |
138 |
-setMethod("asBam", "GsnapOutput", |
|
158 |
+setMethod("asBam", "GmapAlignerOutput", |
|
139 | 159 |
function(file) { |
140 | 160 |
## the input is not a file. It is a GsnapOutput, |
141 | 161 |
## but param name is enforced |
... | ... |
@@ -155,6 +175,11 @@ setMethod("asBam", "GsnapOutput", |
155 | 175 |
file |
156 | 176 |
}) |
157 | 177 |
|
178 |
+setMethod("import", c("GsnapOutput", "missing", "missing"), |
|
179 |
+ function(con, format, text, ...) { |
|
180 |
+ import(as(con, "BamFile"), ...) |
|
181 |
+ }) |
|
182 |
+ |
|
158 | 183 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
159 | 184 |
### List class |
160 | 185 |
### |
... | ... |
@@ -167,9 +192,13 @@ setMethod("asBam", "GsnapOutput", |
167 | 192 |
### character vectors. Would we vectorize the version or not? It makes |
168 | 193 |
### sense to have an object that represents a single run of gsnap. |
169 | 194 |
|
195 |
+setClass("GmapAlignerOutputList", |
|
196 |
+ prototype = prototype(elementType = "GmapAlignerOutput"), |
|
197 |
+ contains = "List") |
|
198 |
+ |
|
170 | 199 |
setClass("GsnapOutputList", |
171 | 200 |
prototype = prototype(elementType = "GsnapOutput"), |
172 |
- contains = "List") |
|
201 |
+ contains = "GmapAlignerOutputList") |
|
173 | 202 |
|
174 | 203 |
setClass("SimpleGsnapOutputList", |
175 | 204 |
contains = c("GsnapOutputList", "SimpleList")) |
... | ... |
@@ -181,7 +210,7 @@ GsnapOutputList <- function(...) { |
181 | 210 |
S4Vectors:::new_SimpleList_from_list("SimpleGsnapOutputList", args) |
182 | 211 |
} |
183 | 212 |
|
184 |
-setAs("GsnapOutputList", "BamFileList", function(from) { |
|
213 |
+setAs("GmapAlignerOutputList", "BamFileList", function(from) { |
|
185 | 214 |
BamFileList(lapply(from, as, "BamFile")) |
186 | 215 |
}) |
187 | 216 |
|
... | ... |
@@ -190,6 +219,6 @@ setAs("GsnapOutputList", "BamFileList", function(from) { |
190 | 219 |
### Show |
191 | 220 |
### |
192 | 221 |
|
193 |
-setMethod("show", "GsnapOutput", function(object) { |
|
194 |
- cat("A GsnapOutput Object\n", "path: ", path(object), "\n", sep = "") |
|
222 |
+setMethod("show", "GmapAlignerOutput", function(object) { |
|
223 |
+ cat("A ", class(object), " Object\n", "path: ", path(object), "\n", sep = "") |
|
195 | 224 |
}) |
... | ... |
@@ -9,27 +9,16 @@ |
9 | 9 |
### |
10 | 10 |
|
11 | 11 |
setClassUnion("integerORNULL", c("integer", "NULL")) |
12 |
-setClassUnion("GmapSnpsORNULL", c("GmapSnps", "NULL")) |
|
13 | 12 |
|
14 | 13 |
setClass("GsnapParam", |
15 |
- representation(genome = "GmapGenome", |
|
16 |
- part = "characterORNULL", # used by parallelized_gsnap |
|
17 |
- batch = "character", # weird "0", "1", ... |
|
18 |
- max_mismatches = "integerORNULL", |
|
14 |
+ representation(max_mismatches = "integerORNULL", |
|
19 | 15 |
suboptimal_levels = "integer", |
20 |
- snps = "GmapSnpsORNULL", |
|
21 |
- mode = "character", |
|
22 |
- nthreads = "integer", |
|
23 | 16 |
novelsplicing = "logical", |
24 | 17 |
splicing = "characterORNULL", |
25 |
- npaths = "integer", |
|
26 |
- quiet_if_excessive = "logical", |
|
27 |
- nofails = "logical", |
|
28 |
- split_output = "logical", |
|
29 | 18 |
terminal_threshold = "integer", |
30 | 19 |
gmap_mode = "character", |
31 |
- clip_overlap = "logical", |
|
32 |
- extra = "list")) |
|
20 |
+ clip_overlap = "logical"), |
|
21 |
+ contains="GmapAlignerParam") |
|
33 | 22 |
|
34 | 23 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
35 | 24 |
### Accessors |
... | ... |
@@ -148,6 +137,27 @@ gsnap_extra <- function(x) { |
148 | 137 |
### Constructor |
149 | 138 |
### |
150 | 139 |
|
140 |
+newGmapAlignerParam <- function(Class, genome, snps) { |
|
141 |
+ if (missing(genome)) |
|
142 |
+ stop("The 'genome' must be specified (and coercible to GmapGenome)") |
|
143 |
+ args <- formals(sys.function(sys.parent(1L))) |
|
144 |
+ params <- mget(names(args), parent.frame()) |
|
145 |
+ params$unique_only <- NULL |
|
146 |
+ paramClasses <- getSlots(Class) |
|
147 |
+ paramClasses <- paramClasses[setdiff(names(paramClasses), |
|
148 |
+ c("extra", "snps"))] |
|
149 |
+ params <- mapply(as, params[names(paramClasses)], paramClasses, |
|
150 |
+ SIMPLIFY = FALSE) |
|
151 |
+ if (!is.null(snps)) { |
|
152 |
+ if (!is(snps, "GmapSnps")) { |
|
153 |
+ snps <- GmapSnps(snps, genome) |
|
154 |
+ } |
|
155 |
+ params$snps <- snps |
|
156 |
+ } |
|
157 |
+ params$extra <- evalq(list(...), parent.frame()) |
|
158 |
+ do.call(new, c(Class, params)) |
|
159 |
+} |
|
160 |
+ |
|
151 | 161 |
GsnapParam <- function(genome, unique_only = FALSE, molecule = c("RNA", "DNA"), |
152 | 162 |
max_mismatches = NULL, |
153 | 163 |
suboptimal_levels = 0L, mode = "standard", |
... | ... |
@@ -157,54 +167,43 @@ GsnapParam <- function(genome, unique_only = FALSE, molecule = c("RNA", "DNA"), |
157 | 167 |
split_output = !unique_only, |
158 | 168 |
novelsplicing = FALSE, splicing = NULL, |
159 | 169 |
nthreads = 1L, part = NULL, batch = "2", |
160 |
- terminal_threshold = if (molecule == "DNA") 1000L else 2L, |
|
161 |
- gmap_mode = if (molecule == "DNA") "none" else |
|
162 |
- "pairsearch,terminal,improve", |
|
170 |
+ terminal_threshold = |
|
171 |
+ if (molecule == "DNA") 1000L else 2L, |
|
172 |
+ gmap_mode = if (molecule == "DNA") "none" |
|
173 |
+ else "pairsearch,terminal,improve", |
|
163 | 174 |
clip_overlap = FALSE, ...) |
164 | 175 |
{ |
165 |
- if (missing(genome)) |
|
166 |
- stop("The 'genome' must be specified (should be coercible to GmapGenome)") |
|
167 | 176 |
molecule <- match.arg(molecule) |
168 |
- args <- formals(sys.function()) |
|
169 |
- params <- mget(names(args), environment()) |
|
170 |
- params$unique_only <- NULL |
|
171 |
- paramClasses <- getSlots("GsnapParam") |
|
172 |
- paramClasses <- paramClasses[setdiff(names(paramClasses), c("extra", "snps"))] |
|
173 |
- params <- mapply(as, params[names(paramClasses)], paramClasses, |
|
174 |
- SIMPLIFY = FALSE) |
|
175 |
- if (!is.null(snps)) { |
|
176 |
- if (!is(snps, "GmapSnps")) { |
|
177 |
- snps <- GmapSnps(snps, genome) |
|
178 |
- } |
|
179 |
- params$snps <- snps |
|
180 |
- } |
|
181 |
- params$extra <- list(...) |
|
182 |
- do.call(new, c("GsnapParam", params)) |
|
177 |
+ newGmapAlignerParam("GsnapParam", genome, snps) |
|
183 | 178 |
} |
184 | 179 |
|
185 | 180 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
186 | 181 |
### Coercion |
187 | 182 |
### |
188 | 183 |
|
189 |
-setAs("GsnapParam", "list", function(from) { |
|
184 |
+GmapAlignerParam_asList <- function(from) { |
|
190 | 185 |
to <- lapply(slotNames(from), slot, object = from) |
191 | 186 |
names(to) <- slotNames(from) |
192 |
- to$split_output <- if (to$split_output) "gsnap" else NULL |
|
187 |
+ to$split_output <- if (to$split_output) tolower(sub("Param", "", class(from))) |
|
193 | 188 |
to$db <- genome(to$genome) |
194 | 189 |
to$dir <- path(directory(to$genome)) |
195 | 190 |
to$genome <- NULL |
196 | 191 |
to$use_snps <- name(to$snps) |
197 | 192 |
to$snpsdir <- path(directory(to$snps)) |
198 |
- to$novelsplicing <- as.integer(to$novelsplicing) |
|
199 |
- to <- rename(to, splicing = "use_splicing") |
|
200 | 193 |
extras <- to$extra |
201 | 194 |
to <- c(to, extras) |
202 |
- to$extra <- NULL |
|
203 |
- |
|
195 |
+ to$extra <- NULL |
|
204 | 196 |
to |
205 |
-}) |
|
197 |
+} |
|
198 |
+ |
|
199 |
+setAs("GsnapParam", "list", function(from) { |
|
200 |
+ to <- GmapAlignerParam_asList(from) |
|
201 |
+ to$novelsplicing <- as.integer(to$novelsplicing) |
|
202 |
+ to <- rename(to, splicing = "use_splicing") |
|
203 |
+ to |
|
204 |
+ }) |
|
206 | 205 |
|
207 |
-setMethod("as.list", "GsnapParam", function(x) as(x, "list")) |
|
206 |
+as.list.GmapAlignerParam <- function(x) as(x, "list") |
|
208 | 207 |
|
209 | 208 |
setAs("ANY", "characterORNULL", function(from) { |
210 | 209 |
if (is.null(from)) |
... | ... |
@@ -222,7 +221,7 @@ setAs("ANY", "integerORNULL", function(from) { |
222 | 221 |
### Show |
223 | 222 |
### |
224 | 223 |
|
225 |
-setMethod("show", "GsnapParam", function(object) { |
|
224 |
+setMethod("show", "GmapAlignerParam", function(object) { |
|
226 | 225 |
slots <- lapply(slotNames(object), slot, object = object) |
227 | 226 |
names(slots) <- slotNames(object) |
228 | 227 |
slots$genome <- paste0(slots$genome@name, |
... | ... |
@@ -231,6 +230,6 @@ setMethod("show", "GsnapParam", function(object) { |
231 | 230 |
slots$snps <- paste0(name(slots$snps), |
232 | 231 |
" (", path(directory(slots$snps)), ")") |
233 | 232 |
} |
234 |
- cat("A GsnapParams object\n", |
|
233 |
+ cat("A ", class(object), " object\n", |
|
235 | 234 |
paste0(names(slots), ": ", slots, collapse = "\n"), "\n", sep = "") |
236 | 235 |
}) |
... | ... |
@@ -6,6 +6,133 @@ |
6 | 6 |
### High-level interface |
7 | 7 |
### |
8 | 8 |
|
9 |
+setGeneric("gmap", function(input, params, ...) standardGeneric("gmap")) |
|
10 |
+ |
|
11 |
+setMethod("gmap", c("ANY", "GmapParam"), |
|
12 |
+ function(input, params, ...) { |
|
13 |
+ tmpfile <- tempfile() |
|
14 |
+ export(input, tmpfile, format="fasta") |
|
15 |
+ input <- tmpfile |
|
16 |
+ callGeneric() |
|
17 |
+ }) |
|
18 |
+ |
|
19 |
+setMethod("gmap", c("FastaFile", "GmapParam"), |
|
20 |
+ function(input, params, ...) { |
|
21 |
+ input <- path(input) |
|
22 |
+ callGeneric() |
|
23 |
+ }) |
|
24 |
+ |
|
25 |
+setMethod("gmap", c("character", "GmapParam"), |
|
26 |
+ function(input, params, |
|
27 |
+ output = file.path(getwd(), |
|
28 |
+ file_path_sans_ext(basename(input), TRUE)), ...) |
|
29 |
+ { |
|
30 |
+ if (any(is.na(input))) |
|
31 |
+ stop("'input' must not contain NA's") |
|
32 |
+ if (length(input) > 1L) { |
|
33 |
+ return(GmapOutputList(mapply(gmap, input, |
|
34 |
+ MoreArgs = |
|
35 |
+ list(params, output, |
|
36 |
+ ...)))) |
|
37 |
+ } |
|
38 |
+ |
|
39 |
+ output_dir <- dirname(output) |
|
40 |
+ if (!file.exists(output_dir)) |
|
41 |
+ dir.create(output_dir, recursive = TRUE) |
|
42 |
+ |
|
43 |
+ params <- initialize(params, ...) |
|
44 |
+ params_list <- as.list(params) |
|
45 |
+ if (gsnap_split_output(params)) { |
|
46 |
+ params_list$split_output <- output |
|
47 |
+ output_path <- output_dir |
|
48 |
+ } else { |
|
49 |
+ output_path <- paste0(output, ".", |
|
50 |
+ formatToExt(params$format)) |
|
51 |
+ params_list$.redirect <- paste(">", output_path) |
|
52 |
+ } |
|
53 |
+ |
|
54 |
+ res <- do.call(.gmap, |
|
55 |
+ c(list(.input = input), params_list)) |
|
56 |
+ gmap_output<- GmapOutput(path = output_path, |
|
57 |
+ version = gmapVersion(), |
|
58 |
+ param = params) |
|
59 |
+ |
|
60 |
+ if (params_list$format %in% c("samse", "sampe")) { |
|
61 |
+ gmap_output <- asBam(gmap_output) |
|
62 |
+ } |
|
63 |
+ gmap_output |
|
64 |
+ }) |
|
65 |
+ |
|
9 | 66 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
10 | 67 |
### Low-level interface |
11 | 68 |
### |
69 |
+ |
|
70 |
+.gmap <- function(db = NULL, dir = NULL, |
|
71 |
+ kmer = NULL, basesize = NULL, sampling = NULL, |
|
72 |
+ genomefull = FALSE, gseg = NULL, |
|
73 |
+ selfalign = FALSE, pairalign = FALSE, |
|
74 |
+ part = NULL, input_buffer_size = 1000L, |
|
75 |
+ batch = c("0", "1", "2", "3", "4", "5"), |
|
76 |
+ expand_offsets = FALSE, nosplicing = FALSE, |
|
77 |
+ min_intronlength = 9L, intronlength = 1000000L, |
|
78 |
+ localsplicedist = 2000000L, totallength = 2400000L, |
|
79 |
+ chimera_margin = 40L, no_chimeras = FALSE, |
|
80 |
+ nthreads = NULL, chrsubsetfile = NULL, |
|
81 |
+ direction = c("auto", "sense_force", "antisense_force", |
|
82 |
+ "sense_filter", "antisense_filter"), |
|
83 |
+ trimendexons = 12L, canonical_mode = c("1", "0", "2"), |
|
84 |
+ cross_species = FALSE, allow_close_indels = c("1", "0", "2"), |
|
85 |
+ microexon_spliceprob = 0.90, cmetdir = NULL, atoidir = NULL, |
|
86 |
+ mode = c("standard", "cmet-stranded", "cmet-nonstranded", |
|
87 |
+ "atoi-stranded", "atoi-nonstranded"), |
|
88 |
+ prunelevel = c("0", "1", "2", "3"), |
|
89 |
+ format = NULL, npaths = 5L, quiet_if_excessive = FALSE, |
|
90 |
+ suboptimal_score = NULL, ordered = FALSE, md5 = FALSE, |
|
91 |
+ chimera_overlap = FALSE, failsonly = FALSE, nofails = FALSE, |
|
92 |
+ fails_as_input = FALSE, snpsdir = NULL, use_snps = NULL, |
|
93 |
+ split_output = NULL, append_output = FALSE, |
|
94 |
+ output_buffer_size = 1000L, fulllength = FALSE, |
|
95 |
+ cdsstart = NULL, truncate = FALSE, tolerant = FALSE, |
|
96 |
+ no_sam_headers = FALSE, sam_use_0M = FALSE, |
|
97 |
+ force_xs_dir = FALSE, md_lowercase_snp = FALSE, |
|
98 |
+ read_group_id = NULL, read_group_name = NULL, |
|
99 |
+ read_group_library = NULL, read_group_platform = NULL, |
|
100 |
+ quality_protocol = c("sanger", "illumina"), |
|
101 |
+ quality_print_shift = 0L, mapdir = NULL, map = NULL, |
|
102 |
+ mapexons = FALSE, mapboth = FALSE, version = FALSE, |
|
103 |
+ .input = NULL, .redirect = NULL) |
|
104 |
+{ |
|
105 |
+ formals <- formals(sys.function()) |
|
106 |
+ |
|
107 |
+ expand_offsets <- as.integer(expand_offsets) |
|
108 |
+ batch <- match.arg(batch) |
|
109 |
+ direction <- match.arg(direction) |
|
110 |
+ canonical_mode <- match.arg(canonical_mode) |
|
111 |
+ allow_close_indels <- match.arg(allow_close_indels) |
|
112 |
+ mode <- match.arg(mode) |
|
113 |
+ prunelevel <- match.arg(prunelevel) |
|
114 |
+ quality_protocol <- match.arg(quality_protocol) |
|
115 |
+ |
|
116 |
+ if (version) { |
|
117 |
+ .redirect <- ">/dev/null" |
|
118 |
+ } |
|
119 |
+ |
|
120 |
+### TODO: if input_a is NULL, or split_output and .redirect are NULL: |
|
121 |
+### return a pipe() |
|
122 |
+ .system_gsnap(commandLine("gmap")) |
|
123 |
+} |
|
124 |
+ |
|
125 |
+..gmap <- function(args, path = NULL) { |
|
126 |
+ .system_gsnap(.commandLine("gmap", args, path)) |
|
127 |
+} |
|
128 |
+ |
|
129 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
130 |
+### Utilities |
|
131 |
+### |
|
132 |
+ |
|
133 |
+gmapVersion <- function() { |
|
134 |
+ output <- .gmap(version = TRUE) |
|
135 |
+ version_text <- sub("GMAP version (.*?) .*", "\\1", output[1]) |
|
136 |
+ parseGsnapVersion(version_text) |
|
137 |
+} |
|
138 |
+ |
... | ... |
@@ -125,6 +125,10 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
125 | 125 |
mode <- match.arg(mode) |
126 | 126 |
quality_protocol <- match.arg(quality_protocol) |
127 | 127 |
filter_chastity <- match.arg(filter_chastity) |
128 |
+ |
|
129 |
+ if (version) { |
|
130 |
+ .redirect <- ">/dev/null" |
|
131 |
+ } |
|
128 | 132 |
|
129 | 133 |
### TODO: if input_a is NULL, or split_output and .redirect are NULL: |
130 | 134 |
### return a pipe() |