... | ... |
@@ -18,7 +18,7 @@ setMethod("gsnap", c("character", "character_OR_NULL", "GsnapParam"), |
18 | 18 |
if (!is.null(input_b) && length(input_a) != length(input_b)) |
19 | 19 |
stop("If 'input_b' is non-NULL, it must have the same length", |
20 | 20 |
" as 'input_a'") |
21 |
- if (any(is.na(input_a)) || any(is.na(input_b))) |
|
21 |
+ if (anyNA(input_a) || anyNA(input_b)) |
|
22 | 22 |
stop("'input_a' and 'input_b' must not contain NA's") |
23 | 23 |
if (length(input_a) > 1L) { |
24 | 24 |
args <- list(params, consolidate, ...) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@129665 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -21,9 +21,10 @@ setMethod("gsnap", c("character", "character_OR_NULL", "GsnapParam"), |
21 | 21 |
if (any(is.na(input_a)) || any(is.na(input_b))) |
22 | 22 |
stop("'input_a' and 'input_b' must not contain NA's") |
23 | 23 |
if (length(input_a) > 1L) { |
24 |
- return(GsnapOutputList(mapply(gsnap, input_a, input_b, |
|
25 |
- MoreArgs = list(params, output, |
|
26 |
- consolidate, ...)))) |
|
24 |
+ args <- list(params, consolidate, ...) |
|
25 |
+ return(GsnapOutputList(mapply(gsnap, input_a, input_b, |
|
26 |
+ output=output, |
|
27 |
+ MoreArgs = args))) |
|
27 | 28 |
} |
28 | 29 |
|
29 | 30 |
output_dir <- dirname(output) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@126394 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -9,7 +9,7 @@ |
9 | 9 |
setGeneric("gsnap", function(input_a, input_b = NULL, params, ...) |
10 | 10 |
standardGeneric("gsnap")) |
11 | 11 |
|
12 |
-setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
|
12 |
+setMethod("gsnap", c("character", "character_OR_NULL", "GsnapParam"), |
|
13 | 13 |
function(input_a, input_b, params, |
14 | 14 |
output = file.path(getwd(), |
15 | 15 |
file_path_sans_ext(basename(input_a), TRUE)), |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110840 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -125,6 +125,10 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
125 | 125 |
mode <- match.arg(mode) |
126 | 126 |
quality_protocol <- match.arg(quality_protocol) |
127 | 127 |
filter_chastity <- match.arg(filter_chastity) |
128 |
+ |
|
129 |
+ if (version) { |
|
130 |
+ .redirect <- ">/dev/null" |
|
131 |
+ } |
|
128 | 132 |
|
129 | 133 |
### TODO: if input_a is NULL, or split_output and .redirect are NULL: |
130 | 134 |
### return a pipe() |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@85505 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -11,12 +11,15 @@ setGeneric("gsnap", function(input_a, input_b = NULL, params, ...) |
11 | 11 |
|
12 | 12 |
setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
13 | 13 |
function(input_a, input_b, params, |
14 |
- output = file_path_sans_ext(input_a, TRUE), |
|
14 |
+ output = file.path(getwd(), |
|
15 |
+ file_path_sans_ext(basename(input_a), TRUE)), |
|
15 | 16 |
consolidate = TRUE, ...) |
16 | 17 |
{ |
17 | 18 |
if (!is.null(input_b) && length(input_a) != length(input_b)) |
18 | 19 |
stop("If 'input_b' is non-NULL, it must have the same length", |
19 | 20 |
" as 'input_a'") |
21 |
+ if (any(is.na(input_a)) || any(is.na(input_b))) |
|
22 |
+ stop("'input_a' and 'input_b' must not contain NA's") |
|
20 | 23 |
if (length(input_a) > 1L) { |
21 | 24 |
return(GsnapOutputList(mapply(gsnap, input_a, input_b, |
22 | 25 |
MoreArgs = list(params, output, |
... | ... |
@@ -30,7 +33,7 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
30 | 33 |
params <- initialize(params, ...) |
31 | 34 |
params_list <- as.list(params) |
32 | 35 |
if (gsnap_split_output(params)) { |
33 |
- params_list$split_output <- basename(output) |
|
36 |
+ params_list$split_output <- output |
|
34 | 37 |
output_path <- output_dir |
35 | 38 |
} else { |
36 | 39 |
output_path <- paste0(output, ".sam") |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@79763 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -14,6 +14,15 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
14 | 14 |
output = file_path_sans_ext(input_a, TRUE), |
15 | 15 |
consolidate = TRUE, ...) |
16 | 16 |
{ |
17 |
+ if (!is.null(input_b) && length(input_a) != length(input_b)) |
|
18 |
+ stop("If 'input_b' is non-NULL, it must have the same length", |
|
19 |
+ " as 'input_a'") |
|
20 |
+ if (length(input_a) > 1L) { |
|
21 |
+ return(GsnapOutputList(mapply(gsnap, input_a, input_b, |
|
22 |
+ MoreArgs = list(params, output, |
|
23 |
+ consolidate, ...)))) |
|
24 |
+ } |
|
25 |
+ |
|
17 | 26 |
output_dir <- dirname(output) |
18 | 27 |
if (!file.exists(output_dir)) |
19 | 28 |
dir.create(output_dir, recursive = TRUE) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@78522 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -21,7 +21,7 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
21 | 21 |
params <- initialize(params, ...) |
22 | 22 |
params_list <- as.list(params) |
23 | 23 |
if (gsnap_split_output(params)) { |
24 |
- params_list$split_output <- output |
|
24 |
+ params_list$split_output <- basename(output) |
|
25 | 25 |
output_path <- output_dir |
26 | 26 |
} else { |
27 | 27 |
output_path <- paste0(output, ".sam") |
... | ... |
@@ -124,8 +124,10 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
124 | 124 |
} |
125 | 125 |
|
126 | 126 |
.system_gsnap <- function(command) { |
127 |
- command <- paste(command, "2>&1") |
|
128 |
- output <- .system(command, intern = TRUE) |
|
127 |
+ stderr <- tempfile("gsnap-stderr", fileext = "log") |
|
128 |
+ command <- paste(command, "2>", stderr) |
|
129 |
+ .system(command) |
|
130 |
+ output <- readLines(stderr) |
|
129 | 131 |
if (!gsnapSucceeded(command, output)) |
130 | 132 |
stop("Execution of gsnap failed, command-line: '", command, |
131 | 133 |
"'; last output line: '", tail(output, 1), "'") |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@73839 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -51,7 +51,8 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
51 | 51 |
### Low-level interface |
52 | 52 |
### |
53 | 53 |
|
54 |
-.gsnap <- function(db, dir = NULL, part = NULL, input_buffer_size = 1000L, |
|
54 |
+.gsnap <- function(db = NULL, dir = NULL, part = NULL, |
|
55 |
+ input_buffer_size = 1000L, |
|
55 | 56 |
barcode_length = 0L, |
56 | 57 |
orientation = c("FR", "RF", "FF"), |
57 | 58 |
fastq_id_start = NULL, fastq_id_end = NULL, |
... | ... |
@@ -107,7 +108,6 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
107 | 108 |
if (!is.null(problems)) |
108 | 109 |
stop("validation failed:\n ", paste(problems, collapse = "\n ")) |
109 | 110 |
|
110 |
- ##TODO: figure out how to do this programmatically |
|
111 | 111 |
orientation <- match.arg(orientation) |
112 | 112 |
batch <- match.arg(batch) |
113 | 113 |
mode <- match.arg(mode) |
... | ... |
@@ -116,7 +116,26 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
116 | 116 |
|
117 | 117 |
### TODO: if input_a is NULL, or split_output and .redirect are NULL: |
118 | 118 |
### return a pipe() |
119 |
- .system(commandLine("gsnap"), intern = version) |
|
119 |
+ .system_gsnap(commandLine("gsnap")) |
|
120 |
+} |
|
121 |
+ |
|
122 |
+..gsnap <- function(args, path = NULL) { |
|
123 |
+ .system_gsnap(.commandLine("gsnap", args, path)) |
|
124 |
+} |
|
125 |
+ |
|
126 |
+.system_gsnap <- function(command) { |
|
127 |
+ command <- paste(command, "2>&1") |
|
128 |
+ output <- .system(command, intern = TRUE) |
|
129 |
+ if (!gsnapSucceeded(command, output)) |
|
130 |
+ stop("Execution of gsnap failed, command-line: '", command, |
|
131 |
+ "'; last output line: '", tail(output, 1), "'") |
|
132 |
+ else output |
|
133 |
+} |
|
134 |
+ |
|
135 |
+gsnapSucceeded <- function(command, output) { |
|
136 |
+ if (!grepl("--version", command)) |
|
137 |
+ any(grepl("^Processed \\d+ queries", output)) |
|
138 |
+ else TRUE |
|
120 | 139 |
} |
121 | 140 |
|
122 | 141 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
... | ... |
@@ -198,10 +217,11 @@ setValidity("GsnapParam", .valid_GsnapParam) |
198 | 217 |
### |
199 | 218 |
|
200 | 219 |
gsnapVersion <- function() { |
201 |
- version_text <- sub("GSNAP version (.*?) ", "\\1", .gsnap(version = TRUE)) |
|
220 |
+ output <- .gsnap(version = TRUE) |
|
221 |
+ version_text <- sub("GSNAP version (.*?) .*", "\\1", output[1]) |
|
202 | 222 |
parseGsnapVersion(version_text) |
203 | 223 |
} |
204 | 224 |
|
205 | 225 |
parseGsnapVersion <- function(x) { |
206 |
- as.POSIXlt(x, format = "%m-%d-%Y") |
|
226 |
+ as.POSIXlt(x, format = "%Y-%m-%d") |
|
207 | 227 |
} |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@71083 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -41,7 +41,7 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
41 | 41 |
gsnap_output <- GsnapOutput(path = output_path, |
42 | 42 |
version = gsnapVersion(), |
43 | 43 |
param = params) |
44 |
- asBam(gsnap_output) |
|
44 |
+ gsnap_output <- asBam(gsnap_output) |
|
45 | 45 |
if (consolidate) |
46 | 46 |
consolidate(gsnap_output) |
47 | 47 |
return(gsnap_output) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69547 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -12,7 +12,7 @@ setGeneric("gsnap", function(input_a, input_b = NULL, params, ...) |
12 | 12 |
setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
13 | 13 |
function(input_a, input_b, params, |
14 | 14 |
output = file_path_sans_ext(input_a, TRUE), |
15 |
- consolidate = TRUE, indexDestination=TRUE, ...) |
|
15 |
+ consolidate = TRUE, ...) |
|
16 | 16 |
{ |
17 | 17 |
output_dir <- dirname(output) |
18 | 18 |
if (!file.exists(output_dir)) |
... | ... |
@@ -41,7 +41,7 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
41 | 41 |
gsnap_output <- GsnapOutput(path = output_path, |
42 | 42 |
version = gsnapVersion(), |
43 | 43 |
param = params) |
44 |
- asBam(gsnap_output, indexDestination=indexDestination) |
|
44 |
+ asBam(gsnap_output) |
|
45 | 45 |
if (consolidate) |
46 | 46 |
consolidate(gsnap_output) |
47 | 47 |
return(gsnap_output) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69460 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -12,7 +12,7 @@ setGeneric("gsnap", function(input_a, input_b = NULL, params, ...) |
12 | 12 |
setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
13 | 13 |
function(input_a, input_b, params, |
14 | 14 |
output = file_path_sans_ext(input_a, TRUE), |
15 |
- consolidate = TRUE, ...) |
|
15 |
+ consolidate = TRUE, indexDestination=TRUE, ...) |
|
16 | 16 |
{ |
17 | 17 |
output_dir <- dirname(output) |
18 | 18 |
if (!file.exists(output_dir)) |
... | ... |
@@ -41,7 +41,7 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
41 | 41 |
gsnap_output <- GsnapOutput(path = output_path, |
42 | 42 |
version = gsnapVersion(), |
43 | 43 |
param = params) |
44 |
- asBam(gsnap_output) |
|
44 |
+ asBam(gsnap_output, indexDestination=indexDestination) |
|
45 | 45 |
if (consolidate) |
46 | 46 |
consolidate(gsnap_output) |
47 | 47 |
return(gsnap_output) |
*asSystemCall.R added
*previous code that calls .system had to account for the case of when
getOption("systemCallMode") is TRUE. Since exception is now being
thrown, this logic was removed from gsnap() function
*LungCancerLines no longer mentioned in NAMESPACE
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69338 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -38,20 +38,13 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
38 | 38 |
c(list(.input_a = input_a, .input_b = input_b, |
39 | 39 |
format = "sam"), |
40 | 40 |
params_list)) |
41 |
- ##users can provide a function to the "gmapRSysCall" |
|
42 |
- ##option. If this has happened, the return value of .gsnap |
|
43 |
- ##(and consequently .system) is returned instead of a |
|
44 |
- ##GsnapOutput object |
|
45 |
- if (is.null(getOption("gmapRSysCall"))) { |
|
46 |
- gsnap_output <- GsnapOutput(path = output_path, |
|
47 |
- version = gsnapVersion(), |
|
48 |
- param = params) |
|
49 |
- asBam(gsnap_output) |
|
50 |
- if (consolidate) |
|
51 |
- consolidate(gsnap_output) |
|
52 |
- res <- gsnap_output |
|
53 |
- } |
|
54 |
- return(res) |
|
41 |
+ gsnap_output <- GsnapOutput(path = output_path, |
|
42 |
+ version = gsnapVersion(), |
|
43 |
+ param = params) |
|
44 |
+ asBam(gsnap_output) |
|
45 |
+ if (consolidate) |
|
46 |
+ consolidate(gsnap_output) |
|
47 |
+ return(gsnap_output) |
|
55 | 48 |
}) |
56 | 49 |
|
57 | 50 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69328 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -82,7 +82,7 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
82 | 82 |
gmap_mode = "pairsearch,terminal,improve", |
83 | 83 |
trigger_score_for_gmap = 5L, max_gmap_pairsearch = 3L, |
84 | 84 |
max_gmap_improvement = 3L, microexon_spliceprob = 0.90, |
85 |
- novelsplicing = FALSE, splicingdir = NULL, |
|
85 |
+ novelsplicing = 0L, splicingdir = NULL, |
|
86 | 86 |
use_splicing = NULL, ambig_splice_noclip = FALSE, |
87 | 87 |
localsplicedist = 200000L, |
88 | 88 |
local_splice_penalty = 0L, distant_splice_penalty = 3L, |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69298 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -34,17 +34,24 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
34 | 34 |
} |
35 | 35 |
} |
36 | 36 |
|
37 |
- do.call(.gsnap, |
|
38 |
- c(list(.input_a = input_a, .input_b = input_b, |
|
39 |
- format = "sam"), |
|
40 |
- params_list)) |
|
41 |
- gsnap_output <- GsnapOutput(path = output_path, |
|
42 |
- version = gsnapVersion(), |
|
43 |
- param = params) |
|
44 |
- asBam(gsnap_output) |
|
45 |
- if (consolidate) |
|
46 |
- consolidate(gsnap_output) |
|
47 |
- gsnap_output |
|
37 |
+ res <- do.call(.gsnap, |
|
38 |
+ c(list(.input_a = input_a, .input_b = input_b, |
|
39 |
+ format = "sam"), |
|
40 |
+ params_list)) |
|
41 |
+ ##users can provide a function to the "gmapRSysCall" |
|
42 |
+ ##option. If this has happened, the return value of .gsnap |
|
43 |
+ ##(and consequently .system) is returned instead of a |
|
44 |
+ ##GsnapOutput object |
|
45 |
+ if (is.null(getOption("gmapRSysCall"))) { |
|
46 |
+ gsnap_output <- GsnapOutput(path = output_path, |
|
47 |
+ version = gsnapVersion(), |
|
48 |
+ param = params) |
|
49 |
+ asBam(gsnap_output) |
|
50 |
+ if (consolidate) |
|
51 |
+ consolidate(gsnap_output) |
|
52 |
+ res <- gsnap_output |
|
53 |
+ } |
|
54 |
+ return(res) |
|
48 | 55 |
}) |
49 | 56 |
|
50 | 57 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69221 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -24,8 +24,14 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
24 | 24 |
params_list$split_output <- output |
25 | 25 |
output_path <- output_dir |
26 | 26 |
} else { |
27 |
- output_path <- paste0("> ", output, ".sam") |
|
28 |
- params_list$.redirect <- output_path |
|
27 |
+ output_path <- paste0(output, ".sam") |
|
28 |
+ params_list$.redirect <- paste(">", output_path) |
|
29 |
+ } |
|
30 |
+ |
|
31 |
+ if (is.null(params_list$gunzip)) { |
|
32 |
+ if (file_ext(input_a) == "gz" && file_ext(input_b) == "gz") { |
|
33 |
+ params_list$gunzip <- TRUE |
|
34 |
+ } |
|
29 | 35 |
} |
30 | 36 |
|
31 | 37 |
do.call(.gsnap, |
... | ... |
@@ -65,7 +71,7 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
65 | 71 |
mode = c("standard", "cmet-stranded", "cmet-nonstranded", |
66 | 72 |
"atoi-stranded", "atoi-nonstranded"), |
67 | 73 |
tallydir = NULL, use_tally = NULL, |
68 |
- use_runlength = NULL, nthreads = NULL, |
|
74 |
+ use_runlength = NULL, nthreads = 1L, |
|
69 | 75 |
gmap_mode = "pairsearch,terminal,improve", |
70 | 76 |
trigger_score_for_gmap = 5L, max_gmap_pairsearch = 3L, |
71 | 77 |
max_gmap_improvement = 3L, microexon_spliceprob = 0.90, |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69207 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -28,8 +28,10 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
28 | 28 |
params_list$.redirect <- output_path |
29 | 29 |
} |
30 | 30 |
|
31 |
- do.call(.gsnap, c(.input_a = input_a, .input_b = input_b, |
|
32 |
- format = "sam", params_list)) |
|
31 |
+ do.call(.gsnap, |
|
32 |
+ c(list(.input_a = input_a, .input_b = input_b, |
|
33 |
+ format = "sam"), |
|
34 |
+ params_list)) |
|
33 | 35 |
gsnap_output <- GsnapOutput(path = output_path, |
34 | 36 |
version = gsnapVersion(), |
35 | 37 |
param = params) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69160 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -104,6 +104,7 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
104 | 104 |
batch <- match.arg(batch) |
105 | 105 |
mode <- match.arg(mode) |
106 | 106 |
quality_protocol <- match.arg(quality_protocol) |
107 |
+ filter_chastity <- match.arg(filter_chastity) |
|
107 | 108 |
|
108 | 109 |
### TODO: if input_a is NULL, or split_output and .redirect are NULL: |
109 | 110 |
### return a pipe() |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69141 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -44,26 +44,39 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
44 | 44 |
### |
45 | 45 |
|
46 | 46 |
.gsnap <- function(db, dir = NULL, part = NULL, input_buffer_size = 1000L, |
47 |
- barcode_length = 0L, pc_linefeeds = FALSE, |
|
48 |
- orientation = c("FR", "RF", "FF"), gunzip = FALSE, |
|
47 |
+ barcode_length = 0L, |
|
48 |
+ orientation = c("FR", "RF", "FF"), |
|
49 |
+ fastq_id_start = NULL, fastq_id_end = NULL, |
|
50 |
+ filter_chastity = c("off", "either", "both"), |
|
51 |
+ gunzip = FALSE, |
|
49 | 52 |
batch = c("2", "0", "1", "3", "4"), max_mismatches = NULL, |
50 | 53 |
query_unk_mismatch = 0L, genome_unk_mismatch = 1L, |
54 |
+ terminal_threshold = 2L, |
|
51 | 55 |
indel_penalty = 2L, |
52 | 56 |
indel_endlength = 4L, max_middle_insertions = 9L, |
53 | 57 |
max_middle_deletions = 30L, max_end_insertions = 3L, |
54 | 58 |
max_end_deletions = 6L, suboptimal_levels = 0L, |
55 | 59 |
adapter_string = NULL, |
56 |
- trim_mismatch_score = -3L, snpsdir = NULL, use_snps = NULL, |
|
60 |
+ trim_mismatch_score = -3L, trim_indel_score = -4L, |
|
61 |
+ snpsdir = NULL, use_snps = NULL, |
|
57 | 62 |
cmetdir = NULL, atoidir = NULL, |
58 |
- mode = c("standard", "cmet", "atoi"), |
|
59 |
- tallydir = NULL, use_tally = NULL, nthreads = NULL, |
|
60 |
- novelsplicing = FALSE, splicing = NULL, |
|
61 |
- novel_doublesplices = FALSE, localsplicedist = 200000L, |
|
63 |
+ mode = c("standard", "cmet-stranded", "cmet-nonstranded", |
|
64 |
+ "atoi-stranded", "atoi-nonstranded"), |
|
65 |
+ tallydir = NULL, use_tally = NULL, |
|
66 |
+ use_runlength = NULL, nthreads = NULL, |
|
67 |
+ gmap_mode = "pairsearch,terminal,improve", |
|
68 |
+ trigger_score_for_gmap = 5L, max_gmap_pairsearch = 3L, |
|
69 |
+ max_gmap_improvement = 3L, microexon_spliceprob = 0.90, |
|
70 |
+ novelsplicing = FALSE, splicingdir = NULL, |
|
71 |
+ use_splicing = NULL, ambig_splice_noclip = FALSE, |
|
72 |
+ localsplicedist = 200000L, |
|
62 | 73 |
local_splice_penalty = 0L, distant_splice_penalty = 3L, |
63 | 74 |
distant_splice_endlength = 16L, |
64 | 75 |
shortend_splice_endlength = 2L, |
76 |
+ distant_splice_identity = 0.95, |
|
77 |
+ antistranded_penalty = 0L, merge_distant_samechr = FALSE, |
|
65 | 78 |
pairmax_dna = 1000L, |
66 |
- pairmax_rna = 200000L, pairexpect = 200L, |
|
79 |
+ pairmax_rna = 200000L, pairexpect = 200L, pairdev = 25L, |
|
67 | 80 |
quality_protocol = c("sanger", "illumina"), |
68 | 81 |
quality_zero_score = 33L, quality_print_shift = 0L, |
69 | 82 |
mapq_unique_score = NULL, npaths = 100L, |
... | ... |
@@ -71,9 +84,12 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
71 | 84 |
show_rediff = FALSE, clip_overlap = FALSE, |
72 | 85 |
print_snps = FALSE, failsonly = FALSE, |
73 | 86 |
nofails = FALSE, fails_as_input = FALSE, |
74 |
- format = NULL, split_output = NULL, no_sam_headers = FALSE, |
|
87 |
+ format = NULL, split_output = NULL, |
|
88 |
+ output_buffer_size = 1000L, |
|
89 |
+ no_sam_headers = FALSE, |
|
75 | 90 |
sam_headers_batch = NULL, read_group_id = NULL, |
76 |
- read_group_name = NULL, version = FALSE, |
|
91 |
+ read_group_name = NULL, read_group_library = NULL, |
|
92 |
+ read_group_platform = NULL, version = FALSE, |
|
77 | 93 |
.input_a = NULL, .input_b = NULL, |
78 | 94 |
.redirect = NULL ## e.g., > gsnap.sam |
79 | 95 |
) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@68555 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -14,20 +14,24 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
14 | 14 |
output = file_path_sans_ext(input_a, TRUE), |
15 | 15 |
consolidate = TRUE, ...) |
16 | 16 |
{ |
17 |
+ output_dir <- dirname(output) |
|
18 |
+ if (!file.exists(output_dir)) |
|
19 |
+ dir.create(output_dir, recursive = TRUE) |
|
20 |
+ |
|
17 | 21 |
params <- initialize(params, ...) |
18 | 22 |
params_list <- as.list(params) |
19 | 23 |
if (gsnap_split_output(params)) { |
20 | 24 |
params_list$split_output <- output |
25 |
+ output_path <- output_dir |
|
21 | 26 |
} else { |
22 |
- params_list$.redirect <- paste0("> ", output, ".sam") |
|
27 |
+ output_path <- paste0("> ", output, ".sam") |
|
28 |
+ params_list$.redirect <- output_path |
|
23 | 29 |
} |
24 |
- output_dir <- dirname(output) |
|
25 |
- if (!file.exists(output_dir)) |
|
26 |
- dir.create(output_dir, recursive = TRUE) |
|
27 | 30 |
|
28 | 31 |
do.call(.gsnap, c(.input_a = input_a, .input_b = input_b, |
29 | 32 |
format = "sam", params_list)) |
30 |
- gsnap_output <- GsnapOutput(path = output_dir, version = gsnapVersion(), |
|
33 |
+ gsnap_output <- GsnapOutput(path = output_path, |
|
34 |
+ version = gsnapVersion(), |
|
31 | 35 |
param = params) |
32 | 36 |
asBam(gsnap_output) |
33 | 37 |
if (consolidate) |
... | ... |
@@ -71,8 +75,8 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
71 | 75 |
sam_headers_batch = NULL, read_group_id = NULL, |
72 | 76 |
read_group_name = NULL, version = FALSE, |
73 | 77 |
.input_a = NULL, .input_b = NULL, |
74 |
- .redirect = NULL, ## e.g., > gsnap.sam |
|
75 |
- extra = NULL) |
|
78 |
+ .redirect = NULL ## e.g., > gsnap.sam |
|
79 |
+ ) |
|
76 | 80 |
{ |
77 | 81 |
formals <- formals(sys.function()) |
78 | 82 |
problems <- do.call(c, lapply(names(formals), .valid_gmap_parameter, formals)) |
... | ... |
@@ -84,8 +88,6 @@ setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
84 | 88 |
batch <- match.arg(batch) |
85 | 89 |
mode <- match.arg(mode) |
86 | 90 |
quality_protocol <- match.arg(quality_protocol) |
87 |
- |
|
88 |
- if (identical(extra, list())) extra <- NULL |
|
89 | 91 |
|
90 | 92 |
### TODO: if input_a is NULL, or split_output and .redirect are NULL: |
91 | 93 |
### return a pipe() |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@68172 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,180 @@ |
1 |
+### ========================================================================= |
|
2 |
+### gsnap command |
|
3 |
+### ------------------------------------------------------------------------- |
|
4 |
+ |
|
5 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
6 |
+### High-level interface |
|
7 |
+### |
|
8 |
+ |
|
9 |
+setGeneric("gsnap", function(input_a, input_b = NULL, params, ...) |
|
10 |
+ standardGeneric("gsnap")) |
|
11 |
+ |
|
12 |
+setMethod("gsnap", c("character", "characterORNULL", "GsnapParam"), |
|
13 |
+ function(input_a, input_b, params, |
|
14 |
+ output = file_path_sans_ext(input_a, TRUE), |
|
15 |
+ consolidate = TRUE, ...) |
|
16 |
+ { |
|
17 |
+ params <- initialize(params, ...) |
|
18 |
+ params_list <- as.list(params) |
|
19 |
+ if (gsnap_split_output(params)) { |
|
20 |
+ params_list$split_output <- output |
|
21 |
+ } else { |
|
22 |
+ params_list$.redirect <- paste0("> ", output, ".sam") |
|
23 |
+ } |
|
24 |
+ output_dir <- dirname(output) |
|
25 |
+ if (!file.exists(output_dir)) |
|
26 |
+ dir.create(output_dir, recursive = TRUE) |
|
27 |
+ |
|
28 |
+ do.call(.gsnap, c(.input_a = input_a, .input_b = input_b, |
|
29 |
+ format = "sam", params_list)) |
|
30 |
+ gsnap_output <- GsnapOutput(path = output_dir, version = gsnapVersion(), |
|
31 |
+ param = params) |
|
32 |
+ asBam(gsnap_output) |
|
33 |
+ if (consolidate) |
|
34 |
+ consolidate(gsnap_output) |
|
35 |
+ gsnap_output |
|
36 |
+ }) |
|
37 |
+ |
|
38 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
39 |
+### Low-level interface |
|
40 |
+### |
|
41 |
+ |
|
42 |
+.gsnap <- function(db, dir = NULL, part = NULL, input_buffer_size = 1000L, |
|
43 |
+ barcode_length = 0L, pc_linefeeds = FALSE, |
|
44 |
+ orientation = c("FR", "RF", "FF"), gunzip = FALSE, |
|
45 |
+ batch = c("2", "0", "1", "3", "4"), max_mismatches = NULL, |
|
46 |
+ query_unk_mismatch = 0L, genome_unk_mismatch = 1L, |
|
47 |
+ indel_penalty = 2L, |
|
48 |
+ indel_endlength = 4L, max_middle_insertions = 9L, |
|
49 |
+ max_middle_deletions = 30L, max_end_insertions = 3L, |
|
50 |
+ max_end_deletions = 6L, suboptimal_levels = 0L, |
|
51 |
+ adapter_string = NULL, |
|
52 |
+ trim_mismatch_score = -3L, snpsdir = NULL, use_snps = NULL, |
|
53 |
+ cmetdir = NULL, atoidir = NULL, |
|
54 |
+ mode = c("standard", "cmet", "atoi"), |
|
55 |
+ tallydir = NULL, use_tally = NULL, nthreads = NULL, |
|
56 |
+ novelsplicing = FALSE, splicing = NULL, |
|
57 |
+ novel_doublesplices = FALSE, localsplicedist = 200000L, |
|
58 |
+ local_splice_penalty = 0L, distant_splice_penalty = 3L, |
|
59 |
+ distant_splice_endlength = 16L, |
|
60 |
+ shortend_splice_endlength = 2L, |
|
61 |
+ pairmax_dna = 1000L, |
|
62 |
+ pairmax_rna = 200000L, pairexpect = 200L, |
|
63 |
+ quality_protocol = c("sanger", "illumina"), |
|
64 |
+ quality_zero_score = 33L, quality_print_shift = 0L, |
|
65 |
+ mapq_unique_score = NULL, npaths = 100L, |
|
66 |
+ quiet_if_excessive = FALSE, ordered = FALSE, |
|
67 |
+ show_rediff = FALSE, clip_overlap = FALSE, |
|
68 |
+ print_snps = FALSE, failsonly = FALSE, |
|
69 |
+ nofails = FALSE, fails_as_input = FALSE, |
|
70 |
+ format = NULL, split_output = NULL, no_sam_headers = FALSE, |
|
71 |
+ sam_headers_batch = NULL, read_group_id = NULL, |
|
72 |
+ read_group_name = NULL, version = FALSE, |
|
73 |
+ .input_a = NULL, .input_b = NULL, |
|
74 |
+ .redirect = NULL, ## e.g., > gsnap.sam |
|
75 |
+ extra = NULL) |
|
76 |
+{ |
|
77 |
+ formals <- formals(sys.function()) |
|
78 |
+ problems <- do.call(c, lapply(names(formals), .valid_gmap_parameter, formals)) |
|
79 |
+ if (!is.null(problems)) |
|
80 |
+ stop("validation failed:\n ", paste(problems, collapse = "\n ")) |
|
81 |
+ |
|
82 |
+ ##TODO: figure out how to do this programmatically |
|
83 |
+ orientation <- match.arg(orientation) |
|
84 |
+ batch <- match.arg(batch) |
|
85 |
+ mode <- match.arg(mode) |
|
86 |
+ quality_protocol <- match.arg(quality_protocol) |
|
87 |
+ |
|
88 |
+ if (identical(extra, list())) extra <- NULL |
|
89 |
+ |
|
90 |
+### TODO: if input_a is NULL, or split_output and .redirect are NULL: |
|
91 |
+### return a pipe() |
|
92 |
+ .system(commandLine("gsnap"), intern = version) |
|
93 |
+} |
|
94 |
+ |
|
95 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
96 |
+### Validity Checking |
|
97 |
+### |
|
98 |
+ |
|
99 |
+.valid_GsnapParam_part <- function(x) { |
|
100 |
+ ## validate the part string (i/n) |
|
101 |
+} |
|
102 |
+.valid_GsnapParam_batch <- function(x) { |
|
103 |
+ ## one of 0, 1, 2, 3, 4 |
|
104 |
+} |
|
105 |
+.valid_GsnapParam_max_mismatches <- function(x) { |
|
106 |
+ ## non-negative number or NULL |
|
107 |
+} |
|
108 |
+.valid_GsnapParam_suboptimal_levels <- function(x) { |
|
109 |
+ ## non-negative number or NULL |
|
110 |
+} |
|
111 |
+.valid_GsnapParam_use_snps <- function(x) { |
|
112 |
+ ## a file that exists (not sure) |
|
113 |
+} |
|
114 |
+.valid_GsnapParam_mode <- function(x) { |
|
115 |
+ ## one of standard, cmet, atoi |
|
116 |
+} |
|
117 |
+.valid_GsnapParam_nthreads <- function(x) { |
|
118 |
+ ## positive number |
|
119 |
+} |
|
120 |
+.valid_GsnapParam_novelsplicing <- function(x) { |
|
121 |
+ ## TRUE or FALSE |
|
122 |
+} |
|
123 |
+.valid_GsnapParam_splicing <- function(x) { |
|
124 |
+ ## a file that exists (not sure) or NULL |
|
125 |
+} |
|
126 |
+.valid_GsnapParam_npaths <- function(x) { |
|
127 |
+ ## positive number |
|
128 |
+} |
|
129 |
+.valid_GsnapParam_quiet_if_excessive <- function(x) { |
|
130 |
+ ## TRUE or FALSE |
|
131 |
+} |
|
132 |
+.valid_GsnapParam_nofails <- function(x) { |
|
133 |
+ ## TRUE or FALSE |
|
134 |
+} |
|
135 |
+.valid_GsnapParam_format <- function(x) { |
|
136 |
+ ## sam or NULL (this is a low-level check!) |
|
137 |
+} |
|
138 |
+.valid_GsnapParam_split_output <- function(x) { |
|
139 |
+ ## single string or NULL |
|
140 |
+} |
|
141 |
+.valid_GsnapParam_read_group_id <- function(x) { |
|
142 |
+ ## single string (any character constraints?) or NULL |
|
143 |
+} |
|
144 |
+.valid_GsnapParam_read_group_name <- function(x) { |
|
145 |
+ ## single string or NULL |
|
146 |
+} |
|
147 |
+ |
|
148 |
+.valid_gmap_parameter <- function(name, params) { |
|
149 |
+ res <- TRUE ##if we're not validating the param, assume it is good |
|
150 |
+ |
|
151 |
+ validatorName <- paste(".valid_GsnapParam_", name, sep = "") |
|
152 |
+ if (exists(validatorName)) { |
|
153 |
+ validator <- get(validatorName) |
|
154 |
+ res <- validator(params) |
|
155 |
+ } else { |
|
156 |
+ res <- NULL |
|
157 |
+ } |
|
158 |
+ |
|
159 |
+ return(res) |
|
160 |
+} |
|
161 |
+ |
|
162 |
+.valid_GsnapParam <- function(object) { |
|
163 |
+ x <- as.list(object) # converts to low-level parameter list |
|
164 |
+ do.call(c, lapply(names(x), .valid_gmap_parameter, x)) |
|
165 |
+} |
|
166 |
+ |
|
167 |
+setValidity("GsnapParam", .valid_GsnapParam) |
|
168 |
+ |
|
169 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
170 |
+### Utilities |
|
171 |
+### |
|
172 |
+ |
|
173 |
+gsnapVersion <- function() { |
|
174 |
+ version_text <- sub("GSNAP version (.*?) ", "\\1", .gsnap(version = TRUE)) |
|
175 |
+ parseGsnapVersion(version_text) |
|
176 |
+} |
|
177 |
+ |
|
178 |
+parseGsnapVersion <- function(x) { |
|
179 |
+ as.POSIXlt(x, format = "%m-%d-%Y") |
|
180 |
+} |