git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@68551 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -6,9 +6,10 @@ |
6 | 6 |
### |
7 | 7 |
|
8 | 8 |
setClass("BamTallyParam", |
9 |
- representation(which = "RangesList", |
|
9 |
+ representation(genome = "GmapGenome", |
|
10 |
+ which = "RangesList", |
|
10 | 11 |
cycle_breaks = "integerORNULL", |
11 |
- high_quality_cutoff = "integer", |
|
12 |
+ high_base_quality = "integer", |
|
12 | 13 |
minimum_mapq = "integer", |
13 | 14 |
concordant_only = "logical", |
14 | 15 |
unique_only = "logical", |
... | ... |
@@ -22,8 +23,9 @@ setClass("BamTallyParam", |
22 | 23 |
### Constructor |
23 | 24 |
### |
24 | 25 |
|
25 |
-BamTallyParam <- function(which = RangesList(), cycle_breaks = NULL, |
|
26 |
- high_quality_cutoff = 0L, |
|
26 |
+BamTallyParam <- function(genome, which = RangesList(), |
|
27 |
+ cycle_breaks = NULL, |
|
28 |
+ high_base_quality = 0L, |
|
27 | 29 |
minimum_mapq = 0L, |
28 | 30 |
concordant_only = FALSE, unique_only = FALSE, |
29 | 31 |
primary_only = FALSE, |
... | ... |
@@ -33,8 +35,9 @@ BamTallyParam <- function(which = RangesList(), cycle_breaks = NULL, |
33 | 35 |
{ |
34 | 36 |
args <- names(formals(sys.function())) |
35 | 37 |
params <- mget(args, environment()) |
38 |
+ params$genome <- as(genome, "GmapGenome") |
|
36 | 39 |
params$which <- as(which, "RangesList") |
37 |
- integer_params <- c("high_quality_cutoff", "minimum_mapq", "min_depth", |
|
40 |
+ integer_params <- c("high_base_quality", "minimum_mapq", "min_depth", |
|
38 | 41 |
"variant_strand") |
39 | 42 |
params[integer_params] <- lapply(params[integer_params], as.integer) |
40 | 43 |
do.call(new, c("BamTallyParam", params)) |
... | ... |
@@ -49,3 +52,17 @@ setAs("BamTallyParam", "list", function(from) { |
49 | 52 |
}) |
50 | 53 |
|
51 | 54 |
setMethod("as.list", "BamTallyParam", function(x) as(x, "list")) |
55 |
+ |
|
56 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
57 |
+### Utilities |
|
58 |
+### |
|
59 |
+ |
|
60 |
+flankingCycleBreaks <- function(read_length, width = 10L) { |
|
61 |
+ if (is.na(read_length)) |
|
62 |
+ return(NULL) |
|
63 |
+ if (read_length < 1) |
|
64 |
+ stop("'read_length' must be >= 1 or NA") |
|
65 |
+ if (width < 0) |
|
66 |
+ stop("'width' must be non-negative") |
|
67 |
+ c(0L, width, read_length - width, read_length) |
|
68 |
+} |
... | ... |
@@ -8,30 +8,31 @@ |
8 | 8 |
### |
9 | 9 |
|
10 | 10 |
setGeneric("bam_tally", |
11 |
- function(x, genome, param = BamTallyParam(), ...) |
|
11 |
+ function(x, param, ...) |
|
12 | 12 |
standardGeneric("bam_tally"), |
13 | 13 |
signature = "x") |
14 | 14 |
|
15 | 15 |
setMethod("bam_tally", "BamFile", |
16 |
- function(x, genome, param, ...) |
|
16 |
+ function(x, param, ...) |
|
17 | 17 |
{ |
18 | 18 |
x <- GmapBamReader(x) |
19 | 19 |
callGeneric() |
20 | 20 |
}) |
21 | 21 |
|
22 | 22 |
setMethod("bam_tally", "character", |
23 |
- function(x, genome, param, ...) |
|
23 |
+ function(x, param, ...) |
|
24 | 24 |
{ |
25 | 25 |
x <- BamFile(x) |
26 | 26 |
callGeneric() |
27 | 27 |
}) |
28 | 28 |
|
29 | 29 |
setMethod("bam_tally", "GmapBamReader", |
30 |
- function(x, genome, param, ...) |
|
30 |
+ function(x, param, ...) |
|
31 | 31 |
{ |
32 | 32 |
param_list <- as.list(param) |
33 | 33 |
args <- list(...) |
34 | 34 |
param_list[names(args)] <- args |
35 |
+ genome <- param_list$genome |
|
35 | 36 |
param_list$db <- genome(genome) |
36 | 37 |
param_list$genome_dir <- path(directory(genome)) |
37 | 38 |
tally <- do.call(.bam_tally_C, c(list(x), param_list)) |
... | ... |
@@ -85,7 +86,7 @@ normArgTRUEorFALSE <- function(x) { |
85 | 86 |
|
86 | 87 |
.bam_tally_C <- function(bamreader, genome_dir = NULL, db = NULL, |
87 | 88 |
which = NULL, cycle_breaks = NULL, |
88 |
- high_quality_cutoff = 0L, alloclength = 200000L, |
|
89 |
+ high_base_quality = 0L, alloclength = 200000L, |
|
89 | 90 |
minimum_mapq = 0L, good_unique_mapq = 35L, |
90 | 91 |
maximum_nhits = 1000000L, |
91 | 92 |
concordant_only = FALSE, unique_only = FALSE, |
... | ... |
@@ -120,7 +121,7 @@ normArgTRUEorFALSE <- function(x) { |
120 | 121 |
} |
121 | 122 |
.Call(R_Bamtally_iit, bamreader@.extptr, genome_dir, db, which, |
122 | 123 |
cycle_breaks, |
123 |
- normArgSingleInteger(high_quality_cutoff), |
|
124 |
+ normArgSingleInteger(high_base_quality), |
|
124 | 125 |
normArgSingleInteger(alloclength), |
125 | 126 |
normArgSingleInteger(minimum_mapq), |
126 | 127 |
normArgSingleInteger(good_unique_mapq), |
... | ... |
@@ -3,12 +3,10 @@ test_bam_tally <- function() { |
3 | 3 |
"test_data_aln.concordant_uniq.bam", package = "gmapR") |
4 | 4 |
bf <- BamFile(f) |
5 | 5 |
|
6 |
- bam_tally(bf, genome) |
|
6 |
+ bam_tally(bf, BamTallyParam(genome)) |
|
7 | 7 |
} |
8 | 8 |
|
9 | 9 |
test_bam_tally_C <- function() { |
10 |
-### FIXME: switch over to using Jeremiah's BAM here, once we get the |
|
11 |
-### human gmap index in a package. |
|
12 | 10 |
library(BSgenome.Dmelanogaster.UCSC.dm3) |
13 | 11 |
|
14 | 12 |
genome <- GmapGenome(Dmelanogaster, create = TRUE) |
... | ... |
@@ -18,11 +16,11 @@ test_bam_tally_C <- function() { |
18 | 16 |
|
19 | 17 |
bf <- Rsamtools::BamFile(bam) |
20 | 18 |
which <- RangesList(chr4 = IRanges(1e6, 2e6)) |
21 |
- gr <- bam_tally(bf, genome) |
|
22 |
- gr <- bam_tally(bf, genome, variant_strand = 1L) |
|
23 |
- gr <- bam_tally(bf, genome, which = which, variant_strand = 1L) |
|
19 |
+ gr <- bam_tally(bf, BamTallyParam(genome)) |
|
20 |
+ gr <- bam_tally(bf, BamTallyParam(genome, variant_strand = 1L)) |
|
21 |
+ gr <- bam_tally(bf, BamTallyParam(genome, which = which, variant_strand = 1L)) |
|
24 | 22 |
empty <- RangesList(chr2L = IRanges(1e6, 2e6)) |
25 |
- gr <- bam_tally(bf, genome, which = empty) |
|
23 |
+ gr <- bam_tally(bf, BamTallyParam(genome, which = empty)) |
|
26 | 24 |
|
27 |
- gr <- bam_tally(bf, genome, cycle_breaks = c(1, 15, 30, 40)) |
|
25 |
+ gr <- bam_tally(bf, BamTallyParam(genome, cycle_breaks = c(1, 15, 30, 40))) |
|
28 | 26 |
} |
... | ... |
@@ -14,8 +14,8 @@ |
14 | 14 |
} |
15 | 15 |
|
16 | 16 |
\usage{ |
17 |
- BamTallyParam(which = RangesList(), cycle_breaks = NULL, |
|
18 |
- high_quality_cutoff = 0L, |
|
17 |
+ BamTallyParam(genome, which = RangesList(), cycle_breaks = NULL, |
|
18 |
+ high_base_quality = 0L, |
|
19 | 19 |
minimum_mapq = 0L, |
20 | 20 |
concordant_only = FALSE, unique_only = FALSE, |
21 | 21 |
primary_only = FALSE, |
... | ... |
@@ -24,6 +24,7 @@ |
24 | 24 |
indels = FALSE) |
25 | 25 |
} |
26 | 26 |
\arguments{ |
27 |
+ \item{genome}{A \code{GmapGenome} object, or something coercible to one.} |
|
27 | 28 |
\item{which}{A \code{RangesList} or something coercible to |
28 | 29 |
one that limits the tally to that range or set of ranges. By |
29 | 30 |
default, the entire genome is processed. |
... | ... |
@@ -31,7 +32,7 @@ |
31 | 32 |
\item{cycle_breaks}{The breaks, like those passed to \code{\link{cut}} |
32 | 33 |
for aggregating the per-cycle counts. If \code{NULL}, no per-cycle |
33 | 34 |
counts are returned.} |
34 |
- \item{high_quality_cutoff}{The minimum mapping quality for a |
|
35 |
+ \item{high_base_quality}{The minimum mapping quality for a |
|
35 | 36 |
read to be counted as high quality.} |
36 | 37 |
\item{minimum_mapq}{Minimum mapping quality for a read to be counted |
37 | 38 |
at all.} |
... | ... |
@@ -6,8 +6,8 @@ |
6 | 6 |
\alias{bam_tally,GmapBamReader-method} |
7 | 7 |
\alias{bam_tally} |
8 | 8 |
|
9 |
-\title{Create a Summarization of the Read Sequences and Qualities for Each |
|
10 |
- Genomic Position} |
|
9 |
+\title{Per-position Alignment Summaries} |
|
10 |
+ |
|
11 | 11 |
\description{ |
12 | 12 |
Given a set of alignments, for each position in the genome output |
13 | 13 |
counts for the reference allele and all alternate alleles. Often used |
... | ... |
@@ -15,15 +15,13 @@ |
15 | 15 |
} |
16 | 16 |
|
17 | 17 |
\usage{ |
18 |
-\S4method{bam_tally}{BamFile}(x, genome, param = BamTallyParam(), ...) |
|
19 |
-\S4method{bam_tally}{character}(x, genome, param = BamTallyParam(), ...) |
|
18 |
+\S4method{bam_tally}{BamFile}(x, param, ...) |
|
19 |
+\S4method{bam_tally}{character}(x, param, ...) |
|
20 | 20 |
} |
21 | 21 |
|
22 | 22 |
\arguments{ |
23 | 23 |
\item{x}{a \code{BamFile} object or string path to a BAM file to read} |
24 |
- \item{genome}{the \code{GmapGenome} object corresponding to the |
|
25 |
- alignments in the BAM file} |
|
26 |
- \item{param}{The \code{\linkS4class{BamTallyParam}} object with extra |
|
24 |
+ \item{param}{The \code{\linkS4class{BamTallyParam}} object with |
|
27 | 25 |
parameters for the tally operation. } |
28 | 26 |
\item{...}{Arguments that override settings in \code{param}.} |
29 | 27 |
} |