git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@88417 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -10,11 +10,11 @@ Description: GSNAP and GMAP are a pair of tools to align short-read |
10 | 10 |
methods to work with GMAP and GSNAP from within R. In addition, |
11 | 11 |
it provides methods to tally alignment results on a |
12 | 12 |
per-nucleotide basis using the bam_tally tool. |
13 |
-Version: 1.5.14 |
|
13 |
+Version: 1.5.15 |
|
14 | 14 |
Depends: R (>= 2.15.0), methods, GenomicRanges |
15 | 15 |
Imports: IRanges, Rsamtools (>= 1.7.4), rtracklayer (>= 1.17.15), |
16 | 16 |
GenomicFeatures, Biostrings, VariantAnnotation (>= 1.9.4), |
17 |
- tools, Biobase, BSgenome |
|
17 |
+ tools, Biobase, BSgenome, GenomicAlignments |
|
18 | 18 |
Suggests: RUnit, BSgenome.Dmelanogaster.UCSC.dm3, |
19 | 19 |
BSgenome.Scerevisiae.UCSC.sacCer3, org.Hs.eg.db, |
20 | 20 |
TxDb.Hsapiens.UCSC.hg19.knownGene, BSgenome.Hsapiens.UCSC.hg19, |
... | ... |
@@ -1,6 +1,7 @@ |
1 | 1 |
useDynLib(gmapR, .registration = TRUE) |
2 | 2 |
|
3 |
-importFrom(Rsamtools, path, bamPaths, "bamWhich<-", BamFile, BamFileList) |
|
3 |
+importFrom(Rsamtools, path, bamPaths, "bamWhich<-", BamFile, BamFileList, |
|
4 |
+ BamSampler) |
|
4 | 5 |
importFrom(tools, file_path_as_absolute, file_ext, file_path_sans_ext, |
5 | 6 |
list_files_with_exts) |
6 | 7 |
importFrom(Biobase, createPackage) |
... | ... |
@@ -21,6 +22,8 @@ importFrom(VariantAnnotation, readVcf, ScanVcfParam, fixed, VRanges, ref, alt, |
21 | 22 |
"ref<-", "alt<-", altDepth, refDepth) |
22 | 23 |
importClassesFrom(VariantAnnotation, "VCF", "VRanges") |
23 | 24 |
importFrom(BSgenome, getSeq, providerVersion) |
25 |
+importFrom(GenomicAlignments, readGAlignments) |
|
26 |
+importMethodsFrom(GenomicAlignments, qwidth) |
|
24 | 27 |
|
25 | 28 |
## public API |
26 | 29 |
|
... | ... |
@@ -85,8 +85,8 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
85 | 85 |
normArgSingleInteger(high_base_quality), |
86 | 86 |
NULL, read_length) |
87 | 87 |
|
88 |
- tally_names <- c("seqnames", "pos", "ref", "alt", "n.read.pos", |
|
89 |
- "n.read.pos.ref", "raw.count", "raw.count.ref", |
|
88 |
+ tally_names <- c("seqnames", "pos", "ref", "alt", |
|
89 |
+ "raw.count", "raw.count.ref", |
|
90 | 90 |
"raw.count.total", |
91 | 91 |
"high.quality", "high.quality.ref", |
92 | 92 |
"high.quality.total", "mean.quality", |
... | ... |
@@ -142,8 +142,8 @@ checkTallyConsistency <- function(x) { |
142 | 142 |
stopifnot(all(raw.count + raw.count.ref <= raw.count.total, na.rm=TRUE)) |
143 | 143 |
stopifnot(all(altDepth(x) <= raw.count, na.rm=TRUE)) |
144 | 144 |
stopifnot(all(refDepth(x) <= raw.count.ref, na.rm=TRUE)) |
145 |
- stopifnot(all(count.pos + count.neg == raw.count, na.rm=TRUE)) |
|
146 |
- stopifnot(all(count.pos.ref + count.neg.ref == raw.count.ref)) |
|
145 |
+ stopifnot(all(count.plus + count.minus == raw.count, na.rm=TRUE)) |
|
146 |
+ stopifnot(all(count.plus.ref + count.minus.ref == raw.count.ref)) |
|
147 | 147 |
}) |
148 | 148 |
} |
149 | 149 |
|
... | ... |
@@ -245,8 +245,6 @@ normArgTRUEorFALSE <- function(x) { |
245 | 245 |
|
246 | 246 |
variantSummaryColumnDescriptions <- function(read_pos_breaks) { |
247 | 247 |
desc <- c( |
248 |
- n.read.pos = "Number of unique read positions for the ALT", |
|
249 |
- n.read.pos.ref = "Number of unique read positions for the REF", |
|
250 | 248 |
raw.count = "Raw ALT count", |
251 | 249 |
raw.count.ref = "Raw REF count", |
252 | 250 |
raw.count.total = "Raw total count", |
... | ... |
@@ -55,9 +55,10 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L, |
55 | 55 |
\item{raw.count.ref}{The number of reads with the reference allele.} |
56 | 56 |
\item{raw.count.total}{The total number of reads at that position, |
57 | 57 |
including reference and all alternates.} |
58 |
- \item{mean.quality}{The mean mapping quality for the alt allele.} |
|
59 |
- \item{mean.quality.ref}{The mean mapping quality for the reference |
|
60 |
- allele.} |
|
58 |
+ \item{mean.quality}{The mean base quality for the alt allele, |
|
59 |
+ truncated at \code{high_base_quality}.} |
|
60 |
+ \item{mean.quality.ref}{The mean base quality for the ref allele, |
|
61 |
+ truncated at \code{high_base_quality}.} |
|
61 | 62 |
\item{count.plus}{The number of positive strand reads for the alternate |
62 | 63 |
allele, \code{NA} for the reference allele row.} |
63 | 64 |
\item{count.plus.ref}{The number of positive strand reads for the reference |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
#include "iit.h" |
5 | 5 |
#include "bytestream.h" |
6 | 6 |
|
7 |
-enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF, |
|
7 |
+enum { SEQNAMES, POS, REF, READ, COUNT, COUNT_REF, |
|
8 | 8 |
COUNT_TOTAL, HIGH_QUALITY, HIGH_QUALITY_REF, HIGH_QUALITY_TOTAL, |
9 | 9 |
MEAN_QUALITY, MEAN_QUALITY_REF, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS, |
10 | 10 |
COUNT_MINUS_REF, READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR, |
... | ... |
@@ -15,8 +15,6 @@ typedef struct TallyTable { |
15 | 15 |
int *pos; |
16 | 16 |
SEXP ref_R; |
17 | 17 |
SEXP read_R; |
18 |
- int *n_cycles; |
|
19 |
- int *n_cycles_ref; |
|
20 | 18 |
int *count; |
21 | 19 |
int *count_ref; |
22 | 20 |
int *count_total; |
... | ... |
@@ -54,8 +52,6 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins) { |
54 | 52 |
SET_VECTOR_ELT(tally_R, POS, allocVector(INTSXP, n_rows)); |
55 | 53 |
SET_VECTOR_ELT(tally_R, REF, allocVector(STRSXP, n_rows)); |
56 | 54 |
SET_VECTOR_ELT(tally_R, READ, allocVector(STRSXP, n_rows)); |
57 |
- SET_VECTOR_ELT(tally_R, N_CYCLES, allocVector(INTSXP, n_rows)); |
|
58 |
- SET_VECTOR_ELT(tally_R, N_CYCLES_REF, allocVector(INTSXP, n_rows)); |
|
59 | 55 |
SET_VECTOR_ELT(tally_R, COUNT, allocVector(INTSXP, n_rows)); |
60 | 56 |
SET_VECTOR_ELT(tally_R, COUNT_REF, allocVector(INTSXP, n_rows)); |
61 | 57 |
SET_VECTOR_ELT(tally_R, COUNT_TOTAL, allocVector(INTSXP, n_rows)); |
... | ... |
@@ -92,8 +88,6 @@ static TallyTable *TallyTable_new(SEXP tally_R) { |
92 | 88 |
tally->pos = INTEGER(VECTOR_ELT(tally_R, POS)); |
93 | 89 |
tally->ref_R = VECTOR_ELT(tally_R, REF); |
94 | 90 |
tally->read_R = VECTOR_ELT(tally_R, READ); |
95 |
- tally->n_cycles = INTEGER(VECTOR_ELT(tally_R, N_CYCLES)); |
|
96 |
- tally->n_cycles_ref = INTEGER(VECTOR_ELT(tally_R, N_CYCLES_REF)); |
|
97 | 91 |
tally->count = INTEGER(VECTOR_ELT(tally_R, COUNT)); |
98 | 92 |
tally->count_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_REF)); |
99 | 93 |
tally->count_total = INTEGER(VECTOR_ELT(tally_R, COUNT_TOTAL)); |
... | ... |
@@ -129,7 +123,7 @@ read_total_counts(unsigned char **bytes, int row, int *count_total) { |
129 | 123 |
|
130 | 124 |
static void |
131 | 125 |
read_cycle_counts(unsigned char **bytes, int row, TallyParam param, |
132 |
- int *n_cycles, double *read_pos_mean, double *read_pos_var, |
|
126 |
+ double *read_pos_mean, double *read_pos_var, |
|
133 | 127 |
double *mdfne, int **cycle_bins) |
134 | 128 |
{ |
135 | 129 |
int n_cycle_breaks = param.n_cycle_bins + 1; |
... | ... |
@@ -139,9 +133,9 @@ read_cycle_counts(unsigned char **bytes, int row, TallyParam param, |
139 | 133 |
if (param.mdfne_buf != NULL) { |
140 | 134 |
memset(param.mdfne_buf, 0, sizeof(int) * midpoint); |
141 | 135 |
} |
142 |
- n_cycles[row] = read_int(bytes); |
|
136 |
+ int n_cycles = read_int(bytes); |
|
143 | 137 |
|
144 |
- for (int index = 0; index < n_cycles[row]; index++) { |
|
138 |
+ for (int index = 0; index < n_cycles; index++) { |
|
145 | 139 |
int cycle = abs(read_int(bytes)); |
146 | 140 |
int count = read_int(bytes); |
147 | 141 |
int bin = 0; |
... | ... |
@@ -217,7 +211,7 @@ parse_indels(unsigned char *bytes, int row, |
217 | 211 |
SET_STRING_ELT(tally->read_R, row, R_BlankString); |
218 | 212 |
SET_STRING_ELT(tally->ref_R, row, seq_R); |
219 | 213 |
} |
220 |
- read_cycle_counts(&bytes, row, param, tally->n_cycles, |
|
214 |
+ read_cycle_counts(&bytes, row, param, |
|
221 | 215 |
tally->read_pos_mean, tally->read_pos_var, |
222 | 216 |
tally->mdfne, tally->cycle_bins); |
223 | 217 |
/* quality is per-base and so not relevant for indels */ |
... | ... |
@@ -227,7 +221,6 @@ parse_indels(unsigned char *bytes, int row, |
227 | 221 |
tally->high_quality_ref[row] = NA_INTEGER; |
228 | 222 |
tally->high_quality_total[row] = NA_INTEGER; |
229 | 223 |
/* no position from which to tabulate the cycles */ |
230 |
- tally->n_cycles_ref[row] = NA_INTEGER; |
|
231 | 224 |
tally->read_pos_mean_ref[row] = NA_REAL; |
232 | 225 |
tally->read_pos_var_ref[row] = NA_REAL; |
233 | 226 |
tally->mdfne_ref[row] = NA_REAL; |
... | ... |
@@ -282,14 +275,13 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
282 | 275 |
tally->count_plus, tally->count_minus, |
283 | 276 |
tally->count); |
284 | 277 |
for (int allele = 0; allele < n_alleles; allele++, row++) { |
285 |
- tally->n_cycles[row] = 0; |
|
286 | 278 |
for (int b = 0; b < param.n_cycle_bins; b++) { |
287 | 279 |
tally->cycle_bins[b][row] = 0; |
288 | 280 |
} |
289 | 281 |
tally->high_quality[row] = 0; |
290 | 282 |
tally->mean_quality[row] = R_NaN; |
291 | 283 |
if (tally->count[row] > 0) { |
292 |
- read_cycle_counts(&bytes, row, param, tally->n_cycles, |
|
284 |
+ read_cycle_counts(&bytes, row, param, |
|
293 | 285 |
tally->read_pos_mean, tally->read_pos_var, |
294 | 286 |
tally->mdfne, tally->cycle_bins); |
295 | 287 |
read_quality_counts(&bytes, row, tally->high_quality, tally->mean_quality, |
... | ... |
@@ -305,7 +297,6 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
305 | 297 |
high_quality_total += tally->high_quality[r]; |
306 | 298 |
} |
307 | 299 |
for (int r = ref_row; r < row; r++) { |
308 |
- tally->n_cycles_ref[r] = tally->n_cycles[ref_row]; |
|
309 | 300 |
tally->count_total[r] = tally->count_total[ref_row]; |
310 | 301 |
tally->mean_quality_ref[r] = tally->mean_quality[ref_row]; |
311 | 302 |
tally->high_quality_ref[r] = tally->high_quality[ref_row]; |
... | ... |
@@ -322,7 +313,6 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
322 | 313 |
if (have_ref_row) { |
323 | 314 |
/* clear the 'alt' columns for the 'ref' row with NAs */ |
324 | 315 |
SET_STRING_ELT(tally->read_R, ref_row, NA_STRING); |
325 |
- tally->n_cycles[ref_row] = NA_INTEGER; |
|
326 | 316 |
tally->mean_quality[ref_row] = NA_REAL; |
327 | 317 |
tally->high_quality[ref_row] = NA_REAL; |
328 | 318 |
tally->count_plus[ref_row] = NA_INTEGER; |