git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@89842 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -10,7 +10,7 @@ Description: GSNAP and GMAP are a pair of tools to align short-read |
10 | 10 |
methods to work with GMAP and GSNAP from within R. In addition, |
11 | 11 |
it provides methods to tally alignment results on a |
12 | 12 |
per-nucleotide basis using the bam_tally tool. |
13 |
-Version: 1.7.2 |
|
13 |
+Version: 1.7.3 |
|
14 | 14 |
Depends: R (>= 2.15.0), methods, GenomicRanges |
15 | 15 |
Imports: S4Vectors, IRanges, Rsamtools (>= 1.7.4), rtracklayer (>= 1.17.15), |
16 | 16 |
GenomicFeatures, Biostrings, VariantAnnotation (>= 1.11.1), |
... | ... |
@@ -83,6 +83,7 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
83 | 83 |
NULL, read_length) |
84 | 84 |
|
85 | 85 |
tally_names <- c("seqnames", "pos", "ref", "alt", |
86 |
+ "n.read.pos", "n.read.pos.ref", |
|
86 | 87 |
"raw.count", "raw.count.ref", |
87 | 88 |
"raw.count.total", |
88 | 89 |
"high.quality", "high.quality.ref", |
... | ... |
@@ -243,6 +244,8 @@ normArgTRUEorFALSE <- function(x) { |
243 | 244 |
|
244 | 245 |
variantSummaryColumnDescriptions <- function(read_pos_breaks) { |
245 | 246 |
desc <- c( |
247 |
+ n.read.pos = "Number of unique read positions for the ALT", |
|
248 |
+ n.read.pos.ref = "Number of unique read positions for the REF", |
|
246 | 249 |
raw.count = "Raw ALT count", |
247 | 250 |
raw.count.ref = "Raw REF count", |
248 | 251 |
raw.count.total = "Raw total count", |
... | ... |
@@ -53,6 +53,8 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L, |
53 | 53 |
after quality filtering (except for indels, for which there is no |
54 | 54 |
quality filtering). The following \code{elementMetadata} |
55 | 55 |
columns are also present: |
56 |
+ \item{n.read.pos}{The number of unique read positions for the alt allele.} |
|
57 |
+ \item{n.read.pos.ref}{The number of unique read positions for the ref allele.} |
|
56 | 58 |
\item{raw.count}{The number of reads with the alternate allele, |
57 | 59 |
\code{NA} for the reference allele row.} |
58 | 60 |
\item{raw.count.ref}{The number of reads with the reference allele.} |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
#include "iit.h" |
5 | 5 |
#include "bytestream.h" |
6 | 6 |
|
7 |
-enum { SEQNAMES, POS, REF, READ, COUNT, COUNT_REF, |
|
7 |
+enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF, |
|
8 | 8 |
COUNT_TOTAL, HIGH_QUALITY, HIGH_QUALITY_REF, HIGH_QUALITY_TOTAL, |
9 | 9 |
MEAN_QUALITY, MEAN_QUALITY_REF, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS, |
10 | 10 |
COUNT_MINUS_REF, READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR, |
... | ... |
@@ -15,6 +15,8 @@ typedef struct TallyTable { |
15 | 15 |
int *pos; |
16 | 16 |
SEXP ref_R; |
17 | 17 |
SEXP read_R; |
18 |
+ int *n_cycles; |
|
19 |
+ int *n_cycles_ref; |
|
18 | 20 |
int *count; |
19 | 21 |
int *count_ref; |
20 | 22 |
int *count_total; |
... | ... |
@@ -52,6 +54,8 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins) { |
52 | 54 |
SET_VECTOR_ELT(tally_R, POS, allocVector(INTSXP, n_rows)); |
53 | 55 |
SET_VECTOR_ELT(tally_R, REF, allocVector(STRSXP, n_rows)); |
54 | 56 |
SET_VECTOR_ELT(tally_R, READ, allocVector(STRSXP, n_rows)); |
57 |
+ SET_VECTOR_ELT(tally_R, N_CYCLES, allocVector(INTSXP, n_rows)); |
|
58 |
+ SET_VECTOR_ELT(tally_R, N_CYCLES_REF, allocVector(INTSXP, n_rows)); |
|
55 | 59 |
SET_VECTOR_ELT(tally_R, COUNT, allocVector(INTSXP, n_rows)); |
56 | 60 |
SET_VECTOR_ELT(tally_R, COUNT_REF, allocVector(INTSXP, n_rows)); |
57 | 61 |
SET_VECTOR_ELT(tally_R, COUNT_TOTAL, allocVector(INTSXP, n_rows)); |
... | ... |
@@ -88,6 +92,8 @@ static TallyTable *TallyTable_new(SEXP tally_R) { |
88 | 92 |
tally->pos = INTEGER(VECTOR_ELT(tally_R, POS)); |
89 | 93 |
tally->ref_R = VECTOR_ELT(tally_R, REF); |
90 | 94 |
tally->read_R = VECTOR_ELT(tally_R, READ); |
95 |
+ tally->n_cycles = INTEGER(VECTOR_ELT(tally_R, N_CYCLES)); |
|
96 |
+ tally->n_cycles_ref = INTEGER(VECTOR_ELT(tally_R, N_CYCLES_REF)); |
|
91 | 97 |
tally->count = INTEGER(VECTOR_ELT(tally_R, COUNT)); |
92 | 98 |
tally->count_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_REF)); |
93 | 99 |
tally->count_total = INTEGER(VECTOR_ELT(tally_R, COUNT_TOTAL)); |
... | ... |
@@ -123,7 +129,7 @@ read_total_counts(unsigned char **bytes, int row, int *count_total) { |
123 | 129 |
|
124 | 130 |
static void |
125 | 131 |
read_cycle_counts(unsigned char **bytes, int row, TallyParam param, |
126 |
- double *read_pos_mean, double *read_pos_var, |
|
132 |
+ int *n_cycles, double *read_pos_mean, double *read_pos_var, |
|
127 | 133 |
double *mdfne, int **cycle_bins) |
128 | 134 |
{ |
129 | 135 |
int n_cycle_breaks = param.n_cycle_bins + 1; |
... | ... |
@@ -133,9 +139,9 @@ read_cycle_counts(unsigned char **bytes, int row, TallyParam param, |
133 | 139 |
if (param.mdfne_buf != NULL) { |
134 | 140 |
memset(param.mdfne_buf, 0, sizeof(int) * midpoint); |
135 | 141 |
} |
136 |
- int n_cycles = read_int(bytes); |
|
142 |
+ n_cycles[row] = read_int(bytes); |
|
137 | 143 |
|
138 |
- for (int index = 0; index < n_cycles; index++) { |
|
144 |
+ for (int index = 0; index < n_cycles[row]; index++) { |
|
139 | 145 |
int cycle = abs(read_int(bytes)); |
140 | 146 |
int count = read_int(bytes); |
141 | 147 |
int bin = 0; |
... | ... |
@@ -145,7 +151,7 @@ read_cycle_counts(unsigned char **bytes, int row, TallyParam param, |
145 | 151 |
count_sum += count; |
146 | 152 |
weighted_sum += cycle * count; |
147 | 153 |
weighted_sum_sq += cycle * cycle * count; |
148 |
- if (param.cycle_breaks != NULL) { |
|
154 |
+ if (param.n_cycle_bins > 0) { |
|
149 | 155 |
while(n_cycle_breaks > bin && |
150 | 156 |
cycle > param.cycle_breaks[bin]) |
151 | 157 |
bin++; |
... | ... |
@@ -211,7 +217,7 @@ parse_indels(unsigned char *bytes, int row, |
211 | 217 |
SET_STRING_ELT(tally->read_R, row, R_BlankString); |
212 | 218 |
SET_STRING_ELT(tally->ref_R, row, seq_R); |
213 | 219 |
} |
214 |
- read_cycle_counts(&bytes, row, param, |
|
220 |
+ read_cycle_counts(&bytes, row, param, tally->n_cycles, |
|
215 | 221 |
tally->read_pos_mean, tally->read_pos_var, |
216 | 222 |
tally->mdfne, tally->cycle_bins); |
217 | 223 |
/* quality is per-base and so not relevant for indels */ |
... | ... |
@@ -221,6 +227,7 @@ parse_indels(unsigned char *bytes, int row, |
221 | 227 |
tally->high_quality_ref[row] = NA_INTEGER; |
222 | 228 |
tally->high_quality_total[row] = NA_INTEGER; |
223 | 229 |
/* no position from which to tabulate the cycles */ |
230 |
+ tally->n_cycles_ref[row] = NA_INTEGER; |
|
224 | 231 |
tally->read_pos_mean_ref[row] = NA_REAL; |
225 | 232 |
tally->read_pos_var_ref[row] = NA_REAL; |
226 | 233 |
tally->mdfne_ref[row] = NA_REAL; |
... | ... |
@@ -275,13 +282,14 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
275 | 282 |
tally->count_plus, tally->count_minus, |
276 | 283 |
tally->count); |
277 | 284 |
for (int allele = 0; allele < n_alleles; allele++, row++) { |
285 |
+ tally->n_cycles[row] = 0; |
|
278 | 286 |
for (int b = 0; b < param.n_cycle_bins; b++) { |
279 | 287 |
tally->cycle_bins[b][row] = 0; |
280 | 288 |
} |
281 | 289 |
tally->high_quality[row] = 0; |
282 | 290 |
tally->mean_quality[row] = R_NaN; |
283 | 291 |
if (tally->count[row] > 0) { |
284 |
- read_cycle_counts(&bytes, row, param, |
|
292 |
+ read_cycle_counts(&bytes, row, param, tally->n_cycles, |
|
285 | 293 |
tally->read_pos_mean, tally->read_pos_var, |
286 | 294 |
tally->mdfne, tally->cycle_bins); |
287 | 295 |
read_quality_counts(&bytes, row, tally->high_quality, tally->mean_quality, |
... | ... |
@@ -297,6 +305,7 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
297 | 305 |
high_quality_total += tally->high_quality[r]; |
298 | 306 |
} |
299 | 307 |
for (int r = ref_row; r < row; r++) { |
308 |
+ tally->n_cycles_ref[r] = tally->n_cycles[ref_row]; |
|
300 | 309 |
tally->count_total[r] = tally->count_total[ref_row]; |
301 | 310 |
tally->mean_quality_ref[r] = tally->mean_quality[ref_row]; |
302 | 311 |
tally->high_quality_ref[r] = tally->high_quality[ref_row]; |
... | ... |
@@ -313,6 +322,7 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
313 | 322 |
if (have_ref_row) { |
314 | 323 |
/* clear the 'alt' columns for the 'ref' row with NAs */ |
315 | 324 |
SET_STRING_ELT(tally->read_R, ref_row, NA_STRING); |
325 |
+ tally->n_cycles[ref_row] = NA_INTEGER; |
|
316 | 326 |
tally->mean_quality[ref_row] = NA_REAL; |
317 | 327 |
tally->high_quality[ref_row] = NA_REAL; |
318 | 328 |
tally->count_plus[ref_row] = NA_INTEGER; |