git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@86546 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -10,7 +10,7 @@ Description: GSNAP and GMAP are a pair of tools to align short-read |
10 | 10 |
to work with GMAP and GSNAP from within R. In addition, it provides |
11 | 11 |
methods to tally alignment results on a per-nucleotide basis using |
12 | 12 |
the bam_tally tool. |
13 |
-Version: 1.5.7 |
|
13 |
+Version: 1.5.8 |
|
14 | 14 |
Depends: R (>= 2.15.0), methods, GenomicRanges |
15 | 15 |
Imports: IRanges, Rsamtools (>= 1.7.4), rtracklayer (>= 1.17.15), |
16 | 16 |
GenomicFeatures, Biostrings, VariantAnnotation (>= 1.9.4), tools, |
... | ... |
@@ -76,16 +76,14 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
76 | 76 |
normArgSingleInteger(high_base_quality), |
77 | 77 |
NULL, read_length) |
78 | 78 |
|
79 |
- tally_names <- c("seqnames", "pos", "ref", "alt", "n.read.pos", |
|
80 |
- "n.read.pos.ref", "raw.count", "raw.count.ref", |
|
79 |
+ tally_names <- c("seqnames", "pos", "ref", "alt", |
|
80 |
+ "raw.count", "raw.count.ref", |
|
81 | 81 |
"raw.count.total", |
82 | 82 |
"high.quality", "high.quality.ref", |
83 | 83 |
"high.quality.total", "mean.quality", |
84 | 84 |
"mean.quality.ref", |
85 | 85 |
"count.pos", "count.pos.ref", |
86 | 86 |
"count.neg", "count.neg.ref", |
87 |
- "read.pos.mean", "read.pos.mean.ref", |
|
88 |
- "read.pos.var", "read.pos.var.ref", |
|
89 | 87 |
"mdfne", "mdfne.ref") |
90 | 88 |
break_names <- character() |
91 | 89 |
if (length(read_pos_breaks) > 0L) { |
... | ... |
@@ -127,8 +125,6 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
127 | 125 |
|
128 | 126 |
checkTallyConsistency <- function(x) { |
129 | 127 |
with(mcols(x), { |
130 |
- stopifnot(all(n.read.pos.ref <= raw.count.ref, na.rm=TRUE)) |
|
131 |
- stopifnot(all(n.read.pos <= raw.count, na.rm=TRUE)) |
|
132 | 128 |
stopifnot(all(raw.count + raw.count.ref <= raw.count.total)) |
133 | 129 |
stopifnot(all(altDepth(x) <= raw.count, na.rm=TRUE)) |
134 | 130 |
stopifnot(all(refDepth(x) <= raw.count.ref, na.rm=TRUE)) |
... | ... |
@@ -225,8 +221,6 @@ normArgTRUEorFALSE <- function(x) { |
225 | 221 |
|
226 | 222 |
variantSummaryColumnDescriptions <- function(read_pos_breaks) { |
227 | 223 |
desc <- c( |
228 |
- n.read.pos = "Number of unique read positions for the ALT", |
|
229 |
- n.read.pos.ref = "Number of unique read positions for the REF", |
|
230 | 224 |
raw.count = "Raw ALT count", |
231 | 225 |
raw.count.ref = "Raw REF count", |
232 | 226 |
raw.count.total = "Raw total count", |
... | ... |
@@ -236,10 +230,6 @@ variantSummaryColumnDescriptions <- function(read_pos_breaks) { |
236 | 230 |
count.pos.ref = "Raw positive strand REF count", |
237 | 231 |
count.neg = "Raw negative strand ALT count", |
238 | 232 |
count.neg.ref = "Raw negative strand REF count", |
239 |
- read.pos.mean = "Average read position for the ALT", |
|
240 |
- read.pos.mean.ref = "Average read position for the ALT", |
|
241 |
- read.pos.var = "Variance in read position for the ALT", |
|
242 |
- read.pos.var.ref = "Variance in read position for the REF", |
|
243 | 233 |
mdfne = "Median distance from nearest end of read for the ALT", |
244 | 234 |
mdfne.ref = "Median distance from nearest end of read for the REF") |
245 | 235 |
if (length(read_pos_breaks) > 0L) { |
... | ... |
@@ -4,19 +4,16 @@ |
4 | 4 |
#include "iit.h" |
5 | 5 |
#include "bytestream.h" |
6 | 6 |
|
7 |
-enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF, |
|
7 |
+enum { SEQNAMES, POS, REF, READ, COUNT, COUNT_REF, |
|
8 | 8 |
COUNT_TOTAL, HIGH_QUALITY, HIGH_QUALITY_REF, HIGH_QUALITY_TOTAL, |
9 | 9 |
MEAN_QUALITY, MEAN_QUALITY_REF, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS, |
10 |
- COUNT_MINUS_REF, READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR, |
|
11 |
- READ_POS_VAR_REF, MDFNE, MDFNE_REF, N_BASE_COLS }; |
|
10 |
+ COUNT_MINUS_REF, MDFNE, MDFNE_REF, N_BASE_COLS }; |
|
12 | 11 |
|
13 | 12 |
typedef struct TallyTable { |
14 | 13 |
SEXP seqnames_R; |
15 | 14 |
int *pos; |
16 | 15 |
SEXP ref_R; |
17 | 16 |
SEXP read_R; |
18 |
- int *n_cycles; |
|
19 |
- int *n_cycles_ref; |
|
20 | 17 |
int *count; |
21 | 18 |
int *count_ref; |
22 | 19 |
int *count_total; |
... | ... |
@@ -29,10 +26,6 @@ typedef struct TallyTable { |
29 | 26 |
int *count_plus_ref; |
30 | 27 |
int *count_minus; |
31 | 28 |
int *count_minus_ref; |
32 |
- double *read_pos_mean; |
|
33 |
- double *read_pos_mean_ref; |
|
34 |
- double *read_pos_var; |
|
35 |
- double *read_pos_var_ref; |
|
36 | 29 |
double *mdfne; |
37 | 30 |
double *mdfne_ref; |
38 | 31 |
int **cycle_bins; |
... | ... |
@@ -54,8 +47,6 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins) { |
54 | 47 |
SET_VECTOR_ELT(tally_R, POS, allocVector(INTSXP, n_rows)); |
55 | 48 |
SET_VECTOR_ELT(tally_R, REF, allocVector(STRSXP, n_rows)); |
56 | 49 |
SET_VECTOR_ELT(tally_R, READ, allocVector(STRSXP, n_rows)); |
57 |
- SET_VECTOR_ELT(tally_R, N_CYCLES, allocVector(INTSXP, n_rows)); |
|
58 |
- SET_VECTOR_ELT(tally_R, N_CYCLES_REF, allocVector(INTSXP, n_rows)); |
|
59 | 50 |
SET_VECTOR_ELT(tally_R, COUNT, allocVector(INTSXP, n_rows)); |
60 | 51 |
SET_VECTOR_ELT(tally_R, COUNT_REF, allocVector(INTSXP, n_rows)); |
61 | 52 |
SET_VECTOR_ELT(tally_R, COUNT_TOTAL, allocVector(INTSXP, n_rows)); |
... | ... |
@@ -68,10 +59,6 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins) { |
68 | 59 |
SET_VECTOR_ELT(tally_R, COUNT_PLUS_REF, allocVector(INTSXP, n_rows)); |
69 | 60 |
SET_VECTOR_ELT(tally_R, COUNT_MINUS, allocVector(INTSXP, n_rows)); |
70 | 61 |
SET_VECTOR_ELT(tally_R, COUNT_MINUS_REF, allocVector(INTSXP, n_rows)); |
71 |
- SET_VECTOR_ELT(tally_R, READ_POS_MEAN, allocVector(REALSXP, n_rows)); |
|
72 |
- SET_VECTOR_ELT(tally_R, READ_POS_MEAN_REF, allocVector(REALSXP, n_rows)); |
|
73 |
- SET_VECTOR_ELT(tally_R, READ_POS_VAR, allocVector(REALSXP, n_rows)); |
|
74 |
- SET_VECTOR_ELT(tally_R, READ_POS_VAR_REF, allocVector(REALSXP, n_rows)); |
|
75 | 62 |
SET_VECTOR_ELT(tally_R, MDFNE, allocVector(REALSXP, n_rows)); |
76 | 63 |
SET_VECTOR_ELT(tally_R, MDFNE_REF, allocVector(REALSXP, n_rows)); |
77 | 64 |
|
... | ... |
@@ -92,8 +79,6 @@ static TallyTable *TallyTable_new(SEXP tally_R) { |
92 | 79 |
tally->pos = INTEGER(VECTOR_ELT(tally_R, POS)); |
93 | 80 |
tally->ref_R = VECTOR_ELT(tally_R, REF); |
94 | 81 |
tally->read_R = VECTOR_ELT(tally_R, READ); |
95 |
- tally->n_cycles = INTEGER(VECTOR_ELT(tally_R, N_CYCLES)); |
|
96 |
- tally->n_cycles_ref = INTEGER(VECTOR_ELT(tally_R, N_CYCLES_REF)); |
|
97 | 82 |
tally->count = INTEGER(VECTOR_ELT(tally_R, COUNT)); |
98 | 83 |
tally->count_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_REF)); |
99 | 84 |
tally->count_total = INTEGER(VECTOR_ELT(tally_R, COUNT_TOTAL)); |
... | ... |
@@ -106,10 +91,6 @@ static TallyTable *TallyTable_new(SEXP tally_R) { |
106 | 91 |
tally->count_plus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS_REF)); |
107 | 92 |
tally->count_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_MINUS)); |
108 | 93 |
tally->count_minus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_MINUS_REF)); |
109 |
- tally->read_pos_mean = REAL(VECTOR_ELT(tally_R, READ_POS_MEAN)); |
|
110 |
- tally->read_pos_mean_ref = REAL(VECTOR_ELT(tally_R, READ_POS_MEAN_REF)); |
|
111 |
- tally->read_pos_var = REAL(VECTOR_ELT(tally_R, READ_POS_VAR)); |
|
112 |
- tally->read_pos_var_ref = REAL(VECTOR_ELT(tally_R, READ_POS_VAR_REF)); |
|
113 | 94 |
tally->mdfne = REAL(VECTOR_ELT(tally_R, MDFNE)); |
114 | 95 |
tally->mdfne_ref = REAL(VECTOR_ELT(tally_R, MDFNE_REF)); |
115 | 96 |
tally->cycle_bins = (int **) R_alloc(sizeof(int*), n_cycle_bins); |
... | ... |
@@ -129,7 +110,6 @@ read_total_counts(unsigned char **bytes, int row, int *count_total) { |
129 | 110 |
|
130 | 111 |
static void |
131 | 112 |
read_cycle_counts(unsigned char **bytes, int row, TallyParam param, |
132 |
- int *n_cycles, double *read_pos_mean, double *read_pos_var, |
|
133 | 113 |
double *mdfne, int **cycle_bins) |
134 | 114 |
{ |
135 | 115 |
int n_cycle_breaks = param.n_cycle_bins + 1; |
... | ... |
@@ -139,9 +119,9 @@ read_cycle_counts(unsigned char **bytes, int row, TallyParam param, |
139 | 119 |
if (param.mdfne_buf != NULL) { |
140 | 120 |
memset(param.mdfne_buf, 0, sizeof(int) * midpoint); |
141 | 121 |
} |
142 |
- n_cycles[row] = read_int(bytes); |
|
122 |
+ int n_cycles = read_int(bytes); |
|
143 | 123 |
|
144 |
- for (int index = 0; index < n_cycles[row]; index++) { |
|
124 |
+ for (int index = 0; index < n_cycles; index++) { |
|
145 | 125 |
int cycle = abs(read_int(bytes)); |
146 | 126 |
int count = read_int(bytes); |
147 | 127 |
int bin = 0; |
... | ... |
@@ -160,14 +140,6 @@ read_cycle_counts(unsigned char **bytes, int row, TallyParam param, |
160 | 140 |
} |
161 | 141 |
} |
162 | 142 |
} |
163 |
- |
|
164 |
- read_pos_mean[row] = ((double)weighted_sum) / count_sum; |
|
165 |
- if (count_sum > 1) { |
|
166 |
- read_pos_var[row] = ((double)weighted_sum_sq) / (count_sum - 1) - |
|
167 |
- (count_sum / (count_sum - 1)) * read_pos_mean[row] * read_pos_mean[row]; |
|
168 |
- } else { |
|
169 |
- read_pos_var[row] = NA_REAL; |
|
170 |
- } |
|
171 | 143 |
|
172 | 144 |
mdfne[row] = NA_REAL; |
173 | 145 |
if (param.mdfne_buf != NULL) { |
... | ... |
@@ -217,8 +189,7 @@ parse_indels(unsigned char *bytes, int row, |
217 | 189 |
SET_STRING_ELT(tally->read_R, row, R_BlankString); |
218 | 190 |
SET_STRING_ELT(tally->ref_R, row, seq_R); |
219 | 191 |
} |
220 |
- read_cycle_counts(&bytes, row, param, tally->n_cycles, |
|
221 |
- tally->read_pos_mean, tally->read_pos_var, |
|
192 |
+ read_cycle_counts(&bytes, row, param, |
|
222 | 193 |
tally->mdfne, tally->cycle_bins); |
223 | 194 |
/* quality is per-base and so not relevant for indels */ |
224 | 195 |
tally->mean_quality[row] = NA_REAL; |
... | ... |
@@ -227,9 +198,6 @@ parse_indels(unsigned char *bytes, int row, |
227 | 198 |
tally->high_quality_ref[row] = NA_INTEGER; |
228 | 199 |
tally->high_quality_total[row] = NA_INTEGER; |
229 | 200 |
/* no position from which to tabulate the cycles */ |
230 |
- tally->n_cycles_ref[row] = NA_INTEGER; |
|
231 |
- tally->read_pos_mean_ref[row] = NA_REAL; |
|
232 |
- tally->read_pos_var_ref[row] = NA_REAL; |
|
233 | 201 |
tally->mdfne_ref[row] = NA_REAL; |
234 | 202 |
} |
235 | 203 |
return indel_count; |
... | ... |
@@ -282,21 +250,17 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
282 | 250 |
tally->count_plus, tally->count_minus, |
283 | 251 |
tally->count); |
284 | 252 |
for (int allele = 0; allele < n_alleles; allele++, row++) { |
285 |
- tally->n_cycles[row] = 0; |
|
286 | 253 |
for (int b = 0; b < param.n_cycle_bins; b++) { |
287 | 254 |
tally->cycle_bins[b][row] = 0; |
288 | 255 |
} |
289 | 256 |
tally->high_quality[row] = 0; |
290 | 257 |
tally->mean_quality[row] = R_NaN; |
291 | 258 |
if (tally->count[row] > 0) { |
292 |
- read_cycle_counts(&bytes, row, param, tally->n_cycles, |
|
293 |
- tally->read_pos_mean, tally->read_pos_var, |
|
259 |
+ read_cycle_counts(&bytes, row, param, |
|
294 | 260 |
tally->mdfne, tally->cycle_bins); |
295 | 261 |
read_quality_counts(&bytes, row, tally->high_quality, tally->mean_quality, |
296 | 262 |
param.high_base_quality); |
297 | 263 |
} else { |
298 |
- tally->read_pos_mean[row] = R_NaN; |
|
299 |
- tally->read_pos_var[row] = NA_REAL; |
|
300 | 264 |
tally->mdfne[row] = NA_REAL; |
301 | 265 |
} |
302 | 266 |
} |
... | ... |
@@ -305,7 +269,6 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
305 | 269 |
high_quality_total += tally->high_quality[r]; |
306 | 270 |
} |
307 | 271 |
for (int r = ref_row; r < row; r++) { |
308 |
- tally->n_cycles_ref[r] = tally->n_cycles[ref_row]; |
|
309 | 272 |
tally->count_total[r] = tally->count_total[ref_row]; |
310 | 273 |
tally->mean_quality_ref[r] = tally->mean_quality[ref_row]; |
311 | 274 |
tally->high_quality_ref[r] = tally->high_quality[ref_row]; |
... | ... |
@@ -313,8 +276,6 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
313 | 276 |
tally->count_plus_ref[r] = tally->count_plus[ref_row]; |
314 | 277 |
tally->count_minus_ref[r] = tally->count_minus[ref_row]; |
315 | 278 |
tally->count_ref[r] = tally->count_plus_ref[r] + tally->count_minus_ref[r]; |
316 |
- tally->read_pos_mean_ref[r] = tally->read_pos_mean[ref_row]; |
|
317 |
- tally->read_pos_var_ref[r] = tally->read_pos_var[ref_row]; |
|
318 | 279 |
tally->mdfne_ref[r] = tally->mdfne[ref_row]; |
319 | 280 |
SET_STRING_ELT(tally->ref_R, r, STRING_ELT(tally->read_R, ref_row)); |
320 | 281 |
} |
... | ... |
@@ -322,14 +283,11 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
322 | 283 |
if (have_ref_row) { |
323 | 284 |
/* clear the 'alt' columns for the 'ref' row with NAs */ |
324 | 285 |
SET_STRING_ELT(tally->read_R, ref_row, NA_STRING); |
325 |
- tally->n_cycles[ref_row] = NA_INTEGER; |
|
326 | 286 |
tally->mean_quality[ref_row] = NA_REAL; |
327 | 287 |
tally->high_quality[ref_row] = NA_REAL; |
328 | 288 |
tally->count_plus[ref_row] = NA_INTEGER; |
329 | 289 |
tally->count_minus[ref_row] = NA_INTEGER; |
330 | 290 |
tally->count[ref_row] = NA_INTEGER; |
331 |
- tally->read_pos_mean[ref_row] = NA_REAL; |
|
332 |
- tally->read_pos_var[ref_row] = NA_REAL; |
|
333 | 291 |
tally->mdfne[ref_row] = NA_REAL; |
334 | 292 |
} |
335 | 293 |
return n_alleles; |