git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@113152 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -195,19 +195,20 @@ read_xs_counts(unsigned char **bytes, int row, int *count_xs_plus, |
195 | 195 |
int *count_xs_minus) |
196 | 196 |
{ |
197 | 197 |
int n_xs = read_int(bytes); |
198 |
- if (count_xs_plus == NULL) { |
|
199 |
- return; |
|
198 |
+ if (count_xs_plus != NULL) { |
|
199 |
+ count_xs_plus[row] = 0; |
|
200 |
+ count_xs_minus[row] = 0; |
|
200 | 201 |
} |
201 |
- count_xs_plus[row] = 0; |
|
202 |
- count_xs_minus[row] = 0; |
|
203 | 202 |
for (int index = 0; index < n_xs; index++) { |
204 | 203 |
int xs = read_int(bytes); |
205 | 204 |
int count = read_int(bytes); |
206 |
- if (xs == 1) { |
|
207 |
- count_xs_plus[row] = count; |
|
208 |
- } else if (xs == 2) { |
|
209 |
- count_xs_minus[row] = count; |
|
210 |
- } |
|
205 |
+ if (count_xs_plus != NULL) { |
|
206 |
+ if (xs == 1) { |
|
207 |
+ count_xs_plus[row] = count; |
|
208 |
+ } else if (xs == 2) { |
|
209 |
+ count_xs_minus[row] = count; |
|
210 |
+ } |
|
211 |
+ } |
|
211 | 212 |
} |
212 | 213 |
} |
213 | 214 |
|
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110891 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -6,9 +6,8 @@ |
6 | 6 |
#include "iit.h" |
7 | 7 |
#include "bytestream.h" |
8 | 8 |
|
9 |
-/*I changed the order of these, because the XS spots are optional...*/ |
|
10 | 9 |
enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF, |
11 |
- COUNT_TOTAL, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS, |
|
10 |
+ RAW_COUNT_TOTAL, COUNT_TOTAL, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS, |
|
12 | 11 |
COUNT_MINUS_REF, DELCOUNT_PLUS, DELCOUNT_MINUS, |
13 | 12 |
READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR, |
14 | 13 |
READ_POS_VAR_REF, MDFNE, MDFNE_REF, CODON_STRAND, |
... | ... |
@@ -41,6 +40,7 @@ typedef struct TallyTable { |
41 | 40 |
int *n_cycles_ref; |
42 | 41 |
int *count; |
43 | 42 |
int *count_ref; |
43 |
+ int *raw_count_total; |
|
44 | 44 |
int *count_total; |
45 | 45 |
int *count_plus; |
46 | 46 |
int *count_plus_ref; |
... | ... |
@@ -84,6 +84,7 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) { |
84 | 84 |
SET_VECTOR_ELT(tally_R, N_CYCLES_REF, allocVector(INTSXP, n_rows)); |
85 | 85 |
SET_VECTOR_ELT(tally_R, COUNT, allocVector(INTSXP, n_rows)); |
86 | 86 |
SET_VECTOR_ELT(tally_R, COUNT_REF, allocVector(INTSXP, n_rows)); |
87 |
+ SET_VECTOR_ELT(tally_R, RAW_COUNT_TOTAL, allocVector(INTSXP, n_rows)); |
|
87 | 88 |
SET_VECTOR_ELT(tally_R, COUNT_TOTAL, allocVector(INTSXP, n_rows)); |
88 | 89 |
SET_VECTOR_ELT(tally_R, COUNT_PLUS, allocVector(INTSXP, n_rows)); |
89 | 90 |
SET_VECTOR_ELT(tally_R, COUNT_PLUS_REF, allocVector(INTSXP, n_rows)); |
... | ... |
@@ -128,6 +129,7 @@ static TallyTable *TallyTable_new(SEXP tally_R, bool xs) { |
128 | 129 |
tally->n_cycles_ref = INTEGER(VECTOR_ELT(tally_R, N_CYCLES_REF)); |
129 | 130 |
tally->count = INTEGER(VECTOR_ELT(tally_R, COUNT)); |
130 | 131 |
tally->count_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_REF)); |
132 |
+ tally->raw_count_total = INTEGER(VECTOR_ELT(tally_R, RAW_COUNT_TOTAL)); |
|
131 | 133 |
tally->count_total = INTEGER(VECTOR_ELT(tally_R, COUNT_TOTAL)); |
132 | 134 |
tally->count_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS)); |
133 | 135 |
tally->count_plus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS_REF)); |
... | ... |
@@ -163,7 +165,10 @@ static TallyTable *TallyTable_new(SEXP tally_R, bool xs) { |
163 | 165 |
} |
164 | 166 |
|
165 | 167 |
static void |
166 |
-read_total_counts(unsigned char **bytes, int row, int *count_total) { |
|
168 |
+read_total_counts(unsigned char **bytes, int row, int *raw_count_total, |
|
169 |
+ int *count_total) |
|
170 |
+{ |
|
171 |
+ raw_count_total[row] = read_int(bytes); |
|
167 | 172 |
int count_total_plus = read_int(bytes); |
168 | 173 |
int count_total_minus = read_int(bytes); |
169 | 174 |
count_total[row] = count_total_plus + count_total_minus; |
... | ... |
@@ -286,7 +291,8 @@ parse_indels(unsigned char *bytes, int row, |
286 | 291 |
tally->count_minus_ref[row] = read_int(&bytes); |
287 | 292 |
tally->count_ref[row] = tally->count_plus_ref[row] + |
288 | 293 |
tally->count_minus_ref[row]; |
289 |
- tally->count_total[row] = tally->count_ref[row] + tally->count[row]; |
|
294 |
+ tally->raw_count_total[row] = tally->count_ref[row] + tally->count[row]; |
|
295 |
+ tally->count_total[row] = tally->raw_count_total[row]; |
|
290 | 296 |
tally->strand[row] = STRAND_NONE; |
291 | 297 |
SEXP seq_R = mkChar(read_string(&bytes)); |
292 | 298 |
if (insertion) { |
... | ... |
@@ -354,8 +360,7 @@ static int |
354 | 360 |
parse_alleles(unsigned char *bytes, int row, int ref_row, |
355 | 361 |
TallyParam param, TallyTable *tally, int strand) |
356 | 362 |
{ |
357 |
- |
|
358 |
- read_total_counts(&bytes, row, tally->count_total); |
|
363 |
+ read_total_counts(&bytes, row, tally->raw_count_total, tally->count_total); |
|
359 | 364 |
int delcount_plus = 0, delcount_minus = 0; |
360 | 365 |
int n_alleles = read_allele_counts(&bytes, row, tally->read_R, |
361 | 366 |
tally->count_plus, tally->count_minus, |
... | ... |
@@ -383,6 +388,7 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
383 | 388 |
tally->read_pos_mean[row] = R_NaN; |
384 | 389 |
tally->read_pos_var[row] = NA_REAL; |
385 | 390 |
tally->mdfne[row] = NA_REAL; |
391 |
+ tally->count_high_nm[row] = 0; |
|
386 | 392 |
if (param.xs) { |
387 | 393 |
tally->count_xs_plus[row] = 0; |
388 | 394 |
tally->count_xs_minus[row] = 0; |
... | ... |
@@ -391,6 +397,7 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
391 | 397 |
} |
392 | 398 |
for (int r = ref_row; r < row; r++) { |
393 | 399 |
tally->n_cycles_ref[r] = tally->n_cycles[ref_row]; |
400 |
+ tally->raw_count_total[r] = tally->raw_count_total[ref_row]; |
|
394 | 401 |
tally->count_total[r] = tally->count_total[ref_row]; |
395 | 402 |
tally->count_plus_ref[r] = tally->count_plus[ref_row]; |
396 | 403 |
tally->count_minus_ref[r] = tally->count_minus[ref_row]; |
... | ... |
@@ -430,7 +437,7 @@ static int parse_indel_count(unsigned char *bytes) { |
430 | 437 |
|
431 | 438 |
static int parse_allele_count(unsigned char *bytes) { |
432 | 439 |
int n_alleles = 1; /* always have a reference */ |
433 |
- bytes += sizeof(int) * 4 + 1; /* skip total and reference */ |
|
440 |
+ bytes += sizeof(int) * 5 + 1; /* skip total and reference */ |
|
434 | 441 |
while(bytes[0] != '\0') { |
435 | 442 |
if (bytes[0] != '_') |
436 | 443 |
n_alleles++; |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110596 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -16,9 +16,9 @@ enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF, |
16 | 16 |
COUNT_XS_MINUS, COUNT_XS_MINUS_REF, |
17 | 17 |
COUNT_HIGH_NM, COUNT_HIGH_NM_REF, N_BASE_COLS }; |
18 | 18 |
|
19 |
-enum { CODON_PLUS = 1, |
|
20 |
- CODON_MINUS, |
|
21 |
- NON_CODON }; |
|
19 |
+enum { STRAND_PLUS = 1, |
|
20 |
+ STRAND_MINUS, |
|
21 |
+ STRAND_NONE }; |
|
22 | 22 |
|
23 | 23 |
static char *codon_table[64] = |
24 | 24 |
{"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", |
... | ... |
@@ -287,7 +287,7 @@ parse_indels(unsigned char *bytes, int row, |
287 | 287 |
tally->count_ref[row] = tally->count_plus_ref[row] + |
288 | 288 |
tally->count_minus_ref[row]; |
289 | 289 |
tally->count_total[row] = tally->count_ref[row] + tally->count[row]; |
290 |
- tally->strand[row] = NON_CODON; |
|
290 |
+ tally->strand[row] = STRAND_NONE; |
|
291 | 291 |
SEXP seq_R = mkChar(read_string(&bytes)); |
292 | 292 |
if (insertion) { |
293 | 293 |
SET_STRING_ELT(tally->read_R, row, seq_R); |
... | ... |
@@ -328,7 +328,7 @@ read_allele_counts(unsigned char **bytes, int row, SEXP read_R, |
328 | 328 |
{ |
329 | 329 |
int n_alleles = 0; |
330 | 330 |
char allele; |
331 |
- char stop = strand == 0 ? '\0' : (char) 255; |
|
331 |
+ char stop = strand == STRAND_NONE ? '\0' : (char) 255; |
|
332 | 332 |
|
333 | 333 |
while((allele = (char)read_char(bytes)) != stop) { |
334 | 334 |
if (allele == '_') { |
... | ... |
@@ -336,7 +336,7 @@ read_allele_counts(unsigned char **bytes, int row, SEXP read_R, |
336 | 336 |
*delcount_minus = read_int(bytes); |
337 | 337 |
continue; |
338 | 338 |
} |
339 |
- if(strand == 0) |
|
339 |
+ if(strand == STRAND_NONE) |
|
340 | 340 |
SET_STRING_ELT(read_R, row, mkCharLen(&allele, 1)); |
341 | 341 |
else |
342 | 342 |
SET_STRING_ELT(read_R, row, mkCharLen(codon_table[(int)allele], 3)); |
... | ... |
@@ -504,7 +504,7 @@ static int parse_interval(IIT_T tally_iit, int index, |
504 | 504 |
bytes -= 4; /* rewind from read-ahead */ |
505 | 505 |
if (codon_plus_offset - allele_offset > 0) |
506 | 506 |
row += parse_alleles(base + allele_offset, row, ref_row, |
507 |
- param, tally, NON_CODON); |
|
507 |
+ param, tally, STRAND_NONE); |
|
508 | 508 |
if (deletion_offset - insertion_offset > 0) |
509 | 509 |
row += parse_indels(base + insertion_offset, row, |
510 | 510 |
param, tally, true); |
... | ... |
@@ -514,11 +514,11 @@ static int parse_interval(IIT_T tally_iit, int index, |
514 | 514 |
|
515 | 515 |
if (codon_minus_offset - codon_plus_offset > 0) { |
516 | 516 |
row += parse_alleles(base + codon_plus_offset, row, row, |
517 |
- param, tally, CODON_PLUS); |
|
517 |
+ param, tally, STRAND_PLUS); |
|
518 | 518 |
} |
519 | 519 |
if (next_offset - codon_minus_offset > 0) { |
520 | 520 |
row += parse_alleles(base + codon_minus_offset, row, row, |
521 |
- param, tally, CODON_MINUS); |
|
521 |
+ param, tally, STRAND_MINUS); |
|
522 | 522 |
} |
523 | 523 |
/* fill in position information */ |
524 | 524 |
for (int r = ref_row; r < row; r++) { |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110593 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-#define DEBUG2 1 |
|
1 |
+#define DEBUG2 0 |
|
2 | 2 |
|
3 | 3 |
#include <stdlib.h> /* for abs */ |
4 | 4 |
#include <string.h> /* for strlen */ |
... | ... |
@@ -7,21 +7,18 @@ |
7 | 7 |
#include "bytestream.h" |
8 | 8 |
|
9 | 9 |
/*I changed the order of these, because the XS spots are optional...*/ |
10 |
-enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF, //8 |
|
11 |
- COUNT_TOTAL, HIGH_QUALITY, HIGH_QUALITY_REF, HIGH_QUALITY_TOTAL, //12 |
|
12 |
- MEAN_QUALITY, MEAN_QUALITY_REF, //14 |
|
13 |
- COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS, //17 |
|
14 |
- COUNT_MINUS_REF, DELCOUNT_PLUS, DELCOUNT_MINUS, //21 |
|
15 |
- READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR, //24 |
|
16 |
- READ_POS_VAR_REF, MDFNE, MDFNE_REF, CODON_STRAND, //28 |
|
17 |
- COUNT_XS_PLUS, COUNT_XS_PLUS_REF, //30 |
|
18 |
- COUNT_XS_MINUS, COUNT_XS_MINUS_REF, N_BASE_COLS }; //33 |
|
19 |
- |
|
20 |
- |
|
21 |
-enum{ CODON_MINUS = -1, |
|
22 |
- NON_CODON, |
|
23 |
- CODON_PLUS}; |
|
24 |
- |
|
10 |
+enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF, |
|
11 |
+ COUNT_TOTAL, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS, |
|
12 |
+ COUNT_MINUS_REF, DELCOUNT_PLUS, DELCOUNT_MINUS, |
|
13 |
+ READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR, |
|
14 |
+ READ_POS_VAR_REF, MDFNE, MDFNE_REF, CODON_STRAND, |
|
15 |
+ COUNT_XS_PLUS, COUNT_XS_PLUS_REF, |
|
16 |
+ COUNT_XS_MINUS, COUNT_XS_MINUS_REF, |
|
17 |
+ COUNT_HIGH_NM, COUNT_HIGH_NM_REF, N_BASE_COLS }; |
|
18 |
+ |
|
19 |
+enum { CODON_PLUS = 1, |
|
20 |
+ CODON_MINUS, |
|
21 |
+ NON_CODON }; |
|
25 | 22 |
|
26 | 23 |
static char *codon_table[64] = |
27 | 24 |
{"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", |
... | ... |
@@ -33,10 +30,6 @@ static char *codon_table[64] = |
33 | 30 |
"TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", |
34 | 31 |
"TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"}; |
35 | 32 |
|
36 |
- |
|
37 |
- |
|
38 |
- |
|
39 |
- |
|
40 | 33 |
static int NUM_PTRS = 5; |
41 | 34 |
|
42 | 35 |
typedef struct TallyTable { |
... | ... |
@@ -49,11 +42,6 @@ typedef struct TallyTable { |
49 | 42 |
int *count; |
50 | 43 |
int *count_ref; |
51 | 44 |
int *count_total; |
52 |
- int *high_quality; |
|
53 |
- int *high_quality_ref; |
|
54 |
- int *high_quality_total; |
|
55 |
- double *mean_quality; |
|
56 |
- double *mean_quality_ref; |
|
57 | 45 |
int *count_plus; |
58 | 46 |
int *count_plus_ref; |
59 | 47 |
int *count_minus; |
... | ... |
@@ -70,6 +58,8 @@ typedef struct TallyTable { |
70 | 58 |
int *count_xs_plus_ref; |
71 | 59 |
int *count_xs_minus; |
72 | 60 |
int *count_xs_minus_ref; |
61 |
+ int *count_high_nm; |
|
62 |
+ int *count_high_nm_ref; |
|
73 | 63 |
int *strand; |
74 | 64 |
int **cycle_bins; |
75 | 65 |
} TallyTable; |
... | ... |
@@ -77,16 +67,15 @@ typedef struct TallyTable { |
77 | 67 |
typedef struct TallyParam { |
78 | 68 |
int *cycle_breaks; |
79 | 69 |
int n_cycle_bins; |
80 |
- int high_base_quality; |
|
81 | 70 |
int read_length; |
82 | 71 |
double *mdfne_buf; |
83 | 72 |
bool xs; |
73 |
+ int high_nm_score; |
|
84 | 74 |
} TallyParam; |
85 | 75 |
|
86 | 76 |
static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) { |
87 | 77 |
SEXP tally_R; /* the result list */ |
88 | 78 |
PROTECT(tally_R = allocVector(VECSXP, N_BASE_COLS + n_cycle_bins)); |
89 |
- |
|
90 | 79 |
SET_VECTOR_ELT(tally_R, SEQNAMES, allocVector(STRSXP, n_rows)); |
91 | 80 |
SET_VECTOR_ELT(tally_R, POS, allocVector(INTSXP, n_rows)); |
92 | 81 |
SET_VECTOR_ELT(tally_R, REF, allocVector(STRSXP, n_rows)); |
... | ... |
@@ -96,11 +85,6 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) { |
96 | 85 |
SET_VECTOR_ELT(tally_R, COUNT, allocVector(INTSXP, n_rows)); |
97 | 86 |
SET_VECTOR_ELT(tally_R, COUNT_REF, allocVector(INTSXP, n_rows)); |
98 | 87 |
SET_VECTOR_ELT(tally_R, COUNT_TOTAL, allocVector(INTSXP, n_rows)); |
99 |
- SET_VECTOR_ELT(tally_R, HIGH_QUALITY, allocVector(INTSXP, n_rows)); |
|
100 |
- SET_VECTOR_ELT(tally_R, HIGH_QUALITY_REF, allocVector(INTSXP, n_rows)); |
|
101 |
- SET_VECTOR_ELT(tally_R, HIGH_QUALITY_TOTAL, allocVector(INTSXP, n_rows)); |
|
102 |
- SET_VECTOR_ELT(tally_R, MEAN_QUALITY, allocVector(REALSXP, n_rows)); |
|
103 |
- SET_VECTOR_ELT(tally_R, MEAN_QUALITY_REF, allocVector(REALSXP, n_rows)); |
|
104 | 88 |
SET_VECTOR_ELT(tally_R, COUNT_PLUS, allocVector(INTSXP, n_rows)); |
105 | 89 |
SET_VECTOR_ELT(tally_R, COUNT_PLUS_REF, allocVector(INTSXP, n_rows)); |
106 | 90 |
SET_VECTOR_ELT(tally_R, COUNT_MINUS, allocVector(INTSXP, n_rows)); |
... | ... |
@@ -114,12 +98,15 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) { |
114 | 98 |
SET_VECTOR_ELT(tally_R, CODON_STRAND, allocVector(INTSXP, n_rows)); |
115 | 99 |
SET_VECTOR_ELT(tally_R, DELCOUNT_PLUS, allocVector(INTSXP, n_rows)); |
116 | 100 |
SET_VECTOR_ELT(tally_R, DELCOUNT_MINUS, allocVector(INTSXP, n_rows)); |
117 |
- SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS, allocVector(INTSXP, n_rows)); |
|
118 |
- SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF, allocVector(INTSXP, n_rows)); |
|
119 |
- SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS, allocVector(INTSXP, n_rows)); |
|
120 |
- SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF, allocVector(INTSXP, n_rows)); |
|
121 |
- |
|
122 |
- |
|
101 |
+ if (xs) { |
|
102 |
+ SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS, allocVector(INTSXP, n_rows)); |
|
103 |
+ SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF, allocVector(INTSXP, n_rows)); |
|
104 |
+ SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS, allocVector(INTSXP, n_rows)); |
|
105 |
+ SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF, allocVector(INTSXP, n_rows)); |
|
106 |
+ } |
|
107 |
+ SET_VECTOR_ELT(tally_R, COUNT_HIGH_NM, allocVector(INTSXP, n_rows)); |
|
108 |
+ SET_VECTOR_ELT(tally_R, COUNT_HIGH_NM_REF, allocVector(INTSXP, n_rows)); |
|
109 |
+ |
|
123 | 110 |
for (int bin = 0; bin < n_cycle_bins; bin++) { |
124 | 111 |
SEXP cycle_bin_R = allocVector(INTSXP, n_rows); |
125 | 112 |
SET_VECTOR_ELT(tally_R, bin + N_BASE_COLS, cycle_bin_R); |
... | ... |
@@ -130,7 +117,7 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) { |
130 | 117 |
} |
131 | 118 |
|
132 | 119 |
static TallyTable *TallyTable_new(SEXP tally_R, bool xs) { |
133 |
- TallyTable *tally = (TallyTable *) R_alloc(sizeof(TallyTable), 1); |
|
120 |
+ TallyTable *tally = (TallyTable *) S_alloc(sizeof(TallyTable), 1); |
|
134 | 121 |
int n_cycle_bins = length(tally_R) - N_BASE_COLS; |
135 | 122 |
|
136 | 123 |
tally->seqnames_R = VECTOR_ELT(tally_R, SEQNAMES); |
... | ... |
@@ -142,11 +129,6 @@ static TallyTable *TallyTable_new(SEXP tally_R, bool xs) { |
142 | 129 |
tally->count = INTEGER(VECTOR_ELT(tally_R, COUNT)); |
143 | 130 |
tally->count_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_REF)); |
144 | 131 |
tally->count_total = INTEGER(VECTOR_ELT(tally_R, COUNT_TOTAL)); |
145 |
- tally->high_quality = INTEGER(VECTOR_ELT(tally_R, HIGH_QUALITY)); |
|
146 |
- tally->high_quality_ref = INTEGER(VECTOR_ELT(tally_R, HIGH_QUALITY_REF)); |
|
147 |
- tally->high_quality_total = INTEGER(VECTOR_ELT(tally_R, HIGH_QUALITY_TOTAL)); |
|
148 |
- tally->mean_quality = REAL(VECTOR_ELT(tally_R, MEAN_QUALITY)); |
|
149 |
- tally->mean_quality_ref = REAL(VECTOR_ELT(tally_R, MEAN_QUALITY_REF)); |
|
150 | 132 |
tally->count_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS)); |
151 | 133 |
tally->count_plus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS_REF)); |
152 | 134 |
tally->count_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_MINUS)); |
... | ... |
@@ -161,12 +143,18 @@ static TallyTable *TallyTable_new(SEXP tally_R, bool xs) { |
161 | 143 |
tally->delcount_plus = INTEGER(VECTOR_ELT(tally_R, DELCOUNT_PLUS)); |
162 | 144 |
tally->delcount_minus = INTEGER(VECTOR_ELT(tally_R, DELCOUNT_MINUS)); |
163 | 145 |
tally->cycle_bins = (int **) R_alloc(sizeof(int*), n_cycle_bins); |
164 |
- tally->count_xs_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS)); |
|
165 |
- tally->count_xs_plus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF)); |
|
166 |
- tally->count_xs_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS)); |
|
167 |
- tally->count_xs_minus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF)); |
|
146 |
+ if (xs) { |
|
147 |
+ tally->count_xs_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS)); |
|
148 |
+ tally->count_xs_plus_ref = INTEGER(VECTOR_ELT(tally_R, |
|
149 |
+ COUNT_XS_PLUS_REF)); |
|
150 |
+ tally->count_xs_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS)); |
|
151 |
+ tally->count_xs_minus_ref = INTEGER(VECTOR_ELT(tally_R, |
|
152 |
+ COUNT_XS_MINUS_REF)); |
|
153 |
+ } |
|
154 |
+ tally->count_high_nm = INTEGER(VECTOR_ELT(tally_R, COUNT_HIGH_NM)); |
|
155 |
+ tally->count_high_nm_ref = INTEGER(VECTOR_ELT(tally_R, |
|
156 |
+ COUNT_HIGH_NM_REF)); |
|
168 | 157 |
|
169 |
- |
|
170 | 158 |
for (int bin = 0; bin < n_cycle_bins; bin++) { |
171 | 159 |
tally->cycle_bins[bin] = INTEGER(VECTOR_ELT(tally_R, bin + N_BASE_COLS)); |
172 | 160 |
} |
... | ... |
@@ -181,11 +169,30 @@ read_total_counts(unsigned char **bytes, int row, int *count_total) { |
181 | 169 |
count_total[row] = count_total_plus + count_total_minus; |
182 | 170 |
} |
183 | 171 |
|
172 |
+static void |
|
173 |
+read_nm_counts(unsigned char **bytes, int row, int *count_high_nm, |
|
174 |
+ int high_nm_score) |
|
175 |
+{ |
|
176 |
+ int n_nm = read_int(bytes); |
|
177 |
+ count_high_nm[row] = 0; |
|
178 |
+ /* FIXME: currently bam_tally gives us counts whether we want them or not */ |
|
179 |
+ for (int index = 0; index < n_nm; index++) { |
|
180 |
+ int nm = read_int(bytes); |
|
181 |
+ int count = read_int(bytes); |
|
182 |
+ if (nm >= high_nm_score) { |
|
183 |
+ count_high_nm[row] += count; |
|
184 |
+ } |
|
185 |
+ } |
|
186 |
+} |
|
187 |
+ |
|
184 | 188 |
static void |
185 | 189 |
read_xs_counts(unsigned char **bytes, int row, int *count_xs_plus, |
186 | 190 |
int *count_xs_minus) |
187 | 191 |
{ |
188 | 192 |
int n_xs = read_int(bytes); |
193 |
+ if (count_xs_plus == NULL) { |
|
194 |
+ return; |
|
195 |
+ } |
|
189 | 196 |
count_xs_plus[row] = 0; |
190 | 197 |
count_xs_minus[row] = 0; |
191 | 198 |
for (int index = 0; index < n_xs; index++) { |
... | ... |
@@ -197,14 +204,8 @@ read_xs_counts(unsigned char **bytes, int row, int *count_xs_plus, |
197 | 204 |
count_xs_minus[row] = count; |
198 | 205 |
} |
199 | 206 |
} |
200 |
-#ifdef DEBUG2 |
|
201 |
- printf("row %d xs counts: %d plus %d minus", row, count_xs_plus[row], count_xs_minus[row]); |
|
202 |
-#endif |
|
203 | 207 |
} |
204 | 208 |
|
205 |
- |
|
206 |
- |
|
207 |
- |
|
208 | 209 |
static void |
209 | 210 |
read_cycle_counts(unsigned char **bytes, int row, TallyParam param, |
210 | 211 |
int *n_cycles, double *read_pos_mean, double *read_pos_var, |
... | ... |
@@ -274,9 +275,6 @@ parse_indels(unsigned char *bytes, int row, |
274 | 275 |
TallyParam param, TallyTable *tally, bool insertion) |
275 | 276 |
{ |
276 | 277 |
int indel_count = read_int(&bytes); |
277 |
-#ifdef DEBUG2 |
|
278 |
- printf("row %d indel count: %d", row, indel_count); |
|
279 |
-#endif |
|
280 | 278 |
for (int indel = 0; indel < indel_count; indel++, row++) { |
281 | 279 |
for (int b = 0; b < param.n_cycle_bins; b++) { |
282 | 280 |
tally->cycle_bins[b][row] = 0; |
... | ... |
@@ -302,72 +300,48 @@ parse_indels(unsigned char *bytes, int row, |
302 | 300 |
read_cycle_counts(&bytes, row, param, tally->n_cycles, |
303 | 301 |
tally->read_pos_mean, tally->read_pos_var, |
304 | 302 |
tally->mdfne, tally->cycle_bins); |
305 |
- /* quality is per-base and so not relevant for indels */ |
|
306 |
- tally->mean_quality[row] = NA_REAL; |
|
307 |
- tally->mean_quality_ref[row] = NA_REAL; |
|
308 |
- tally->high_quality[row] = NA_INTEGER; |
|
309 |
- tally->high_quality_ref[row] = NA_INTEGER; |
|
310 |
- tally->high_quality_total[row] = NA_INTEGER; |
|
303 |
+ read_nm_counts(&bytes, row, tally->count_high_nm, param.high_nm_score); |
|
304 |
+ read_xs_counts(&bytes, row, tally->count_xs_plus, tally->count_xs_minus); |
|
311 | 305 |
/* no position from which to tabulate the cycles */ |
312 | 306 |
tally->n_cycles_ref[row] = NA_INTEGER; |
313 | 307 |
tally->read_pos_mean_ref[row] = NA_REAL; |
314 | 308 |
tally->read_pos_var_ref[row] = NA_REAL; |
315 | 309 |
tally->mdfne_ref[row] = NA_REAL; |
316 |
- } |
|
317 |
- return indel_count; |
|
318 |
-} |
|
319 |
- |
|
320 |
-static void |
|
321 |
-read_quality_counts(unsigned char **bytes, int row, int *high_quality, |
|
322 |
- double *mean_quality, int high_base_quality) |
|
323 |
-{ |
|
324 |
- int n_qualities = read_int(bytes); |
|
325 |
- int total_quality = 0; |
|
326 |
- int total_quality_weight = 0; |
|
327 |
- high_quality[row] = 0; |
|
328 |
- for (int index = 0; index < n_qualities; index++) { |
|
329 |
- int quality = read_int(bytes); |
|
330 |
- int count = read_int(bytes); |
|
331 |
- if (quality >= high_base_quality) { |
|
332 |
- total_quality += quality * count; |
|
333 |
- total_quality_weight += count; |
|
334 |
- high_quality[row] += count; |
|
310 |
+ /* overlapping deletion count only relevant for SNVs */ |
|
311 |
+ tally->delcount_plus[row] = NA_INTEGER; |
|
312 |
+ tally->delcount_minus[row] = NA_INTEGER; |
|
313 |
+ /* information not available for "ref" allele */ |
|
314 |
+ tally->count_high_nm_ref[row] = NA_INTEGER; |
|
315 |
+ if (param.xs) { |
|
316 |
+ tally->count_xs_plus_ref[row] = NA_INTEGER; |
|
317 |
+ tally->count_xs_minus_ref[row] = NA_INTEGER; |
|
335 | 318 |
} |
336 | 319 |
} |
337 |
- mean_quality[row] = total_quality_weight > 0 ? |
|
338 |
- (double)total_quality / total_quality_weight : R_NaN; |
|
320 |
+ return indel_count; |
|
339 | 321 |
} |
340 | 322 |
|
341 | 323 |
static int |
342 | 324 |
read_allele_counts(unsigned char **bytes, int row, SEXP read_R, |
343 |
- int *count_plus, int *count_minus, int *count, int strand) |
|
325 |
+ int *count_plus, int *count_minus, int *count, |
|
326 |
+ int *delcount_plus, int *delcount_minus, |
|
327 |
+ int strand) |
|
344 | 328 |
{ |
345 | 329 |
int n_alleles = 0; |
346 | 330 |
char allele; |
347 | 331 |
char stop = strand == 0 ? '\0' : (char) 255; |
348 |
-#ifdef DEBUG2 |
|
349 |
- printf("Starting read_allele_counts (codon strand %d) at row %d. Total length of read_R is %d\n", strand, row, LENGTH(read_R)); |
|
350 |
-#endif |
|
332 |
+ |
|
351 | 333 |
while((allele = (char)read_char(bytes)) != stop) { |
352 |
-#ifdef DEBUG2 |
|
353 |
- printf("Parsing counts for allele: %d (%s) row %d \n", allele, &allele, row ); |
|
354 |
-#endif |
|
334 |
+ if (allele == '_') { |
|
335 |
+ *delcount_plus = read_int(bytes); |
|
336 |
+ *delcount_minus = read_int(bytes); |
|
337 |
+ continue; |
|
338 |
+ } |
|
355 | 339 |
if(strand == 0) |
356 | 340 |
SET_STRING_ELT(read_R, row, mkCharLen(&allele, 1)); |
357 | 341 |
else |
358 | 342 |
SET_STRING_ELT(read_R, row, mkCharLen(codon_table[(int)allele], 3)); |
359 |
-#ifdef DEBUG2 |
|
360 |
- printf("Reading allele plus count: "); |
|
361 |
-#endif |
|
362 | 343 |
count_plus[row] = read_int(bytes); |
363 |
-#ifdef DEBUG2 |
|
364 |
- printf("%d\n", count_plus[row]); |
|
365 |
- printf("Reading allele minus count: "); |
|
366 |
-#endif |
|
367 | 344 |
count_minus[row] = read_int(bytes); |
368 |
-#ifdef DEBUG2 |
|
369 |
- printf("%d\n", count_minus[row]); |
|
370 |
-#endif |
|
371 | 345 |
count[row] = count_plus[row] + count_minus[row]; |
372 | 346 |
row++; |
373 | 347 |
n_alleles++; |
... | ... |
@@ -376,69 +350,35 @@ read_allele_counts(unsigned char **bytes, int row, SEXP read_R, |
376 | 350 |
return n_alleles; |
377 | 351 |
} |
378 | 352 |
|
379 |
- |
|
380 |
-//format _(int)(int) |
|
381 |
-static void |
|
382 |
-read_del_counts(unsigned char **bytes, int row, int *delcount_plus, |
|
383 |
- int *delcount_minus) |
|
384 |
-{ |
|
385 |
- |
|
386 |
-//This consumes a byte!!! |
|
387 |
- unsigned char mychar = read_char(bytes); |
|
388 |
-#ifdef DEBUG2 |
|
389 |
- printf("in read_del_counts, mchar is (as int) %d", mychar); |
|
390 |
-#endif |
|
391 |
- |
|
392 |
- if(mychar == '_') { |
|
393 |
- |
|
394 |
- |
|
395 |
- *delcount_plus = read_int(bytes); |
|
396 |
- *delcount_minus = read_int(bytes); |
|
397 |
-#ifdef DEBUG2 |
|
398 |
- printf("read_del_counts: '_' character detected. count plus %d count minus %d\n", *delcount_plus, *delcount_minus); |
|
399 |
-#endif |
|
400 |
- } else { |
|
401 |
- //backtrack char consumption if it wasn't a "_" |
|
402 |
- *bytes = *bytes-1; |
|
403 |
- *delcount_plus = 0; |
|
404 |
- *delcount_minus = 0; |
|
405 |
- } |
|
406 |
- return; |
|
407 |
-} |
|
408 |
- |
|
409 | 353 |
static int |
410 | 354 |
parse_alleles(unsigned char *bytes, int row, int ref_row, |
411 | 355 |
TallyParam param, TallyTable *tally, int strand) |
412 | 356 |
{ |
413 | 357 |
|
414 |
- bool have_ref_row = false; |
|
415 | 358 |
read_total_counts(&bytes, row, tally->count_total); |
359 |
+ int delcount_plus = 0, delcount_minus = 0; |
|
416 | 360 |
int n_alleles = read_allele_counts(&bytes, row, tally->read_R, |
417 | 361 |
tally->count_plus, tally->count_minus, |
418 | 362 |
tally->count, |
363 |
+ &delcount_plus, &delcount_minus, |
|
419 | 364 |
strand); |
420 |
- int delcount_plus, delcount_minus; |
|
421 |
-/* read_del_counts(&bytes, row, &delcount_plus, &delcount_minus);*/ |
|
422 | 365 |
for (int allele = 0; allele < n_alleles; allele++, row++) { |
423 | 366 |
tally->n_cycles[row] = 0; |
424 | 367 |
for (int b = 0; b < param.n_cycle_bins; b++) { |
425 | 368 |
tally->cycle_bins[b][row] = 0; |
426 | 369 |
} |
427 |
- tally->high_quality[row] = 0; |
|
428 |
- tally->mean_quality[row] = R_NaN; |
|
429 | 370 |
tally->strand[row] = strand; |
430 |
- tally->delcount_plus[row] = R_NaN;//delcount_plus; |
|
431 |
- tally->delcount_minus[row] = R_NaN;//delcount_minus; |
|
371 |
+ tally->delcount_plus[row] = delcount_plus; |
|
372 |
+ tally->delcount_minus[row] = delcount_minus; |
|
432 | 373 |
if (tally->count[row] > 0) { |
433 | 374 |
read_cycle_counts(&bytes, row, param, tally->n_cycles, |
434 | 375 |
tally->read_pos_mean, tally->read_pos_var, |
435 | 376 |
tally->mdfne, tally->cycle_bins); |
436 |
- read_quality_counts(&bytes, row, tally->high_quality, tally->mean_quality, |
|
437 |
- param.high_base_quality); |
|
377 |
+ read_nm_counts(&bytes, row, tally->count_high_nm, param.high_nm_score); |
|
438 | 378 |
if (param.xs) { |
439 |
- read_xs_counts(&bytes, row, tally->count_xs_plus, tally->count_xs_minus); |
|
379 |
+ read_xs_counts(&bytes, row, tally->count_xs_plus, |
|
380 |
+ tally->count_xs_minus); |
|
440 | 381 |
} |
441 |
- |
|
442 | 382 |
} else { |
443 | 383 |
tally->read_pos_mean[row] = R_NaN; |
444 | 384 |
tally->read_pos_var[row] = NA_REAL; |
... | ... |
@@ -446,27 +386,19 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
446 | 386 |
if (param.xs) { |
447 | 387 |
tally->count_xs_plus[row] = 0; |
448 | 388 |
tally->count_xs_minus[row] = 0; |
449 |
- |
|
450 | 389 |
} |
451 | 390 |
} |
452 |
- have_ref_row = true; |
|
453 |
- } |
|
454 |
- int high_quality_total = 0; |
|
455 |
- for (int r = ref_row; r < row; r++) { |
|
456 |
- high_quality_total += tally->high_quality[r]; |
|
457 | 391 |
} |
458 | 392 |
for (int r = ref_row; r < row; r++) { |
459 | 393 |
tally->n_cycles_ref[r] = tally->n_cycles[ref_row]; |
460 | 394 |
tally->count_total[r] = tally->count_total[ref_row]; |
461 |
- tally->mean_quality_ref[r] = tally->mean_quality[ref_row]; |
|
462 |
- tally->high_quality_ref[r] = tally->high_quality[ref_row]; |
|
463 |
- tally->high_quality_total[r] = high_quality_total; |
|
464 | 395 |
tally->count_plus_ref[r] = tally->count_plus[ref_row]; |
465 | 396 |
tally->count_minus_ref[r] = tally->count_minus[ref_row]; |
466 | 397 |
tally->count_ref[r] = tally->count_plus_ref[r] + tally->count_minus_ref[r]; |
467 | 398 |
tally->read_pos_mean_ref[r] = tally->read_pos_mean[ref_row]; |
468 | 399 |
tally->read_pos_var_ref[r] = tally->read_pos_var[ref_row]; |
469 | 400 |
tally->mdfne_ref[r] = tally->mdfne[ref_row]; |
401 |
+ tally->count_high_nm_ref[r] = tally->count_high_nm[ref_row]; |
|
470 | 402 |
if (param.xs) { |
471 | 403 |
tally->count_xs_plus_ref[r] = tally->count_xs_plus[ref_row]; |
472 | 404 |
tally->count_xs_minus_ref[r] = tally->count_xs_minus[ref_row]; |
... | ... |
@@ -474,12 +406,9 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
474 | 406 |
SET_STRING_ELT(tally->ref_R, r, STRING_ELT(tally->read_R, ref_row)); |
475 | 407 |
} |
476 | 408 |
|
477 |
-// if (have_ref_row) { |
|
478 | 409 |
/* clear the 'alt' columns for the 'ref' row with NAs */ |
479 | 410 |
SET_STRING_ELT(tally->read_R, ref_row, NA_STRING); |
480 | 411 |
tally->n_cycles[ref_row] = NA_INTEGER; |
481 |
- tally->mean_quality[ref_row] = NA_REAL; |
|
482 |
- tally->high_quality[ref_row] = NA_REAL; |
|
483 | 412 |
tally->count_plus[ref_row] = NA_INTEGER; |
484 | 413 |
tally->count_minus[ref_row] = NA_INTEGER; |
485 | 414 |
tally->count[ref_row] = NA_INTEGER; |
... | ... |
@@ -491,7 +420,6 @@ parse_alleles(unsigned char *bytes, int row, int ref_row, |
491 | 420 |
tally->count_xs_minus[ref_row] = NA_INTEGER; |
492 | 421 |
} |
493 | 422 |
|
494 |
-// } |
|
495 | 423 |
return n_alleles; |
496 | 424 |
} |
497 | 425 |
|
... | ... |
@@ -504,11 +432,9 @@ static int parse_allele_count(unsigned char *bytes) { |
504 | 432 |
int n_alleles = 1; /* always have a reference */ |
505 | 433 |
bytes += sizeof(int) * 4 + 1; /* skip total and reference */ |
506 | 434 |
while(bytes[0] != '\0') { |
507 |
-#ifdef DEBUG2 |
|
508 |
- printf("Found allele %s\n", &bytes[0]); |
|
509 |
-#endif |
|
510 |
- bytes += sizeof(int) * 2 + 1; |
|
511 |
- n_alleles++; |
|
435 |
+ if (bytes[0] != '_') |
|
436 |
+ n_alleles++; |
|
437 |
+ bytes += sizeof(int) * 2 + 1; |
|
512 | 438 |
} |
513 | 439 |
return n_alleles; |
514 | 440 |
} |
... | ... |
@@ -518,9 +444,6 @@ static int parse_codon_count(unsigned char *bytes) { |
518 | 444 |
int n_alleles = 1; /* always have a reference */ |
519 | 445 |
bytes += sizeof(int) * 4 + 1; /* skip reference */ |
520 | 446 |
while((int) bytes[0] != 255 ){ // && bytes [0] != '\0') { |
521 |
-#ifdef DEBUG2 |
|
522 |
- printf("Found codon %d\n", bytes[0]); |
|
523 |
-#endif |
|
524 | 447 |
bytes += sizeof(int) * 2 + 1; |
525 | 448 |
n_alleles++; |
526 | 449 |
} |
... | ... |
@@ -571,38 +494,12 @@ static int parse_interval(IIT_T tally_iit, int index, |
571 | 494 |
SEXP divstring_R; |
572 | 495 |
PROTECT(divstring_R = mkChar(IIT_divstring_from_index(tally_iit, index))); |
573 | 496 |
for (int position = 0; position < width; position++) { |
574 |
-#ifdef DEBUG2 |
|
575 |
- printf("Reading insertion offset: "); |
|
576 |
-#endif |
|
577 | 497 |
int insertion_offset = read_int(&bytes); |
578 |
-#ifdef DEBUG2 |
|
579 |
- printf("%d\n", insertion_offset); |
|
580 |
- printf("Reading deletion offset: "); |
|
581 |
-#endif |
|
582 | 498 |
int deletion_offset = read_int(&bytes); |
583 |
-#ifdef DEBUG2 |
|
584 |
- printf("%d\n", deletion_offset); |
|
585 |
- printf("Reading allele offset: "); |
|
586 |
-#endif |
|
587 | 499 |
int allele_offset = read_int(&bytes); |
588 |
-#ifdef DEBUG2 |
|
589 |
- printf("%d\n", allele_offset); |
|
590 |
- printf("Reading codon plus offset: "); |
|
591 |
-#endif |
|
592 | 500 |
int codon_plus_offset = read_int(&bytes); |
593 |
-#ifdef DEBUG2 |
|
594 |
- printf("%d\n", codon_plus_offset); |
|
595 |
- printf("Reading codon_minus offset: "); |
|
596 |
-#endif |
|
597 | 501 |
int codon_minus_offset = read_int(&bytes); |
598 |
-#ifdef DEBUG2 |
|
599 |
- printf("%d\n", codon_minus_offset); |
|
600 |
- printf("Reading 'next' offset: "); |
|
601 |
-#endif |
|
602 | 502 |
int next_offset = read_int(&bytes); |
603 |
-#ifdef DEBUG2 |
|
604 |
- printf("%d\n", next_offset); |
|
605 |
-#endif |
|
606 | 503 |
int ref_row = row; |
607 | 504 |
bytes -= 4; /* rewind from read-ahead */ |
608 | 505 |
if (codon_plus_offset - allele_offset > 0) |
... | ... |
@@ -616,12 +513,10 @@ static int parse_interval(IIT_T tally_iit, int index, |
616 | 513 |
param, tally, false); |
617 | 514 |
|
618 | 515 |
if (codon_minus_offset - codon_plus_offset > 0) { |
619 |
-// ref_row = row; |
|
620 | 516 |
row += parse_alleles(base + codon_plus_offset, row, row, |
621 | 517 |
param, tally, CODON_PLUS); |
622 | 518 |
} |
623 | 519 |
if (next_offset - codon_minus_offset > 0) { |
624 |
-// ref_row = row; |
|
625 | 520 |
row += parse_alleles(base + codon_minus_offset, row, row, |
626 | 521 |
param, tally, CODON_MINUS); |
627 | 522 |
} |
... | ... |
@@ -640,17 +535,9 @@ static SEXP parse_all(IIT_T tally_iit, TallyParam param) |
640 | 535 |
int n_rows = 0; |
641 | 536 |
/* loop over the IIT, getting the total number of rows |
642 | 537 |
this is (num alts + 1) for every position. */ |
643 |
-#ifdef DEBUG2 |
|
644 |
- printf("Total number of intervals: %d\n", IIT_total_nintervals(tally_iit)); |
|
645 |
-#endif |
|
646 | 538 |
for (int index = 1; index <= IIT_total_nintervals(tally_iit); index++) { |
647 | 539 |
n_rows += count_rows_for_interval(tally_iit, index); |
648 | 540 |
} |
649 |
- |
|
650 |
-#ifdef DEBUG2 |
|
651 |
- printf("Total number of rows: %d\n", n_rows); |
|
652 |
-#endif |
|
653 |
- |
|
654 | 541 |
|
655 | 542 |
SEXP tally_R; |
656 | 543 |
PROTECT(tally_R = R_TallyTable_new(n_rows, param.n_cycle_bins, param.xs)); |
... | ... |
@@ -699,26 +586,27 @@ static SEXP parse_some(IIT_T tally_iit, TallyParam param, |
699 | 586 |
/* FORMAT |
700 | 587 |
|
701 | 588 |
block header: |
702 |
- [0][ins][del][mm] |
|
589 |
+ [0][ins][del][mm][codon+][codon-] |
|
703 | 590 |
|
704 |
- mismatches for each position: |
|
705 |
- [t+][t-][rn][r+][r-]([an][a+][a-])*[\0]([c#]([cv][cc])*)*([q#]([qv][qc])*)* |
|
591 |
+ mismatches/codons: |
|
592 |
+ [t+][t-][rn][r+][r-]([an][a+][a-])* |
|
593 |
+ mismatches only: |
|
594 |
+ '_'[d+][d-][\0]([c#]([cv][cc])*[nm#]([nmv][nmc])*[xs#]([xsv][xsc])*)* |
|
706 | 595 |
|
707 | 596 |
insertions: |
708 |
- [i#]([seq][t+][t-][c#]([cv][cc])*)* |
|
597 |
+ [i#]([t+][t-][r+][r-][seq][c#]([cv][cc])*[nm#]([nmv][nmc])*[xs#]([xsv][xsc])* |
|
709 | 598 |
|
710 | 599 |
deletions: |
711 |
- [d#]([seq][t+][t-][c#]([cv][cc])*)* |
|
600 |
+ [d#]([t+][t-][r+][r-][seq][c#]([cv][cc])*[nm#]([nmv][nmc])*[xs#]([xsv][xsc])* |
|
712 | 601 |
*/ |
713 | 602 |
|
714 | 603 |
SEXP R_tally_iit_parse(SEXP tally_iit_R, SEXP cycle_breaks_R, |
715 |
- SEXP high_base_quality_R, SEXP which_R, |
|
716 |
- SEXP read_length_R, SEXP xs_R) |
|
604 |
+ SEXP which_R, |
|
605 |
+ SEXP read_length_R, SEXP xs_R, SEXP high_nm_score_R) |
|
717 | 606 |
{ |
718 | 607 |
IIT_T tally_iit = (IIT_T) R_ExternalPtrAddr(tally_iit_R); |
719 | 608 |
SEXP tally_R; |
720 | 609 |
TallyParam param; |
721 |
- param.high_base_quality = asInteger(high_base_quality_R); |
|
722 | 610 |
param.cycle_breaks = |
723 | 611 |
cycle_breaks_R == R_NilValue ? NULL : INTEGER(cycle_breaks_R); |
724 | 612 |
param.n_cycle_bins = |
... | ... |
@@ -730,6 +618,7 @@ SEXP R_tally_iit_parse(SEXP tally_iit_R, SEXP cycle_breaks_R, |
730 | 618 |
param.mdfne_buf = NULL; |
731 | 619 |
} |
732 | 620 |
param.xs = asLogical(xs_R); |
621 |
+ param.high_nm_score = asInteger(high_nm_score_R); |
|
733 | 622 |
|
734 | 623 |
if (which_R == R_NilValue) { |
735 | 624 |
tally_R = parse_all(tally_iit, param); |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110039 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-//#define DEBUG2 1 |
|
1 |
+#define DEBUG2 1 |
|
2 | 2 |
|
3 | 3 |
#include <stdlib.h> /* for abs */ |
4 | 4 |
#include <string.h> /* for strlen */ |
... | ... |
@@ -6,11 +6,17 @@ |
6 | 6 |
#include "iit.h" |
7 | 7 |
#include "bytestream.h" |
8 | 8 |
|
9 |
-enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF, |
|
10 |
- COUNT_TOTAL, HIGH_QUALITY, HIGH_QUALITY_REF, HIGH_QUALITY_TOTAL, |
|
11 |
- MEAN_QUALITY, MEAN_QUALITY_REF, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS, |
|
12 |
- COUNT_MINUS_REF, READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR, |
|
13 |
- READ_POS_VAR_REF, MDFNE, MDFNE_REF, CODON_STRAND, N_BASE_COLS }; |
|
9 |
+/*I changed the order of these, because the XS spots are optional...*/ |
|
10 |
+enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF, //8 |
|
11 |
+ COUNT_TOTAL, HIGH_QUALITY, HIGH_QUALITY_REF, HIGH_QUALITY_TOTAL, //12 |
|
12 |
+ MEAN_QUALITY, MEAN_QUALITY_REF, //14 |
|
13 |
+ COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS, //17 |
|
14 |
+ COUNT_MINUS_REF, DELCOUNT_PLUS, DELCOUNT_MINUS, //21 |
|
15 |
+ READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR, //24 |
|
16 |
+ READ_POS_VAR_REF, MDFNE, MDFNE_REF, CODON_STRAND, //28 |
|
17 |
+ COUNT_XS_PLUS, COUNT_XS_PLUS_REF, //30 |
|
18 |
+ COUNT_XS_MINUS, COUNT_XS_MINUS_REF, N_BASE_COLS }; //33 |
|
19 |
+ |
|
14 | 20 |
|
15 | 21 |
enum{ CODON_MINUS = -1, |
16 | 22 |
NON_CODON, |
... | ... |
@@ -52,12 +58,18 @@ typedef struct TallyTable { |
52 | 58 |
int *count_plus_ref; |
53 | 59 |
int *count_minus; |
54 | 60 |
int *count_minus_ref; |
61 |
+ int *delcount_plus; |
|
62 |
+ int *delcount_minus; |
|
55 | 63 |
double *read_pos_mean; |
56 | 64 |
double *read_pos_mean_ref; |
57 | 65 |
double *read_pos_var; |
58 | 66 |
double *read_pos_var_ref; |
59 | 67 |
double *mdfne; |
60 | 68 |
double *mdfne_ref; |
69 |
+ int *count_xs_plus; |
|
70 |
+ int *count_xs_plus_ref; |
|
71 |
+ int *count_xs_minus; |
|
72 |
+ int *count_xs_minus_ref; |
|
61 | 73 |
int *strand; |
62 | 74 |
int **cycle_bins; |
63 | 75 |
} TallyTable; |
... | ... |
@@ -68,9 +80,10 @@ typedef struct TallyParam { |
68 | 80 |
int high_base_quality; |
69 | 81 |
int read_length; |
70 | 82 |
double *mdfne_buf; |
83 |
+ bool xs; |
|
71 | 84 |
} TallyParam; |
72 | 85 |
|
73 |
-static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins) { |
|
86 |
+static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) { |
|
74 | 87 |
SEXP tally_R; /* the result list */ |
75 | 88 |
PROTECT(tally_R = allocVector(VECSXP, N_BASE_COLS + n_cycle_bins)); |
76 | 89 |
|
... | ... |
@@ -99,6 +112,14 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins) { |
99 | 112 |
SET_VECTOR_ELT(tally_R, MDFNE, allocVector(REALSXP, n_rows)); |
100 | 113 |
SET_VECTOR_ELT(tally_R, MDFNE_REF, allocVector(REALSXP, n_rows)); |
101 | 114 |
SET_VECTOR_ELT(tally_R, CODON_STRAND, allocVector(INTSXP, n_rows)); |
115 |
+ SET_VECTOR_ELT(tally_R, DELCOUNT_PLUS, allocVector(INTSXP, n_rows)); |
|
116 |
+ SET_VECTOR_ELT(tally_R, DELCOUNT_MINUS, allocVector(INTSXP, n_rows)); |
|
117 |
+ SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS, allocVector(INTSXP, n_rows)); |
|
118 |
+ SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF, allocVector(INTSXP, n_rows)); |
|
119 |
+ SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS, allocVector(INTSXP, n_rows)); |
|
120 |
+ SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF, allocVector(INTSXP, n_rows)); |
|
121 |
+ |
|
122 |
+ |
|
102 | 123 |
for (int bin = 0; bin < n_cycle_bins; bin++) { |
103 | 124 |
SEXP cycle_bin_R = allocVector(INTSXP, n_rows); |
104 | 125 |
SET_VECTOR_ELT(tally_R, bin + N_BASE_COLS, cycle_bin_R); |
... | ... |
@@ -108,7 +129,7 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins) { |
108 | 129 |
return tally_R; |
109 | 130 |
} |
110 | 131 |
|
111 |
-static TallyTable *TallyTable_new(SEXP tally_R) { |
|
132 |
+static TallyTable *TallyTable_new(SEXP tally_R, bool xs) { |
|
112 | 133 |
TallyTable *tally = (TallyTable *) R_alloc(sizeof(TallyTable), 1); |
113 | 134 |
int n_cycle_bins = length(tally_R) - N_BASE_COLS; |
114 | 135 |
|
... | ... |
@@ -137,7 +158,15 @@ static TallyTable *TallyTable_new(SEXP tally_R) { |
137 | 158 |
tally->mdfne = REAL(VECTOR_ELT(tally_R, MDFNE)); |
138 | 159 |
tally->mdfne_ref = REAL(VECTOR_ELT(tally_R, MDFNE_REF)); |
139 | 160 |
tally->strand = INTEGER(VECTOR_ELT(tally_R, CODON_STRAND)); |
161 |
+ tally->delcount_plus = INTEGER(VECTOR_ELT(tally_R, DELCOUNT_PLUS)); |
|
162 |
+ tally->delcount_minus = INTEGER(VECTOR_ELT(tally_R, DELCOUNT_MINUS)); |
|
140 | 163 |
tally->cycle_bins = (int **) R_alloc(sizeof(int*), n_cycle_bins); |
164 |
+ tally->count_xs_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS)); |
|
165 |
+ tally->count_xs_plus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF)); |
|
166 |
+ tally->count_xs_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS)); |
|
167 |
+ tally->count_xs_minus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF)); |
|
168 |
+ |
|
169 |
+ |
|
141 | 170 |
for (int bin = 0; bin < n_cycle_bins; bin++) { |
142 | 171 |
tally->cycle_bins[bin] = INTEGER(VECTOR_ELT(tally_R, bin + N_BASE_COLS)); |
143 | 172 |
} |
... | ... |
@@ -152,6 +181,30 @@ read_total_counts(unsigned char **bytes, int row, int *count_total) { |
152 | 181 |
count_total[row] = count_total_plus + count_total_minus; |
153 | 182 |
} |
154 | 183 |
|
184 |
+static void |
|
185 |
+read_xs_counts(unsigned char **bytes, int row, int *count_xs_plus, |
|
186 |
+ int *count_xs_minus) |
|
187 |
+{ |
|
188 |
+ int n_xs = read_int(bytes); |
|
189 |
+ count_xs_plus[row] = 0; |
|
190 |
+ count_xs_minus[row] = 0; |
|
191 |
+ for (int index = 0; index < n_xs; index++) { |
|
192 |
+ int xs = read_int(bytes); |
|
193 |
+ int count = read_int(bytes); |
|
194 |
+ if (xs == 1) { |
|
195 |
+ count_xs_plus[row] = count; |
|
196 |
+ } else if (xs == 2) { |
|
197 |
+ count_xs_minus[row] = count; |
|
198 |
+ } |
|
199 |
+ } |
|
200 |
+#ifdef DEBUG2 |
|
201 |
+ printf("row %d xs counts: %d plus %d minus", row, count_xs_plus[row], count_xs_minus[row]); |
|
202 |
+#endif |
|
203 |
+} |
|
204 |
+ |
|
205 |
+ |
|
206 |
+ |
|
207 |
+ |
|
155 | 208 |
static void |
156 | 209 |
read_cycle_counts(unsigned char **bytes, int row, TallyParam param, |
157 | 210 |
int *n_cycles, double *read_pos_mean, double *read_pos_var, |
... | ... |
@@ -221,6 +274,9 @@ parse_indels(unsigned char *bytes, int row, |
221 | 274 |
TallyParam param, TallyTable *tally, bool insertion) |
222 | 275 |
{ |
223 | 276 |
int indel_count = read_int(&bytes); |
277 |
+#ifdef DEBUG2 |
|
278 |
+ printf("row %d indel count: %d", row, indel_count); |
|
279 |
+#endif |
|
224 | 280 |
for (int indel = 0; indel < indel_count; indel++, row++) { |
225 | 281 |
for (int b = 0; b < param.n_cycle_bins; b++) { |
226 | 282 |
tally->cycle_bins[b][row] = 0; |
... | ... |
@@ -231,7 +287,7 @@ parse_indels(unsigned char *b |