Browse code

stream past XS counts for indels (even when XS counting is disabled)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@113152 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 01/02/2016 20:11:05
Showing 1 changed files
... ...
@@ -195,19 +195,20 @@ read_xs_counts(unsigned char **bytes, int row, int *count_xs_plus,
195 195
                int *count_xs_minus)
196 196
 {
197 197
     int n_xs = read_int(bytes);
198
-    if (count_xs_plus == NULL) {
199
-        return;
198
+    if (count_xs_plus != NULL) {
199
+        count_xs_plus[row] = 0;
200
+        count_xs_minus[row] = 0;        
200 201
     }
201
-    count_xs_plus[row] = 0;
202
-    count_xs_minus[row] = 0;
203 202
     for (int index = 0; index < n_xs; index++) {
204 203
 	int xs = read_int(bytes);
205 204
 	int count = read_int(bytes);
206
-	if (xs == 1) {
207
-	    count_xs_plus[row] = count;
208
-	} else if (xs == 2) {
209
-	    count_xs_minus[row] = count;
210
-	} 
205
+        if (count_xs_plus != NULL) {
206
+            if (xs == 1) {
207
+                count_xs_plus[row] = count;
208
+            } else if (xs == 2) {
209
+                count_xs_minus[row] = count;
210
+            }
211
+        }
211 212
     }
212 213
 }
213 214
 
Browse code

add raw total count statistic to tallies (everything else now qual filtered)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110891 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 24/11/2015 22:19:59
Showing 1 changed files
... ...
@@ -6,9 +6,8 @@
6 6
 #include "iit.h"
7 7
 #include "bytestream.h"
8 8
 
9
-/*I changed the order of these, because the XS spots are optional...*/
10 9
 enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF,
11
-       COUNT_TOTAL, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS,
10
+       RAW_COUNT_TOTAL, COUNT_TOTAL, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS,
12 11
        COUNT_MINUS_REF, DELCOUNT_PLUS, DELCOUNT_MINUS,
13 12
        READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR,
14 13
        READ_POS_VAR_REF, MDFNE, MDFNE_REF,  CODON_STRAND,
... ...
@@ -41,6 +40,7 @@ typedef struct TallyTable {
41 40
   int *n_cycles_ref;
42 41
   int *count;
43 42
   int *count_ref;
43
+  int *raw_count_total;
44 44
   int *count_total;
45 45
   int *count_plus;
46 46
   int *count_plus_ref;
... ...
@@ -84,6 +84,7 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) {
84 84
   SET_VECTOR_ELT(tally_R, N_CYCLES_REF, allocVector(INTSXP, n_rows));
85 85
   SET_VECTOR_ELT(tally_R, COUNT, allocVector(INTSXP, n_rows));
86 86
   SET_VECTOR_ELT(tally_R, COUNT_REF, allocVector(INTSXP, n_rows));
87
+  SET_VECTOR_ELT(tally_R, RAW_COUNT_TOTAL, allocVector(INTSXP, n_rows));
87 88
   SET_VECTOR_ELT(tally_R, COUNT_TOTAL, allocVector(INTSXP, n_rows));
88 89
   SET_VECTOR_ELT(tally_R, COUNT_PLUS, allocVector(INTSXP, n_rows));
89 90
   SET_VECTOR_ELT(tally_R, COUNT_PLUS_REF, allocVector(INTSXP, n_rows));
... ...
@@ -128,6 +129,7 @@ static TallyTable *TallyTable_new(SEXP tally_R, bool xs) {
128 129
   tally->n_cycles_ref = INTEGER(VECTOR_ELT(tally_R, N_CYCLES_REF));
129 130
   tally->count = INTEGER(VECTOR_ELT(tally_R, COUNT));
130 131
   tally->count_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_REF));
132
+  tally->raw_count_total = INTEGER(VECTOR_ELT(tally_R, RAW_COUNT_TOTAL));
131 133
   tally->count_total = INTEGER(VECTOR_ELT(tally_R, COUNT_TOTAL));
132 134
   tally->count_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS));
133 135
   tally->count_plus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS_REF));
... ...
@@ -163,7 +165,10 @@ static TallyTable *TallyTable_new(SEXP tally_R, bool xs) {
163 165
 }
164 166
 
165 167
 static void
166
-read_total_counts(unsigned char **bytes, int row, int *count_total) {
168
+read_total_counts(unsigned char **bytes, int row, int *raw_count_total,
169
+                  int *count_total)
170
+{
171
+  raw_count_total[row] = read_int(bytes);
167 172
   int count_total_plus = read_int(bytes);
168 173
   int count_total_minus = read_int(bytes);
169 174
   count_total[row] = count_total_plus + count_total_minus;
... ...
@@ -286,7 +291,8 @@ parse_indels(unsigned char *bytes, int row,
286 291
     tally->count_minus_ref[row] = read_int(&bytes);
287 292
     tally->count_ref[row] = tally->count_plus_ref[row] +
288 293
 	tally->count_minus_ref[row];
289
-    tally->count_total[row] = tally->count_ref[row] + tally->count[row];
294
+    tally->raw_count_total[row] = tally->count_ref[row] + tally->count[row];
295
+    tally->count_total[row] = tally->raw_count_total[row];
290 296
     tally->strand[row] = STRAND_NONE;
291 297
     SEXP seq_R = mkChar(read_string(&bytes));
292 298
     if (insertion) {
... ...
@@ -354,8 +360,7 @@ static int
354 360
 parse_alleles(unsigned char *bytes, int row, int ref_row,
355 361
               TallyParam param, TallyTable *tally, int strand)
356 362
 {
357
-
358
-  read_total_counts(&bytes, row, tally->count_total);
363
+  read_total_counts(&bytes, row, tally->raw_count_total, tally->count_total);
359 364
   int delcount_plus = 0, delcount_minus = 0;
360 365
   int n_alleles = read_allele_counts(&bytes, row, tally->read_R,
361 366
                                      tally->count_plus, tally->count_minus,
... ...
@@ -383,6 +388,7 @@ parse_alleles(unsigned char *bytes, int row, int ref_row,
383 388
       tally->read_pos_mean[row] = R_NaN;
384 389
       tally->read_pos_var[row] = NA_REAL;
385 390
       tally->mdfne[row] = NA_REAL;
391
+      tally->count_high_nm[row] = 0;
386 392
       if (param.xs) {
387 393
         tally->count_xs_plus[row] = 0;
388 394
         tally->count_xs_minus[row] = 0;
... ...
@@ -391,6 +397,7 @@ parse_alleles(unsigned char *bytes, int row, int ref_row,
391 397
   }
392 398
   for (int r = ref_row; r < row; r++) {
393 399
     tally->n_cycles_ref[r] = tally->n_cycles[ref_row];
400
+    tally->raw_count_total[r] = tally->raw_count_total[ref_row];
394 401
     tally->count_total[r] = tally->count_total[ref_row];
395 402
     tally->count_plus_ref[r] = tally->count_plus[ref_row];
396 403
     tally->count_minus_ref[r] = tally->count_minus[ref_row];
... ...
@@ -430,7 +437,7 @@ static int parse_indel_count(unsigned char *bytes) {
430 437
 
431 438
 static int parse_allele_count(unsigned char *bytes) {
432 439
   int n_alleles = 1; /* always have a reference */
433
-  bytes += sizeof(int) * 4 + 1; /* skip total and reference */
440
+  bytes += sizeof(int) * 5 + 1; /* skip total and reference */
434 441
   while(bytes[0] != '\0') {
435 442
       if (bytes[0] != '_')
436 443
           n_alleles++;
Browse code

fixes for bam_tally update

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110596 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 13/11/2015 22:40:27
Showing 1 changed files
... ...
@@ -16,9 +16,9 @@ enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF,
16 16
        COUNT_XS_MINUS, COUNT_XS_MINUS_REF,
17 17
        COUNT_HIGH_NM, COUNT_HIGH_NM_REF, N_BASE_COLS };
18 18
 
19
-enum { CODON_PLUS = 1,
20
-       CODON_MINUS,
21
-       NON_CODON };
19
+enum { STRAND_PLUS = 1,
20
+       STRAND_MINUS,
21
+       STRAND_NONE };
22 22
 
23 23
 static char *codon_table[64] = 
24 24
   {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT",
... ...
@@ -287,7 +287,7 @@ parse_indels(unsigned char *bytes, int row,
287 287
     tally->count_ref[row] = tally->count_plus_ref[row] +
288 288
 	tally->count_minus_ref[row];
289 289
     tally->count_total[row] = tally->count_ref[row] + tally->count[row];
290
-    tally->strand[row] = NON_CODON;
290
+    tally->strand[row] = STRAND_NONE;
291 291
     SEXP seq_R = mkChar(read_string(&bytes));
292 292
     if (insertion) {
293 293
       SET_STRING_ELT(tally->read_R, row, seq_R);
... ...
@@ -328,7 +328,7 @@ read_allele_counts(unsigned char **bytes, int row, SEXP read_R,
328 328
 {
329 329
   int n_alleles = 0;
330 330
   char allele;
331
-  char stop = strand == 0 ? '\0' : (char) 255;
331
+  char stop = strand == STRAND_NONE ? '\0' : (char) 255;
332 332
   
333 333
   while((allele = (char)read_char(bytes)) != stop) {
334 334
     if (allele == '_') {
... ...
@@ -336,7 +336,7 @@ read_allele_counts(unsigned char **bytes, int row, SEXP read_R,
336 336
         *delcount_minus = read_int(bytes);
337 337
         continue;
338 338
     }
339
-    if(strand == 0)
339
+    if(strand == STRAND_NONE)
340 340
        SET_STRING_ELT(read_R, row, mkCharLen(&allele, 1));
341 341
     else
342 342
        SET_STRING_ELT(read_R, row, mkCharLen(codon_table[(int)allele], 3));
... ...
@@ -504,7 +504,7 @@ static int parse_interval(IIT_T tally_iit, int index,
504 504
     bytes -= 4; /* rewind from read-ahead */
505 505
     if (codon_plus_offset - allele_offset > 0)
506 506
       row += parse_alleles(base + allele_offset, row, ref_row,
507
-                           param, tally, NON_CODON);
507
+                           param, tally, STRAND_NONE);
508 508
     if (deletion_offset - insertion_offset > 0) 
509 509
       row += parse_indels(base + insertion_offset, row,
510 510
                           param, tally, true);
... ...
@@ -514,11 +514,11 @@ static int parse_interval(IIT_T tally_iit, int index,
514 514
 
515 515
     if (codon_minus_offset - codon_plus_offset > 0) {
516 516
       row += parse_alleles(base + codon_plus_offset, row, row,
517
-                           param, tally, CODON_PLUS);
517
+                           param, tally, STRAND_PLUS);
518 518
     }
519 519
     if (next_offset - codon_minus_offset > 0) { 
520 520
       row += parse_alleles(base + codon_minus_offset, row, row,
521
-                           param, tally, CODON_MINUS);
521
+                           param, tally, STRAND_MINUS);
522 522
     }
523 523
     /* fill in position information */
524 524
     for (int r = ref_row; r < row; r++) {
Browse code

work towards supporting new bam_tally; drops quality info (now cutoff based), adds xs/nm/del and codon-level counts

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110593 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 13/11/2015 21:50:11
Showing 1 changed files
... ...
@@ -1,4 +1,4 @@
1
-#define DEBUG2 1
1
+#define DEBUG2 0
2 2
 
3 3
 #include <stdlib.h> /* for abs */
4 4
 #include <string.h> /* for strlen */
... ...
@@ -7,21 +7,18 @@
7 7
 #include "bytestream.h"
8 8
 
9 9
 /*I changed the order of these, because the XS spots are optional...*/
10
-enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF, //8
11
-       COUNT_TOTAL, HIGH_QUALITY, HIGH_QUALITY_REF, HIGH_QUALITY_TOTAL, //12
12
-       MEAN_QUALITY, MEAN_QUALITY_REF, //14
13
-       COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS, //17
14
-       COUNT_MINUS_REF, DELCOUNT_PLUS, DELCOUNT_MINUS, //21
15
-       READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR, //24
16
-       READ_POS_VAR_REF, MDFNE, MDFNE_REF,  CODON_STRAND, //28
17
-       COUNT_XS_PLUS, COUNT_XS_PLUS_REF, //30
18
-       COUNT_XS_MINUS, COUNT_XS_MINUS_REF, N_BASE_COLS }; //33
19
-
20
-
21
-enum{ CODON_MINUS = -1,
22
-      NON_CODON,
23
-      CODON_PLUS};
24
-
10
+enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF,
11
+       COUNT_TOTAL, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS,
12
+       COUNT_MINUS_REF, DELCOUNT_PLUS, DELCOUNT_MINUS,
13
+       READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR,
14
+       READ_POS_VAR_REF, MDFNE, MDFNE_REF,  CODON_STRAND,
15
+       COUNT_XS_PLUS, COUNT_XS_PLUS_REF,
16
+       COUNT_XS_MINUS, COUNT_XS_MINUS_REF,
17
+       COUNT_HIGH_NM, COUNT_HIGH_NM_REF, N_BASE_COLS };
18
+
19
+enum { CODON_PLUS = 1,
20
+       CODON_MINUS,
21
+       NON_CODON };
25 22
 
26 23
 static char *codon_table[64] = 
27 24
   {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT",
... ...
@@ -33,10 +30,6 @@ static char *codon_table[64] =
33 30
    "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT",
34 31
    "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
35 32
 
36
-
37
-
38
-
39
-
40 33
 static int NUM_PTRS = 5;
41 34
 
42 35
 typedef struct TallyTable {
... ...
@@ -49,11 +42,6 @@ typedef struct TallyTable {
49 42
   int *count;
50 43
   int *count_ref;
51 44
   int *count_total;
52
-  int *high_quality;
53
-  int *high_quality_ref;
54
-  int *high_quality_total;
55
-  double *mean_quality;
56
-  double *mean_quality_ref;
57 45
   int *count_plus;
58 46
   int *count_plus_ref;
59 47
   int *count_minus;
... ...
@@ -70,6 +58,8 @@ typedef struct TallyTable {
70 58
   int *count_xs_plus_ref;
71 59
   int *count_xs_minus;
72 60
   int *count_xs_minus_ref;
61
+  int *count_high_nm;
62
+  int *count_high_nm_ref;
73 63
   int *strand;
74 64
   int **cycle_bins;
75 65
 } TallyTable;
... ...
@@ -77,16 +67,15 @@ typedef struct TallyTable {
77 67
 typedef struct TallyParam {
78 68
   int *cycle_breaks;
79 69
   int n_cycle_bins;
80
-  int high_base_quality;
81 70
   int read_length;
82 71
   double *mdfne_buf;
83 72
   bool xs;
73
+  int high_nm_score;
84 74
 } TallyParam;
85 75
 
86 76
 static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) {
87 77
   SEXP tally_R; /* the result list */
88 78
   PROTECT(tally_R = allocVector(VECSXP, N_BASE_COLS + n_cycle_bins));
89
-  
90 79
   SET_VECTOR_ELT(tally_R, SEQNAMES, allocVector(STRSXP, n_rows));
91 80
   SET_VECTOR_ELT(tally_R, POS, allocVector(INTSXP, n_rows));
92 81
   SET_VECTOR_ELT(tally_R, REF, allocVector(STRSXP, n_rows));
... ...
@@ -96,11 +85,6 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) {
96 85
   SET_VECTOR_ELT(tally_R, COUNT, allocVector(INTSXP, n_rows));
97 86
   SET_VECTOR_ELT(tally_R, COUNT_REF, allocVector(INTSXP, n_rows));
98 87
   SET_VECTOR_ELT(tally_R, COUNT_TOTAL, allocVector(INTSXP, n_rows));
99
-  SET_VECTOR_ELT(tally_R, HIGH_QUALITY, allocVector(INTSXP, n_rows));
100
-  SET_VECTOR_ELT(tally_R, HIGH_QUALITY_REF, allocVector(INTSXP, n_rows));
101
-  SET_VECTOR_ELT(tally_R, HIGH_QUALITY_TOTAL, allocVector(INTSXP, n_rows));
102
-  SET_VECTOR_ELT(tally_R, MEAN_QUALITY, allocVector(REALSXP, n_rows));
103
-  SET_VECTOR_ELT(tally_R, MEAN_QUALITY_REF, allocVector(REALSXP, n_rows));
104 88
   SET_VECTOR_ELT(tally_R, COUNT_PLUS, allocVector(INTSXP, n_rows));
105 89
   SET_VECTOR_ELT(tally_R, COUNT_PLUS_REF, allocVector(INTSXP, n_rows));
106 90
   SET_VECTOR_ELT(tally_R, COUNT_MINUS, allocVector(INTSXP, n_rows));
... ...
@@ -114,12 +98,15 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) {
114 98
   SET_VECTOR_ELT(tally_R, CODON_STRAND, allocVector(INTSXP, n_rows));
115 99
   SET_VECTOR_ELT(tally_R, DELCOUNT_PLUS, allocVector(INTSXP, n_rows));
116 100
   SET_VECTOR_ELT(tally_R, DELCOUNT_MINUS, allocVector(INTSXP, n_rows));
117
-  SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS, allocVector(INTSXP, n_rows));
118
-  SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF, allocVector(INTSXP, n_rows));
119
-  SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS, allocVector(INTSXP, n_rows));
120
-  SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF, allocVector(INTSXP, n_rows));
121
- 
122
-
101
+  if (xs) {
102
+      SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS, allocVector(INTSXP, n_rows));
103
+      SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF, allocVector(INTSXP, n_rows));
104
+      SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS, allocVector(INTSXP, n_rows));
105
+      SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF, allocVector(INTSXP, n_rows));
106
+  }
107
+  SET_VECTOR_ELT(tally_R, COUNT_HIGH_NM, allocVector(INTSXP, n_rows));
108
+  SET_VECTOR_ELT(tally_R, COUNT_HIGH_NM_REF, allocVector(INTSXP, n_rows));
109
+  
123 110
   for (int bin = 0; bin < n_cycle_bins; bin++) {
124 111
     SEXP cycle_bin_R = allocVector(INTSXP, n_rows);
125 112
     SET_VECTOR_ELT(tally_R, bin + N_BASE_COLS, cycle_bin_R);
... ...
@@ -130,7 +117,7 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) {
130 117
 }
131 118
 
132 119
 static TallyTable *TallyTable_new(SEXP tally_R, bool xs) {
133
-  TallyTable *tally = (TallyTable *) R_alloc(sizeof(TallyTable), 1);
120
+  TallyTable *tally = (TallyTable *) S_alloc(sizeof(TallyTable), 1);
134 121
   int n_cycle_bins = length(tally_R) - N_BASE_COLS;
135 122
   
136 123
   tally->seqnames_R = VECTOR_ELT(tally_R, SEQNAMES);
... ...
@@ -142,11 +129,6 @@ static TallyTable *TallyTable_new(SEXP tally_R, bool xs) {
142 129
   tally->count = INTEGER(VECTOR_ELT(tally_R, COUNT));
143 130
   tally->count_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_REF));
144 131
   tally->count_total = INTEGER(VECTOR_ELT(tally_R, COUNT_TOTAL));
145
-  tally->high_quality = INTEGER(VECTOR_ELT(tally_R, HIGH_QUALITY));
146
-  tally->high_quality_ref = INTEGER(VECTOR_ELT(tally_R, HIGH_QUALITY_REF));
147
-  tally->high_quality_total = INTEGER(VECTOR_ELT(tally_R, HIGH_QUALITY_TOTAL));
148
-  tally->mean_quality = REAL(VECTOR_ELT(tally_R, MEAN_QUALITY));
149
-  tally->mean_quality_ref = REAL(VECTOR_ELT(tally_R, MEAN_QUALITY_REF));
150 132
   tally->count_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS));
151 133
   tally->count_plus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS_REF));
152 134
   tally->count_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_MINUS));
... ...
@@ -161,12 +143,18 @@ static TallyTable *TallyTable_new(SEXP tally_R, bool xs) {
161 143
   tally->delcount_plus = INTEGER(VECTOR_ELT(tally_R, DELCOUNT_PLUS));
162 144
   tally->delcount_minus = INTEGER(VECTOR_ELT(tally_R, DELCOUNT_MINUS));
163 145
   tally->cycle_bins = (int **) R_alloc(sizeof(int*), n_cycle_bins);
164
-  tally->count_xs_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS));
165
-  tally->count_xs_plus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF));
166
-  tally->count_xs_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS));
167
-  tally->count_xs_minus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF));
146
+  if (xs) {
147
+      tally->count_xs_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS));
148
+      tally->count_xs_plus_ref = INTEGER(VECTOR_ELT(tally_R,
149
+                                                    COUNT_XS_PLUS_REF));
150
+      tally->count_xs_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS));
151
+      tally->count_xs_minus_ref = INTEGER(VECTOR_ELT(tally_R,
152
+                                                     COUNT_XS_MINUS_REF));
153
+  }
154
+  tally->count_high_nm = INTEGER(VECTOR_ELT(tally_R, COUNT_HIGH_NM));
155
+  tally->count_high_nm_ref = INTEGER(VECTOR_ELT(tally_R,
156
+                                                COUNT_HIGH_NM_REF));
168 157
   
169
-
170 158
   for (int bin = 0; bin < n_cycle_bins; bin++) {
171 159
     tally->cycle_bins[bin] = INTEGER(VECTOR_ELT(tally_R, bin + N_BASE_COLS));
172 160
   }
... ...
@@ -181,11 +169,30 @@ read_total_counts(unsigned char **bytes, int row, int *count_total) {
181 169
   count_total[row] = count_total_plus + count_total_minus;
182 170
 }
183 171
 
172
+static void
173
+read_nm_counts(unsigned char **bytes, int row, int *count_high_nm,
174
+               int high_nm_score)
175
+{
176
+    int n_nm = read_int(bytes);
177
+    count_high_nm[row] = 0;
178
+    /* FIXME: currently bam_tally gives us counts whether we want them or not */
179
+    for (int index = 0; index < n_nm; index++) {
180
+        int nm = read_int(bytes);
181
+	int count = read_int(bytes);
182
+        if (nm >= high_nm_score) {
183
+            count_high_nm[row] += count;
184
+        }
185
+    }
186
+}
187
+
184 188
 static void
185 189
 read_xs_counts(unsigned char **bytes, int row, int *count_xs_plus,
186 190
                int *count_xs_minus)
187 191
 {
188 192
     int n_xs = read_int(bytes);
193
+    if (count_xs_plus == NULL) {
194
+        return;
195
+    }
189 196
     count_xs_plus[row] = 0;
190 197
     count_xs_minus[row] = 0;
191 198
     for (int index = 0; index < n_xs; index++) {
... ...
@@ -197,14 +204,8 @@ read_xs_counts(unsigned char **bytes, int row, int *count_xs_plus,
197 204
 	    count_xs_minus[row] = count;
198 205
 	} 
199 206
     }
200
-#ifdef DEBUG2
201
-    printf("row %d xs counts: %d plus %d minus", row, count_xs_plus[row], count_xs_minus[row]);
202
-#endif
203 207
 }
204 208
 
205
-
206
-
207
-
208 209
 static void
209 210
 read_cycle_counts(unsigned char **bytes, int row, TallyParam param,
210 211
                   int *n_cycles, double *read_pos_mean, double *read_pos_var,
... ...
@@ -274,9 +275,6 @@ parse_indels(unsigned char *bytes, int row,
274 275
              TallyParam param, TallyTable *tally, bool insertion)
275 276
 {
276 277
   int indel_count = read_int(&bytes);
277
-#ifdef DEBUG2
278
-  printf("row %d indel count: %d", row, indel_count);
279
-#endif
280 278
   for (int indel = 0; indel < indel_count; indel++, row++) {
281 279
     for (int b = 0; b < param.n_cycle_bins; b++) {
282 280
       tally->cycle_bins[b][row] = 0;
... ...
@@ -302,72 +300,48 @@ parse_indels(unsigned char *bytes, int row,
302 300
     read_cycle_counts(&bytes, row, param, tally->n_cycles,
303 301
                       tally->read_pos_mean, tally->read_pos_var,
304 302
                       tally->mdfne, tally->cycle_bins);
305
-    /* quality is per-base and so not relevant for indels */
306
-    tally->mean_quality[row] = NA_REAL;
307
-    tally->mean_quality_ref[row] = NA_REAL;
308
-    tally->high_quality[row] = NA_INTEGER;
309
-    tally->high_quality_ref[row] = NA_INTEGER;
310
-    tally->high_quality_total[row] = NA_INTEGER;
303
+    read_nm_counts(&bytes, row, tally->count_high_nm, param.high_nm_score);
304
+    read_xs_counts(&bytes, row, tally->count_xs_plus, tally->count_xs_minus);
311 305
     /* no position from which to tabulate the cycles */
312 306
     tally->n_cycles_ref[row] = NA_INTEGER;
313 307
     tally->read_pos_mean_ref[row] = NA_REAL;
314 308
     tally->read_pos_var_ref[row] = NA_REAL;
315 309
     tally->mdfne_ref[row] = NA_REAL;
316
-  }
317
-  return indel_count;
318
-}
319
-
320
-static void
321
-read_quality_counts(unsigned char **bytes, int row, int *high_quality,
322
-                    double *mean_quality, int high_base_quality)
323
-{
324
-  int n_qualities = read_int(bytes);
325
-  int total_quality = 0;
326
-  int total_quality_weight = 0;
327
-  high_quality[row] = 0;
328
-  for (int index = 0; index < n_qualities; index++) {
329
-    int quality = read_int(bytes);
330
-    int count = read_int(bytes);
331
-    if (quality >= high_base_quality) {
332
-	total_quality += quality * count;
333
-	total_quality_weight += count;
334
-	high_quality[row] += count;
310
+    /* overlapping deletion count only relevant for SNVs */
311
+    tally->delcount_plus[row] = NA_INTEGER;
312
+    tally->delcount_minus[row] = NA_INTEGER;
313
+    /* information not available for "ref" allele */
314
+    tally->count_high_nm_ref[row] = NA_INTEGER;
315
+    if (param.xs) {
316
+        tally->count_xs_plus_ref[row] = NA_INTEGER;
317
+        tally->count_xs_minus_ref[row] = NA_INTEGER;
335 318
     }
336 319
   }
337
-  mean_quality[row] = total_quality_weight > 0 ?
338
-    (double)total_quality / total_quality_weight : R_NaN;
320
+  return indel_count;
339 321
 }
340 322
 
341 323
 static int
342 324
 read_allele_counts(unsigned char **bytes, int row, SEXP read_R,
343
-                   int *count_plus, int *count_minus, int *count, int strand)
325
+                   int *count_plus, int *count_minus, int *count,
326
+                   int *delcount_plus, int *delcount_minus,
327
+                   int strand)
344 328
 {
345 329
   int n_alleles = 0;
346 330
   char allele;
347 331
   char stop = strand == 0 ? '\0' : (char) 255;
348
-#ifdef DEBUG2
349
-  printf("Starting read_allele_counts (codon strand %d) at row %d. Total length of read_R is %d\n", strand, row, LENGTH(read_R));
350
-#endif
332
+  
351 333
   while((allele = (char)read_char(bytes)) != stop) {
352
-#ifdef DEBUG2
353
-      printf("Parsing counts for allele: %d (%s) row %d \n", allele, &allele, row );
354
-#endif
334
+    if (allele == '_') {
335
+        *delcount_plus = read_int(bytes);
336
+        *delcount_minus = read_int(bytes);
337
+        continue;
338
+    }
355 339
     if(strand == 0)
356 340
        SET_STRING_ELT(read_R, row, mkCharLen(&allele, 1));
357 341
     else
358 342
        SET_STRING_ELT(read_R, row, mkCharLen(codon_table[(int)allele], 3));
359
-#ifdef DEBUG2
360
-    printf("Reading allele plus count: ");
361
-#endif
362 343
     count_plus[row] = read_int(bytes);
363
-#ifdef DEBUG2
364
-    printf("%d\n", count_plus[row]);
365
-    printf("Reading allele minus count: ");
366
-#endif
367 344
     count_minus[row] = read_int(bytes);
368
-#ifdef DEBUG2
369
-    printf("%d\n", count_minus[row]);
370
-#endif
371 345
     count[row] = count_plus[row] + count_minus[row];
372 346
     row++;
373 347
     n_alleles++;
... ...
@@ -376,69 +350,35 @@ read_allele_counts(unsigned char **bytes, int row, SEXP read_R,
376 350
   return n_alleles;
377 351
 }
378 352
 
379
-
380
-//format _(int)(int) 
381
-static void
382
-read_del_counts(unsigned char **bytes, int row, int *delcount_plus,
383
-                int *delcount_minus)
384
-{
385
-
386
-//This consumes a byte!!!
387
-    unsigned char mychar = read_char(bytes);
388
-#ifdef DEBUG2
389
-    printf("in read_del_counts, mchar is (as int) %d", mychar);
390
-#endif 
391
-
392
-    if(mychar == '_') {
393
-
394
-
395
-	*delcount_plus = read_int(bytes);
396
-	*delcount_minus = read_int(bytes);
397
-#ifdef DEBUG2 
398
-	printf("read_del_counts: '_' character detected. count plus %d count minus %d\n", *delcount_plus, *delcount_minus);
399
-#endif
400
-    } else {
401
-	//backtrack char consumption if it wasn't a "_"
402
-	*bytes = *bytes-1;
403
-	*delcount_plus = 0;
404
-	*delcount_minus = 0;
405
-    }
406
-    return;
407
-}
408
-
409 353
 static int
410 354
 parse_alleles(unsigned char *bytes, int row, int ref_row,
411 355
               TallyParam param, TallyTable *tally, int strand)
412 356
 {
413 357
 
414
-  bool have_ref_row = false;
415 358
   read_total_counts(&bytes, row, tally->count_total);
359
+  int delcount_plus = 0, delcount_minus = 0;
416 360
   int n_alleles = read_allele_counts(&bytes, row, tally->read_R,
417 361
                                      tally->count_plus, tally->count_minus,
418 362
 	                             tally->count,
363
+                                     &delcount_plus, &delcount_minus,
419 364
 	                             strand);
420
-  int delcount_plus, delcount_minus;
421
-/*  read_del_counts(&bytes, row, &delcount_plus, &delcount_minus);*/
422 365
   for (int allele = 0; allele < n_alleles; allele++, row++) {
423 366
     tally->n_cycles[row] = 0;
424 367
     for (int b = 0; b < param.n_cycle_bins; b++) {
425 368
       tally->cycle_bins[b][row] = 0;
426 369
     }
427
-    tally->high_quality[row] = 0;
428
-    tally->mean_quality[row] = R_NaN;
429 370
     tally->strand[row] = strand;
430
-    tally->delcount_plus[row] = R_NaN;//delcount_plus;
431
-    tally->delcount_minus[row] = R_NaN;//delcount_minus;
371
+    tally->delcount_plus[row] = delcount_plus;
372
+    tally->delcount_minus[row] = delcount_minus;
432 373
     if (tally->count[row] > 0) {
433 374
       read_cycle_counts(&bytes, row, param, tally->n_cycles,
434 375
                         tally->read_pos_mean, tally->read_pos_var,
435 376
                         tally->mdfne, tally->cycle_bins);
436
-      read_quality_counts(&bytes, row, tally->high_quality, tally->mean_quality,
437
-                          param.high_base_quality);
377
+      read_nm_counts(&bytes, row, tally->count_high_nm, param.high_nm_score);
438 378
       if (param.xs) {
439
-        read_xs_counts(&bytes, row, tally->count_xs_plus, tally->count_xs_minus);
379
+        read_xs_counts(&bytes, row, tally->count_xs_plus,
380
+                       tally->count_xs_minus);
440 381
       }
441
-
442 382
     } else {
443 383
       tally->read_pos_mean[row] = R_NaN;
444 384
       tally->read_pos_var[row] = NA_REAL;
... ...
@@ -446,27 +386,19 @@ parse_alleles(unsigned char *bytes, int row, int ref_row,
446 386
       if (param.xs) {
447 387
         tally->count_xs_plus[row] = 0;
448 388
         tally->count_xs_minus[row] = 0;
449
-      
450 389
       }
451 390
     }
452
-    have_ref_row = true;
453
-  }
454
-  int high_quality_total = 0;
455
-  for (int r = ref_row; r < row; r++) {
456
-    high_quality_total += tally->high_quality[r];
457 391
   }
458 392
   for (int r = ref_row; r < row; r++) {
459 393
     tally->n_cycles_ref[r] = tally->n_cycles[ref_row];
460 394
     tally->count_total[r] = tally->count_total[ref_row];
461
-    tally->mean_quality_ref[r] = tally->mean_quality[ref_row];
462
-    tally->high_quality_ref[r] = tally->high_quality[ref_row];
463
-    tally->high_quality_total[r] = high_quality_total;
464 395
     tally->count_plus_ref[r] = tally->count_plus[ref_row];
465 396
     tally->count_minus_ref[r] = tally->count_minus[ref_row];
466 397
     tally->count_ref[r] = tally->count_plus_ref[r] + tally->count_minus_ref[r];
467 398
     tally->read_pos_mean_ref[r] = tally->read_pos_mean[ref_row];
468 399
     tally->read_pos_var_ref[r] = tally->read_pos_var[ref_row];
469 400
     tally->mdfne_ref[r] = tally->mdfne[ref_row];
401
+    tally->count_high_nm_ref[r] = tally->count_high_nm[ref_row];
470 402
     if (param.xs) {
471 403
       tally->count_xs_plus_ref[r] = tally->count_xs_plus[ref_row];
472 404
       tally->count_xs_minus_ref[r] = tally->count_xs_minus[ref_row];
... ...
@@ -474,12 +406,9 @@ parse_alleles(unsigned char *bytes, int row, int ref_row,
474 406
     SET_STRING_ELT(tally->ref_R, r, STRING_ELT(tally->read_R, ref_row));  
475 407
   }
476 408
 
477
-//  if (have_ref_row) {
478 409
     /* clear the 'alt' columns for the 'ref' row with NAs */
479 410
     SET_STRING_ELT(tally->read_R, ref_row, NA_STRING);
480 411
     tally->n_cycles[ref_row] = NA_INTEGER;
481
-    tally->mean_quality[ref_row] = NA_REAL;
482
-    tally->high_quality[ref_row] = NA_REAL;
483 412
     tally->count_plus[ref_row] = NA_INTEGER;
484 413
     tally->count_minus[ref_row] = NA_INTEGER;
485 414
     tally->count[ref_row] = NA_INTEGER;
... ...
@@ -491,7 +420,6 @@ parse_alleles(unsigned char *bytes, int row, int ref_row,
491 420
       tally->count_xs_minus[ref_row] = NA_INTEGER;
492 421
     }
493 422
     
494
-// }
495 423
   return n_alleles;
496 424
 }
497 425
     
... ...
@@ -504,11 +432,9 @@ static int parse_allele_count(unsigned char *bytes) {
504 432
   int n_alleles = 1; /* always have a reference */
505 433
   bytes += sizeof(int) * 4 + 1; /* skip total and reference */
506 434
   while(bytes[0] != '\0') {
507
-#ifdef DEBUG2
508
-    printf("Found allele %s\n", &bytes[0]);
509
-#endif
510
-    bytes += sizeof(int) * 2 + 1;
511
-    n_alleles++;
435
+      if (bytes[0] != '_')
436
+          n_alleles++;
437
+      bytes += sizeof(int) * 2 + 1;
512 438
   }
513 439
   return n_alleles;
514 440
 }
... ...
@@ -518,9 +444,6 @@ static int parse_codon_count(unsigned char *bytes) {
518 444
   int n_alleles = 1; /* always have a reference */
519 445
   bytes += sizeof(int) * 4 + 1; /* skip reference */
520 446
   while((int) bytes[0] != 255 ){ // && bytes [0] != '\0') {
521
-#ifdef DEBUG2
522
-    printf("Found codon %d\n", bytes[0]);
523
-#endif
524 447
     bytes += sizeof(int) * 2 + 1;
525 448
     n_alleles++;
526 449
   }
... ...
@@ -571,38 +494,12 @@ static int parse_interval(IIT_T tally_iit, int index,
571 494
   SEXP divstring_R;
572 495
   PROTECT(divstring_R = mkChar(IIT_divstring_from_index(tally_iit, index)));
573 496
   for (int position = 0; position < width; position++) {
574
-#ifdef DEBUG2
575
-    printf("Reading insertion offset: ");
576
-#endif
577 497
     int insertion_offset = read_int(&bytes);
578
-#ifdef DEBUG2
579
-    printf("%d\n", insertion_offset);
580
-    printf("Reading deletion offset: ");
581
-#endif
582 498
     int deletion_offset = read_int(&bytes);
583
-#ifdef DEBUG2
584
-    printf("%d\n", deletion_offset);
585
-    printf("Reading allele offset: ");
586
-#endif
587 499
     int allele_offset = read_int(&bytes);
588
-#ifdef DEBUG2
589
-    printf("%d\n", allele_offset);
590
-    printf("Reading codon plus offset: ");
591
-#endif
592 500
     int codon_plus_offset = read_int(&bytes);
593
-#ifdef DEBUG2
594
-    printf("%d\n", codon_plus_offset);
595
-    printf("Reading codon_minus offset: ");
596
-#endif
597 501
     int codon_minus_offset = read_int(&bytes);
598
-#ifdef DEBUG2
599
-    printf("%d\n", codon_minus_offset);
600
-    printf("Reading 'next' offset: ");
601
-#endif
602 502
     int next_offset = read_int(&bytes);
603
-#ifdef DEBUG2
604
-    printf("%d\n", next_offset);
605
-#endif
606 503
     int ref_row = row;
607 504
     bytes -= 4; /* rewind from read-ahead */
608 505
     if (codon_plus_offset - allele_offset > 0)
... ...
@@ -616,12 +513,10 @@ static int parse_interval(IIT_T tally_iit, int index,
616 513
                           param, tally, false);
617 514
 
618 515
     if (codon_minus_offset - codon_plus_offset > 0) {
619
-//      ref_row = row;
620 516
       row += parse_alleles(base + codon_plus_offset, row, row,
621 517
                            param, tally, CODON_PLUS);
622 518
     }
623 519
     if (next_offset - codon_minus_offset > 0) { 
624
-//      ref_row = row;
625 520
       row += parse_alleles(base + codon_minus_offset, row, row,
626 521
                            param, tally, CODON_MINUS);
627 522
     }
... ...
@@ -640,17 +535,9 @@ static SEXP parse_all(IIT_T tally_iit, TallyParam param)
640 535
   int n_rows = 0;
641 536
   /* loop over the IIT, getting the total number of rows
642 537
      this is (num alts + 1) for every position. */
643
-#ifdef DEBUG2
644
-  printf("Total number of intervals: %d\n", IIT_total_nintervals(tally_iit));
645
-#endif
646 538
   for (int index = 1; index <= IIT_total_nintervals(tally_iit); index++) {
647 539
     n_rows += count_rows_for_interval(tally_iit, index);
648 540
   }
649
-
650
-#ifdef DEBUG2
651
-  printf("Total number of rows: %d\n", n_rows);
652
-#endif
653
-
654 541
   
655 542
   SEXP tally_R;
656 543
   PROTECT(tally_R = R_TallyTable_new(n_rows, param.n_cycle_bins, param.xs));
... ...
@@ -699,26 +586,27 @@ static SEXP parse_some(IIT_T tally_iit, TallyParam param,
699 586
 /* FORMAT
700 587
 
701 588
    block header:
702
-   [0][ins][del][mm]
589
+   [0][ins][del][mm][codon+][codon-]
703 590
 
704
-   mismatches for each position:
705
-   [t+][t-][rn][r+][r-]([an][a+][a-])*[\0]([c#]([cv][cc])*)*([q#]([qv][qc])*)*
591
+   mismatches/codons:
592
+   [t+][t-][rn][r+][r-]([an][a+][a-])*
593
+   mismatches only:
594
+   '_'[d+][d-][\0]([c#]([cv][cc])*[nm#]([nmv][nmc])*[xs#]([xsv][xsc])*)*
706 595
 
707 596
    insertions:
708
-   [i#]([seq][t+][t-][c#]([cv][cc])*)*
597
+   [i#]([t+][t-][r+][r-][seq][c#]([cv][cc])*[nm#]([nmv][nmc])*[xs#]([xsv][xsc])*
709 598
 
710 599
    deletions:
711
-   [d#]([seq][t+][t-][c#]([cv][cc])*)*
600
+   [d#]([t+][t-][r+][r-][seq][c#]([cv][cc])*[nm#]([nmv][nmc])*[xs#]([xsv][xsc])*
712 601
 */
713 602
 
714 603
 SEXP R_tally_iit_parse(SEXP tally_iit_R, SEXP cycle_breaks_R,
715
-                       SEXP high_base_quality_R, SEXP which_R,
716
-                       SEXP read_length_R, SEXP xs_R)
604
+                       SEXP which_R,
605
+                       SEXP read_length_R, SEXP xs_R, SEXP high_nm_score_R)
717 606
 {
718 607
   IIT_T tally_iit = (IIT_T) R_ExternalPtrAddr(tally_iit_R);
719 608
   SEXP tally_R;
720 609
   TallyParam param;
721
-  param.high_base_quality = asInteger(high_base_quality_R);
722 610
   param.cycle_breaks =
723 611
     cycle_breaks_R == R_NilValue ? NULL : INTEGER(cycle_breaks_R);
724 612
   param.n_cycle_bins =
... ...
@@ -730,6 +618,7 @@ SEXP R_tally_iit_parse(SEXP tally_iit_R, SEXP cycle_breaks_R,
730 618
     param.mdfne_buf = NULL;
731 619
   }
732 620
   param.xs = asLogical(xs_R);
621
+  param.high_nm_score = asInteger(high_nm_score_R);
733 622
   
734 623
   if (which_R == R_NilValue) {
735 624
     tally_R = parse_all(tally_iit, param);
Browse code

resurrect improvements that we reverted before the April release, expect breakage for a while

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110039 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 28/10/2015 18:51:00
Showing 1 changed files
... ...
@@ -1,4 +1,4 @@
1
-//#define DEBUG2 1
1
+#define DEBUG2 1
2 2
 
3 3
 #include <stdlib.h> /* for abs */
4 4
 #include <string.h> /* for strlen */
... ...
@@ -6,11 +6,17 @@
6 6
 #include "iit.h"
7 7
 #include "bytestream.h"
8 8
 
9
-enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF,
10
-       COUNT_TOTAL, HIGH_QUALITY, HIGH_QUALITY_REF, HIGH_QUALITY_TOTAL,
11
-       MEAN_QUALITY, MEAN_QUALITY_REF, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS,
12
-       COUNT_MINUS_REF, READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR,
13
-       READ_POS_VAR_REF, MDFNE, MDFNE_REF, CODON_STRAND, N_BASE_COLS };
9
+/*I changed the order of these, because the XS spots are optional...*/
10
+enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF, //8
11
+       COUNT_TOTAL, HIGH_QUALITY, HIGH_QUALITY_REF, HIGH_QUALITY_TOTAL, //12
12
+       MEAN_QUALITY, MEAN_QUALITY_REF, //14
13
+       COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS, //17
14
+       COUNT_MINUS_REF, DELCOUNT_PLUS, DELCOUNT_MINUS, //21
15
+       READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR, //24
16
+       READ_POS_VAR_REF, MDFNE, MDFNE_REF,  CODON_STRAND, //28
17
+       COUNT_XS_PLUS, COUNT_XS_PLUS_REF, //30
18
+       COUNT_XS_MINUS, COUNT_XS_MINUS_REF, N_BASE_COLS }; //33
19
+
14 20
 
15 21
 enum{ CODON_MINUS = -1,
16 22
       NON_CODON,
... ...
@@ -52,12 +58,18 @@ typedef struct TallyTable {
52 58
   int *count_plus_ref;
53 59
   int *count_minus;
54 60
   int *count_minus_ref;
61
+  int *delcount_plus;
62
+  int *delcount_minus;
55 63
   double *read_pos_mean;
56 64
   double *read_pos_mean_ref;
57 65
   double *read_pos_var;
58 66
   double *read_pos_var_ref;
59 67
   double *mdfne;
60 68
   double *mdfne_ref;
69
+  int *count_xs_plus;
70
+  int *count_xs_plus_ref;
71
+  int *count_xs_minus;
72
+  int *count_xs_minus_ref;
61 73
   int *strand;
62 74
   int **cycle_bins;
63 75
 } TallyTable;
... ...
@@ -68,9 +80,10 @@ typedef struct TallyParam {
68 80
   int high_base_quality;
69 81
   int read_length;
70 82
   double *mdfne_buf;
83
+  bool xs;
71 84
 } TallyParam;
72 85
 
73
-static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins) {
86
+static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) {
74 87
   SEXP tally_R; /* the result list */
75 88
   PROTECT(tally_R = allocVector(VECSXP, N_BASE_COLS + n_cycle_bins));
76 89
   
... ...
@@ -99,6 +112,14 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins) {
99 112
   SET_VECTOR_ELT(tally_R, MDFNE, allocVector(REALSXP, n_rows));
100 113
   SET_VECTOR_ELT(tally_R, MDFNE_REF, allocVector(REALSXP, n_rows));
101 114
   SET_VECTOR_ELT(tally_R, CODON_STRAND, allocVector(INTSXP, n_rows));
115
+  SET_VECTOR_ELT(tally_R, DELCOUNT_PLUS, allocVector(INTSXP, n_rows));
116
+  SET_VECTOR_ELT(tally_R, DELCOUNT_MINUS, allocVector(INTSXP, n_rows));
117
+  SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS, allocVector(INTSXP, n_rows));
118
+  SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF, allocVector(INTSXP, n_rows));
119
+  SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS, allocVector(INTSXP, n_rows));
120
+  SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF, allocVector(INTSXP, n_rows));
121
+ 
122
+
102 123
   for (int bin = 0; bin < n_cycle_bins; bin++) {
103 124
     SEXP cycle_bin_R = allocVector(INTSXP, n_rows);
104 125
     SET_VECTOR_ELT(tally_R, bin + N_BASE_COLS, cycle_bin_R);
... ...
@@ -108,7 +129,7 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins) {
108 129
   return tally_R;
109 130
 }
110 131
 
111
-static TallyTable *TallyTable_new(SEXP tally_R) {
132
+static TallyTable *TallyTable_new(SEXP tally_R, bool xs) {
112 133
   TallyTable *tally = (TallyTable *) R_alloc(sizeof(TallyTable), 1);
113 134
   int n_cycle_bins = length(tally_R) - N_BASE_COLS;
114 135
   
... ...
@@ -137,7 +158,15 @@ static TallyTable *TallyTable_new(SEXP tally_R) {
137 158
   tally->mdfne = REAL(VECTOR_ELT(tally_R, MDFNE));
138 159
   tally->mdfne_ref = REAL(VECTOR_ELT(tally_R, MDFNE_REF));
139 160
   tally->strand = INTEGER(VECTOR_ELT(tally_R, CODON_STRAND));
161
+  tally->delcount_plus = INTEGER(VECTOR_ELT(tally_R, DELCOUNT_PLUS));
162
+  tally->delcount_minus = INTEGER(VECTOR_ELT(tally_R, DELCOUNT_MINUS));
140 163
   tally->cycle_bins = (int **) R_alloc(sizeof(int*), n_cycle_bins);
164
+  tally->count_xs_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS));
165
+  tally->count_xs_plus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF));
166
+  tally->count_xs_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS));
167
+  tally->count_xs_minus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF));
168
+  
169
+
141 170
   for (int bin = 0; bin < n_cycle_bins; bin++) {
142 171
     tally->cycle_bins[bin] = INTEGER(VECTOR_ELT(tally_R, bin + N_BASE_COLS));
143 172
   }
... ...
@@ -152,6 +181,30 @@ read_total_counts(unsigned char **bytes, int row, int *count_total) {
152 181
   count_total[row] = count_total_plus + count_total_minus;
153 182
 }
154 183
 
184
+static void
185
+read_xs_counts(unsigned char **bytes, int row, int *count_xs_plus,
186
+               int *count_xs_minus)
187
+{
188
+    int n_xs = read_int(bytes);
189
+    count_xs_plus[row] = 0;
190
+    count_xs_minus[row] = 0;
191
+    for (int index = 0; index < n_xs; index++) {
192
+	int xs = read_int(bytes);
193
+	int count = read_int(bytes);
194
+	if (xs == 1) {
195
+	    count_xs_plus[row] = count;
196
+	} else if (xs == 2) {
197
+	    count_xs_minus[row] = count;
198
+	} 
199
+    }
200
+#ifdef DEBUG2
201
+    printf("row %d xs counts: %d plus %d minus", row, count_xs_plus[row], count_xs_minus[row]);
202
+#endif
203
+}
204
+
205
+
206
+
207
+
155 208
 static void
156 209
 read_cycle_counts(unsigned char **bytes, int row, TallyParam param,
157 210
                   int *n_cycles, double *read_pos_mean, double *read_pos_var,
... ...
@@ -221,6 +274,9 @@ parse_indels(unsigned char *bytes, int row,
221 274
              TallyParam param, TallyTable *tally, bool insertion)
222 275
 {
223 276
   int indel_count = read_int(&bytes);
277
+#ifdef DEBUG2
278
+  printf("row %d indel count: %d", row, indel_count);
279
+#endif
224 280
   for (int indel = 0; indel < indel_count; indel++, row++) {
225 281
     for (int b = 0; b < param.n_cycle_bins; b++) {
226 282
       tally->cycle_bins[b][row] = 0;
... ...
@@ -231,7 +287,7 @@ parse_indels(unsigned char *b