Browse code

work towards supporting new bam_tally; drops quality info (now cutoff based), adds xs/nm/del and codon-level counts

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110593 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 13/11/2015 21:50:11
Showing 8 changed files

... ...
@@ -9,7 +9,7 @@ Description: GSNAP and GMAP are a pair of tools to align short-read
9 9
         methods to work with GMAP and GSNAP from within R. In addition,
10 10
         it provides methods to tally alignment results on a
11 11
         per-nucleotide basis using the bam_tally tool.
12
-Version: 1.13.1
12
+Version: 1.13.2
13 13
 Depends: R (>= 2.15.0), methods, GenomeInfoDb (>= 1.1.3),
14 14
         GenomicRanges (>= 1.17.12)
15 15
 Imports: S4Vectors, IRanges, Rsamtools (>= 1.17.8), rtracklayer (>= 1.25.5),
... ...
@@ -24,7 +24,8 @@ setClass("BamTallyParam",
24 24
                         xs = "logical",
25 25
                         read_pos = "logical",
26 26
                         min_base_quality = "integer",
27
-                        noncovered = "logical"))
27
+                        noncovered = "logical",
28
+                        nm = "logical"))
28 29
 
29 30
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
30 31
 ### Constructor
... ...
@@ -107,7 +108,8 @@ BamTallyParam <- function(genome, which = GRanges(),
107 108
                           indels = FALSE, include_soft_clips = 0L,
108 109
                           exon_iit = NULL, IIT_BPPARAM = NULL,
109 110
                           xs = FALSE, read_pos = FALSE,
110
-                          min_base_quality = 0L, noncovered = FALSE)
111
+                          min_base_quality = 0L, noncovered = FALSE,
112
+                          nm = FALSE)
111 113
 {
112 114
   if (!is.null(desired_read_group) && !isSingleString(desired_read_group))
113 115
     stop("'desired_read_group' must be NULL or a single, non-NA string")
... ...
@@ -139,6 +141,8 @@ BamTallyParam <- function(genome, which = GRanges(),
139 141
     stop("min_base_quality must be a single, non-negative, non-NA number")
140 142
   if (!isTRUEorFALSE(noncovered))
141 143
     stop("noncovered must be TRUE or FALSE")
144
+  if (!isTRUEorFALSE(nm))
145
+    stop("nm must be TRUE or FALSE")
142 146
   args <- names(formals(sys.function()))
143 147
   params <- mget(args, environment())
144 148
   params$genome <- as(genome, "GmapGenome")
... ...
@@ -11,11 +11,12 @@ setClass("TallyIIT", representation(ptr = "externalptr",
11 11
                                     genome = "GmapGenome",
12 12
                                     bam = "BamFile",
13 13
                                     xs = "logical",
14
-                                    read_pos = "logical"))
14
+                                    read_pos = "logical",
15
+                                    nm = "logical"))
15 16
 
16
-TallyIIT <- function(ptr, genome, bam, xs, read_pos) {
17
+TallyIIT <- function(ptr, genome, bam, xs, read_pos, nm) {
17 18
   new("TallyIIT", ptr = ptr, genome = genome, bam = bam, xs = xs,
18
-      read_pos = read_pos)
19
+      read_pos = read_pos, nm = nm)
19 20
 }
20 21
 
21 22
 setMethod("genome", "TallyIIT", function(x) x@genome)
... ...
@@ -72,16 +73,22 @@ setMethod("bam_tally", "GmapBamReader",
72 73
             
73 74
             TallyIIT(do.call(.bam_tally_C, c(list(x), param_list)), genome,
74 75
                      as(x, "BamFile"), xs=param_list$xs,
75
-                     read_pos=param_list$read_pos)
76
+                     read_pos=param_list$read_pos, nm=param_list$nm)
76 77
           })
77 78
 
78
-variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L,
79
-                           keep_ref_rows = FALSE, read_length = NA_integer_)
79
+variantSummary <- function(x, read_pos_breaks = NULL,
80
+                           keep_ref_rows = FALSE, read_length = NA_integer_,
81
+                           high_nm_score = NA_integer_)
80 82
 {
81 83
   read_length <- as.integer(read_length)
82 84
   if (length(read_length) != 1L) {
83 85
     stop("'read_length' must be a single integer")
84 86
   }
87
+  high_nm_score <- as.integer(high_nm_score)
88
+  stopifnot(length(high_nm_score) == 1L)
89
+  if (!is.na(high_nm_score) && !x@nm) {
90
+      stop("'high_nm_score is not NA but NM scores were not tallied")
91
+  }
85 92
   if (length(read_pos_breaks) > 0L) {
86 93
     if (!x@read_pos) {
87 94
       stop("'read_pos_breaks' non-empty, but read positions were not tallied")
... ...
@@ -97,25 +104,23 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L,
97 104
 
98 105
   tally <- .Call(R_tally_iit_parse, x@ptr,
99 106
                  read_pos_breaks,
100
-                 normArgSingleInteger(high_base_quality),
101
-                 NULL, read_length, x@xs)
107
+                 NULL, read_length, x@xs, high_nm_score)
102 108
   
103 109
   tally_names <- c("seqnames", "pos", "ref", "alt",
104 110
                    "n.read.pos", "n.read.pos.ref",
105
-                   "raw.count", "raw.count.ref",
106
-                   "raw.count.total",
107
-                   "high.quality", "high.quality.ref",
108
-                   "high.quality.total", "mean.quality",
109
-                   "mean.quality.ref",
111
+                   "count", "count.ref", "count.total",
110 112
                    "count.plus", "count.plus.ref",
111 113
                    "count.minus", "count.minus.ref",
112 114
                    "del.count.plus", "del.count.minus",
113 115
                    "read.pos.mean", "read.pos.mean.ref",
114 116
                    "read.pos.var", "read.pos.var.ref",
115
-                   "mdfne", "mdfne.ref", "codon.dir",
117
+                   "mdfne", "mdfne.ref", "codon.strand",
116 118
                    "count.xs.plus", "count.xs.plus.ref",
117
-                   "count.xs.minus", "count.xs.minus.ref")
118
-  
119
+                   "count.xs.minus", "count.xs.minus.ref",
120
+                   "count.high.nm", "count.high.nm.ref")
121
+
122
+  tally$codon.strand <- structure(tally$codon.strand, class="factor",
123
+                                  levels=strand())
119 124
   break_names <- character()
120 125
   if (length(read_pos_breaks) > 0L) {
121 126
     read_pos_breaks <- as.integer(read_pos_breaks)
... ...
@@ -124,6 +129,7 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L,
124 129
     tally_names <- c(tally_names, break_names)
125 130
   }
126 131
   names(tally) <- tally_names
132
+  tally <- Filter(Negate(is.null), tally)  
127 133
 
128 134
   if (!keep_ref_rows) {
129 135
     variant_rows <- !is.na(tally$alt)
... ...
@@ -131,23 +137,20 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L,
131 137
       tally <- lapply(tally, `[`, variant_rows)
132 138
   }
133 139
   
134
-  meta_names <- setdiff(tally_names,
135
-                        c("seqnames", "pos", "ref", "alt", "high.quality",
136
-                          "high.quality.ref", "high.quality.total"))
140
+  meta_names <- setdiff(names(tally),
141
+                        c("seqnames", "pos", "ref", "alt", "count",
142
+                          "count.ref", "count.total"))
137 143
   genome <- genome(x)
138 144
   indel <- nchar(tally$ref) == 0L | nchar(tally$alt) == 0L
139 145
   metacols <- DataFrame(tally[meta_names])
140 146
   mcols(metacols) <- variantSummaryColumnDescriptions(read_pos_breaks)
141 147
 
142
-  samecols = tally[["ref"]] != tally[["alt"]]
143 148
   gr <- with(tally,
144 149
              VRanges(seqnames,
145 150
                      IRanges(pos,
146 151
                              width = ifelse(nchar(alt) == 0, nchar(ref), 1L)),
147 152
                      ref, alt,
148
-                     ifelse(indel, raw.count.total, high.quality.total),
149
-                     ifelse(indel, raw.count.ref, high.quality.ref),
150
-                     ifelse(indel, raw.count, high.quality),
153
+                     count.total, count.ref, count,
151 154
                      seqlengths = seqlengths(genome)))
152 155
   mcols(gr) <- metacols
153 156
   checkTallyConsistency(gr)
... ...
@@ -160,11 +163,8 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L,
160 163
 
161 164
 checkTallyConsistency <- function(x) {
162 165
   with(mcols(x), {
163
-    stopifnot(all(raw.count + raw.count.ref <= raw.count.total, na.rm=TRUE))
164
-    stopifnot(all(altDepth(x) <= raw.count, na.rm=TRUE))
165
-    stopifnot(all(refDepth(x) <= raw.count.ref, na.rm=TRUE))
166
-    stopifnot(all(count.plus + count.minus == raw.count, na.rm=TRUE))
167
-    stopifnot(all(count.plus.ref + count.minus.ref == raw.count.ref))
166
+    stopifnot(all(count.plus + count.minus == altDepth(x), na.rm=TRUE))
167
+    stopifnot(all(count.plus.ref + count.minus.ref == refDepth(x)))
168 168
   })
169 169
 }
170 170
 
... ...
@@ -228,7 +228,7 @@ normArgSingleCharacterOrNULL <- function(x) {
228 228
                          blocksize = 1000L, verbosep = FALSE,
229 229
                          include_soft_clips = 0L,
230 230
                          exon_iit = NULL, xs = FALSE, read_pos = FALSE,
231
-                         min_base_quality = 0L, noncovered = FALSE)
231
+                         min_base_quality = 0L, noncovered = FALSE, nm = FALSE)
232 232
 {
233 233
   if (!is(bamreader, "GmapBamReader"))
234 234
     stop("'bamreader' must be a GmapBamReader")
... ...
@@ -262,7 +262,8 @@ normArgSingleCharacterOrNULL <- function(x) {
262 262
         normArgTRUEorFALSE(xs),
263 263
         normArgTRUEorFALSE(read_pos),
264 264
         normArgSingleInteger(min_base_quality),
265
-        normArgTRUEorFALSE(noncovered))
265
+        normArgTRUEorFALSE(noncovered),
266
+        normArgTRUEorFALSE(nm))
266 267
 }
267 268
 
268 269
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
... ...
@@ -7,7 +7,7 @@
7 7
 static const R_CallMethodDef callMethods[] = {
8 8
 
9 9
   /* bamtally.c */
10
-  CALLMETHOD_DEF(R_Bamtally_iit, 25),
10
+  CALLMETHOD_DEF(R_Bamtally_iit, 26),
11 11
   CALLMETHOD_DEF(R_tally_iit_parse, 6),
12 12
   
13 13
   /* bamreader.c */
... ...
@@ -26,7 +26,8 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R,
26 26
                 SEXP verbosep_R, SEXP max_softclip_R,
27 27
                 SEXP genome_iit_file_R,
28 28
                 SEXP print_xs_scores_p_R, SEXP print_cycles_p_R,
29
-                SEXP minimum_quality_score_R, SEXP noncovered_R)
29
+                SEXP minimum_quality_score_R, SEXP noncovered_R,
30
+                SEXP print_nm_scores_p_R)
30 31
 {
31 32
   Bamreader_T bamreader = (Bamreader_T) R_ExternalPtrAddr(bamreader_R);
32 33
   const char *genome_dir =
... ...
@@ -54,6 +55,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R,
54 55
   bool print_cycles_p = asLogical(print_cycles_p_R);
55 56
   int minimum_quality_score = asInteger(minimum_quality_score_R);
56 57
   bool noncovered_p = asLogical(noncovered_R);
58
+  bool print_nm_scores_p = asLogical(print_nm_scores_p_R);
57 59
 
58 60
   Genome_T genome = createGenome(genome_dir, db);
59 61
   IIT_T chromosome_iit = readChromosomeIIT(genome_dir, db);
... ...
@@ -88,7 +90,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R,
88 90
                                  print_indels_p, blocksize, verbosep,
89 91
                                  /*readlevel_p*/false, max_softclip,
90 92
                                  print_cycles_p,
91
-                                 /*print_nm_scores_p*/ false,
93
+                                 print_nm_scores_p,
92 94
                                  print_xs_scores_p, noncovered_p);
93 95
   IIT_free(&chromosome_iit);
94 96
   Genome_free(&genome);
... ...
@@ -23,7 +23,8 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R,
23 23
                 SEXP verbosep_R, SEXP max_softclip_R,
24 24
                 SEXP genome_iit_file_R,
25 25
                 SEXP print_xs_scores_p_R, SEXP print_cycles_p_R,
26
-                SEXP minimum_quality_score_R, SEXP nonconvered_R);
26
+                SEXP minimum_quality_score_R, SEXP nonconvered_R,
27
+                SEXP print_nm_scores_p_R);
27 28
 
28 29
 SEXP
29 30
 R_tally_iit_parse(SEXP tally_iit_R, SEXP cycle_breaks_R,
... ...
@@ -1,4 +1,4 @@
1
-static char rcsid[] = "$Id: bamtally.c 172388 2015-08-21 20:10:50Z twu $";
1
+static char rcsid[] = "$Id: bamtally.c 178481 2015-11-09 20:55:28Z twu $";
2 2
 #ifdef HAVE_CONFIG_H
3 3
 #include <config.h>
4 4
 #endif
... ...
@@ -859,13 +859,18 @@ pass_variant_filter_p (long int nmatches, long int delcounts_plus, long int delc
859 859
   if (variant_strands == 0) {
860 860
     return true;
861 861
   } else if (variant_strands == 1) {
862
-    if (mismatches_by_shift == NULL) {
862
+    /* Not sure if we should look at insertions_byshift also */
863
+    if (mismatches_by_shift == NULL && delcounts_plus == 0 && delcounts_minus == 0) {
863 864
       return false;
864 865
     } else {
865 866
       return true;
866 867
     }
867 868
 
868 869
   } else {
870
+    if (delcounts_plus > 0 && delcounts_minus > 0) {
871
+      return true;
872
+    }
873
+
869 874
     plus_strand_p[0] = plus_strand_p[1] = plus_strand_p[2] = plus_strand_p[3] = false;
870 875
     minus_strand_p[0] = minus_strand_p[1] = minus_strand_p[2] = minus_strand_p[3] = false;
871 876
 
... ...
@@ -873,64 +878,71 @@ pass_variant_filter_p (long int nmatches, long int delcounts_plus, long int delc
873 878
     for (ptr = mismatches_by_shift; ptr != NULL; ptr = List_next(ptr)) {
874 879
       mismatch = (Mismatch_T) List_head(ptr);
875 880
       if (mismatch->shift > 0) {
881
+	if (delcounts_minus > 0) {
882
+	  return true;
883
+	}
876 884
 	switch (mismatch->nt) {
877 885
 	case 'A':
878
-	  if (minus_strand_p[0] == true) {
886
+	  if (minus_strand_p[/*A*/0] == true) {
879 887
 	    return true;
880 888
 	  } else {
881
-	    plus_strand_p[0] = true;
889
+	    plus_strand_p[/*A*/0] = true;
882 890
 	  }
883 891
 	  break;
884 892
 	case 'C':
885
-	  if (minus_strand_p[1] == true) {
893
+	  if (minus_strand_p[/*C*/1] == true) {
886 894
 	    return true;
887 895
 	  } else {
888
-	    plus_strand_p[1] = true;
896
+	    plus_strand_p[/*C*/1] = true;
889 897
 	  }
890 898
 	  break;
891 899
 	case 'G': 
892
-	  if (minus_strand_p[2] == true) {
900
+	  if (minus_strand_p[/*G*/2] == true) {
893 901
 	    return true;
894 902
 	  } else {
895
-	    plus_strand_p[2] = true;
903
+	    plus_strand_p[/*G*/2] = true;
896 904
 	  }
897 905
 	  break;
898 906
 	case 'T':
899
-	  if (minus_strand_p[3] == true) {
907
+	  if (minus_strand_p[/*T*/3] == true) {
900 908
 	    return true;
901 909
 	  } else {
902
-	    plus_strand_p[3] = true;
910
+	    plus_strand_p[/*T*/3] = true;
903 911
 	  }
904 912
 	  break;
905 913
 	}
914
+
906 915
       } else if (mismatch->shift < 0) {
916
+	if (delcounts_plus > 0) {
917
+	  return true;
918
+	}
907 919
 	switch (mismatch->nt) {
908 920
 	case 'A':
909
-	  if (plus_strand_p[0] == true) {
921
+	  if (plus_strand_p[/*A*/0] == true) {
910 922
 	    return true;
911 923
 	  } else {
912
-	    minus_strand_p[0] = true;
924
+	    minus_strand_p[/*A*/0] = true;
913 925
 	  }
914 926
 	  break;
915 927
 	case 'C':
916
-	  if (plus_strand_p[1] == true) {
928
+	  if (plus_strand_p[/*C*/1] == true) {
917 929
 	    return true;
918 930
 	  } else {
919
-	    minus_strand_p[1] = true;
931
+	    minus_strand_p[/*C*/1] = true;
920 932
 	  }
921 933
 	  break;
922 934
 	case 'G':
923
-	  if (plus_strand_p[2] == true) {
935
+	  if (plus_strand_p[/*G*/2] == true) {
924 936
 	    return true;
925 937
 	  } else {
926
-	    minus_strand_p[2] = true;
938
+	    minus_strand_p[/*G*/2] = true;
927 939
 	  }
928 940
 	  break;
929 941
 	case 'T':
930
-	  if (plus_strand_p[3] == true) {
942
+	  if (plus_strand_p[/*T*/3] == true) {
931 943
 	    return true;
932 944
 	  } else {
933
-	    minus_strand_p[3] = true;
945
+	    minus_strand_p[/*T*/3] = true;
934 946
 	  }
935 947
 	  break;
936 948
 	}
... ...
@@ -5930,6 +5942,11 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
5930 5942
   ignore_duplicates_p = true;
5931 5943
 #endif
5932 5944
 
5945
+  /*
5946
+  fprintf(stderr,"Called with alloclength %d, minimum_mapq %d, good_unique_mapq %d, minimum_quality_score %d, maximum_nhits %d, need_concordant_p %d, need_unique_p %d, need_primary_p %d, ignore_duplicates_p %d, min_depth %d, variant_strands %d, ignore_query_Ns_p %d, blocksize %d, verbosep %d, readlevel_p %d, max_softclip %d\n",
5947
+	  alloclength,minimum_mapq,good_unique_mapq,minimum_quality_score,maximum_nhits,need_concordant_p,need_unique_p,need_primary_p,ignore_duplicates_p,min_depth,variant_strands,ignore_query_Ns_p,blocksize,verbosep,readlevel_p,max_softclip);
5948
+  */
5949
+
5933 5950
 
5934 5951
   /* Create tally at position N to store n_fromleft */
5935 5952
   alloc_tallies_alloc = (Tally_T *) CALLOC(alloclength + 2*max_softclip + 1,sizeof(Tally_T));
... ...
@@ -1,4 +1,4 @@
1
-#define DEBUG2 1
1
+#define DEBUG2 0
2 2
 
3 3
 #include <stdlib.h> /* for abs */
4 4
 #include <string.h> /* for strlen */
... ...
@@ -7,21 +7,18 @@
7 7
 #include "bytestream.h"
8 8
 
9 9
 /*I changed the order of these, because the XS spots are optional...*/
10
-enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF, //8
11
-       COUNT_TOTAL, HIGH_QUALITY, HIGH_QUALITY_REF, HIGH_QUALITY_TOTAL, //12
12
-       MEAN_QUALITY, MEAN_QUALITY_REF, //14
13
-       COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS, //17
14
-       COUNT_MINUS_REF, DELCOUNT_PLUS, DELCOUNT_MINUS, //21
15
-       READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR, //24
16
-       READ_POS_VAR_REF, MDFNE, MDFNE_REF,  CODON_STRAND, //28
17
-       COUNT_XS_PLUS, COUNT_XS_PLUS_REF, //30
18
-       COUNT_XS_MINUS, COUNT_XS_MINUS_REF, N_BASE_COLS }; //33
19
-
20
-
21
-enum{ CODON_MINUS = -1,
22
-      NON_CODON,
23
-      CODON_PLUS};
24
-
10
+enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF,
11
+       COUNT_TOTAL, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS,
12
+       COUNT_MINUS_REF, DELCOUNT_PLUS, DELCOUNT_MINUS,
13
+       READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR,
14
+       READ_POS_VAR_REF, MDFNE, MDFNE_REF,  CODON_STRAND,
15
+       COUNT_XS_PLUS, COUNT_XS_PLUS_REF,
16
+       COUNT_XS_MINUS, COUNT_XS_MINUS_REF,
17
+       COUNT_HIGH_NM, COUNT_HIGH_NM_REF, N_BASE_COLS };
18
+
19
+enum { CODON_PLUS = 1,
20
+       CODON_MINUS,
21
+       NON_CODON };
25 22
 
26 23
 static char *codon_table[64] = 
27 24
   {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT",
... ...
@@ -33,10 +30,6 @@ static char *codon_table[64] =
33 30
    "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT",
34 31
    "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
35 32
 
36
-
37
-
38
-
39
-
40 33
 static int NUM_PTRS = 5;
41 34
 
42 35
 typedef struct TallyTable {
... ...
@@ -49,11 +42,6 @@ typedef struct TallyTable {
49 42
   int *count;
50 43
   int *count_ref;
51 44
   int *count_total;
52
-  int *high_quality;
53
-  int *high_quality_ref;
54
-  int *high_quality_total;
55
-  double *mean_quality;
56
-  double *mean_quality_ref;
57 45
   int *count_plus;
58 46
   int *count_plus_ref;
59 47
   int *count_minus;
... ...
@@ -70,6 +58,8 @@ typedef struct TallyTable {
70 58
   int *count_xs_plus_ref;
71 59
   int *count_xs_minus;
72 60
   int *count_xs_minus_ref;
61
+  int *count_high_nm;
62
+  int *count_high_nm_ref;
73 63
   int *strand;
74 64
   int **cycle_bins;
75 65
 } TallyTable;
... ...
@@ -77,16 +67,15 @@ typedef struct TallyTable {
77 67
 typedef struct TallyParam {
78 68
   int *cycle_breaks;
79 69
   int n_cycle_bins;
80
-  int high_base_quality;
81 70
   int read_length;
82 71
   double *mdfne_buf;
83 72
   bool xs;
73
+  int high_nm_score;
84 74
 } TallyParam;
85 75
 
86 76
 static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) {
87 77
   SEXP tally_R; /* the result list */
88 78
   PROTECT(tally_R = allocVector(VECSXP, N_BASE_COLS + n_cycle_bins));
89
-  
90 79
   SET_VECTOR_ELT(tally_R, SEQNAMES, allocVector(STRSXP, n_rows));
91 80
   SET_VECTOR_ELT(tally_R, POS, allocVector(INTSXP, n_rows));
92 81
   SET_VECTOR_ELT(tally_R, REF, allocVector(STRSXP, n_rows));
... ...
@@ -96,11 +85,6 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) {
96 85
   SET_VECTOR_ELT(tally_R, COUNT, allocVector(INTSXP, n_rows));
97 86
   SET_VECTOR_ELT(tally_R, COUNT_REF, allocVector(INTSXP, n_rows));
98 87
   SET_VECTOR_ELT(tally_R, COUNT_TOTAL, allocVector(INTSXP, n_rows));
99
-  SET_VECTOR_ELT(tally_R, HIGH_QUALITY, allocVector(INTSXP, n_rows));
100
-  SET_VECTOR_ELT(tally_R, HIGH_QUALITY_REF, allocVector(INTSXP, n_rows));
101
-  SET_VECTOR_ELT(tally_R, HIGH_QUALITY_TOTAL, allocVector(INTSXP, n_rows));
102
-  SET_VECTOR_ELT(tally_R, MEAN_QUALITY, allocVector(REALSXP, n_rows));
103
-  SET_VECTOR_ELT(tally_R, MEAN_QUALITY_REF, allocVector(REALSXP, n_rows));
104 88
   SET_VECTOR_ELT(tally_R, COUNT_PLUS, allocVector(INTSXP, n_rows));
105 89
   SET_VECTOR_ELT(tally_R, COUNT_PLUS_REF, allocVector(INTSXP, n_rows));
106 90
   SET_VECTOR_ELT(tally_R, COUNT_MINUS, allocVector(INTSXP, n_rows));
... ...
@@ -114,12 +98,15 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) {
114 98
   SET_VECTOR_ELT(tally_R, CODON_STRAND, allocVector(INTSXP, n_rows));
115 99
   SET_VECTOR_ELT(tally_R, DELCOUNT_PLUS, allocVector(INTSXP, n_rows));
116 100
   SET_VECTOR_ELT(tally_R, DELCOUNT_MINUS, allocVector(INTSXP, n_rows));
117
-  SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS, allocVector(INTSXP, n_rows));
118
-  SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF, allocVector(INTSXP, n_rows));
119
-  SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS, allocVector(INTSXP, n_rows));
120
-  SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF, allocVector(INTSXP, n_rows));
121
- 
122
-
101
+  if (xs) {
102
+      SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS, allocVector(INTSXP, n_rows));
103
+      SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF, allocVector(INTSXP, n_rows));
104
+      SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS, allocVector(INTSXP, n_rows));
105
+      SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF, allocVector(INTSXP, n_rows));
106
+  }
107
+  SET_VECTOR_ELT(tally_R, COUNT_HIGH_NM, allocVector(INTSXP, n_rows));
108
+  SET_VECTOR_ELT(tally_R, COUNT_HIGH_NM_REF, allocVector(INTSXP, n_rows));
109
+  
123 110
   for (int bin = 0; bin < n_cycle_bins; bin++) {
124 111
     SEXP cycle_bin_R = allocVector(INTSXP, n_rows);
125 112
     SET_VECTOR_ELT(tally_R, bin + N_BASE_COLS, cycle_bin_R);
... ...
@@ -130,7 +117,7 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) {
130 117
 }
131 118
 
132 119
 static TallyTable *TallyTable_new(SEXP tally_R, bool xs) {
133
-  TallyTable *tally = (TallyTable *) R_alloc(sizeof(TallyTable), 1);
120
+  TallyTable *tally = (TallyTable *) S_alloc(sizeof(TallyTable), 1);
134 121
   int n_cycle_bins = length(tally_R) - N_BASE_COLS;
135 122
   
136 123
   tally->seqnames_R = VECTOR_ELT(tally_R, SEQNAMES);
... ...
@@ -142,11 +129,6 @@ static TallyTable *TallyTable_new(SEXP tally_R, bool xs) {
142 129
   tally->count = INTEGER(VECTOR_ELT(tally_R, COUNT));
143 130
   tally->count_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_REF));
144 131
   tally->count_total = INTEGER(VECTOR_ELT(tally_R, COUNT_TOTAL));
145
-  tally->high_quality = INTEGER(VECTOR_ELT(tally_R, HIGH_QUALITY));
146
-  tally->high_quality_ref = INTEGER(VECTOR_ELT(tally_R, HIGH_QUALITY_REF));
147
-  tally->high_quality_total = INTEGER(VECTOR_ELT(tally_R, HIGH_QUALITY_TOTAL));
148
-  tally->mean_quality = REAL(VECTOR_ELT(tally_R, MEAN_QUALITY));
149
-  tally->mean_quality_ref = REAL(VECTOR_ELT(tally_R, MEAN_QUALITY_REF));
150 132
   tally->count_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS));
151 133
   tally->count_plus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS_REF));
152 134
   tally->count_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_MINUS));
... ...
@@ -161,12 +143,18 @@ static TallyTable *TallyTable_new(SEXP tally_R, bool xs) {
161 143
   tally->delcount_plus = INTEGER(VECTOR_ELT(tally_R, DELCOUNT_PLUS));
162 144
   tally->delcount_minus = INTEGER(VECTOR_ELT(tally_R, DELCOUNT_MINUS));
163 145
   tally->cycle_bins = (int **) R_alloc(sizeof(int*), n_cycle_bins);
164
-  tally->count_xs_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS));
165
-  tally->count_xs_plus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF));
166
-  tally->count_xs_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS));
167
-  tally->count_xs_minus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF));
146
+  if (xs) {
147
+      tally->count_xs_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS));
148
+      tally->count_xs_plus_ref = INTEGER(VECTOR_ELT(tally_R,
149
+                                                    COUNT_XS_PLUS_REF));
150
+      tally->count_xs_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS));
151
+      tally->count_xs_minus_ref = INTEGER(VECTOR_ELT(tally_R,
152
+                                                     COUNT_XS_MINUS_REF));
153
+  }
154
+  tally->count_high_nm = INTEGER(VECTOR_ELT(tally_R, COUNT_HIGH_NM));
155
+  tally->count_high_nm_ref = INTEGER(VECTOR_ELT(tally_R,
156
+                                                COUNT_HIGH_NM_REF));
168 157
   
169
-
170 158
   for (int bin = 0; bin < n_cycle_bins; bin++) {
171 159
     tally->cycle_bins[bin] = INTEGER(VECTOR_ELT(tally_R, bin + N_BASE_COLS));
172 160
   }
... ...
@@ -181,11 +169,30 @@ read_total_counts(unsigned char **bytes, int row, int *count_total) {
181 169
   count_total[row] = count_total_plus + count_total_minus;
182 170
 }
183 171
 
172
+static void
173
+read_nm_counts(unsigned char **bytes, int row, int *count_high_nm,
174
+               int high_nm_score)
175
+{
176
+    int n_nm = read_int(bytes);
177
+    count_high_nm[row] = 0;
178
+    /* FIXME: currently bam_tally gives us counts whether we want them or not */
179
+    for (int index = 0; index < n_nm; index++) {
180
+        int nm = read_int(bytes);
181
+	int count = read_int(bytes);
182
+        if (nm >= high_nm_score) {
183
+            count_high_nm[row] += count;
184
+        }
185
+    }
186
+}
187
+
184 188
 static void
185 189
 read_xs_counts(unsigned char **bytes, int row, int *count_xs_plus,
186 190
                int *count_xs_minus)
187 191
 {
188 192
     int n_xs = read_int(bytes);
193
+    if (count_xs_plus == NULL) {
194
+        return;
195
+    }
189 196
     count_xs_plus[row] = 0;
190 197
     count_xs_minus[row] = 0;
191 198
     for (int index = 0; index < n_xs; index++) {
... ...
@@ -197,14 +204,8 @@ read_xs_counts(unsigned char **bytes, int row, int *count_xs_plus,
197 204
 	    count_xs_minus[row] = count;
198 205
 	} 
199 206
     }
200
-#ifdef DEBUG2
201
-    printf("row %d xs counts: %d plus %d minus", row, count_xs_plus[row], count_xs_minus[row]);
202
-#endif
203 207
 }
204 208
 
205
-
206
-
207
-
208 209
 static void
209 210
 read_cycle_counts(unsigned char **bytes, int row, TallyParam param,
210 211
                   int *n_cycles, double *read_pos_mean, double *read_pos_var,
... ...
@@ -274,9 +275,6 @@ parse_indels(unsigned char *bytes, int row,
274 275
              TallyParam param, TallyTable *tally, bool insertion)
275 276
 {
276 277
   int indel_count = read_int(&bytes);
277
-#ifdef DEBUG2
278
-  printf("row %d indel count: %d", row, indel_count);
279
-#endif
280 278
   for (int indel = 0; indel < indel_count; indel++, row++) {
281 279
     for (int b = 0; b < param.n_cycle_bins; b++) {
282 280
       tally->cycle_bins[b][row] = 0;
... ...
@@ -302,72 +300,48 @@ parse_indels(unsigned char *bytes, int row,
302 300
     read_cycle_counts(&bytes, row, param, tally->n_cycles,
303 301
                       tally->read_pos_mean, tally->read_pos_var,
304 302
                       tally->mdfne, tally->cycle_bins);
305
-    /* quality is per-base and so not relevant for indels */
306
-    tally->mean_quality[row] = NA_REAL;
307
-    tally->mean_quality_ref[row] = NA_REAL;
308
-    tally->high_quality[row] = NA_INTEGER;
309
-    tally->high_quality_ref[row] = NA_INTEGER;
310
-    tally->high_quality_total[row] = NA_INTEGER;
303
+    read_nm_counts(&bytes, row, tally->count_high_nm, param.high_nm_score);
304
+    read_xs_counts(&bytes, row, tally->count_xs_plus, tally->count_xs_minus);
311 305
     /* no position from which to tabulate the cycles */
312 306
     tally->n_cycles_ref[row] = NA_INTEGER;
313 307
     tally->read_pos_mean_ref[row] = NA_REAL;
314 308
     tally->read_pos_var_ref[row] = NA_REAL;
315 309
     tally->mdfne_ref[row] = NA_REAL;
316
-  }
317
-  return indel_count;
318
-}
319
-
320
-static void
321
-read_quality_counts(unsigned char **bytes, int row, int *high_quality,
322
-                    double *mean_quality, int high_base_quality)
323
-{
324
-  int n_qualities = read_int(bytes);
325
-  int total_quality = 0;
326
-  int total_quality_weight = 0;
327
-  high_quality[row] = 0;
328
-  for (int index = 0; index < n_qualities; index++) {
329
-    int quality = read_int(bytes);
330
-    int count = read_int(bytes);
331
-    if (quality >= high_base_quality) {
332
-	total_quality += quality * count;
333
-	total_quality_weight += count;
334
-	high_quality[row] += count;
310
+    /* overlapping deletion count only relevant for SNVs */
311
+    tally->delcount_plus[row] = NA_INTEGER;
312
+    tally->delcount_minus[row] = NA_INTEGER;
313
+    /* information not available for "ref" allele */
314
+    tally->count_high_nm_ref[row] = NA_INTEGER;
315
+    if (param.xs) {
316
+        tally->count_xs_plus_ref[row] = NA_INTEGER;
317
+        tally->count_xs_minus_ref[row] = NA_INTEGER;
335 318
     }
336 319
   }
337
-  mean_quality[row] = total_quality_weight > 0 ?
338
-    (double)total_quality / total_quality_weight : R_NaN;
320
+  return indel_count;
339 321
 }
340 322
 
341 323
 static int
342 324
 read_allele_counts(unsigned char **bytes, int row, SEXP read_R,
343
-                   int *count_plus, int *count_minus, int *count, int strand)
325
+                   int *count_plus, int *count_minus, int *count,
326
+                   int *delcount_plus, int *delcount_minus,
327
+                   int strand)
344 328
 {
345 329
   int n_alleles = 0;
346 330
   char allele;
347 331
   char stop = strand == 0 ? '\0' : (char) 255;
348
-#ifdef DEBUG2
349
-  printf("Starting read_allele_counts (codon strand %d) at row %d. Total length of read_R is %d\n", strand, row, LENGTH(read_R));
350
-#endif
332
+  
351 333
   while((allele = (char)read_char(bytes)) != stop) {
352
-#ifdef DEBUG2
353
-      printf("Parsing counts for allele: %d (%s) row %d \n", allele, &allele, row );
354
-#endif
334
+    if (allele == '_') {
335
+        *delcount_plus = read_int(bytes);
336
+        *delcount_minus = read_int(bytes);
337
+        continue;
338
+    }
355 339
     if(strand == 0)
356 340
        SET_STRING_ELT(read_R, row, mkCharLen(&allele, 1));
357 341
     else
358 342
        SET_STRING_ELT(read_R, row, mkCharLen(codon_table[(int)allele], 3));
359
-#ifdef DEBUG2
360
-    printf("Reading allele plus count: ");
361
-#endif
362 343
     count_plus[row] = read_int(bytes);
363
-#ifdef DEBUG2
364
-    printf("%d\n", count_plus[row]);
365
-    printf("Reading allele minus count: ");
366
-#endif
367 344
     count_minus[row] = read_int(bytes);
368
-#ifdef DEBUG2
369
-    printf("%d\n", count_minus[row]);
370
-#endif
371 345
     count[row] = count_plus[row] + count_minus[row];
372 346
     row++;
373 347
     n_alleles++;
... ...
@@ -376,69 +350,35 @@ read_allele_counts(unsigned char **bytes, int row, SEXP read_R,
376 350
   return n_alleles;
377 351
 }
378 352
 
379
-
380
-//format _(int)(int) 
381
-static void
382
-read_del_counts(unsigned char **bytes, int row, int *delcount_plus,
383
-                int *delcount_minus)
384
-{
385
-
386
-//This consumes a byte!!!
387
-    unsigned char mychar = read_char(bytes);
388
-#ifdef DEBUG2
389
-    printf("in read_del_counts, mchar is (as int) %d", mychar);
390
-#endif 
391
-
392
-    if(mychar == '_') {
393
-
394
-
395
-	*delcount_plus = read_int(bytes);
396
-	*delcount_minus = read_int(bytes);
397
-#ifdef DEBUG2 
398
-	printf("read_del_counts: '_' character detected. count plus %d count minus %d\n", *delcount_plus, *delcount_minus);
399
-#endif
400
-    } else {
401
-	//backtrack char consumption if it wasn't a "_"
402
-	*bytes = *bytes-1;
403
-	*delcount_plus = 0;
404
-	*delcount_minus = 0;
405
-    }
406
-    return;
407
-}
408
-
409 353
 static int
410 354
 parse_alleles(unsigned char *bytes, int row, int ref_row,
411 355
               TallyParam param, TallyTable *tally, int strand)
412 356
 {
413 357
 
414
-  bool have_ref_row = false;
415 358
   read_total_counts(&bytes, row, tally->count_total);
359
+  int delcount_plus = 0, delcount_minus = 0;
416 360
   int n_alleles = read_allele_counts(&bytes, row, tally->read_R,
417 361
                                      tally->count_plus, tally->count_minus,
418 362
 	                             tally->count,
363
+                                     &delcount_plus, &delcount_minus,
419 364
 	                             strand);
420
-  int delcount_plus, delcount_minus;
421
-/*  read_del_counts(&bytes, row, &delcount_plus, &delcount_minus);*/
422 365
   for (int allele = 0; allele < n_alleles; allele++, row++) {
423 366
     tally->n_cycles[row] = 0;
424 367
     for (int b = 0; b < param.n_cycle_bins; b++) {
425 368
       tally->cycle_bins[b][row] = 0;
426 369
     }
427
-    tally->high_quality[row] = 0;
428
-    tally->mean_quality[row] = R_NaN;
429 370
     tally->strand[row] = strand;
430
-    tally->delcount_plus[row] = R_NaN;//delcount_plus;
431
-    tally->delcount_minus[row] = R_NaN;//delcount_minus;
371
+    tally->delcount_plus[row] = delcount_plus;
372
+    tally->delcount_minus[row] = delcount_minus;
432 373
     if (tally->count[row] > 0) {
433 374
       read_cycle_counts(&bytes, row, param, tally->n_cycles,
434 375
                         tally->read_pos_mean, tally->read_pos_var,
435 376
                         tally->mdfne, tally->cycle_bins);
436
-      read_quality_counts(&bytes, row, tally->high_quality, tally->mean_quality,
437
-                          param.high_base_quality);
377
+      read_nm_counts(&bytes, row, tally->count_high_nm, param.high_nm_score);
438 378
       if (param.xs) {
439
-        read_xs_counts(&bytes, row, tally->count_xs_plus, tally->count_xs_minus);
379
+        read_xs_counts(&bytes, row, tally->count_xs_plus,
380
+                       tally->count_xs_minus);
440 381
       }
441
-
442 382
     } else {
443 383
       tally->read_pos_mean[row] = R_NaN;
444 384
       tally->read_pos_var[row] = NA_REAL;
... ...
@@ -446,27 +386,19 @@ parse_alleles(unsigned char *bytes, int row, int ref_row,
446 386
       if (param.xs) {
447 387
         tally->count_xs_plus[row] = 0;
448 388
         tally->count_xs_minus[row] = 0;
449
-      
450 389
       }
451 390
     }
452
-    have_ref_row = true;
453
-  }
454
-  int high_quality_total = 0;
455
-  for (int r = ref_row; r < row; r++) {
456
-    high_quality_total += tally->high_quality[r];
457 391
   }
458 392
   for (int r = ref_row; r < row; r++) {
459 393
     tally->n_cycles_ref[r] = tally->n_cycles[ref_row];
460 394
     tally->count_total[r] = tally->count_total[ref_row];
461
-    tally->mean_quality_ref[r] = tally->mean_quality[ref_row];
462
-    tally->high_quality_ref[r] = tally->high_quality[ref_row];
463
-    tally->high_quality_total[r] = high_quality_total;
464 395
     tally->count_plus_ref[r] = tally->count_plus[ref_row];
465 396
     tally->count_minus_ref[r] = tally->count_minus[ref_row];
466 397
     tally->count_ref[r] = tally->count_plus_ref[r] + tally->count_minus_ref[r];
467 398
     tally->read_pos_mean_ref[r] = tally->read_pos_mean[ref_row];
468 399
     tally->read_pos_var_ref[r] = tally->read_pos_var[ref_row];
469 400
     tally->mdfne_ref[r] = tally->mdfne[ref_row];
401
+    tally->count_high_nm_ref[r] = tally->count_high_nm[ref_row];
470 402
     if (param.xs) {
471 403
       tally->count_xs_plus_ref[r] = tally->count_xs_plus[ref_row];
472 404
       tally->count_xs_minus_ref[r] = tally->count_xs_minus[ref_row];
... ...
@@ -474,12 +406,9 @@ parse_alleles(unsigned char *bytes, int row, int ref_row,
474 406
     SET_STRING_ELT(tally->ref_R, r, STRING_ELT(tally->read_R, ref_row));  
475 407
   }
476 408
 
477
-//  if (have_ref_row) {
478 409
     /* clear the 'alt' columns for the 'ref' row with NAs */
479 410
     SET_STRING_ELT(tally->read_R, ref_row, NA_STRING);
480 411
     tally->n_cycles[ref_row] = NA_INTEGER;
481
-    tally->mean_quality[ref_row] = NA_REAL;
482
-    tally->high_quality[ref_row] = NA_REAL;
483 412
     tally->count_plus[ref_row] = NA_INTEGER;
484 413
     tally->count_minus[ref_row] = NA_INTEGER;
485 414
     tally->count[ref_row] = NA_INTEGER;
... ...
@@ -491,7 +420,6 @@ parse_alleles(unsigned char *bytes, int row, int ref_row,
491 420
       tally->count_xs_minus[ref_row] = NA_INTEGER;
492 421
     }
493 422
     
494
-// }
495 423
   return n_alleles;
496 424
 }
497 425
     
... ...
@@ -504,11 +432,9 @@ static int parse_allele_count(unsigned char *bytes) {
504 432
   int n_alleles = 1; /* always have a reference */
505 433
   bytes += sizeof(int) * 4 + 1; /* skip total and reference */
506 434
   while(bytes[0] != '\0') {
507
-#ifdef DEBUG2
508
-    printf("Found allele %s\n", &bytes[0]);
509
-#endif
510
-    bytes += sizeof(int) * 2 + 1;
511
-    n_alleles++;
435
+      if (bytes[0] != '_')
436
+          n_alleles++;
437
+      bytes += sizeof(int) * 2 + 1;
512 438
   }
513 439
   return n_alleles;
514 440
 }
... ...
@@ -518,9 +444,6 @@ static int parse_codon_count(unsigned char *bytes) {
518 444
   int n_alleles = 1; /* always have a reference */
519 445
   bytes += sizeof(int) * 4 + 1; /* skip reference */
520 446
   while((int) bytes[0] != 255 ){ // && bytes [0] != '\0') {
521
-#ifdef DEBUG2
522
-    printf("Found codon %d\n", bytes[0]);
523
-#endif
524 447
     bytes += sizeof(int) * 2 + 1;
525 448
     n_alleles++;
526 449
   }
... ...
@@ -571,38 +494,12 @@ static int parse_interval(IIT_T tally_iit, int index,
571 494
   SEXP divstring_R;
572 495
   PROTECT(divstring_R = mkChar(IIT_divstring_from_index(tally_iit, index)));
573 496
   for (int position = 0; position < width; position++) {
574
-#ifdef DEBUG2
575
-    printf("Reading insertion offset: ");
576
-#endif
577 497
     int insertion_offset = read_int(&bytes);
578
-#ifdef DEBUG2
579
-    printf("%d\n", insertion_offset);
580
-    printf("Reading deletion offset: ");
581
-#endif
582 498
     int deletion_offset = read_int(&bytes);
583
-#ifdef DEBUG2
584
-    printf("%d\n", deletion_offset);
585
-    printf("Reading allele offset: ");
586
-#endif
587 499
     int allele_offset = read_int(&bytes);
588
-#ifdef DEBUG2
589
-    printf("%d\n", allele_offset);
590
-    printf("Reading codon plus offset: ");
591
-#endif
592 500
     int codon_plus_offset = read_int(&bytes);
593
-#ifdef DEBUG2
594
-    printf("%d\n", codon_plus_offset);
595
-    printf("Reading codon_minus offset: ");
596
-#endif
597 501
     int codon_minus_offset = read_int(&bytes);
598
-#ifdef DEBUG2
599
-    printf("%d\n", codon_minus_offset);
600
-    printf("Reading 'next' offset: ");
601
-#endif
602 502
     int next_offset = read_int(&bytes);
603
-#ifdef DEBUG2
604
-    printf("%d\n", next_offset);
605
-#endif
606 503
     int ref_row = row;
607 504
     bytes -= 4; /* rewind from read-ahead */
608 505
     if (codon_plus_offset - allele_offset > 0)
... ...
@@ -616,12 +513,10 @@ static int parse_interval(IIT_T tally_iit, int index,
616 513
                           param, tally, false);
617 514
 
618 515
     if (codon_minus_offset - codon_plus_offset > 0) {
619
-//      ref_row = row;
620 516
       row += parse_alleles(base + codon_plus_offset, row, row,
621 517
                            param, tally, CODON_PLUS);
622 518
     }
623 519
     if (next_offset - codon_minus_offset > 0) { 
624
-//      ref_row = row;
625 520
       row += parse_alleles(base + codon_minus_offset, row, row,
626 521
                            param, tally, CODON_MINUS);
627 522
     }
... ...
@@ -640,17 +535,9 @@ static SEXP parse_all(IIT_T tally_iit, TallyParam param)
640 535
   int n_rows = 0;
641 536
   /* loop over the IIT, getting the total number of rows
642 537
      this is (num alts + 1) for every position. */
643
-#ifdef DEBUG2
644
-  printf("Total number of intervals: %d\n", IIT_total_nintervals(tally_iit));
645
-#endif
646 538
   for (int index = 1; index <= IIT_total_nintervals(tally_iit); index++) {
647 539
     n_rows += count_rows_for_interval(tally_iit, index);
648 540
   }
649
-
650
-#ifdef DEBUG2
651
-  printf("Total number of rows: %d\n", n_rows);
652
-#endif
653
-
654 541
   
655 542
   SEXP tally_R;
656 543
   PROTECT(tally_R = R_TallyTable_new(n_rows, param.n_cycle_bins, param.xs));
... ...
@@ -699,26 +586,27 @@ static SEXP parse_some(IIT_T tally_iit, TallyParam param,
699 586
 /* FORMAT
700 587
 
701 588
    block header:
702
-   [0][ins][del][mm]
589
+   [0][ins][del][mm][codon+][codon-]
703 590
 
704
-   mismatches for each position:
705
-   [t+][t-][rn][r+][r-]([an][a+][a-])*[\0]([c#]([cv][cc])*)*([q#]([qv][qc])*)*
591
+   mismatches/codons:
592
+   [t+][t-][rn][r+][r-]([an][a+][a-])*
593
+   mismatches only:
594
+   '_'[d+][d-][\0]([c#]([cv][cc])*[nm#]([nmv][nmc])*[xs#]([xsv][xsc])*)*
706 595
 
707 596
    insertions:
708
-   [i#]([seq][t+][t-][c#]([cv][cc])*)*
597
+   [i#]([t+][t-][r+][r-][seq][c#]([cv][cc])*[nm#]([nmv][nmc])*[xs#]([xsv][xsc])*
709 598
 
710 599
    deletions:
711
-   [d#]([seq][t+][t-][c#]([cv][cc])*)*
600
+   [d#]([t+][t-][r+][r-][seq][c#]([cv][cc])*[nm#]([nmv][nmc])*[xs#]([xsv][xsc])*
712 601
 */
713 602
 
714 603
 SEXP R_tally_iit_parse(SEXP tally_iit_R, SEXP cycle_breaks_R,
715
-                       SEXP high_base_quality_R, SEXP which_R,
716
-                       SEXP read_length_R, SEXP xs_R)
604
+                       SEXP which_R,
605
+                       SEXP read_length_R, SEXP xs_R, SEXP high_nm_score_R)
717 606
 {
718 607
   IIT_T tally_iit = (IIT_T) R_ExternalPtrAddr(tally_iit_R);
719 608
   SEXP tally_R;
720 609
   TallyParam param;
721
-  param.high_base_quality = asInteger(high_base_quality_R);
722 610
   param.cycle_breaks =
723 611
     cycle_breaks_R == R_NilValue ? NULL : INTEGER(cycle_breaks_R);
724 612
   param.n_cycle_bins =
... ...
@@ -730,6 +618,7 @@ SEXP R_tally_iit_parse(SEXP tally_iit_R, SEXP cycle_breaks_R,
730 618
     param.mdfne_buf = NULL;
731 619
   }
732 620
   param.xs = asLogical(xs_R);
621
+  param.high_nm_score = asInteger(high_nm_score_R);
733 622
   
734 623
   if (which_R == R_NilValue) {
735 624
     tally_R = parse_all(tally_iit, param);