Browse code

add raw total count statistic to tallies (everything else now qual filtered)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110891 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 24/11/2015 22:19:59
Showing8 changed files

... ...
@@ -9,7 +9,7 @@ Description: GSNAP and GMAP are a pair of tools to align short-read
9 9
         methods to work with GMAP and GSNAP from within R. In addition,
10 10
         it provides methods to tally alignment results on a
11 11
         per-nucleotide basis using the bam_tally tool.
12
-Version: 1.13.4
12
+Version: 1.13.5
13 13
 Depends: R (>= 2.15.0), methods, GenomeInfoDb (>= 1.1.3),
14 14
         GenomicRanges (>= 1.17.12)
15 15
 Imports: S4Vectors, IRanges, Rsamtools (>= 1.17.8), rtracklayer (>= 1.31.2),
... ...
@@ -110,7 +110,7 @@ variantSummary <- function(x, read_pos_breaks = NULL,
110 110
   
111 111
   tally_names <- c("seqnames", "pos", "ref", "alt",
112 112
                    "n.read.pos", "n.read.pos.ref",
113
-                   "count", "count.ref", "count.total",
113
+                   "count", "count.ref", "raw.count.total", "count.total",
114 114
                    "count.plus", "count.plus.ref",
115 115
                    "count.minus", "count.minus.ref",
116 116
                    "count.del.plus", "count.del.minus",
... ...
@@ -280,6 +280,7 @@ variantSummaryColumnDescriptions <-
280 280
   desc <- c(
281 281
     n.read.pos = "Number of unique read positions for the ALT",
282 282
     n.read.pos.ref = "Number of unique read positions for the REF",
283
+    raw.count.total = "Total depth, before quality filtering",  
283 284
     count.plus = "Positive strand ALT count",
284 285
     count.plus.ref = "Positive strand REF count",
285 286
     count.minus = "Negative strand ALT count",
... ...
@@ -1,4 +1,4 @@
1
-static char rcsid[] = "$Id: bamread.c 163197 2015-04-13 21:57:42Z twu $";
1
+static char rcsid[] = "$Id: bamread.c 178960 2015-11-16 19:52:26Z twu $";
2 2
 #ifdef HAVE_CONFIG_H
3 3
 #include <config.h>
4 4
 #endif
... ...
@@ -644,6 +644,7 @@ Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr
644 644
 struct Bamline_T {
645 645
   char *acc;
646 646
   unsigned int flag;
647
+  int hiti;
647 648
   int nhits;
648 649
   bool good_unique_p;			/* Good above good_unique_mapq.  Dependent on second_mapq. */
649 650
   int mapq;
... ...
@@ -746,6 +747,11 @@ Bamline_firstend_p (Bamline_T this) {
746 747
   }
747 748
 }
748 749
 
750
+int
751
+Bamline_hiti (Bamline_T this) {
752
+  return this->hiti;
753
+}
754
+
749 755
 int
750 756
 Bamline_nhits (Bamline_T this) {
751 757
   return this->nhits;
... ...
@@ -1331,6 +1337,22 @@ Bamline_splice_strand (Bamline_T this) {
1331 1337
 }
1332 1338
 
1333 1339
 
1340
+static int
1341
+aux_hiti (T this) {
1342
+#ifndef HAVE_SAMTOOLS_LIB
1343
+  return 1;
1344
+#else
1345
+  uint8_t *s;
1346
+
1347
+  s = bam_aux_get(this->bam,"HI");
1348
+  if (s == NULL) {
1349
+    return 1;
1350
+  } else {
1351
+    return bam_aux2i(s);
1352
+  }
1353
+#endif
1354
+}
1355
+
1334 1356
 static int
1335 1357
 aux_nhits (T this) {
1336 1358
 #ifndef HAVE_SAMTOOLS_LIB
... ...
@@ -1820,7 +1842,7 @@ Bamline_free (Bamline_T *old) {
1820 1842
 
1821 1843
 
1822 1844
 static Bamline_T
1823
-Bamline_new (char *acc, unsigned int flag, int nhits, bool good_unique_p, int mapq,
1845
+Bamline_new (char *acc, unsigned int flag, int hiti, int nhits, bool good_unique_p, int mapq,
1824 1846
 	     int nm, char splice_strand, char *chr, Genomicpos_T chrpos_low,
1825 1847
 	     char *mate_chr, Genomicpos_T mate_chrpos_low, int insert_length,
1826 1848
 	     Intlist_T cigar_types, Uintlist_T cigar_npositions, int cigar_querylength, int readlength,
... ...
@@ -1832,6 +1854,7 @@ Bamline_new (char *acc, unsigned int flag, int nhits, bool good_unique_p, int ma
1832 1854
   strcpy(new->acc,acc);
1833 1855
 
1834 1856
   new->flag = flag;
1857
+  new->hiti = hiti;
1835 1858
   new->nhits = nhits;
1836 1859
   new->good_unique_p = good_unique_p;
1837 1860
 
... ...
@@ -1919,7 +1942,7 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
1919 1942
   char *acc, *chr, *mate_chr, splice_strand;
1920 1943
   int nm;
1921 1944
   unsigned int flag;
1922
-  int nhits;
1945
+  int hiti, nhits;
1923 1946
   bool good_unique_p;
1924 1947
   int mapq;
1925 1948
   Genomicpos_T chrpos_low, mate_chrpos_low;
... ...
@@ -1983,10 +2006,11 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
1983 2006
 	FREE(read);
1984 2007
       } else {
1985 2008
 	debug1(fprintf(stderr,"Success\n"));
2009
+	hiti = aux_hiti(this);
1986 2010
 	nm = aux_nm(this);
1987 2011
 	splice_strand = aux_splice_strand(this);
1988 2012
 	good_unique_p = aux_good_unique_p(this,good_unique_mapq);
1989
-	return Bamline_new(acc,flag,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2013
+	return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
1990 2014
 			   mate_chr,mate_chrpos_low,insert_length,
1991 2015
 			   cigar_types,cigarlengths,cigar_querylength,
1992 2016
 			   readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
... ...
@@ -2009,10 +2033,11 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
2009 2033
 	  (need_unique_p == false || nhits == 1) &&
2010 2034
 	  (need_primary_p == false || (flag & NOT_PRIMARY) == 0) &&
2011 2035
 	  (need_concordant_p == false || concordantp(flag) == true)) {
2036
+	hiti = aux_hiti(this);
2012 2037
 	nm = aux_nm(this);
2013 2038
 	splice_strand = aux_splice_strand(this);
2014 2039
 	good_unique_p = aux_good_unique_p(this,good_unique_mapq);
2015
-	return Bamline_new(acc,flag,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2040
+	return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2016 2041
 			   mate_chr,mate_chrpos_low,insert_length,
2017 2042
 			   cigar_types,cigarlengths,cigar_querylength,
2018 2043
 			   readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
... ...
@@ -2037,7 +2062,7 @@ Bamread_next_imperfect_bamline_copy_aux (T this, char *desired_read_group, int m
2037 2062
   char *acc, *chr, *mate_chr, splice_strand;
2038 2063
   int nm;
2039 2064
   unsigned int flag;
2040
-  int nhits;
2065
+  int hiti, nhits;
2041 2066
   bool good_unique_p;
2042 2067
   int mapq;
2043 2068
   Genomicpos_T chrpos_low, mate_chrpos_low;
... ...
@@ -2104,10 +2129,11 @@ Bamread_next_imperfect_bamline_copy_aux (T this, char *desired_read_group, int m
2104 2129
 	  FREE(read);
2105 2130
 	} else {
2106 2131
 	  debug1(fprintf(stderr,"Success\n"));
2132
+	  hiti = aux_hiti(this);
2107 2133
 	  nm = aux_nm(this);
2108 2134
 	  splice_strand = aux_splice_strand(this);
2109 2135
 	  good_unique_p = aux_good_unique_p(this,good_unique_mapq);
2110
-	  return Bamline_new(acc,flag,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2136
+	  return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2111 2137
 			     mate_chr,mate_chrpos_low,insert_length,
2112 2138
 			     cigar_types,cigarlengths,cigar_querylength,
2113 2139
 			     readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
... ...
@@ -2134,10 +2160,11 @@ Bamread_next_imperfect_bamline_copy_aux (T this, char *desired_read_group, int m
2134 2160
 	    (need_unique_p == false || nhits == 1) &&
2135 2161
 	    (need_primary_p == false || (flag & NOT_PRIMARY) == 0) &&
2136 2162
 	    (need_concordant_p == false || concordantp(flag) == true)) {
2163
+	  hiti = aux_hiti(this);
2137 2164
 	  nm = aux_nm(this);
2138 2165
 	  splice_strand = aux_splice_strand(this);
2139 2166
 	  good_unique_p = aux_good_unique_p(this,good_unique_mapq);
2140
-	  return Bamline_new(acc,flag,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2167
+	  return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2141 2168
 			     mate_chr,mate_chrpos_low,insert_length,
2142 2169
 			     cigar_types,cigarlengths,cigar_querylength,
2143 2170
 			     readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
... ...
@@ -2166,7 +2193,7 @@ Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq,
2166 2193
   char *acc, *chr, *mate_chr, splice_strand;
2167 2194
   int nm;
2168 2195
   unsigned int flag;
2169
-  int nhits;
2196
+  int hiti, nhits;
2170 2197
   bool good_unique_p;
2171 2198
   int mapq;
2172 2199
   Genomicpos_T chrpos_low, mate_chrpos_low;
... ...
@@ -2233,10 +2260,11 @@ Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq,
2233 2260
 	  FREE(read);
2234 2261
 	} else {
2235 2262
 	  debug1(fprintf(stderr,"Success\n"));
2263
+	  hiti = aux_hiti(this);
2236 2264
 	  nm = aux_nm(this);
2237 2265
 	  splice_strand = aux_splice_strand(this);
2238 2266
 	  good_unique_p = aux_good_unique_p(this,good_unique_mapq);
2239
-	  return Bamline_new(acc,flag,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2267
+	  return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2240 2268
 			     mate_chr,mate_chrpos_low,insert_length,
2241 2269
 			     cigar_types,cigarlengths,cigar_querylength,
2242 2270
 			     readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
... ...
@@ -2264,10 +2292,11 @@ Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq,
2264 2292
 	    (need_unique_p == false || nhits == 1) &&
2265 2293
 	    (need_primary_p == false || (flag & NOT_PRIMARY) == 0) &&
2266 2294
 	    (need_concordant_p == false || concordantp(flag) == true)) {
2295
+	  hiti = aux_hiti(this);
2267 2296
 	  nm = aux_nm(this);
2268 2297
 	  splice_strand = aux_splice_strand(this);
2269 2298
 	  good_unique_p = aux_good_unique_p(this,good_unique_mapq);
2270
-	  return Bamline_new(acc,flag,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2299
+	  return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2271 2300
 			     mate_chr,mate_chrpos_low,insert_length,
2272 2301
 			     cigar_types,cigarlengths,cigar_querylength,
2273 2302
 			     readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
... ...
@@ -2449,7 +2478,7 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d
2449 2478
   char *acc, *chr, *mate_chr, splice_strand;
2450 2479
   int nm;
2451 2480
   unsigned int flag;
2452
-  int nhits;
2481
+  int hiti, nhits;
2453 2482
   int mapq;
2454 2483
   Genomicpos_T chrpos_low, mate_chrpos_low;
2455 2484
   int insert_length;
... ...
@@ -2471,11 +2500,12 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d
2471 2500
 	       &cigar_types,&cigarlengths,&cigar_querylength,
2472 2501
 	       &readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
2473 2502
 	       &terminalp);
2503
+    hiti = aux_hiti(this);
2474 2504
     nm = aux_nm(this);
2475 2505
     splice_strand = aux_splice_strand(this);
2476 2506
     if (!strcmp(acc,desired_acc) && chrpos_low == desired_chrpos) {
2477 2507
       Bamread_unlimit_region(this);
2478
-      return Bamline_new(acc,flag,nhits,/*good_unique_p*/true,mapq,nm,splice_strand,chr,chrpos_low,
2508
+      return Bamline_new(acc,flag,hiti,nhits,/*good_unique_p*/true,mapq,nm,splice_strand,chr,chrpos_low,
2479 2509
 			 mate_chr,mate_chrpos_low,insert_length,
2480 2510
 			 cigar_types,cigarlengths,cigar_querylength,
2481 2511
 			 readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
... ...
@@ -2537,7 +2567,7 @@ Bamstore_new (Genomicpos_T chrpos) {
2537 2567
 
2538 2568
 Bamline_T
2539 2569
 Bamstore_get (Table_T bamstore_chrtable, char *chr, Genomicpos_T low, char *acc,
2540
-	      Genomicpos_T mate_low) {
2570
+	      Genomicpos_T mate_low, int hiti) {
2541 2571
   List_T p, list = NULL;
2542 2572
   Bamline_T wanted = NULL, bamline;
2543 2573
   Bamstore_T bamstore;
... ...
@@ -2560,7 +2590,7 @@ Bamstore_get (Table_T bamstore_chrtable, char *chr, Genomicpos_T low, char *acc,
2560 2590
   } else {
2561 2591
     for (p = bamstore->bamlines; p != NULL; p = List_next(p)) {
2562 2592
       bamline = (Bamline_T) List_head(p);
2563
-      if (strcmp(Bamline_acc(bamline),acc) == 0 && Bamline_mate_chrpos_low(bamline) == mate_low) {
2593
+      if (strcmp(Bamline_acc(bamline),acc) == 0 && Bamline_mate_chrpos_low(bamline) == mate_low && bamline->hiti == hiti) {
2564 2594
 	wanted = bamline;
2565 2595
       } else {
2566 2596
 	list = List_push(list,(void *) bamline);
... ...
@@ -2637,7 +2667,10 @@ struct Bampair_T {
2637 2667
   Bamline_T bamline_high;
2638 2668
   Genomicpos_T chrpos_low;
2639 2669
   Genomicpos_T chrpos_high;
2670
+  Genomicpos_T chrpos_low_noclip;
2671
+  Genomicpos_T chrpos_high_noclip;
2640 2672
   int level;
2673
+  bool plusp;			/* Based on first read */
2641 2674
 };
2642 2675
 
2643 2676
 char *
... ...
@@ -2671,11 +2704,26 @@ Bampair_chrpos_high (Bampair_T this) {
2671 2704
   return this->chrpos_high;
2672 2705
 }
2673 2706
 
2707
+Genomicpos_T
2708
+Bampair_chrpos_low_noclip (Bampair_T this) {
2709
+  return this->chrpos_low_noclip;
2710
+}
2711
+
2712
+Genomicpos_T
2713
+Bampair_chrpos_high_noclip (Bampair_T this) {
2714
+  return this->chrpos_high_noclip;
2715
+}
2716
+
2674 2717
 int
2675 2718
 Bampair_level (Bampair_T this) {
2676 2719
   return this->level;
2677 2720
 }
2678 2721
 
2722
+bool
2723
+Bampair_plusp (Bampair_T this) {
2724
+  return this->plusp;
2725
+}
2726
+
2679 2727
 bool
2680 2728
 Bampair_good_unique_p (Bampair_T this) {
2681 2729
   if (this->bamline_low != NULL && this->bamline_low->good_unique_p == false) {
... ...
@@ -2720,14 +2768,77 @@ Bampair_new (Bamline_T bamline_low, Bamline_T bamline_high) {
2720 2768
   if (bamline_low == NULL) {
2721 2769
     new->chrpos_low = bamline_high->chrpos_low;
2722 2770
     new->chrpos_high = Bamline_chrpos_high(bamline_high);
2771
+    new->chrpos_low_noclip = Bamline_chrpos_low_noclip(bamline_high);
2772
+    new->chrpos_high_noclip = Bamline_chrpos_high_noclip(bamline_high);
2773
+
2774
+    if (Bamline_firstend_p(bamline_high) == true) {
2775
+      if (bamline_high->flag & QUERY_MINUSP) {
2776
+	new->plusp = false;
2777
+      } else {
2778
+	new->plusp = true;
2779
+      }
2780
+    } else {
2781
+      if (bamline_high->flag & QUERY_MINUSP) {
2782
+	new->plusp = true;
2783
+      } else {
2784
+	new->plusp = false;
2785
+      }
2786
+    }
2787
+
2723 2788
   } else if (bamline_high == NULL) {
2724 2789
     new->chrpos_low = bamline_low->chrpos_low;
2725 2790
     new->chrpos_high = Bamline_chrpos_high(bamline_low);
2791
+    new->chrpos_low_noclip = Bamline_chrpos_low_noclip(bamline_low);
2792
+    new->chrpos_high_noclip = Bamline_chrpos_high_noclip(bamline_low);
2793
+
2794
+    if (Bamline_firstend_p(bamline_low) == true) {
2795
+      if (bamline_low->flag & QUERY_MINUSP) {
2796
+	new->plusp = false;
2797
+      } else {
2798
+	new->plusp = true;
2799
+      }
2800
+    } else {
2801
+      if (bamline_low->flag & QUERY_MINUSP) {
2802
+	new->plusp = true;
2803
+      } else {
2804
+	new->plusp = false;
2805
+      }
2806
+    }
2807
+
2726 2808
   } else {
2727 2809
     new->chrpos_low = bamline_low->chrpos_low;
2728 2810
     new->chrpos_high = Bamline_chrpos_high(bamline_high);
2811
+    new->chrpos_low_noclip = Bamline_chrpos_low_noclip(bamline_low);
2812
+    new->chrpos_high_noclip = Bamline_chrpos_high_noclip(bamline_high);
2813
+
2814
+    if (Bamline_firstend_p(bamline_low) == true && Bamline_firstend_p(bamline_high) == false) {
2815
+      if (bamline_low->flag & QUERY_MINUSP) {
2816
+	new->plusp = false;
2817
+      } else {
2818
+	new->plusp = true;
2819
+      }
2820
+
2821
+    } else if (Bamline_firstend_p(bamline_low) == false && Bamline_firstend_p(bamline_high) == true) {
2822
+      if (bamline_high->flag & QUERY_MINUSP) {
2823
+	new->plusp = false;
2824
+      } else {
2825
+	new->plusp = true;
2826
+      }
2827
+
2828
+    } else if (Bamline_firstend_p(bamline_low) == true && Bamline_firstend_p(bamline_high) == true) {
2829
+      fprintf(stderr,"For bampair %s, both ends are first ends.  Flags are %d and %d\n",
2830
+	      bamline_low->acc,bamline_low->flag,bamline_high->flag);
2831
+      new->plusp = false;
2832
+
2833
+    } else {
2834
+      fprintf(stderr,"For bampair %s , both ends are second ends.  Flags are %d and %d\n",
2835
+	      bamline_low->acc,bamline_low->flag,bamline_high->flag);
2836
+      new->plusp = false;
2837
+    }
2729 2838
   }
2839
+
2730 2840
   new->level = -1;
2841
+
2731 2842
   return new;
2732 2843
 }
2733 2844
 
... ...
@@ -2756,31 +2867,92 @@ Bampair_print (FILE *fp, Bampair_T this, int quality_score_adj) {
2756 2867
 
2757 2868
 
2758 2869
 void
2759
-Bampair_details (Uintlist_T *chrpos_lows, Uintlist_T *chrpos_highs,
2870
+Bampair_details (Uintlist_T *chrpos_first_lows, Uintlist_T *chrpos_first_highs,
2871
+		 Uintlist_T *chrpos_second_lows, Uintlist_T *chrpos_second_highs,
2872
+		 Uintlist_T *chrpos_overlap_lows, Uintlist_T *chrpos_overlap_highs,
2760 2873
 		 Uintlist_T *splice_lows, Uintlist_T *splice_highs, Intlist_T *splice_signs,
2761 2874
 		 Bampair_T this) {
2762
-  *chrpos_lows = (Uintlist_T) NULL;
2763
-  *chrpos_highs = (Uintlist_T) NULL;
2875
+  Uintlist_T p1, p2, q1, q2;
2876
+  Chrpos_T low1, high1, low2, high2;
2877
+
2878
+  *chrpos_first_lows = (Uintlist_T) NULL;
2879
+  *chrpos_first_highs = (Uintlist_T) NULL;
2880
+  *chrpos_second_lows = (Uintlist_T) NULL;
2881
+  *chrpos_second_highs = (Uintlist_T) NULL;
2882
+  *chrpos_overlap_lows = (Uintlist_T) NULL;
2883
+  *chrpos_overlap_highs = (Uintlist_T) NULL;
2764 2884
   *splice_lows = (Uintlist_T) NULL;
2765 2885
   *splice_highs = (Uintlist_T) NULL;
2766 2886
   *splice_signs = (Intlist_T) NULL;
2767 2887
 
2768 2888
   if (this->bamline_low != NULL) {
2769
-    Bamline_regions(&(*chrpos_lows),&(*chrpos_highs),this->bamline_low);
2889
+    if (1 || Bamline_firstend_p(this->bamline_low) == true) {
2890
+      Bamline_regions(&(*chrpos_first_lows),&(*chrpos_first_highs),this->bamline_low);
2891
+    } else {
2892
+      Bamline_regions(&(*chrpos_second_lows),&(*chrpos_second_highs),this->bamline_low);
2893
+    }
2770 2894
     Bamline_splices(&(*splice_lows),&(*splice_highs),&(*splice_signs),this->bamline_low);
2771 2895
   }
2772 2896
 
2773 2897
   if (this->bamline_high != NULL) {
2774
-    Bamline_regions(&(*chrpos_lows),&(*chrpos_highs),this->bamline_high);
2898
+    if (0 && Bamline_firstend_p(this->bamline_high) == true) {
2899
+      Bamline_regions(&(*chrpos_first_lows),&(*chrpos_first_highs),this->bamline_high);
2900
+    } else {
2901
+      Bamline_regions(&(*chrpos_second_lows),&(*chrpos_second_highs),this->bamline_high);
2902
+    }
2775 2903
     Bamline_splices(&(*splice_lows),&(*splice_highs),&(*splice_signs),this->bamline_high);
2776 2904
   }
2777 2905
 
2778
-  *chrpos_lows = Uintlist_reverse(*chrpos_lows);
2779
-  *chrpos_highs = Uintlist_reverse(*chrpos_highs);
2906
+  *chrpos_first_lows = Uintlist_reverse(*chrpos_first_lows);
2907
+  *chrpos_first_highs = Uintlist_reverse(*chrpos_first_highs);
2908
+  *chrpos_second_lows = Uintlist_reverse(*chrpos_second_lows);
2909
+  *chrpos_second_highs = Uintlist_reverse(*chrpos_second_highs);
2780 2910
   *splice_lows = Uintlist_reverse(*splice_lows);
2781 2911
   *splice_highs = Uintlist_reverse(*splice_highs);
2782 2912
   *splice_signs = Intlist_reverse(*splice_signs);
2783 2913
 
2914
+  if (this->bamline_low != NULL && this->bamline_high != NULL) {
2915
+    p1 = *chrpos_first_lows;
2916
+    q1 = *chrpos_first_highs;
2917
+    p2 = *chrpos_second_lows;
2918
+    q2 = *chrpos_second_highs;
2919
+
2920
+    while (p1 != NULL && p2 != NULL) {
2921
+      low1 = Uintlist_head(p1);
2922
+      high1 = Uintlist_head(q1);
2923
+      low2 = Uintlist_head(p2);
2924
+      high2 = Uintlist_head(q2);
2925
+
2926
+      if (low2 >= high1) {
2927
+	p1 = Uintlist_next(p1);	q1 = Uintlist_next(q1); /* Advance first read */
2928
+      } else if (low1 >= high2) {
2929
+	p2 = Uintlist_next(p2);	q2 = Uintlist_next(q2); /* Advance second read */
2930
+      } else if (low1 <= low2) {
2931
+	*chrpos_overlap_lows = Uintlist_push(*chrpos_overlap_lows,low2);
2932
+	if (high2 <= high1) {
2933
+	  *chrpos_overlap_highs = Uintlist_push(*chrpos_overlap_highs,high2);
2934
+	  p2 = Uintlist_next(p2); q2 = Uintlist_next(q2); /* Advance second read */
2935
+	} else {
2936
+	  *chrpos_overlap_highs = Uintlist_push(*chrpos_overlap_highs,high1);
2937
+	  p1 = Uintlist_next(p1); q1 = Uintlist_next(q1); /* Advance first read */
2938
+	}
2939
+      } else {
2940
+	*chrpos_overlap_lows = Uintlist_push(*chrpos_overlap_lows,low1);
2941
+	if (high1 <= high2) {
2942
+	  *chrpos_overlap_highs = Uintlist_push(*chrpos_overlap_highs,high1);
2943
+	  p1 = Uintlist_next(p1); q1 = Uintlist_next(q1); /* Advance first read */
2944
+	} else {
2945
+	  *chrpos_overlap_highs = Uintlist_push(*chrpos_overlap_highs,high2);
2946
+	  p2 = Uintlist_next(p2); q2 = Uintlist_next(q2); /* Advance second read */
2947
+	}
2948
+      }
2949
+    }
2950
+
2951
+    *chrpos_overlap_lows = Uintlist_reverse(*chrpos_overlap_lows);
2952
+    *chrpos_overlap_highs = Uintlist_reverse(*chrpos_overlap_highs);
2953
+  }
2954
+
2955
+
2784 2956
   return;
2785 2957
 }
2786 2958
 
... ...
@@ -2822,7 +2994,7 @@ Bamread_all_pairs (T bamreader, char *desired_read_group, int minimum_mapq, int
2822 2994
     } else {
2823 2995
       /* This is the high end */
2824 2996
       bamline_low = Bamstore_get(bamstore_chrtable,Bamline_chr(bamline),Bamline_mate_chrpos_low(bamline),
2825
-				 Bamline_acc(bamline),Bamline_chrpos_low(bamline));
2997
+				 Bamline_acc(bamline),Bamline_chrpos_low(bamline),bamline->hiti);
2826 2998
       if (bamline_low == NULL) {
2827 2999
 #if 0
2828 3000
 	fprintf(stderr,"Hmm...low end not found for %s at %s:%u\n",
... ...
@@ -1,4 +1,4 @@
1
-/* $Id: bamread.h 160725 2015-03-11 16:45:17Z twu $ */
1
+/* $Id: bamread.h 178960 2015-11-16 19:52:26Z twu $ */
2 2
 #ifndef BAMREAD_INCLUDED
3 3
 #define BAMREAD_INCLUDED
4 4
 /* Cannot use bool, since it appears to conflict with samtools */
... ...
@@ -61,6 +61,8 @@ Bamline_paired_read_p (Bamline_T this);
61 61
 extern int
62 62
 Bamline_firstend_p (Bamline_T this);
63 63
 extern int
64
+Bamline_hiti (Bamline_T this);
65
+extern int
64 66
 Bamline_nhits (Bamline_T this);
65 67
 extern bool
66 68
 Bamline_good_unique_p (Bamline_T this);
... ...
@@ -169,7 +171,7 @@ Bamstore_new (Genomicpos_T chrpos);
169 171
 
170 172
 extern Bamline_T
171 173
 Bamstore_get (Table_T bamstore_chrtable, char *chr, Genomicpos_T low, char *acc,
172
-	      Genomicpos_T mate_low);
174
+	      Genomicpos_T mate_low, int hiti);
173 175
 
174 176
 extern void
175 177
 Bamstore_add_at_low (Table_T bamstore_chrtable, char *chr, Genomicpos_T low,
... ...
@@ -191,9 +193,16 @@ extern Genomicpos_T
191 193
 Bampair_chrpos_low (Bampair_T this);
192 194
 extern Genomicpos_T
193 195
 Bampair_chrpos_high (Bampair_T this);
196
+extern Genomicpos_T
197
+Bampair_chrpos_low_noclip (Bampair_T this);
198
+extern Genomicpos_T
199
+Bampair_chrpos_high_noclip (Bampair_T this);
200
+
194 201
 extern int
195 202
 Bampair_level (Bampair_T this);
196 203
 extern bool
204
+Bampair_plusp (Bampair_T this);
205
+extern bool
197 206
 Bampair_good_unique_p (Bampair_T this);
198 207
 extern bool
199 208
 Bampair_uniquep (Bampair_T this);
... ...
@@ -205,7 +214,9 @@ Bampair_free (Bampair_T *old);
205 214
 extern void
206 215
 Bampair_print (FILE *fp, Bampair_T this, int quality_score_adj);
207 216
 extern void
208
-Bampair_details (Uintlist_T *chrpos_lows, Uintlist_T *chrpos_highs,
217
+Bampair_details (Uintlist_T *chrpos_first_lows, Uintlist_T *chrpos_first_highs,
218
+		 Uintlist_T *chrpos_second_lows, Uintlist_T *chrpos_second_highs,
219
+		 Uintlist_T *chrpos_overlap_lows, Uintlist_T *chrpos_overlap_highs,
209 220
 		 Uintlist_T *splice_lows, Uintlist_T *splice_highs, Intlist_T *splice_signs,
210 221
 		 Bampair_T this);
211 222
 
... ...
@@ -1,4 +1,4 @@
1
-static char rcsid[] = "$Id: bamtally.c 178481 2015-11-09 20:55:28Z twu $";
1
+static char rcsid[] = "$Id: bamtally.c 179050 2015-11-17 21:30:52Z twu $";
2 2
 #ifdef HAVE_CONFIG_H
3 3
 #include <config.h>
4 4
 #endif
... ...
@@ -837,14 +837,14 @@ compute_probs_array (double *probs, double *loglik, char refnt,
837 837
 
838 838
 
839 839
 static bool
840
-pass_variant_filter_p (long int nmatches, long int delcounts_plus, long int delcounts_minus,
840
+pass_variant_filter_p (long int nmatches_passing, long int delcounts_plus, long int delcounts_minus,
841 841
 		       List_T mismatches_by_shift, int min_depth, int variant_strands) {
842 842
   long int depth;
843 843
   bool plus_strand_p[4], minus_strand_p[4];
844 844
   List_T ptr;
845 845
   Mismatch_T mismatch;
846 846
 
847
-  depth = nmatches + delcounts_plus + delcounts_minus;
847
+  depth = nmatches_passing + delcounts_plus + delcounts_minus;
848 848
   ptr = mismatches_by_shift;
849 849
   while (ptr != NULL) {
850 850
     mismatch = (Mismatch_T) List_head(ptr);
... ...
@@ -982,10 +982,10 @@ print_allele_counts_simple (FILE *fp, Tally_T this, Genome_T genome, Genomicpos_
982 982
     fprintf(fp,"\n");
983 983
   }
984 984
 
985
-  if (this->nmatches + this->delcounts_plus + this->delcounts_minus == 0) {
985
+  if (this->nmatches_passing + this->delcounts_plus + this->delcounts_minus == 0) {
986 986
     fprintf(fp,"%c0",Genome_get_char(genome,chroffset+chrpos-1U));
987 987
   } else {
988
-    fprintf(fp,"%c%d",this->refnt,this->nmatches);
988
+    fprintf(fp,"%c%d",this->refnt,this->nmatches_passing);
989 989
   }
990 990
 
991 991
   for (p = this->mismatches_byshift; p != NULL; p = List_next(p)) {
... ...
@@ -1012,7 +1012,7 @@ pass_difference_filter_p (double *probs, double *loglik, Tally_T this,
1012 1012
   } else {
1013 1013
 #if 0
1014 1014
     if (this->use_array_p == false) {
1015
-      bestg = compute_probs_list(probs,loglik,this->refnt,this->nmatches,
1015
+      bestg = compute_probs_list(probs,loglik,this->refnt,this->nmatches_passing,
1016 1016
 				 this->mismatches_byquality,quality_score_adj);
1017 1017
     } else {
1018 1018
       bestg = compute_probs_array(probs,loglik,this->refnt,this->matches_byquality,this->n_matches_byquality,
... ...
@@ -1097,7 +1097,7 @@ print_runlength (Tally_T *block_tallies, Genomicpos_T *exonstart, Genomicpos_T l
1097 1097
 
1098 1098
   for (blocki = 0; blocki < lasti; blocki++) {
1099 1099
     this = block_tallies[blocki];
1100
-    if (this->nmatches > 0 || this->delcounts_plus + this->delcounts_minus > 0 ||
1100
+    if (this->nmatches_passing > 0 || this->delcounts_plus + this->delcounts_minus > 0 ||
1101 1101
 	this->mismatches_byshift != NULL || this->insertions_byshift != NULL || this->deletions_byshift != NULL) {
1102 1102
       chrpos = blockstart + blocki;
1103 1103
       if (chrpos > lastpos + 1U) {
... ...
@@ -1227,7 +1227,7 @@ block_total (Tally_T *block_tallies, Genomicpos_T blockstart, Genomicpos_T block
1227 1227
   total = 0;
1228 1228
   for (blocki = 0; blocki < lasti; blocki++) {
1229 1229
     this = block_tallies[blocki];
1230
-    total += this->nmatches + this->delcounts_plus + this->delcounts_minus;
1230
+    total += this->nmatches_passing + this->delcounts_plus + this->delcounts_minus;
1231 1231
     for (ptr = this->mismatches_byshift; ptr != NULL; ptr = List_next(ptr)) {
1232 1232
       mismatch = (Mismatch_T) List_head(ptr);
1233 1233
       total += mismatch->count;
... ...
@@ -3022,12 +3022,12 @@ print_block (Tally_T *block_tallies, Genomicpos_T blockstart, Genomicpos_T block
3022 3022
   for (blocki = 0; blocki < lasti; blocki++) {
3023 3023
     this = block_tallies[blocki];
3024 3024
     if (print_noncovered_p == true ||
3025
-	(pass_variant_filter_p(this->nmatches,this->delcounts_plus,this->delcounts_minus,
3025
+	(pass_variant_filter_p(this->nmatches_passing,this->delcounts_plus,this->delcounts_minus,
3026 3026
 			       this->mismatches_byshift,min_depth,variant_strands) == true &&
3027 3027
 	 pass_difference_filter_p(probs,loglik,this,genome,
3028 3028
 				  printchr,chroffset,chrpos,
3029 3029
 				  quality_score_adj,genomic_diff_p) == true)) {
3030
-      total += this->nmatches + this->delcounts_plus + this->delcounts_minus;
3030
+      total += this->nmatches_passing + this->delcounts_plus + this->delcounts_minus;
3031 3031
       for (ptr = this->mismatches_byshift; ptr != NULL; ptr = List_next(ptr)) {
3032 3032
 	mismatch = (Mismatch_T) List_head(ptr);
3033 3033
 	total += mismatch->count;
... ...
@@ -3479,7 +3479,7 @@ print_block (Tally_T *block_tallies, Genomicpos_T blockstart, Genomicpos_T block
3479 3479
 
3480 3480
       /* Handle mismatches */
3481 3481
       if (print_noncovered_p == false && 
3482
-	  pass_variant_filter_p(this->nmatches,this->delcounts_plus,this->delcounts_minus,
3482
+	  pass_variant_filter_p(this->nmatches_passing,this->delcounts_plus,this->delcounts_minus,
3483 3483
 				this->mismatches_byshift,min_depth,variant_strands) == false) {
3484 3484
 	if (blockp == true) {
3485 3485
 	  printf("\n");
... ...
@@ -3500,11 +3500,13 @@ print_block (Tally_T *block_tallies, Genomicpos_T blockstart, Genomicpos_T block
3500 3500
 	}
3501 3501
 
3502 3502
 	if (signed_counts_p == false) {
3503
-	  total = this->nmatches + this->delcounts_plus + this->delcounts_minus;
3503
+	  total = 0;
3504 3504
 	  for (ptr = this->mismatches_byshift; ptr != NULL; ptr = List_next(ptr)) {
3505 3505
 	    mismatch = (Mismatch_T) List_head(ptr);
3506 3506
 	    total += mismatch->count;
3507 3507
 	  }
3508
+	  assert(total == this->nmismatches_passing);
3509
+	  total += this->nmatches_passing + this->delcounts_plus + this->delcounts_minus;
3508 3510
 	  if (print_totals_p == true) {
3509 3511
 	    printf("%ld\t",total);
3510 3512
 	  }
... ...
@@ -3528,7 +3530,7 @@ print_block (Tally_T *block_tallies, Genomicpos_T blockstart, Genomicpos_T block
3528 3530
 	      total_matches_minus += this->matches_byshift_minus[i];
3529 3531
 	    }
3530 3532
 	  }
3531
-	  assert(this->nmatches == total_matches_plus + total_matches_minus);
3533
+	  assert(this->nmatches_passing == total_matches_plus + total_matches_minus);
3532 3534
 
3533 3535
 	  total_mismatches_plus = total_mismatches_minus = 0;
3534 3536
 	  for (ptr = this->mismatches_byshift; ptr != NULL; ptr = List_next(ptr)) {
... ...
@@ -3539,6 +3541,7 @@ print_block (Tally_T *block_tallies, Genomicpos_T blockstart, Genomicpos_T block
3539 3541
 	      total_mismatches_minus += mismatch->count;
3540 3542
 	    }
3541 3543
 	  }
3544
+	  assert(this->nmismatches_passing == total_mismatches_plus + total_mismatches_minus);
3542 3545
 	  if (print_totals_p == true) {
3543 3546
 	    printf("%ld|%ld\t",
3544 3547
 		   total_matches_plus+this->delcounts_plus+total_mismatches_plus,
... ...
@@ -3551,16 +3554,20 @@ print_block (Tally_T *block_tallies, Genomicpos_T blockstart, Genomicpos_T block
3551 3554
 	if (readlevel_p == true || totals_only_p == true) {
3552 3555
 	  /* Don't print details */
3553 3556
 
3554
-	} else if (this->nmatches + this->delcounts_plus + this->delcounts_minus == 0) {
3557
+	} else if (this->nmatches_passing + this->delcounts_plus + this->delcounts_minus == 0) {
3555 3558
 	  /* Genome_T expects zero-based numbers */
3556 3559
 	  if (signed_counts_p == false) {
3557 3560
 	    printf("%c0",Genome_get_char(genome,chroffset+chrpos-1U));
3558 3561
 	  } else {
3559 3562
 	    printf("%c0|0",Genome_get_char(genome,chroffset+chrpos-1U));
3560 3563
 	  }
3564
+
3565
+	  /* Depth */
3566
+	  /* printf(" %d",this->nmatches_total + this->nmismatches_total); */
3567
+
3561 3568
 	} else {
3562 3569
 	  if (signed_counts_p == false) {
3563
-	    printf("%c%d",this->refnt,this->nmatches);
3570
+	    printf("%c%d",this->refnt,this->nmatches_passing);
3564 3571
 	  } else {
3565 3572
 	    printf("%c%ld|%ld",this->refnt,total_matches_plus,total_matches_minus);
3566 3573
 	  }
... ...
@@ -3691,6 +3698,9 @@ print_block (Tally_T *block_tallies, Genomicpos_T blockstart, Genomicpos_T block
3691 3698
 	      }
3692 3699
 	    }
3693 3700
           }
3701
+
3702
+	  /* Depth */
3703
+	  /* printf(" %d",this->nmatches_total + this->nmismatches_total); */
3694 3704
 	}
3695 3705
 
3696 3706
 	if (this->mismatches_byshift != NULL) {
... ...
@@ -3924,12 +3934,12 @@ tally_block (long int *tally_matches, long int *tally_mismatches,
3924 3934
   for (blocki = 0; blocki < lasti; blocki++) {
3925 3935
     this = block_tallies[blocki];
3926 3936
     if (print_noncovered_p == true ||
3927
-	(pass_variant_filter_p(this->nmatches,this->delcounts_plus,this->delcounts_minus,
3937
+	(pass_variant_filter_p(this->nmatches_passing,this->delcounts_plus,this->delcounts_minus,
3928 3938
 			       this->mismatches_byshift,min_depth,variant_strands) == true &&
3929 3939
 	 pass_difference_filter_p(probs,loglik,this,genome,
3930 3940
 				  printchr,chroffset,chrpos,
3931 3941
 				  quality_score_adj,genomic_diff_p) == true)) {
3932
-      total += this->nmatches + this->delcounts_plus + this->delcounts_minus;
3942
+      total += this->nmatches_passing + this->delcounts_plus + this->delcounts_minus;
3933 3943
       for (ptr = this->mismatches_byshift; ptr != NULL; ptr = List_next(ptr)) {
3934 3944
 	mismatch = (Mismatch_T) List_head(ptr);
3935 3945
 	total += mismatch->count;
... ...
@@ -3950,7 +3960,7 @@ tally_block (long int *tally_matches, long int *tally_mismatches,
3950 3960
       chrpos = blockstart + blocki;
3951 3961
 
3952 3962
       if (print_noncovered_p == false &&
3953
-	  pass_variant_filter_p(this->nmatches,this->delcounts_plus,this->delcounts_minus,
3963
+	  pass_variant_filter_p(this->nmatches_passing,this->delcounts_plus,this->delcounts_minus,
3954 3964
 				this->mismatches_byshift,min_depth,variant_strands) == false) {
3955 3965
 	/* Skip */
3956 3966
 
... ...
@@ -3975,7 +3985,7 @@ tally_block (long int *tally_matches, long int *tally_mismatches,
3975 3985
 	    binx_mismatches[bini] += mismatch->count;
3976 3986
 	}
3977 3987
 #else
3978
-	tally_matches[chrpos - chrstart] += this->nmatches + this->delcounts_plus + this->delcounts_minus;
3988
+	tally_matches[chrpos - chrstart] += this->nmatches_passing + this->delcounts_plus + this->delcounts_minus;
3979 3989
 
3980 3990
 	for (ptr = this->mismatches_byshift; ptr != NULL; ptr = List_next(ptr)) {
3981 3991
 	    mismatch = (Mismatch_T) List_head(ptr);
... ...
@@ -3993,6 +4003,7 @@ tally_block (long int *tally_matches, long int *tally_mismatches,
3993 4003
 }
3994 4004
 
3995 4005
 
4006
+/* signed_counts_p assumed to be true */
3996 4007
 static void
3997 4008
 iit_block (List_T *intervallist, List_T *labellist, List_T *datalist,
3998 4009
 	   Tally_T *block_tallies, Genomicpos_T blockstart, Genomicpos_T blockptr,
... ...
@@ -4030,9 +4041,9 @@ iit_block (List_T *intervallist, List_T *labellist, List_T *datalist,
4030 4041
   for (blocki = 0; blocki < lasti; blocki++) {
4031 4042
     this = block_tallies[blocki];
4032 4043
     if (print_noncovered_p == true ||
4033
-	pass_variant_filter_p(this->nmatches,this->delcounts_plus,this->delcounts_minus,
4044
+	pass_variant_filter_p(this->nmatches_passing,this->delcounts_plus,this->delcounts_minus,
4034 4045
 			      this->mismatches_byshift,min_depth,variant_strands) == true) {
4035
-      total += this->nmatches + this->delcounts_plus + this->delcounts_minus;
4046
+      total += this->nmatches_passing + this->delcounts_plus + this->delcounts_minus;
4036 4047
       for (ptr = this->mismatches_byshift; ptr != NULL; ptr = List_next(ptr)) {
4037 4048
 	mismatch = (Mismatch_T) List_head(ptr);
4038 4049
 	total += mismatch->count;
... ...
@@ -4462,7 +4473,7 @@ iit_block (List_T *intervallist, List_T *labellist, List_T *datalist,
4462 4473
       debug2(printf("Pointers for allele counts:\n"));
4463 4474
       pointers = push_int(&ignore,pointers,nbytes);
4464 4475
       if (print_noncovered_p == true ||
4465
-	  pass_variant_filter_p(this->nmatches,this->delcounts_plus,this->delcounts_minus,
4476
+	  pass_variant_filter_p(this->nmatches_passing,this->delcounts_plus,this->delcounts_minus,
4466 4477
 				this->mismatches_byshift,min_depth,variant_strands) == true) {
4467 4478
 	total_matches_plus = total_matches_minus = 0;
4468 4479
 	if (this->use_array_p == false) {
... ...
@@ -4482,7 +4493,7 @@ iit_block (List_T *intervallist, List_T *labellist, List_T *datalist,
4482 4493
 	    total_matches_minus += this->matches_byshift_minus[shift];
4483 4494
 	  }
4484 4495
 	}
4485
-	assert(this->nmatches == total_matches_plus + total_matches_minus);
4496
+	assert(this->nmatches_passing == total_matches_plus + total_matches_minus);
4486 4497
 
4487 4498
 	total_mismatches_plus = total_mismatches_minus = 0;
4488 4499
 	for (ptr = this->mismatches_byshift; ptr != NULL; ptr = List_next(ptr)) {
... ...
@@ -4493,6 +4504,11 @@ iit_block (List_T *intervallist, List_T *labellist, List_T *datalist,
4493 4504
 	    total_mismatches_minus += mismatch->count;
4494 4505
 	  }
4495 4506
 	}
4507
+	assert(this->nmismatches_passing == total_mismatches_plus + total_mismatches_minus);
4508
+
4509
+	/* Total depth */
4510
+	debug2(printf("Total depth:\n"));
4511
+	bytes = push_int(&nbytes,bytes,this->nmatches_total + this->nmismatches_total);
4496 4512
 
4497 4513
 	/* Total signed counts at this position */
4498 4514
 	debug2(printf("Total signed counts:\n"));
... ...
@@ -4500,7 +4516,7 @@ iit_block (List_T *intervallist, List_T *labellist, List_T *datalist,
4500 4516
 	bytes = push_int(&nbytes,bytes,total_matches_minus + this->delcounts_minus + total_mismatches_minus);
4501 4517
 	
4502 4518
 	/* Reference nucleotide and counts */
4503
-	if (this->nmatches == 0) {
4519
+	if (this->nmatches_passing == 0) {
4504 4520
 	  bytes = push_char(&nbytes,bytes,Genome_get_char(genome,chroffset+chrpos-1U));
4505 4521
 	  bytes = push_int(&nbytes,bytes,/*plus*/0);
4506 4522
 	  bytes = push_int(&nbytes,bytes,/*minus*/0);
... ...
@@ -4543,7 +4559,7 @@ iit_block (List_T *intervallist, List_T *labellist, List_T *datalist,
4543 4559
 	bytes = push_char(&nbytes,bytes,'\0');
4544 4560
 	
4545 4561
 	/* Cycles, nm, and optionally xs for reference */
4546
-	if (this->nmatches + this->delcounts_plus + this->delcounts_minus > 0) {
4562
+	if (this->nmatches_passing + this->delcounts_plus + this->delcounts_minus > 0) {
4547 4563
 	  if (this->use_array_p == false) {
4548 4564
 	    length = List_length(this->list_matches_byshift);
4549 4565
 	    match_array = (Match_T *) List_to_array(this->list_matches_byshift,NULL);
... ...
@@ -4728,7 +4744,7 @@ iit_block (List_T *intervallist, List_T *labellist, List_T *datalist,
4728 4744
       if (map_iit != NULL) {
4729 4745
 	bytes = report_plus_genes(bytes,&nbytes,/*tally2*/this,block_tallies,blockstart,prev_block_tallies,
4730 4746
 				  prev_blockstart,prev_blockptr,plus_genes,chrpos,genome,chroffset,/*signed_counts_p*/true,
4731
-				  /*print_cycles_p*/true,/*print_nm_scores_p*/true,print_xs_scores_p,
4747
+				  print_cycles_p,print_nm_scores_p,print_xs_scores_p,
4732 4748
 				  quality_score_adj,/*output_type*/OUTPUT_IIT);
4733 4749
       }
4734 4750
 
... ...
@@ -4737,7 +4753,7 @@ iit_block (List_T *intervallist, List_T *labellist, List_T *datalist,
4737 4753
       if (map_iit != NULL) {
4738 4754
 	bytes = report_minus_genes(bytes,&nbytes,/*tally2*/this,block_tallies,blockstart,prev_block_tallies,
4739 4755
 				   prev_blockstart,prev_blockptr,minus_genes,chrpos,genome,chroffset,/*signed_counts_p*/true,
4740
-				   /*print_cycles_p*/true,/*print_nm_scores_p*/true,print_xs_scores_p,
4756
+				   print_cycles_p,print_nm_scores_p,print_xs_scores_p,
4741 4757
 				   quality_score_adj,/*output_type*/OUTPUT_IIT);
4742 4758
       }
4743 4759
 
... ...
@@ -4966,7 +4982,7 @@ transfer_position_lh (Tally_T *alloc_tallies_low, Tally_T *alloc_tallies_high,
4966 4982
 static void
4967 4983
 revise_position (char querynt, char genomicnt, int nm, int xs, int signed_shift,
4968 4984
 		 Tally_T this, Tally_T right, bool ignore_query_Ns_p, bool readlevel_p,
4969
-		 unsigned int linei, int ncounts) {
4985
+		 unsigned int linei, int n_passing_counts, int n_total_counts) {
4970 4986
   Match_T match;
4971 4987
   Mismatch_T mismatch;
4972 4988
   List_T ptr;
... ...
@@ -4975,20 +4991,22 @@ revise_position (char querynt, char genomicnt, int nm, int xs, int signed_shift,
4975 4991
 
4976 4992
   if (right != NULL) {
4977 4993
     if (signed_shift > 0) {
4978
-      right->n_fromleft_plus += ncounts;
4994
+      right->n_fromleft_plus += n_passing_counts;
4979 4995
     } else {
4980
-      right->n_fromleft_minus += ncounts;
4996
+      right->n_fromleft_minus += n_passing_counts;
4981 4997
     }
4982 4998
   }
4983 4999
 
4984 5000
   if (readlevel_p == true || totals_only_p == true) {
4985 5001
     /* Don't store any details.  Also, nmatches captures both matches and mismatches */
4986
-    this->nmatches += ncounts;
5002
+    this->nmatches_passing += n_passing_counts;
5003
+    this->nmatches_total += n_total_counts;
4987 5004
 
4988 5005
   } else if (toupper(querynt) == toupper(genomicnt)) {
4989 5006
     /* Count matches, even if query quality score was low */
4990
-    this->nmatches += ncounts;
4991
-    if (this->nmatches == ARRAY_THRESHOLD) {
5007
+    this->nmatches_passing += n_passing_counts;
5008
+    this->nmatches_total += n_total_counts;
5009
+    if (this->nmatches_passing == ARRAY_THRESHOLD) {
4992 5010
       /* Convert list to array */
4993 5011
       /* Find maximum shifts */
4994 5012
       max_shift_plus = max_shift_minus = 0;
... ...
@@ -5081,32 +5099,32 @@ revise_position (char querynt, char genomicnt, int nm, int xs, int signed_shift,
5081 5099
 
5082 5100
     if (this->use_array_p == false) {
5083 5101
       if ((match = find_match_byshift(this->list_matches_byshift,signed_shift)) != NULL) {
5084
-	match->count += ncounts;
5102
+	match->count += n_passing_counts;
5085 5103
       } else {
5086 5104
 #ifdef USE_MATCHPOOL
5087
-        this->list_matches_byshift = Matchpool_push(this->list_matches_byshift,this->matchpool,signed_shift,nm,xs,ncounts);
5105
+        this->list_matches_byshift = Matchpool_push(this->list_matches_byshift,this->matchpool,signed_shift,nm,xs,n_passing_counts);
5088 5106
 #else
5089
-	this->list_matches_byshift = List_push(this->list_matches_byshift,(void *) Match_new(signed_shift,nm,xs,ncounts));
5107
+	this->list_matches_byshift = List_push(this->list_matches_byshift,(void *) Match_new(signed_shift,nm,xs,n_passing_counts));
5090 5108
 #endif
5091 5109
       }
5092 5110
 
5093 5111
       if ((match = find_match_bynm(this->list_matches_bynm,nm)) != NULL) {
5094
-	match->count += ncounts;
5112
+	match->count += n_passing_counts;
5095 5113
       } else {
5096 5114
 #ifdef USE_MATCHPOOL
5097
-        this->list_matches_bynm = Matchpool_push(this->list_matches_bynm,this->matchpool,signed_shift,nm,xs,ncounts);
5115
+        this->list_matches_bynm = Matchpool_push(this->list_matches_bynm,this->matchpool,signed_shift,nm,xs,n_passing_counts);
5098 5116
 #else
5099
-        this->list_matches_bynm = List_push(this->list_matches_bynm,(void *) Match_new(signed_shift,nm,xs,ncounts));
5117
+        this->list_matches_bynm = List_push(this->list_matches_bynm,(void *) Match_new(signed_shift,nm,xs,n_passing_counts));
5100 5118
 #endif
5101 5119
       }
5102 5120
 
5103 5121
       if ((match = find_match_byxs(this->list_matches_byxs,xs)) != NULL) {
5104
-	match->count += ncounts;
5122
+	match->count += n_passing_counts;
5105 5123
       } else {
5106 5124
 #ifdef USE_MATCHPOOL
5107
-        this->list_matches_byxs = Matchpool_push(this->list_matches_byxs,this->matchpool,signed_shift,nm,xs,ncounts);
5125
+        this->list_matches_byxs = Matchpool_push(this->list_matches_byxs,this->matchpool,signed_shift,nm,xs,n_passing_counts);
5108 5126
 #else
5109
-        this->list_matches_byxs = List_push(this->list_matches_byxs,(void *) Match_new(signed_shift,nm,xs,ncounts));
5127
+        this->list_matches_byxs = List_push(this->list_matches_byxs,(void *) Match_new(signed_shift,nm,xs,n_passing_counts));
5110 5128
 #endif
5111 5129
       }
5112 5130
 
... ...
@@ -5123,7 +5141,7 @@ revise_position (char querynt, char genomicnt, int nm, int xs, int signed_shift,
5123 5141
 	  FREE(this->matches_byshift_plus);
5124 5142
 	  this->matches_byshift_plus = newarray;
5125 5143
 	}
5126
-	this->matches_byshift_plus[signed_shift] += ncounts;
5144
+	this->matches_byshift_plus[signed_shift] += n_passing_counts;
5127 5145
       } else {
5128 5146
 	if (-signed_shift >= this->n_matches_byshift_minus) {
5129 5147
 	  /* Resize array */
... ...
@@ -5136,11 +5154,11 @@ revise_position (char querynt, char genomicnt, int nm, int xs, int signed_shift,
5136 5154
 	  FREE(this->matches_byshift_minus);
5137 5155
 	  this->matches_byshift_minus = newarray;
5138 5156
 	}
5139
-	this->matches_byshift_minus[-signed_shift] += ncounts;
5157
+	this->matches_byshift_minus[-signed_shift] += n_passing_counts;
5140 5158
       }
5141 5159
 
5142
-      this->matches_bynm[nm] += ncounts;
5143
-      this->matches_byxs[xs] += ncounts;
5160
+      this->matches_bynm[nm] += n_passing_counts;
5161
+      this->matches_byxs[xs] += n_passing_counts;
5144 5162
     }
5145 5163
 
5146 5164
   } else if (toupper(querynt) == 'N' && ignore_query_Ns_p == true) {
... ...
@@ -5148,36 +5166,39 @@ revise_position (char querynt, char genomicnt, int nm, int xs, int signed_shift,
5148 5166
 
5149 5167
   } else {
5150 5168
 
5169
+    this->nmismatches_passing += n_passing_counts;
5170
+    this->nmismatches_total += n_total_counts;
5171
+
5151 5172
     if ((mismatch = find_mismatch_byshift(this->mismatches_byshift,toupper(querynt),signed_shift)) != NULL) {
5152
-      mismatch->count += ncounts;
5173
+      mismatch->count += n_passing_counts;
5153 5174
     } else {
5154 5175
 #ifdef USE_MISMATCHPOOL
5155 5176
       this->mismatches_byshift = Mismatchpool_push(this->mismatches_byshift,this->mismatchpool,
5156
-                                           	   toupper(querynt),signed_shift,nm,xs,ncounts);
5177
+                                           	   toupper(querynt),signed_shift,nm,xs,n_passing_counts);
5157 5178
 #else
5158
-      this->mismatches_byshift = List_push(this->mismatches_byshift,(void *) Mismatch_new(toupper(querynt),signed_shift,nm,xs,ncounts));
5179
+      this->mismatches_byshift = List_push(this->mismatches_byshift,(void *) Mismatch_new(toupper(querynt),signed_shift,nm,xs,n_passing_counts));
5159 5180
 #endif
5160 5181
     }
5161 5182
 
5162 5183
     if ((mismatch = find_mismatch_bynm(this->mismatches_bynm,toupper(querynt),nm)) != NULL) {
5163
-      mismatch->count += ncounts;
5184
+      mismatch->count += n_passing_counts;
5164 5185
     } else {
5165 5186
 #ifdef USE_MISMATCHPOOL
5166 5187
       this->mismatches_bynm = Mismatchpool_push(this->mismatches_bynm,this->mismatchpool,
5167
-						toupper(querynt),signed_shift,nm,xs,ncounts);
5188
+						toupper(querynt),signed_shift,nm,xs,n_passing_counts);
5168 5189
 #else
5169
-      this->mismatches_bynm = List_push(this->mismatches_bynm,(void *) Mismatch_new(toupper(querynt),signed_shift,nm,xs,ncounts));
5190
+      this->mismatches_bynm = List_push(this->mismatches_bynm,(void *) Mismatch_new(toupper(querynt),signed_shift,nm,xs,n_passing_counts));
5170 5191
 #endif
5171 5192
     }
5172 5193
 
5173 5194
     if ((mismatch = find_mismatch_byxs(this->mismatches_byxs,toupper(querynt),xs)) != NULL) {
5174
-      mismatch->count += ncounts;
5195
+      mismatch->count += n_passing_counts;
5175 5196
     } else {
5176 5197
 #ifdef USE_MISMATCHPOOL
5177 5198
       this->mismatches_byxs = Mismatchpool_push(this->mismatches_byxs,this->mismatchpool,
5178
-						toupper(querynt),signed_shift,nm,xs,ncounts);
5199
+						toupper(querynt),signed_shift,nm,xs,n_passing_counts);
5179 5200
 #else
5180
-      this->mismatches_byxs = List_push(this->mismatches_byxs,(void *) Mismatch_new(toupper(querynt),signed_shift,nm,xs,ncounts));
5201
+      this->mismatches_byxs = List_push(this->mismatches_byxs,(void *) Mismatch_new(toupper(querynt),signed_shift,nm,xs,n_passing_counts));
5181 5202
 #endif
5182 5203
     }
5183 5204
   }
... ...
@@ -5191,25 +5212,25 @@ revise_position (char querynt, char genomicnt, int nm, int xs, int signed_shift,
5191 5212
 
5192 5213
 static void
5193 5214
 revise_insertion (Genomicpos_T chrpos, char *query_insert, int mlength, int signed_shift, int nm, int xs,
5194
-		  Tally_T this, int ncounts) {
5215
+		  Tally_T this, int n_passing_counts) {
5195 5216
   Insertion_T ins;
5196 5217
 
5197 5218
   if ((ins = find_insertion_byshift(this->insertions_byshift,query_insert,mlength,signed_shift)) != NULL) {
5198
-    ins->count += ncounts;
5219
+    ins->count += n_passing_counts;
5199 5220
   } else {
5200
-    this->insertions_byshift = List_push(this->insertions_byshift,(void *) Insertion_new(chrpos,query_insert,mlength,signed_shift,nm,xs,ncounts));
5221
+    this->insertions_byshift = List_push(this->insertions_byshift,(void *) Insertion_new(chrpos,query_insert,mlength,signed_shift,nm,xs,n_passing_counts));
5201 5222
   }
5202 5223
 
5203 5224
   if ((ins = find_insertion_bynm(this->insertions_bynm,query_insert,mlength,nm)) != NULL) {
5204
-    ins->count += ncounts;
5225
+    ins->count += n_passing_counts;
5205 5226
   } else {
5206
-    this->insertions_bynm = List_push(this->insertions_bynm,(void *) Insertion_new(chrpos,query_insert,mlength,signed_shift,nm,xs,ncounts));
5227
+    this->insertions_bynm = List_push(this->insertions_bynm,(void *) Insertion_new(chrpos,query_insert,mlength,signed_shift,nm,xs,n_passing_counts));
5207 5228
   }
5208 5229
 
5209 5230
   if ((ins = find_insertion_byxs(this->insertions_byxs,query_insert,mlength,xs)) != NULL) {
5210
-    ins->count += ncounts;
5231
+    ins->count += n_passing_counts;
5211 5232
   } else {
5212
-    this->insertions_byxs = List_push(this->insertions_byxs,(void *) Insertion_new(chrpos,query_insert,mlength,signed_shift,nm,xs,ncounts));
5233
+    this->insertions_byxs = List_push(this->insertions_byxs,(void *) Insertion_new(chrpos,query_insert,mlength,signed_shift,nm,xs,n_passing_counts));
5213 5234
   }
5214 5235
 
5215 5236
   return;
... ...
@@ -5217,25 +5238,25 @@ revise_insertion (Genomicpos_T chrpos, char *query_insert, int mlength, int sign
5217 5238
 
5218 5239
 static void
5219 5240
 revise_deletion (Genomicpos_T chrpos, char *deletion, int mlength, int signed_shift, int nm, int xs, Tally_T this,
5220
-		 int ncounts) {
5241
+		 int n_passing_counts) {
5221 5242
   Deletion_T del;
5222 5243
 
5223 5244
   if ((del = find_deletion_byshift(this->deletions_byshift,deletion,mlength,signed_shift)) != NULL) {
5224
-    del->count += ncounts;
5245
+    del->count += n_passing_counts;
5225 5246
   } else {
5226
-    this->deletions_byshift = List_push(this->deletions_byshift,(void *) Deletion_new(chrpos,deletion,mlength,signed_shift,nm,xs,ncounts));
5247
+    this->deletions_byshift = List_push(this->deletions_byshift,(void *) Deletion_new(chrpos,deletion,mlength,signed_shift,nm,xs,n_passing_counts));
5227 5248
   }
5228 5249
 
5229 5250
   if ((del = find_deletion_bynm(this->deletions_bynm,deletion,mlength,nm)) != NULL) {
5230
-    del->count += ncounts;
5251
+    del->count += n_passing_counts;
5231 5252
   } else {
5232
-    this->deletions_bynm = List_push(this->deletions_bynm,(void *) Deletion_new(chrpos,deletion,mlength,signed_shift,nm,xs,ncounts));
5253
+    this->deletions_bynm = List_push(this->deletions_bynm,(void *) Deletion_new(chrpos,deletion,mlength,signed_shift,nm,xs,n_passing_counts));
5233 5254
   }
5234 5255
 
5235 5256
   if ((del = find_deletion_byxs(this->deletions_byxs,deletion,mlength,xs)) != NULL) {
5236
-    del->count += ncounts;
5257
+    del->count += n_passing_counts;
5237 5258
   } else {
5238
-    this->deletions_byxs = List_push(this->deletions_byxs,(void *) Deletion_new(chrpos,deletion,mlength,signed_shift,nm,xs,ncounts));
5259
+    this->deletions_byxs = List_push(this->deletions_byxs,(void *) Deletion_new(chrpos,deletion,mlength,signed_shift,nm,xs,n_passing_counts));
5239 5260
   }
5240 5261
 
5241 5262
   return;
... ...
@@ -5261,7 +5282,7 @@ revise_read (Tally_T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend,
5261 5282
 	     char *shortread, int nm, char splice_strand, bool terminalp, Genomicpos_T alloc_low,
5262 5283
 	     Genome_T genome, Genomicpos_T chroffset, bool ignore_query_Ns_p,
5263 5284
 	     bool print_indels_p, bool readlevel_p, int max_softclip, unsigned int linei,
5264
-	     __m128i *counts, int nreps) {
5285
+	     __m128i *passing_counts, int nreps) {
5265 5286
   Tally_T this, right;
5266 5287
   int alloci;
5267 5288
   int shift, signed_shift;
... ...
@@ -5272,7 +5293,7 @@ revise_read (Tally_T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend,
5272 5293
   Intlist_T a;
5273 5294
   Uintlist_T b;
5274 5295
   unsigned int mlength;
5275
-  int ncounts;
5296
+  int n_passing_counts, n_total_counts;
5276 5297
   int type;
5277 5298
   int quality_score;
5278 5299
   int xs;
... ...
@@ -5307,7 +5328,7 @@ revise_read (Tally_T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend,
5307 5328
 
5308 5329
   p = shortread;
5309 5330
   r = 0;
5310
-  if (counts) _mm_store_si128((__m128i *) block,counts[0]);
5331
+  if (passing_counts) _mm_store_si128((__m128i *) block,passing_counts[0]);
5311 5332
   if (max_softclip > 0 && types != NULL) {
5312 5333
     if (Intlist_head(types) == 'S') {
5313 5334
       /* Revise pos, so we handle the initial soft clip */
... ...
@@ -5317,7 +5338,7 @@ revise_read (Tally_T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend,
5317 5338
 	pos = 0U;
5318 5339
 	p += (mlength - pos);
5319 5340
 	r += (mlength - pos);
5320
-	if (counts) _mm_store_si128((__m128i *) block,counts[r/8]);
5341
+	if (passing_counts) _mm_store_si128((__m128i *) block,passing_counts[r/8]);
5321 5342
 	Uintlist_head_set(npositions,pos);
5322 5343
       } else {
5323 5344
 	pos -= mlength;
... ...
@@ -5330,7 +5351,7 @@ revise_read (Tally_T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend,
5330 5351
 	pos += (mlength - max_softclip);
5331 5352
 	p += (mlength - max_softclip);
5332 5353
 	r += (mlength - max_softclip);
5333
-	if (counts) _mm_store_si128((__m128i *) block,counts[r/8]);
5354
+	if (passing_counts) _mm_store_si128((__m128i *) block,passing_counts[r/8]);
5334 5355
 	Uintlist_head_set(npositions,max_softclip);
5335 5356
       }
5336 5357
     }
... ...
@@ -5343,7 +5364,7 @@ revise_read (Tally_T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend,
5343 5364
       /* pos += mlength; -- SAM assumes genome coordinates are of clipped region */
5344 5365
       p += mlength;
5345 5366
       r += mlength;
5346
-      if (counts) _mm_store_si128((__m128i *) block,counts[r/8]);
5367
+      if (passing_counts) _mm_store_si128((__m128i *) block,passing_counts[r/8]);
5347 5368
       shift += ((strand == '+') ? +mlength : -mlength);
5348 5369
     } else if (type == 'H') {
5349 5370
       /* pos += mlength; -- SAM assumes genome coordinates are of clipped region */
... ...
@@ -5368,13 +5389,13 @@ revise_read (Tally_T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend,
5368 5389
 	this = alloc_tallies[alloci];
5369 5390
 
5370 5391
 	signed_shift = (strand == '+') ? shift : -shift;
5371
-	/* quality_score = (int) count_average(counts,r,mlength); */
5392
+	/* quality_score = (int) count_average(passing_counts,r,mlength); */
5372 5393
 	revise_insertion(pos,/*query_insert*/p,mlength,signed_shift,nm,xs,this,nreps);
5373 5394
       }
5374 5395
 
5375 5396
       p += mlength;
5376 5397
       r += mlength;
5377
-      if (counts) _mm_store_si128((__m128i *) block,counts[r/8]);
5398
+      if (passing_counts) _mm_store_si128((__m128i *) block,passing_counts[r/8]);
5378 5399
       shift += ((strand == '+') ? +mlength : -mlength);
5379 5400
 
5380 5401
     } else if (type == 'D') {
... ...
@@ -5421,7 +5442,7 @@ revise_read (Tally_T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend,
5421 5442
       if (0 /* mlength < min_mlength */) {
5422 5443
 	p += mlength;
5423 5444
 	r += mlength;
5424
-	if (counts) _mm_store_si128((__m128i *) block,counts[r/8]);
5445
+	if (passing_counts) _mm_store_si128((__m128i *) block,passing_counts[r/8]);
5425 5446
 	pos += mlength;
5426 5447
 	shift += ((strand == '+') ? +mlength : -mlength);
5427 5448
 
... ...
@@ -5460,7 +5481,7 @@ revise_read (Tally_T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend,
5460 5481
 	  if (genome == NULL) {
5461 5482
 	    this->refnt = ' ';
5462 5483
 
5463
-	  } else if (this->nmatches == 0 && this->mismatches_byshift == NULL) {
5484
+	  } else if (this->nmatches_passing == 0 && this->mismatches_byshift == NULL) {
5464 5485
 	    this->refnt = toupper(*q);
5465 5486
 	    debug1(printf("Line %d assigning %c at %u to tally %p\n",linei,this->refnt,pos+1U,this));
5466 5487
 	    
... ...
@@ -5468,19 +5489,20 @@ revise_read (Tally_T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend,
5468 5489
 	    fprintf(stderr,"Two different genomic chars %c and %c at position %u\n",
5469 5490
 		    this->refnt,*q,pos+1U);
5470 5491
 	    fprintf(stderr,"Have seen %d matches and %d types of mismatches here so far\n",
5471
-		    this->nmatches,List_length(this->mismatches_byshift));
5492
+		    this->nmatches_passing,List_length(this->mismatches_byshift));
5472 5493
 	    abort();
5473 5494
 	  }
5474 5495
 
5475 5496
 	  signed_shift = (strand == '+') ? shift : -shift;
5476
-	  if (counts) {
5477
-	    ncounts = block[r % 8];
5497
+	  if (passing_counts) {
5498
+	    n_passing_counts = block[r % 8];
5478 5499
 	  } else {
5479
-	    ncounts = nreps;
5500
+	    n_passing_counts = nreps;
5480 5501
 	  }
5481
-	  debug1(printf("ncounts at r %d is %d\n",r,ncounts));
5502
+	  n_total_counts = nreps;
5503
+	  debug1(printf("ncounts at r %d is %d\n",r,n_passing_counts));
5482 5504
 	  revise_position(/*querynt*/*p,/*genomicnt*/*q,nm,xs,signed_shift,
5483
-			  this,right,ignore_query_Ns_p,readlevel_p,linei,ncounts);
5505
+			  this,right,ignore_query_Ns_p,readlevel_p,linei,n_passing_counts,n_total_counts);
5484 5506
 	  if (readlevel_p == true && pos >= chrstart && pos <= chrend) {
5485 5507
 	    FREE(genomic);
5486 5508
 	    return;
... ...
@@ -5489,7 +5511,7 @@ revise_read (Tally_T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend,
5489 5511
 	  p++;
5490 5512
 	  q++;
5491 5513
 	  if (++r % 8 == 0) {
5492
-	    if (counts) _mm_store_si128((__m128i *) block,counts[r/8]);
5514
+	    if (passing_counts) _mm_store_si128((__m128i *) block,passing_counts[r/8]);
5493 5515
 	  }
5494 5516
 	  pos++;
5495 5517
 	  shift += ((strand == '+') ? +1 : -1);
... ...
@@ -5718,19 +5740,19 @@ revise_read_lh (Tally_T *alloc_tallies_low, Tally_T *alloc_tallies_high, Genomic
5718 5740
 	  if (genome == NULL) {
5719 5741
 	    this->refnt = ' ';
5720 5742
 
5721
-	  } else if (this->nmatches == 0 && this->mismatches_byshift == NULL) {
5743
+	  } else if (this->nmatches_passing == 0 && this->mismatches_byshift == NULL) {
5722 5744
 	    this->refnt = toupper(*q);
5723 5745
 	    
5724 5746
 	  } else if (this->refnt != toupper(*q)) {
5725 5747
 	    fprintf(stderr,"Two different genomic chars %c and %c at position %u\n",
5726 5748
 		    this->refnt,*q,pos+1U);
5727 5749
 	    fprintf(stderr,"Have seen %d matches and %d types of mismatches here so far\n",
5728
-		    this->nmatches,List_length(this->mismatches_byshift));
5750
+		    this->nmatches_passing,List_length(this->mismatches_byshift));
5729 5751
 	    abort();
5730 5752
 	  }
5731 5753
 
5732 5754
 	  revise_position(/*querynt*/*p,/*genomicnt*/*q,nm,xs,signed_shift,
5733
-			  this,right,ignore_query_Ns_p,readlevel_p,linei,/*ncounts*/1);
5755
+			  this,right,ignore_query_Ns_p,readlevel_p,linei,/*n_passing_counts*/1,/*n_total_counts*/1);
5734 5756
 	  if (readlevel_p == true && pos >= chrstart && pos <= chrend) {
5735 5757
 	    FREE(genomic);
5736 5758
 	    return;
... ...
@@ -5780,7 +5802,7 @@ best_mapping_p (Tableuint_T resolve_low_table, Tableuint_T resolve_high_table, c
5780 5802
 
5781 5803
 
5782 5804
 static void
5783
-get_passing_counts (__m128i *counts, int ncounts, int readlength, Bamline_T *bamlines, int nreps, int minimum_quality_score) {
5805
+get_passing_counts (__m128i *passing_counts, int readlength, Bamline_T *bamlines, int nreps, int minimum_quality_score) {
5784 5806
   int i, r, b;
5785 5807
   char *quality_string, buffer[16];
5786 5808
   int x;
... ...
@@ -5791,24 +5813,20 @@ get_passing_counts (__m128i *counts, int ncounts, int readlength, Bamline_T *bam
5791 5813
   threshold = _mm_set1_epi8(minimum_quality_score);
5792 5814
   ones = _mm_set1_epi8(1);
5793 5815
 
5794
-  /* Initialize counts */
5816
+  /* Initialize passing_counts */
5795 5817
   b = 0; r = 0;
5796 5818
   while (r + 16 < readlength) {
5797
-    counts[b++] = _mm_set1_epi16(0);
5798
-    counts[b++] = _mm_set1_epi16(0);
5819
+    passing_counts[b++] = _mm_set1_epi16(0);
5820
+    passing_counts[b++] = _mm_set1_epi16(0);
5799 5821
     r += 16;
5800 5822
   }
5801 5823
   if (r < readlength) {
5802
-    counts[b++] = _mm_set1_epi16(0);
5803
-    r += 8;
5804
-    if (r < readlength) {
5805
-      counts[b] = _mm_set1_epi16(0);
5824
+    passing_counts[b] = _mm_set1_epi16(0);
5825
+    if (r + 8 < readlength) {
5826
+      b++;
5827
+      passing_counts[b] = _mm_set1_epi16(0);
5806 5828
     }
5807 5829
   }
5808
-  if (b > ncounts) {
5809
-    fprintf(stderr,"Allocated only %d vectors, but need %d\n",ncounts,b);
5810
-    abort();
5811
-  }
5812 5830
 
5813 5831
   for (i = 0; i < nreps; i++) {
5814 5832
     quality_string = Bamline_quality_string(bamlines[0]);
... ...
@@ -5824,11 +5842,11 @@ get_passing_counts (__m128i *counts, int ncounts, int readlength, Bamline_T *bam
5824 5842
 
5825 5843
       /* Tally the lower 8 bytes */
5826 5844
       cmp16 = _mm_cvtepi8_epi16(cmp8);
5827
-      counts[b] = _mm_add_epi16(counts[b],cmp16); b++;
5845
+      passing_counts[b] = _mm_add_epi16(passing_counts[b],cmp16); b++;
5828 5846
 
5829 5847
       /* Tally the upper 8 bytes */
5830 5848
       cmp16 = _mm_cvtepi8_epi16(_mm_srli_si128(cmp8,8));
5831
-      counts[b] = _mm_add_epi16(counts[b],cmp16); b++;
5849
+      passing_counts[b] = _mm_add_epi16(passing_counts[b],cmp16); b++;
5832 5850
 
5833 5851
       r += 16;
5834 5852
     }
... ...
@@ -5838,12 +5856,12 @@ get_passing_counts (__m128i *counts, int ncounts, int readlength, Bamline_T *bam
5838 5856
       block = _mm_loadu_si128((__m128i *) buffer);
5839 5857
       cmp8 = _mm_add_epi8(_mm_cmpgt_epi8(threshold,block),ones);
5840 5858
       cmp16 = _mm_cvtepi8_epi16(cmp8);
5841
-      counts[b] = _mm_add_epi16(counts[b],cmp16); b++;
5842
-      r += 8;
5859
+      passing_counts[b] = _mm_add_epi16(passing_counts[b],cmp16);
5843 5860
 
5844
-      if (r < readlength) {
5861
+      if (r + 8 < readlength) {
5862
+	b++;
5845 5863
 	cmp16 = _mm_cvtepi8_epi16(_mm_srli_si128(cmp8,8));
5846
-	counts[b] = _mm_add_epi16(counts[b],cmp16); /* b++; */
5864
+	passing_counts[b] = _mm_add_epi16(passing_counts[b],cmp16); /* b++; */
5847 5865
       }
5848 5866
     }
5849 5867
 
... ...
@@ -5855,9 +5873,9 @@ get_passing_counts (__m128i *counts, int ncounts, int readlength, Bamline_T *bam
5855 5873
       }
5856 5874
       printf("\n");
5857 5875
 
5858
-      foo = counts[b++];
5876
+      foo = passing_counts[b++];
5859 5877
       print_vector_16_dec(foo);
5860
-      foo = counts[b++];
5878
+      foo = passing_counts[b++];
5861 5879
       print_vector_16_dec(foo);
5862 5880
       printf("\n");
5863 5881
 
... ...
@@ -5872,11 +5890,11 @@ get_passing_counts (__m128i *counts, int ncounts, int readlength, Bamline_T *bam
5872 5890
       }
5873 5891
       printf("\n");
5874 5892
 
5875
-      foo = counts[b++];
5893
+      foo = passing_counts[b++];
5876 5894
       print_vector_16_dec(foo);
5877 5895
       r += 8;
5878 5896
       if (r < readlength) {
5879
-	foo = counts[b++];
5897
+	foo = passing_counts[b++];
5880 5898
 	print_vector_16_dec(foo);
5881 5899
       }
5882 5900
       printf("\n");
... ...
@@ -5923,8 +5941,8 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
5923 5941
   unsigned int linei = 0, linei_start, linei_end;
5924 5942
   int nlines;
5925 5943
   int nreps;
5926
-  __m128i *counts, foo;
5927
-  int ncounts;
5944
+  __m128i *passing_counts, foo;
5945
+  int nvectors;
5928 5946
   int i;
5929 5947
 
5930 5948
 
... ...
@@ -6042,18 +6060,18 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
6042 6060
 			Bamline_cigar_querylength(bamline_rep),rsequence,Bamline_nm(bamline_rep),
6043 6061
 			Bamline_splice_strand(bamline_rep),Bamline_terminalp(bamline_rep),alloc_low,
6044 6062
 			genome,chroffset,ignore_query_Ns_p,print_indels_p,readlevel_p,
6045
-			max_softclip,linei+linei_start,/*counts*/NULL,nreps);
6063
+			max_softclip,linei+linei_start,/*passing_counts*/NULL,nreps);
6046 6064
 	  } else {
6047
-	    ncounts = (readlength + 7)/8;
6048
-	    counts = (__m128i *) _mm_malloc(ncounts * sizeof(__m128i),16);
6049
-	    get_passing_counts(counts,ncounts,readlength,&(bamlines[linei_start]),nreps,minimum_quality_score);
6065
+	    nvectors = (readlength + 7)/8;
6066
+	    passing_counts = (__m128i *) _mm_malloc(nvectors * sizeof(__m128i),16);
6067
+	    get_passing_counts(passing_counts,readlength,&(bamlines[linei_start]),nreps,minimum_quality_score);
6050 6068
 	    revise_read(alloc_tallies,chrstart,chrend,chrpos_low,Bamline_flag(bamline_rep),
6051 6069
 			Bamline_cigar_types(bamline_rep),Bamline_cigar_npositions(bamline_rep),
6052 6070
 			Bamline_cigar_querylength(bamline_rep),rsequence,Bamline_nm(bamline_rep),
6053 6071
 			Bamline_splice_strand(bamline_rep),Bamline_terminalp(bamline_rep),alloc_low,
6054 6072
 			genome,chroffset,ignore_query_Ns_p,print_indels_p,readlevel_p,
6055
-			max_softclip,linei+linei_start,counts,nreps);
6056
-	    _mm_free(counts);
6073
+			max_softclip,linei+linei_start,passing_counts,nreps);
6074
+	    _mm_free(passing_counts);
6057 6075
 	  }
6058 6076
 	  goodp = true;
6059 6077
 	}
... ...
@@ -6109,7 +6127,7 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
6109 6127
 	  for (chrpos = alloc_low; chrpos < alloc_ptr; chrpos++) {
6110 6128
 	    alloci = chrpos - alloc_low;
6111 6129
 	    this = alloc_tallies[alloci];
6112
-	    if (this->nmatches > 0 || this->delcounts_plus + this->delcounts_minus > 0 ||
6130
+	    if (this->nmatches_passing > 0 || this->delcounts_plus + this->delcounts_minus > 0 ||
6113 6131
 		this->mismatches_byshift != NULL || this->insertions_byshift != NULL || this->deletions_byshift != NULL) {
6114 6132
 	      lastpos = transfer_position(&grand_total,alloc_tallies,block_tallies,&exonstart,lastpos,
6115 6133
 					  &blockptr,&blockstart,&blockend,
... ...
@@ -6136,7 +6154,7 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
6136 6154
 	  for (chrpos = alloc_low; chrpos + max_softclip < chrpos_low; chrpos++) {
6137 6155
 	    alloci = chrpos - alloc_low;
6138 6156
 	    this = alloc_tallies[alloci];
6139
-	    if (this->nmatches > 0 || this->delcounts_plus + this->delcounts_minus > 0 ||
6157
+	    if (this->nmatches_passing > 0 || this->delcounts_plus + this->delcounts_minus > 0 ||
6140 6158
 		this->mismatches_byshift != NULL || this->insertions_byshift != NULL || this->deletions_byshift != NULL) {
6141 6159
 	      lastpos = transfer_position(&grand_total,alloc_tallies,block_tallies,&exonstart,lastpos,
6142 6160
 					  &blockptr,&blockstart,&blockend,
... ...
@@ -6198,18 +6216,18 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
6198 6216
 			Bamline_cigar_querylength(bamline_rep),Bamline_read(bamline_rep),
6199 6217
 			Bamline_nm(bamline_rep),Bamline_splice_strand(bamline_rep),Bamline_terminalp(bamline_rep),alloc_low,
6200 6218
 			genome,chroffset,ignore_query_Ns_p,print_indels_p,readlevel_p,
6201
-			max_softclip,linei + linei_start,/*counts*/NULL,nreps);
6219
+			max_softclip,linei + linei_start,/*passing_counts*/NULL,nreps);
6202 6220
 	  } else {
6203 6221
 	    readlength = Bamline_readlength(bamline_rep);
6204
-	    ncounts = (readlength + 7)/8;
6205
-	    counts = (__m128i *) _mm_malloc(ncounts * sizeof(__m128i),16);
6206
-	    get_passing_counts(counts,ncounts,readlength,&(bamlines[linei_start]),nreps,minimum_quality_score);
6222
+	    nvectors = (readlength + 7)/8;
6223
+	    passing_counts = (__m128i *) _mm_malloc(nvectors * sizeof(__m128i),16);
6224
+	    get_passing_counts(passing_counts,readlength,&(bamlines[linei_start]),nreps,minimum_quality_score);
6207 6225
 	    revise_read(alloc_tallies,chrstart,chrend,chrpos_low,Bamline_flag(bamline_rep),
6208 6226
 			Bamline_cigar_types(bamline_rep),Bamline_cigar_npositions(bamline_rep),
6209 6227
 			Bamline_cigar_querylength(bamline_rep),Bamline_read(bamline_rep),
6210 6228
 			Bamline_nm(bamline_rep),Bamline_splice_strand(bamline_rep),Bamline_terminalp(bamline_rep),alloc_low,
6211 6229
 			genome,chroffset,ignore_query_Ns_p,print_indels_p,readlevel_p,
6212
-			max_softclip,linei + linei_start,counts,nreps);
6230
+			max_softclip,linei + linei_start,passing_counts,nreps);
6213 6231
 	  }
6214 6232
 	}
6215 6233
       }
... ...
@@ -6232,7 +6250,7 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
6232 6250
   for (chrpos = alloc_low; chrpos < alloc_ptr; chrpos++) {
6233 6251
     alloci = chrpos - alloc_low;
6234 6252
     this = alloc_tallies[alloci];
6235
-    if (this->nmatches > 0 || this->delcounts_plus + this->delcounts_minus > 0 ||
6253
+    if (this->nmatches_passing > 0 || this->delcounts_plus + this->delcounts_minus > 0 ||
6236 6254
 	this->mismatches_byshift != NULL || this->insertions_byshift != NULL || this->deletions_byshift != NULL) {
6237 6255
       lastpos = transfer_position(&grand_total,alloc_tallies,block_tallies,&exonstart,lastpos,
6238 6256
 				  &blockptr,&blockstart,&blockend,
... ...
@@ -6433,9 +6451,9 @@ Bamtally_run_lh (long int **tally_matches_low, long int **tally_mismatches_low,
6433 6451
 	alloci = chrpos - alloc_low;
6434 6452
 	this_low = alloc_tallies_low[alloci];
6435 6453
 	this_high = alloc_tallies_high[alloci];
6436
-	if (this_low->nmatches > 0 || this_low->delcounts_plus + this_low->delcounts_minus > 0 ||
6454
+	if (this_low->nmatches_passing > 0 || this_low->delcounts_plus + this_low->delcounts_minus > 0 ||
6437 6455
 	    this_low->mismatches_byshift != NULL || this_low->insertions_byshift != NULL || this_low->deletions_byshift != NULL ||
6438
-	    this_high->nmatches > 0 || this_high->delcounts_plus + this_high->delcounts_minus > 0 ||
6456
+	    this_high->nmatches_passing > 0 || this_high->delcounts_plus + this_high->delcounts_minus > 0 ||
6439 6457
 	    this_high->mismatches_byshift != NULL || this_high->insertions_byshift != NULL || this_high->deletions_byshift != NULL) {
6440 6458
 	  transfer_position_lh(alloc_tallies_low,alloc_tallies_high,block_tallies_low,block_tallies_high,
6441 6459
 			       &blockptr,&blockstart,&blockend,
... ...
@@ -6458,9 +6476,9 @@ Bamtally_run_lh (long int **tally_matches_low, long int **tally_mismatches_low,
6458 6476
 	alloci = chrpos - alloc_low;
6459 6477
 	this_low = alloc_tallies_low[alloci];
6460 6478
 	this_high = alloc_tallies_high[alloci];
6461
-	if (this_low->nmatches > 0 || this_low->delcounts_plus + this_low->delcounts_minus > 0 ||
6479
+	if (this_low->nmatches_passing > 0 || this_low->delcounts_plus + this_low->delcounts_minus > 0 ||
6462 6480
 	    this_low->mismatches_byshift != NULL || this_low->insertions_byshift != NULL || this_low->deletions_byshift != NULL ||
6463
-	    this_high->nmatches > 0 || this_high->delcounts_plus + this_high->delcounts_minus > 0 ||
6481
+	    this_high->nmatches_passing > 0 || this_high->delcounts_plus + this_high->delcounts_minus > 0 ||
6464 6482
 	    this_high->mismatches_byshift != NULL || this_high->insertions_byshift != NULL || this_high->deletions_byshift != NULL) {
6465 6483
 	  transfer_position_lh(alloc_tallies_low,alloc_tallies_high,block_tallies_low,block_tallies_high,
6466 6484
 			       &blockptr,&blockstart,&blockend,
... ...
@@ -6536,9 +6554,9 @@ Bamtally_run_lh (long int **tally_matches_low, long int **tally_mismatches_low,
6536 6554
     alloci = chrpos - alloc_low;
6537 6555
     this_low = alloc_tallies_low[alloci];
6538 6556
     this_high = alloc_tallies_high[alloci];
6539
-    if (this_low->nmatches > 0 || this_low->delcounts_plus + this_low->delcounts_minus > 0 ||
6557
+    if (this_low->nmatches_passing > 0 || this_low->delcounts_plus + this_low->delcounts_minus > 0 ||
6540 6558
 	this_low->mismatches_byshift != NULL || this_low->insertions_byshift != NULL || this_low->deletions_byshift != NULL ||
6541
-	this_high->nmatches > 0 || this_high->delcounts_plus + this_high->delcounts_minus > 0 ||
6559
+	this_high->nmatches_passing > 0 || this_high->delcounts_plus + this_high->delcounts_minus > 0 ||
6542 6560
 	this_high->mismatches_byshift != NULL || this_high->insertions_byshift != NULL || this_high->deletions_byshift != NULL) {
6543 6561
       transfer_position_lh(alloc_tallies_low,alloc_tallies_high,block_tallies_low,block_tallies_high,
6544 6562
 			   &blockptr,&blockstart,&blockend,
... ...
@@ -1,4 +1,4 @@
1
-static char rcsid[] = "$Id: tally.c 161999 2015-03-26 00:05:19Z twu $";
1
+static char rcsid[] = "$Id: tally.c 178965 2015-11-16 19:55:45Z twu $";
2 2
 #ifdef HAVE_CONFIG_H
3 3
 #include <config.h>
4 4
 #endif
... ...
@@ -443,7 +443,10 @@ Tally_new () {
443 443
   T new = (T) MALLOC(sizeof(*new));
444 444
 
445 445
   new->refnt = ' ';
446
-  new->nmatches = 0;
446
+  new->nmatches_passing = 0;
447
+  new->nmatches_total = 0;
448
+  new->nmismatches_passing = 0;
449
+  new->nmismatches_total = 0;
447 450
   new->delcounts_plus = 0;
448 451
   new->delcounts_minus = 0;
449 452
   new->n_fromleft_plus = 0;
... ...
@@ -502,7 +505,10 @@ Tally_clear (T this) {
502 505
 
503 506
 
504 507
   this->refnt = ' ';
505
-  this->nmatches = 0;
508
+  this->nmatches_passing = 0;
509
+  this->nmatches_total = 0;
510
+  this->nmismatches_passing = 0;
511
+  this->nmismatches_total = 0;
506 512
   this->delcounts_plus = 0;
507 513
   this->delcounts_minus = 0;
508 514
   this->n_fromleft_plus = 0;
... ...
@@ -638,7 +644,10 @@ Tally_transfer (T *dest, T *src) {
638 644
   *dest = *src;
639 645
 
640 646
   temp->refnt = ' ';
641
-  temp->nmatches = 0;
647
+  temp->nmatches_passing = 0;
648
+  temp->nmatches_total = 0;
649
+  temp->nmismatches_passing = 0;
650
+  temp->nmismatches_total = 0;
642 651
   temp->delcounts_plus = 0;
643 652
   temp->delcounts_minus = 0;
644 653
   temp->n_fromleft_plus = 0;
... ...
@@ -115,7 +115,12 @@ Readevid_cmp (const void *a, const void *b);
115 115
 typedef struct Tally_T *Tally_T;
116 116
 struct Tally_T {
117 117
   char refnt;
118
-  int nmatches;
118
+  int nmatches_passing;
119
+  int nmismatches_passing;
120
+
121
+  int nmatches_total;		/* Kept for informational purposes */
122
+  int nmismatches_total;	/* Kept for informational purposes */
123
+
119 124
   int delcounts_plus;
120 125
   int delcounts_minus;
121 126
 
... ...
@@ -139,6 +144,7 @@ struct Tally_T {
139 144
   int avail_matches_byshift_minus;
140 145
 #endif
141 146
 
147
+  /* Includes only passing quality scores */
142 148
   int n_matches_byshift_plus;
143 149
   int *matches_byshift_plus;
144 150
   int n_matches_byshift_minus;
... ...
@@ -150,6 +156,7 @@ struct Tally_T {
150 156
   int n_matches_byxs;
151 157
   int *matches_byxs;
152 158
 
159
+  /* Includes only passing quality scores */
153 160
   List_T mismatches_byshift;
154 161
   List_T mismatches_bynm;
155 162
   List_T mismatches_byxs;
... ...
@@ -6,9 +6,8 @@
6 6
 #include "iit.h"
7 7
 #include "bytestream.h"
8 8
 
9
-/*I changed the order of these, because the XS spots are optional...*/
10 9
 enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF,
11
-       COUNT_TOTAL, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS,
10
+       RAW_COUNT_TOTAL, COUNT_TOTAL, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS,
12 11
        COUNT_MINUS_REF, DELCOUNT_PLUS, DELCOUNT_MINUS,
13 12
        READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR,
14 13
        READ_POS_VAR_REF, MDFNE, MDFNE_REF,  CODON_STRAND,
... ...
@@ -41,6 +40,7 @@ typedef struct TallyTable {
41 40
   int *n_cycles_ref;
42 41
   int *count;
43 42
   int *count_ref;
43
+  int *raw_count_total;
44 44
   int *count_total;
45 45
   int *count_plus;
46 46
   int *count_plus_ref;
... ...
@@ -84,6 +84,7 @@ static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) {
84 84
   SET_VECTOR_ELT(tally_R, N_CYCLES_REF, allocVector(INTSXP, n_rows));
85 85
   SET_VECTOR_ELT(tally_R, COUNT, allocVector(INTSXP, n_rows));
86 86
   SET_VECTOR_ELT(tally_R, COUNT_REF, allocVector(INTSXP, n_rows));
87
+  SET_VECTOR_ELT(tally_R, RAW_COUNT_TOTAL, allocVector(INTSXP, n_rows));
87 88
   SET_VECTOR_ELT(tally_R, COUNT_TOTAL, allocVector(INTSXP, n_rows));
88 89
   SET_VECTOR_ELT(tally_R, COUNT_PLUS, allocVector(INTSXP, n_rows));
89 90
   SET_VECTOR_ELT(tally_R, COUNT_PLUS_REF, allocVector(INTSXP, n_rows));
... ...
@@ -128,6 +129,7 @@ static TallyTable *TallyTable_new(SEXP tally_R, bool xs) {
128 129
   tally->n_cycles_ref = INTEGER(VECTOR_ELT(tally_R, N_CYCLES_REF));
129 130
   tally->count = INTEGER(VECTOR_ELT(tally_R, COUNT));
130 131
   tally->count_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_REF));
132
+  tally->raw_count_total = INTEGER(VECTOR_ELT(tally_R, RAW_COUNT_TOTAL));
131 133
   tally->count_total = INTEGER(VECTOR_ELT(tally_R, COUNT_TOTAL));
132 134
   tally->count_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS));
133 135
   tally->count_plus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS_REF));
... ...
@@ -163,7 +165,10 @@ static TallyTable *TallyTable_new(SEXP tally_R, bool xs) {
163 165
 }
164 166
 
165 167
 static void
166
-read_total_counts(unsigned char **bytes, int row, int *count_total) {
168
+read_total_counts(unsigned char **bytes, int row, int *raw_count_total,
169
+                  int *count_total)
170
+{
171
+  raw_count_total[row] = read_int(bytes);
167 172
   int count_total_plus = read_int(bytes);
168 173
   int count_total_minus = read_int(bytes);
169 174
   count_total[row] = count_total_plus + count_total_minus;
... ...
@@ -286,7 +291,8 @@ parse_indels(unsigned char *bytes, int row,
286 291
     tally->count_minus_ref[row] = read_int(&bytes);
287 292
     tally->count_ref[row] = tally->count_plus_ref[row] +
288 293
 	tally->count_minus_ref[row];
289
-    tally->count_total[row] = tally->count_ref[row] + tally->count[row];
294
+    tally->raw_count_total[row] = tally->count_ref[row] + tally->count[row];
295
+    tally->count_total[row] = tally->raw_count_total[row];
290 296
     tally->strand[row] = STRAND_NONE;
291 297
     SEXP seq_R = mkChar(read_string(&bytes));
292 298
     if (insertion) {
... ...
@@ -354,8 +360,7 @@ static int
354 360
 parse_alleles(unsigned char *bytes, int row, int ref_row,
355 361
               TallyParam param, TallyTable *tally, int strand)
356 362
 {
357
-
358
-  read_total_counts(&bytes, row, tally->count_total);
363
+  read_total_counts(&bytes, row, tally->raw_count_total, tally->count_total);
359 364
   int delcount_plus = 0, delcount_minus = 0;
360 365
   int n_alleles = read_allele_counts(&bytes, row, tally->read_R,
361 366
                                      tally->count_plus, tally->count_minus,
... ...
@@ -383,6 +388,7 @@ parse_alleles(unsigned char *bytes, int row, int ref_row,
383 388
       tally->read_pos_mean[row] = R_NaN;
384 389
       tally->read_pos_var[row] = NA_REAL;
385 390
       tally->mdfne[row] = NA_REAL;
391
+      tally->count_high_nm[row] = 0;
386 392
       if (param.xs) {
387 393
         tally->count_xs_plus[row] = 0;
388 394
         tally->count_xs_minus[row] = 0;
... ...
@@ -391,6 +397,7 @@ parse_alleles(unsigned char *bytes, int row, int ref_row,
391 397
   }
392 398
   for (int r = ref_row; r < row; r++) {
393 399
     tally->n_cycles_ref[r] = tally->n_cycles[ref_row];
400
+    tally->raw_count_total[r] = tally->raw_count_total[ref_row];
394 401
     tally->count_total[r] = tally->count_total[ref_row];
395 402
     tally->count_plus_ref[r] = tally->count_plus[ref_row];
396 403
     tally->count_minus_ref[r] = tally->count_minus[ref_row];
... ...
@@ -430,7 +437,7 @@ static int parse_indel_count(unsigned char *bytes) {
430 437
 
431 438
 static int parse_allele_count(unsigned char *bytes) {
432 439
   int n_alleles = 1; /* always have a reference */
433
-  bytes += sizeof(int) * 4 + 1; /* skip total and reference */
440
+  bytes += sizeof(int) * 5 + 1; /* skip total and reference */
434 441
   while(bytes[0] != '\0') {
435 442
       if (bytes[0] != '_')
436 443
           n_alleles++;