src/gstruct/src/bamread.c
5659f096
 static char rcsid[] = "$Id: bamread.c 178960 2015-11-16 19:52:26Z twu $";
36cb64f8
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 
 #include "bamread.h"
 #include <stdlib.h>
4d5cc806
 #include "assert.h"
36cb64f8
 #include "mem.h"
 #include "complement.h"
 #include "bool.h"
 #include "list.h"
 #include "samflags.h"
 
 
 #ifdef DEBUG
 #define debug(x) x
 #else
 #define debug(x)
 #endif
 
 
 /* Bamread_next_bamline */
 #ifdef DEBUG1
 #define debug1(x) x
 #else
 #define debug1(x)
 #endif
 
 
 
b0aa06af
 #ifdef HAVE_SAMTOOLS_LIB
 #include "bam.h"
36cb64f8
 typedef uint8_t *BAM_Sequence_T;
 typedef uint32_t *BAM_Cigar_T;
 #endif
 
 #define T Bamreader_T
 struct T {
 
b0aa06af
 #ifdef HAVE_SAMTOOLS_LIB
36cb64f8
   bamFile fp;
   bam_header_t *header;
 
   bam_index_t *idx;
   bam_iter_t iter;
 
   bam1_t *bam;
   bam1_core_t *core;
 #endif
 
   int region_limited_p;
   int ndivs;
   char **divnames;
   unsigned int *divlengths;
 };
 
 
b0aa06af
 #ifdef HAVE_SAMTOOLS_LIB
36cb64f8
 #ifndef bam_init_header_hash
 /* Not declared in bam.h */
 extern void bam_init_header_hash (bam_header_t *header);
 #endif
 #endif
 
 
 void
 Bamread_free (T *old) {
 
b0aa06af
 #ifdef HAVE_SAMTOOLS_LIB
36cb64f8
   bam_index_destroy((*old)->idx);
   bam_destroy1((*old)->bam);
   if ((*old)->header != NULL) {
     bam_header_destroy((*old)->header);
   }
   bam_close((*old)->fp);
 #endif
 
   FREE(*old);
 
   return;
 }
 
 
 T
 Bamread_new (char *filename) {
   T new = (T) MALLOC(sizeof(*new));
 
b0aa06af
 #ifdef HAVE_SAMTOOLS_LIB
3a231c30
   if (Access_file_exists_p(filename) == false) {
     fprintf(stderr,"BAM file %s does not exist\n",filename);
     return (T) NULL;
   } else if ((new->fp = bam_open(filename,"rb")) == NULL) {
36cb64f8
     fprintf(stderr,"Cannot open BAM file %s\n",filename);
     return (T) NULL;
   }
 
   if ((new->header = bam_header_read(new->fp)) == NULL) {
     fprintf(stderr,"bam file has no SQ header lines\n");
   } else {
     new->ndivs = new->header->n_targets;
     new->divnames = new->header->target_name;
     new->divlengths = new->header->target_len;
   }
 
   bam_init_header_hash(new->header);
 
   if ((new->idx = bam_index_load(filename)) == NULL) {
     fprintf(stderr,"Warning: BAM file %s is not indexed\n",filename);
   }
 
   new->bam = bam_init1();
   new->core = &(new->bam->core);
 #endif
 
   new->region_limited_p = 0;
 
   return new;
 }
 
 void
 Bamread_write_header (T this) {
   int i;
 
   for (i = 0; i < this->ndivs; i++) {
     printf("@SQ\tSN:%s\tLN:%u\n",this->divnames[i],this->divlengths[i]);
   }
 
 #if 0
   if (sam_read_group_id != NULL) {
     fprintf(fp,"@RG\tID:%s",sam_read_group_id);
     if (sam_read_group_platform != NULL) {
       fprintf(fp,"\tPL:%s",sam_read_group_platform);
     }
     if (sam_read_group_library != NULL) {
       fprintf(fp,"\tLB:%s",sam_read_group_library);
     }
     fprintf(fp,"\tSM:%s",sam_read_group_name);
     fprintf(fp,"\n");
   }
 #endif
 
   return;
 }
 
 
 Genomicpos_T
 Bamread_chrlength (T this, char *chr) {
   int32_t tid;
 
b0aa06af
 #ifdef HAVE_SAMTOOLS_LIB
36cb64f8
   /* bam_parse_region(this->header,region,&tid,&chrstart,&chrend); */
   if ((tid = bam_get_tid(this->header,chr)) < 0) {
     fprintf(stderr,"chr %s is not in BAM file\n",chr);
     return 0U;
   } else {
     return this->divlengths[tid];
   }
 #else
   return 0U;
 #endif
 
 }
 
 
 #if 0
 void
 Bamread_reset (T this) {
b0aa06af
 #ifdef HAVE_SAMTOOLS_LIB
36cb64f8
   bam_destroy1(this->bam);
   this->bam = bam_init1();
   this->core = &(this->bam->core);
 #endif
 
   this->iter = NULL;
   this->region_limited_p = 0;
 
   return;
 }
 #endif
 
 
 /* Returns true if chromosome is found */
 bool
 Bamread_limit_region (T this, char *chr, Genomicpos_T chrstart, Genomicpos_T chrend) {
   int32_t tid;
 
b0aa06af
 #ifdef HAVE_SAMTOOLS_LIB
36cb64f8
   /* bam_parse_region(this->header,region,&tid,&chrstart,&chrend); */
   if ((tid = bam_get_tid(this->header,chr)) < 0) {
     fprintf(stderr,"chr %s is not in BAM file\n",chr);
     return false;
   } else {
     this->iter = bam_iter_query(this->idx,tid,(int) (chrstart - 1U),(int) chrend);
     this->region_limited_p = 1;
     return true;
   }
 #else
   return false;
 #endif
 }
 
 
 void
 Bamread_unlimit_region (T this) {
 
b0aa06af
 #ifdef HAVE_SAMTOOLS_LIB
36cb64f8
   if (this->region_limited_p == 1) {
     this->region_limited_p = 0;
     bam_iter_destroy(this->iter);
     this->iter = NULL;
   }
 #endif
 
   return;
 }
 
 
 int
 Bamread_nreads (int *npositions, T this, char *chr, Genomicpos_T chrpos1, Genomicpos_T chrpos2) {
   int nreads = 0;
   Genomicpos_T chrstart, chrend;
   int npositions_lowend, npositions_highend;
 
   if (chrpos1 < chrpos2) {
     chrstart = chrpos1;
     chrend = chrpos2;
   } else {
     chrstart = chrpos2;
     chrend = chrpos1;
   }
   
b0aa06af
 #ifdef HAVE_SAMTOOLS_LIB
36cb64f8
   Genomicpos_T chrpos_low, chrpos_high;
   BAM_Cigar_T cigar;
   int type;
   int i;
   unsigned int length;
   bool lowend_p;
   int max_overhang1_lowend = 0, max_overhang2_lowend = 0,
     max_overhang1_highend = 0, max_overhang2_highend = 0;
   int overhang1, overhang2;
   int max_readlength_lowend = 0, max_readlength_highend = 0;
   int readlength;
 
   Bamread_limit_region(this,chr,chrstart,chrend);
   while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
     readlength = this->core->l_qseq;
     chrpos_high = chrpos_low = this->core->pos + 1U;
     if (this->core->mtid != this->core->tid) {
       lowend_p = true;
     } else if (/*mate_chrpos_low*/this->core->mpos + 1U > chrpos_low) {
       lowend_p = true;
     } else {
       lowend_p = false;
     }
 
     cigar = bam1_cigar(this->bam);
     for (i = 0; i < this->core->n_cigar; i++) {
       type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]);
       length = cigar[i] >> BAM_CIGAR_SHIFT;
 
       if (type == 'S') {
 	/* Ignore */
 
       } else if (type == 'H') {
 	/* Ignore */
 
       } else if (type == 'M') {
 	chrpos_high += length;
 
       } else if (type == 'N') {
 	chrpos_high += length;
 
       } else if (type == 'P') {
 	/* Do nothing */
 
       } else if (type == 'I') {
 	/* Do nothing */
 
       } else if (type == 'D') {
 	/* CHECK */
 	chrpos_high += length;
 
       } else {
 	fprintf(stderr,"Cannot parse type %c\n",type);
 	exit(9);
       }
     }
 
     /* printf("read %d between %u and %u, at %u..%u\n",
        nreads,chrstart,chrend,chrpos_low,chrpos_high); */
     if (chrpos_low >= chrstart && chrpos_high <= chrend) {
       /* Internal to both bounds */
       nreads++;
 
     } else if (chrpos_high <= chrend) {
       /* Straddles low bound */
       if (chrpos_high > chrstart + readlength/2) {
 	/* More than half of the read is in the intron */
 	if (lowend_p == true) {
 	  if ((overhang1 = readlength - (chrpos_high - chrstart)) > max_overhang1_lowend) {
 	    max_overhang1_lowend = overhang1;
 	  }
 	  if (readlength > max_readlength_lowend) {
 	    max_readlength_lowend = readlength;
 	  }
 	} else {
 	  if ((overhang1 = readlength - (chrpos_high - chrstart)) > max_overhang1_highend) {
 	    max_overhang1_highend = overhang1;
 	  }
 	  if (readlength > max_readlength_highend) {
 	    max_readlength_highend = readlength;
 	  }
 	}
 	nreads++;
       }
 
     } else if (chrpos_low >= chrstart) {
       /* Straddles high bound */
       if (chrpos_low < chrend - readlength/2) {
 	/* More than half of the read is in the intron */
 	if (lowend_p == true) {
 	  if ((overhang2 = readlength - (chrend - chrpos_low)) > max_overhang2_lowend) {
 	    max_overhang2_lowend = overhang2;
 	  }
 	  if (readlength > max_readlength_lowend) {
 	    max_readlength_lowend = readlength;
 	  }
 	} else {
 	  if ((overhang2 = readlength - (chrend - chrpos_low)) > max_overhang2_highend) {
 	    max_overhang2_highend = overhang2;
 	  }
 	  if (readlength > max_readlength_highend) {
 	    max_readlength_highend = readlength;
 	  }
 	}
 	nreads++;
       }
 
     } else {
       /* Includes both bounds.  Don't want these reads.  */
     }
   }
   Bamread_unlimit_region(this);
 #endif
 
 
   if (max_readlength_lowend == 0) {
     npositions_lowend = 0;
   } else if ((npositions_lowend = (chrend - chrstart + 1) + max_overhang1_lowend + max_overhang2_lowend - max_readlength_lowend) < 0) {
     npositions_lowend = 0;
   }
 
   if (max_readlength_highend == 0) {
     npositions_highend = 0;
   } else if ((npositions_highend = (chrend - chrstart + 1) + max_overhang1_highend + max_overhang2_highend - max_readlength_highend) < 0) {
     npositions_highend = 0;
   }
   
   if ((*npositions = npositions_lowend + npositions_highend) == 0) {
     *npositions = 1;
   }
 
   return nreads;
 }
 
 
 static void
 parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genomicpos_T *chrpos,
 	    char **mate_chr, Genomicpos_T *mate_chrpos, int *insert_length,
 	    Intlist_T *cigartypes, Uintlist_T *cigarlengths, int *cigarlength,
8418591e
 	    int *readlength, char **read, char **quality_string, char **hardclip, char **hardclip_quality,
 	    char **read_group, bool *terminalp) {
36cb64f8
   int type;
   int i;
   unsigned int length;
 
b0aa06af
 #ifdef HAVE_SAMTOOLS_LIB
36cb64f8
   BAM_Sequence_T seq;
   BAM_Cigar_T cigar;
   uint8_t *ptr;
8418591e
   char *string;
36cb64f8
 
   *acc = bam1_qname(this->bam);
   *flag = this->core->flag;
   *mapq = this->core->qual;
 
   if (this->core->tid < 0) {
     *chr = (char *) NULL;
   } else if (this->core->tid >= this->ndivs) {
     fprintf(stderr,"tid %d >= ndivs %d\n",this->core->tid,this->ndivs);
     exit(9);
   } else {
     *chr = this->divnames[this->core->tid];
   }
   *chrpos = this->core->pos + 1U;
   /* printf("%s:%u\n",*chr,*chrpos); */
 
   if (this->core->mtid < 0) {
     *mate_chr = (char *) NULL;
   } else if (this->core->mtid >= this->ndivs) {
     fprintf(stderr,"mtid %d >= ndivs %d\n",this->core->mtid,this->ndivs);
     exit(9);
   } else {
     *mate_chr = this->divnames[this->core->mtid];
   }
   *mate_chrpos = this->core->mpos + 1U;
 
   *insert_length = this->core->isize;
 
   *readlength = this->core->l_qseq;
8418591e
   *read = (char *) MALLOC(((*readlength)+1) * sizeof(char));
36cb64f8
   seq = bam1_seq(this->bam);
   for (i = 0; i < *readlength; i++) {
     switch (bam1_seqi(seq,i)) {
     case 1: (*read)[i] = 'A'; break;
     case 2: (*read)[i] = 'C'; break;
     case 4: (*read)[i] = 'G'; break;
     case 8: (*read)[i] = 'T'; break;
     case 15: (*read)[i] = 'N'; break;
     default: (*read)[i] = '?'; break;
     }
   }
8418591e
   (*read)[*readlength] = '\0';
36cb64f8
 
   ptr = bam1_qual(this->bam);
   if (ptr[0] == 0xff) {
     *quality_string = (char *) NULL;
   } else {
     *quality_string = (char *) ptr;
   }
 
8418591e
   ptr = bam_aux_get(this->bam,"XH");
   if (ptr == NULL) {
     *hardclip = (char *) NULL;
   } else {
     string = bam_aux2Z(ptr);
     *hardclip = (char *) MALLOC((strlen(string)+1) * sizeof(char));
     strcpy(*hardclip,string);
   }
 
   ptr = bam_aux_get(this->bam,"XI");
   if (ptr == NULL) {
     *hardclip_quality = (char *) NULL;
   } else {
     string = bam_aux2Z(ptr);
     *hardclip_quality = (char *) MALLOC((strlen(string)+1) * sizeof(char));
     strcpy(*hardclip_quality,string);
   }
 
4d5cc806
   ptr = bam_aux_get(this->bam,"RG");
   if (ptr == NULL) {
     *read_group = (char *) NULL;
   } else {
     *read_group = bam_aux2Z(ptr);
   }
 
8418591e
   ptr = bam_aux_get(this->bam,"XG");
17db5e75
   if (ptr == NULL) {
     *terminalp = false;
   } else {
8418591e
     string = bam_aux2Z(ptr);
     if (strcmp((char *) string,"T")) {
       *terminalp = false;
     } else {
       *terminalp = true;
     }
17db5e75
   }
 
36cb64f8
   /* Cigar */
   *cigarlength = 0;
   *cigartypes = (Intlist_T) NULL;
   *cigarlengths = (Uintlist_T) NULL;
 
   cigar = bam1_cigar(this->bam);
   for (i = 0; i < this->core->n_cigar; i++) {
     length = cigar[i] >> BAM_CIGAR_SHIFT;
     *cigarlengths = Uintlist_push(*cigarlengths,length);
 
     type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]);
     *cigartypes = Intlist_push(*cigartypes,type);
 
     if (type == 'S' || type == 'M' || type == 'I') {
       *cigarlength += (int) length;
     } else if (type == 'H') {
       *cigarlength += (int) length;
     } else if (type == 'D' || type == 'N') {
       /* Ignore */
     } else if (type == 'P') {
       /* Ignore */
     } else {
       fprintf(stderr,"bamread.c cannot cigar int of %d\n",cigar[i] & BAM_CIGAR_MASK);
       exit(9);
     }
   }
 
   *cigartypes = Intlist_reverse(*cigartypes);
   *cigarlengths = Uintlist_reverse(*cigarlengths);
 
 #endif
 
   return;
 }
 
 
c78be696
 static bool
 has_indel_p (T this) {
   int type;
   int i;
 
b0aa06af
 #ifdef HAVE_SAMTOOLS_LIB
c78be696
   BAM_Cigar_T cigar;
 
   /* Cigar */
   cigar = bam1_cigar(this->bam);
   for (i = 0; i < this->core->n_cigar; i++) {
     /* length = cigar[i] >> BAM_CIGAR_SHIFT; */
     type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]);
     if (type == 'I' || type == 'D') {
       return true;
     }
   }
 #endif
 
   return false;
 }
 
 static bool
 perfect_match_p (T this) {
   int type;
   int i;
   unsigned char *s;
   unsigned char tag1, tag2;
 
 
b0aa06af
 #ifdef HAVE_SAMTOOLS_LIB
c78be696
   BAM_Cigar_T cigar;
 
   /* Cigar */
   cigar = bam1_cigar(this->bam);
   for (i = 0; i < this->core->n_cigar; i++) {
     /* length = cigar[i] >> BAM_CIGAR_SHIFT; */
     type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]);
     if (type == 'S') {
       return false;
 
     } else if (type == 'H') {
       return false;
       
     } else if (type == 'M') {
       /* acceptable */
 
     } else if (type == 'N') {
       /* acceptable */
       
     } else if (type == 'P') {
       /* Do nothing */
       
     } else if (type == 'I') {
       return false;
 
     } else if (type == 'D') {
       return false;
 
     } else {
       fprintf(stderr,"Cannot parse type %c\n",type);
       exit(9);
     }
   }
 
   /* Check MD string */
   s = /*aux_start*/ bam1_aux(this->bam);
   while (s < /*aux_end*/ this->bam->data + this->bam->data_len) {
     tag1 = *s++;
     tag2 = *s++;
     type = *s++;
     if (tag1 == 'M' && tag2 == 'D') {
       /* MD string */
       while (*s) {
 	if (*s >= '0' && *s <= '9') {
 	  /* acceptable */
 	  s++;
 	} else {
 	  return false;
 	}
       }
       /* s++; */
       return true;
 
     } else {
       /* Skip this tag */
       while (*s) { s++;}
       s++;
     }
   }
 #endif
 
   /* No MD string found */
   return true;
 }
 
 
 
36cb64f8
 int
 Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genomicpos_T *chrpos,
 		   char **mate_chr, Genomicpos_T *mate_chrpos,
 		   Intlist_T *cigartypes, Uintlist_T *cigarlengths, int *cigarlength,
8418591e
 		   int *readlength, char **read, char **quality_string, char **hardclip, char **hardclip_quality,
 		   char **read_group, bool *terminalp) {
36cb64f8
   int insert_length;
 
b0aa06af
 #ifndef HAVE_SAMTOOLS_LIB
36cb64f8
   return 0;
 #else
   if (this->region_limited_p == 1) {
     if (bam_iter_read(this->fp,this->iter,this->bam) < 0) {
       return 0;
     } else {
       parse_line(this,&(*acc),&(*flag),&(*mapq),&(*chr),&(*chrpos),
 		 &(*mate_chr),&(*mate_chrpos),&insert_length,
 		 &(*cigartypes),&(*cigarlengths),&(*cigarlength),
8418591e
 		 &(*readlength),&(*read),&(*quality_string),&(*hardclip),&(*hardclip_quality),
 		 &(*read_group),&(*terminalp));
36cb64f8
       return 1;
     }
   } else {
     if (bam_read1(this->fp,this->bam) < 0) {
       return 0;
     } else {
       parse_line(this,&(*acc),&(*flag),&(*mapq),&(*chr),&(*chrpos),
 		 &(*mate_chr),&(*mate_chrpos),&insert_length,
 		 &(*cigartypes),&(*cigarlengths),&(*cigarlength),
8418591e
 		 &(*readlength),&(*read),&(*quality_string),&(*hardclip),&(*hardclip_quality),
 		 &(*read_group),&(*terminalp));
36cb64f8
       return 1;
     }
   }
 #endif
 }
 
 
 
 struct Bamline_T {
   char *acc;
   unsigned int flag;
5659f096
   int hiti;
36cb64f8
   int nhits;
   bool good_unique_p;			/* Good above good_unique_mapq.  Dependent on second_mapq. */
   int mapq;
8418591e
   int nm;
36cb64f8
   char splice_strand;
   char *chr;
   Genomicpos_T chrpos_low;
   char *mate_chr;
   Genomicpos_T mate_chrpos_low;
   int insert_length;
   Intlist_T cigar_types;
   Uintlist_T cigar_npositions;
   int cigar_querylength;
   int readlength;
   char *read;
   char *quality_string;
8418591e
   char *hardclip;
   char *hardclip_quality;
17db5e75
   bool terminalp;
c78be696
 
   /* Can be used if we read one line at a time.  Points to bam->data, which is re-used upon each read */
36cb64f8
   unsigned char *aux_start;
   unsigned char *aux_end;
c78be696
 
   /* Used if we read more than one line at a time */
   uint8_t *aux_contents;
   size_t aux_length;
36cb64f8
 };
 
 
 char *
 Bamline_acc (Bamline_T this) {
   return this->acc;
 }
 
 unsigned int
 Bamline_flag (Bamline_T this) {
   return this->flag;
 }
 
 static bool
 concordantp (unsigned int flag) {
   if (flag & QUERY_UNMAPPED) {
     return false;
   } else if ((flag & PAIRED_READ) == 0U) {
     /* Not even a paired read, so consider it concordant */
     return true;
   } else if (flag & PAIRED_MAPPING) {
     return true;
   } else {
     return false;
   }
 }
 
 
 int
 Bamline_concordantp (Bamline_T this) {
   if (this->flag & QUERY_UNMAPPED) {
     return false;
   } else if ((this->flag & PAIRED_READ) == 0U) {
     return false;
   } else if (this->flag & PAIRED_MAPPING) {
     return true;
   } else {
     return false;
   }
 }
 
 int
 Bamline_lowend_p (Bamline_T this) {
   if (this->mate_chrpos_low > this->chrpos_low) {
     return true;
   } else {
     return false;
   }
 }
 
 
 int
 Bamline_paired_read_p (Bamline_T this) {
   if ((this->flag & PAIRED_READ) == 0) {
     return false;
   } else {
     return true;
   }
 }
 
 
 int
 Bamline_firstend_p (Bamline_T this) {
   if ((this->flag & PAIRED_READ) == 0) {
     return true;
   } else if (this->flag & FIRST_READ_P) {
     return true;
   } else if (this->flag & SECOND_READ_P) {
     return false;
   } else {
     fprintf(stderr,"Read is marked as paired (0x1), but neither first read nor second read bit is set\n");
     exit(9);
   }
 }
 
5659f096
 int
 Bamline_hiti (Bamline_T this) {
   return this->hiti;
 }
 
36cb64f8
 int
 Bamline_nhits (Bamline_T this) {
   return this->nhits;
 }
 
 bool
 Bamline_good_unique_p (Bamline_T this) {
   return this->good_unique_p;
 }
 
c78be696
 bool
 Bamline_perfect_match_p (Bamline_T this) {
   Intlist_T p;
   int type;
   unsigned char *s;
   unsigned char tag1, tag2;
 
   for (p = this->cigar_types; p != NULL; p = Intlist_next(p)) {
     type = Intlist_head(p);
     if (type == 'S') {
       return false;
 
     } else if (type == 'H') {
       return false;
       
     } else if (type == 'M') {
       /* acceptable */
 
     } else if (type == 'N') {
       /* acceptable */
       
     } else if (type == 'P') {
       /* Do nothing */
       
     } else if (type == 'I') {
       return false;
 
     } else if (type == 'D') {
       return false;
 
     } else {
       fprintf(stderr,"Cannot parse type %c\n",type);
       exit(9);
     }
   }
 
   /* Check MD string */
   s = this->aux_start;
   while (s < this->aux_end) {
     tag1 = *s++;
     tag2 = *s++;
     type = *s++;
     if (tag1 == 'M' && tag2 == 'D') {
       /* MD string */
       while (*s) {
 	if (*s >= '0' && *s <= '9') {
 	  /* acceptable */
 	  s++;
 	} else {
 	  return false;
 	}
       }
       /* s++; */
       return true;
 
     } else {
       /* Skip this tag */
       while (*s) { s++;}
       s++;
     }
   }
 
   /* No MD string found */
   return true;
 }
 
 
36cb64f8
 int
 Bamline_mapq (Bamline_T this) {
   return this->mapq;
 }
 
 char *
 Bamline_chr (Bamline_T this) {
   return this->chr;
 }
 
 Genomicpos_T
 Bamline_chrpos_low (Bamline_T this) {
   return this->chrpos_low;
 }
 
144f6bb1
 Genomicpos_T
 Bamline_chrpos_low_noclip (Bamline_T this) {
   int type;
 
   if ((type = Intlist_head(this->cigar_types)) != 'S') {
     return this->chrpos_low;
   } else {
     return this->chrpos_low - (Uintlist_head(this->cigar_npositions));
   }
 }
 
36cb64f8
 char *
 Bamline_mate_chr (Bamline_T this) {
   return this->mate_chr;
 }
 
 Genomicpos_T
 Bamline_mate_chrpos_low (Bamline_T this) {
   return this->mate_chrpos_low;
 }
 
 int
 Bamline_insert_length (Bamline_T this) {
   return this->insert_length;
 }
 
 Intlist_T
 Bamline_cigar_types (Bamline_T this) {
   return this->cigar_types;
 }
 
 
 Uintlist_T
 Bamline_cigar_npositions (Bamline_T this) {
   return this->cigar_npositions;
 }
 
52b3ca64
 Intlist_T
8418591e
 Bamline_diffcigar (int *min_overhang, Uintlist_T *npositions, Uintlist_T *chrpositions, Bamline_T this) {
52b3ca64
   Intlist_T types = NULL;
   int type;
   Genomicpos_T chrpos;
   Intlist_T p;
   Uintlist_T q;
8418591e
   int querypos = 0;
52b3ca64
 
   *npositions = (Uintlist_T) NULL;
   *chrpositions = (Uintlist_T) NULL;
8418591e
   *min_overhang = 0;
52b3ca64
 
   chrpos = this->chrpos_low;
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
     if ((type = Intlist_head(p)) == 'S') {
8418591e
       querypos += Uintlist_head(q);
52b3ca64
 
     } else if (type == 'H') {
       /* Ignore */
 
     } else if (type == 'M') {
8418591e
       querypos += Uintlist_head(q);
52b3ca64
       chrpos += Uintlist_head(q);
 
     } else if (type == 'N') {
       types = Intlist_push(types,type);
       *npositions = Uintlist_push(*npositions,Uintlist_head(q));
       *chrpositions = Uintlist_push(*chrpositions,chrpos);
       chrpos += Uintlist_head(q);
 
     } else if (type == 'P') {
       /* Do nothing */
 
     } else if (type == 'I') {
8418591e
       if (querypos < this->readlength/2) {
 	if (querypos > *min_overhang) {
 	  *min_overhang = querypos;
 	}
       } else {
 	if (this->readlength - querypos > *min_overhang) {
 	  *min_overhang = this->readlength - querypos;
 	}
       }
 
52b3ca64
       types = Intlist_push(types,type);
       *npositions = Uintlist_push(*npositions,Uintlist_head(q));
       *chrpositions = Uintlist_push(*chrpositions,chrpos);
8418591e
       querypos += Uintlist_head(q);
52b3ca64
 
     } else if (type == 'D') {
8418591e
       if (querypos < this->readlength/2) {
 	if (querypos > *min_overhang) {
 	  *min_overhang = querypos;
 	}
       } else {
 	if (this->readlength - querypos > *min_overhang) {
 	  *min_overhang = this->readlength - querypos;
 	}
       }
 
52b3ca64
       types = Intlist_push(types,type);
       *npositions = Uintlist_push(*npositions,Uintlist_head(q));
       *chrpositions = Uintlist_push(*chrpositions,chrpos);
       chrpos += Uintlist_head(q);
 
     } else {
       fprintf(stderr,"Cannot parse type %c\n",type);
       exit(9);
     }
     debug(printf("  type = %c, chrpos = %u\n",type,chrpos));
   }
 
   *npositions = Uintlist_reverse(*npositions);
   *chrpositions = Uintlist_reverse(*chrpositions);
   return Intlist_reverse(types);
 }
 
 
36cb64f8
 void
95e6ece6
 Bamread_print_cigar (FILE *fp, Bamline_T this) {
36cb64f8
   Intlist_T p;
   Uintlist_T q;
 
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
95e6ece6
     fprintf(fp,"%u%c",Uintlist_head(q),Intlist_head(p));
36cb64f8
   }
   return;
 }
 
4d5cc806
 char *
 Bamline_cigar_string (Bamline_T this) {
   char *string, number[12];
   Intlist_T p;
   Uintlist_T q;
   int string_length = 0;
 
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
     sprintf(number,"%u",Uintlist_head(q));
     string_length += strlen(number) + 1;
   }
   string = (char *) CALLOC(string_length+1,sizeof(char));
 
   string_length = 0;
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
     sprintf(number,"%u",Uintlist_head(q));
     sprintf(&(string[string_length]),"%u%c",Uintlist_head(q),Intlist_head(p));
     string_length += strlen(number) + 1;
   }
   
   return string;
 }
 
36cb64f8
 int
 Bamline_cigar_querylength (Bamline_T this) {
   return this->cigar_querylength;
 }
 
 int
 Bamline_readlength (Bamline_T this) {
   return this->readlength;
 }
 
 char *
 Bamline_read (Bamline_T this) {
   return this->read;
 }
 
 char *
 Bamline_quality_string (Bamline_T this) {
   return this->quality_string;
 }
 
8418591e
 char *
 Bamline_hardclip (Bamline_T this) {
   return this->hardclip;
 }
 
 char *
 Bamline_hardclip_quality (Bamline_T this) {
   return this->hardclip_quality;
 }
 
17db5e75
 bool
 Bamline_terminalp (Bamline_T this) {
   return this->terminalp;
 }
 
36cb64f8
 
 static void
 aux_print (FILE *fp, unsigned char *s, unsigned char *aux_end) {
   unsigned char tag1, tag2;
   int type;
 
   while (s < aux_end) {
     tag1 = *s++;
     tag2 = *s++;
     type = *s++;
     fprintf(fp,"\t%c%c:",tag1,tag2);
 
     if (type == 'c') {
       fprintf(fp,"i:%d",* (int8_t *) s);
       s += 1;
     } else if (type == 'C') {
       fprintf(fp,"i:%u",* (uint8_t *) s);
       s += 1;
     } else if (type == 's') {
       fprintf(fp,"i:%d",* (int16_t *) s);
       s += 2;
     } else if (type == 'S') {
       fprintf(fp,"i:%u",* (uint16_t *) s);
       s += 2;
     } else if (type == 'i') {
       fprintf(fp,"i:%d",* (int32_t *) s);
       s += 4;
     } else if (type == 'I') {
       fprintf(fp,"i:%u",* (uint32_t *) s);
       s += 4;
     } else if (type == 'A') {
       fprintf(fp,"A:%c",* (char *) s);
       s += 1;
     } else if (type == 'f') {
       fprintf(fp,"f:%f",* (float *) s);
       s += 4;
     } else if (type == 'd') {
       fprintf(fp,"d:%f",* (double *) s);
       s += 8;
     } else if (type == 'Z' || type == 'H') {
       fprintf(fp,"Z:");
       while (*s) {
 	fprintf(fp,"%c",*s++);
       }
       s++;
     } else {
       /* fprintf(stderr,"Unrecognized type %c\n",type); */
     }
   }
 
   return;
 }
 
 
144f6bb1
 static char *
 aux_print_skip_md (FILE *fp, unsigned char *s, unsigned char *aux_end) {
   char *orig_md_string = NULL;
   unsigned char tag1, tag2;
   int type;
 
   while (s < aux_end) {
     tag1 = *s++;
     tag2 = *s++;
     type = *s++;
     if (tag1 == 'M' && tag2 == 'D') {
       /* Skip MD string.  Assume type is 'Z' or 'H' */
       orig_md_string = (char *) s;
       while (*s) { s++;}
       s++;
 
     } else {
       fprintf(fp,"\t%c%c:",tag1,tag2);
 
       if (type == 'c') {
 	fprintf(fp,"i:%d",* (int8_t *) s);
 	s += 1;
       } else if (type == 'C') {
 	fprintf(fp,"i:%u",* (uint8_t *) s);
 	s += 1;
       } else if (type == 's') {
 	fprintf(fp,"i:%d",* (int16_t *) s);
 	s += 2;
       } else if (type == 'S') {
 	fprintf(fp,"i:%u",* (uint16_t *) s);
 	s += 2;
       } else if (type == 'i') {
 	fprintf(fp,"i:%d",* (int32_t *) s);
 	s += 4;
       } else if (type == 'I') {
 	fprintf(fp,"i:%u",* (uint32_t *) s);
 	s += 4;
       } else if (type == 'A') {
 	fprintf(fp,"A:%c",* (char *) s);
 	s += 1;
       } else if (type == 'f') {
 	fprintf(fp,"f:%f",* (float *) s);
 	s += 4;
       } else if (type == 'd') {
 	fprintf(fp,"d:%f",* (double *) s);
 	s += 8;
       } else if (type == 'Z' || type == 'H') {
 	fprintf(fp,"Z:");
 	while (*s) {
 	  fprintf(fp,"%c",*s++);
 	}
 	s++;
       } else {
 	/* fprintf(stderr,"Unrecognized type %c\n",type); */
       }
     }
   }
 
   return orig_md_string;
 }
 
 
36cb64f8
 void
95e6ece6
 Bamline_print (FILE *fp, Bamline_T this, unsigned int newflag, int quality_score_adj) {
36cb64f8
   Intlist_T p;
   Uintlist_T q;
95e6ece6
   char *c;
36cb64f8
 
   fprintf(fp,"%s\t",this->acc);
   fprintf(fp,"%u\t",newflag);
   if (this->chr == NULL) {
     fprintf(fp,"*\t0\t");
   } else {
     fprintf(fp,"%s\t%u\t",this->chr,this->chrpos_low);
   }
   fprintf(fp,"%d\t",this->mapq);
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
     fprintf(fp,"%u%c",Uintlist_head(q),Intlist_head(p));
   }
   fprintf(fp,"\t");
   if (this->mate_chr == NULL) {
     fprintf(fp,"*\t0\t");
   } else if (this->chr != NULL && strcmp(this->mate_chr,this->chr) == 0) {
     fprintf(fp,"=\t%u\t",this->mate_chrpos_low);
   } else {
     fprintf(fp,"%s\t%u\t",this->mate_chr,this->mate_chrpos_low);
   }
   fprintf(fp,"%d\t",this->insert_length);
   fprintf(fp,"%s\t",this->read);
   if (this->quality_string == NULL) {
     fprintf(fp,"*");
   } else {
95e6ece6
     /* fprintf(fp,"%s",this->quality_string); */
     c = this->quality_string;
     while (*c != '\0') {
       fputc(*c + quality_score_adj,fp);
       c++;
     }
36cb64f8
   }
 
   aux_print(fp,this->aux_start,this->aux_end);
 
   fprintf(fp,"\n");
   return;
 }
 
 
144f6bb1
 void
 Bamline_print_new_cigar (FILE *fp, Bamline_T this, Genomicpos_T chrpos_low, char *new_cigar,
c78be696
 			 char *new_md_string, int quality_score_adj) {
144f6bb1
   Intlist_T p;
   Uintlist_T q;
   char *orig_md_string;
c78be696
   char *c;
144f6bb1
 
   fprintf(fp,"%s\t",this->acc);
   fprintf(fp,"%u\t",this->flag);
   fprintf(fp,"%s\t%u\t",this->chr,chrpos_low);
   fprintf(fp,"%d\t",this->mapq);
   fprintf(fp,"%s\t",new_cigar);
 
   /* TODO: Fix mate information */
   if (this->mate_chr == NULL) {
     fprintf(fp,"*\t0\t");
   } else if (this->chr != NULL && strcmp(this->mate_chr,this->chr) == 0) {
     fprintf(fp,"=\t%u\t",this->mate_chrpos_low);
   } else {
     fprintf(fp,"%s\t%u\t",this->mate_chr,this->mate_chrpos_low);
   }
   fprintf(fp,"%d\t",this->insert_length);
   fprintf(fp,"%s\t",this->read);
   if (this->quality_string == NULL) {
     fprintf(fp,"*");
   } else {
c78be696
     /* fprintf(fp,"%s",this->quality_string); */
     c = this->quality_string;
     while (*c != '\0') {
       fputc(*c + quality_score_adj,fp);
       c++;
     }
144f6bb1
   }
 
   fprintf(fp,"\tMD:Z:%s",new_md_string);
   
   orig_md_string = aux_print_skip_md(fp,this->aux_start,this->aux_end);
 
   /* Original alignment */
   fprintf(fp,"\tXX:i:%u",this->chrpos_low);
   fprintf(fp,"\tXY:Z:");
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
     fprintf(fp,"%u%c",Uintlist_head(q),Intlist_head(p));
   }
   if (orig_md_string != NULL) {
     fprintf(fp,"\tXZ:Z:%s",orig_md_string);
   }
 
   fprintf(fp,"\n");
   return;
 }
 
 
52b3ca64
 void
 Bamline_print_new_mate (FILE *fp, Bamline_T this, char *mate_chr, Genomicpos_T mate_chrpos_low,
 			int insert_length) {
   Intlist_T p;
   Uintlist_T q;
 
   fprintf(fp,"%s\t",this->acc);
   fprintf(fp,"%u\t",this->flag);
   if (this->chr == NULL) {
     fprintf(fp,"*\t0\t");
   } else {
     fprintf(fp,"%s\t%u\t",this->chr,this->chrpos_low);
   }
   fprintf(fp,"%d\t",this->mapq);
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
     fprintf(fp,"%u%c",Uintlist_head(q),Intlist_head(p));
   }
   fprintf(fp,"\t");
   if (mate_chr == NULL) {
     fprintf(fp,"*\t0\t");
   } else if (this->chr != NULL && strcmp(mate_chr,this->chr) == 0) {
     fprintf(fp,"=\t%u\t",mate_chrpos_low);
   } else {
     fprintf(fp,"%s\t%u\t",mate_chr,mate_chrpos_low);
   }
   assert(insert_length >= 0);
   if (this->chrpos_low < mate_chrpos_low) {
     fprintf(fp,"%d\t",insert_length);
   } else {
     fprintf(fp,"%d\t",-insert_length);
   }
   fprintf(fp,"%s\t",this->read);
   if (this->quality_string == NULL) {
     fprintf(fp,"*");
   } else {
     fprintf(fp,"%s",this->quality_string);
   }
 
   aux_print(fp,this->aux_start,this->aux_end);
 
   fprintf(fp,"\n");
   return;
 }
 
 
8418591e
 static int
 aux_nm (T this) {
 #ifndef HAVE_SAMTOOLS_LIB
   return 0;
 #else
   uint8_t *s;
 
   s = bam_aux_get(this->bam,"NM");
   if (s == NULL) {
     return 0;
   } else {
     return bam_aux2i(s);
   }
 #endif
 }
 
 int
 Bamline_nm (Bamline_T this) {
   return this->nm;
 }
 
 
36cb64f8
 static char
 aux_splice_strand (T this) {
   char strand;
 
b0aa06af
 #ifndef HAVE_SAMTOOLS_LIB
36cb64f8
   return ' ';
 #else
   uint8_t *s;
 
   s = bam_aux_get(this->bam,"XS");
   if (s == NULL) {
     return ' ';
   } else if ((strand = bam_aux2A(s)) == '?') {
     return '?';
   } else {
     return strand;
   }
 #endif
 }
 
 char
 Bamline_splice_strand (Bamline_T this) {
   return this->splice_strand;
 }
 
 
5659f096
 static int
 aux_hiti (T this) {
 #ifndef HAVE_SAMTOOLS_LIB
   return 1;
 #else
   uint8_t *s;
 
   s = bam_aux_get(this->bam,"HI");
   if (s == NULL) {
     return 1;
   } else {
     return bam_aux2i(s);
   }
 #endif
 }
 
36cb64f8
 static int
 aux_nhits (T this) {
b0aa06af
 #ifndef HAVE_SAMTOOLS_LIB
36cb64f8
   return 1;
 #else
   uint8_t *s;
 
   s = bam_aux_get(this->bam,"NH");
   if (s == NULL) {
     return 1;
   } else {
     return bam_aux2i(s);
   }
 #endif
 }
 
 
 static bool
 aux_good_unique_p (T this, int good_unique_mapq) {
b0aa06af
 #ifndef HAVE_SAMTOOLS_LIB
36cb64f8
   return true;
 #else
   uint8_t *s;
 
   s = bam_aux_get(this->bam,"X2");
   if (s != NULL) {
     if (bam_aux2i(s) < good_unique_mapq) {
       return true;
     } else {
       return false;
     }
 
   } else {
     s = bam_aux_get(this->bam,"NH");
     if (s == NULL) {
       return true;
     } else if (bam_aux2i(s) <= 1) {
       return true;
     } else {
       return false;
     }
   }
 #endif
 }
 
 
 static char complCode[128] = COMPLEMENT_LC;
 
 
 static char
 find_strand (bool *canonicalp, char *donor1, char *donor2, char *acceptor1, char *acceptor2,
 	     Genomicpos_T firstpos, Genomicpos_T secondpos, char *chr,
 	     Genome_T genome, IIT_T chromosome_iit, Bamline_T this, bool trust_sam_p) {
   Chrnum_T chrnum;
   Genomicpos_T chroffset;
   char nt1, nt2, nt3, nt4;
   char truestrand;
 
   if ((truestrand = this->splice_strand) != ' ') {
     if (trust_sam_p == true) {
       *canonicalp = true;
       *donor1 = *donor2 = *acceptor1 = *acceptor2 = ' ';
       return truestrand;
     } else {
       chrnum = IIT_find_one(chromosome_iit,chr);
       chroffset = Interval_low(IIT_interval(chromosome_iit,chrnum)) - 1U;
 
       /* Look at genome inside of firstpos and secondpos to get dinucleotides */
       nt1 = Genome_get_char(genome,chroffset+firstpos+1);
       nt2 = Genome_get_char(genome,chroffset+firstpos+2);
       nt3 = Genome_get_char(genome,chroffset+secondpos-2);
       nt4 = Genome_get_char(genome,chroffset+secondpos-1);
 
       debug(printf("Got splice from %u to %u\n",firstpos,secondpos));
       debug(printf("Dinucleotides are %c%c to %c%c\n",nt1,nt2,nt3,nt4));
 
       if (truestrand == '+') {
 	if (nt1 == 'G' && (nt2 == 'T' || nt2 == 'C') && nt3 == 'A' && nt4 == 'G') {
 	  *canonicalp = true;
 	} else if (nt1 == 'A' && nt2 == 'T' && nt3 == 'A' && nt4 == 'C') {
 	  *canonicalp = true;
 	} else {
 	  *canonicalp = false;
 	}
 	*donor1 = nt1; *donor2 = nt2; *acceptor1 = nt3; *acceptor2 = nt4;
 
       } else if (truestrand == '-') {
 	if (nt1 == 'C' && nt2 == 'T' && (nt3 == 'A' || nt3 == 'G') && nt4 == 'C') {
 	  *canonicalp = true;
 	} else if (nt1 == 'G' && nt2 == 'T' && nt3 == 'A' && nt4 == 'T') {
 	  *canonicalp = true;
 	} else {
 	  *canonicalp = false;
 	}
 	*donor1 = complCode[(int) nt4]; *donor2 = complCode[(int) nt3]; *acceptor1 = complCode[(int) nt2]; *acceptor2 = complCode[(int) nt1];
 
       } else {
 	fprintf(stderr,"Unrecognized truestrand %c\n",truestrand);
 	abort();
       }
 
       return truestrand;
     }
 
   } else if (chromosome_iit == NULL) {
     fprintf(stderr,"Strand is not present in auxinfo\n");
     fprintf(stderr,"To determine strand, need to provide index file with -d flag\n");
     exit(9);
 
   } else {
     chrnum = IIT_find_one(chromosome_iit,chr);
     chroffset = Interval_low(IIT_interval(chromosome_iit,chrnum)) - 1U;
 
     /* Look at genome inside of firstpos and secondpos to determine truestrand */
     nt1 = Genome_get_char(genome,chroffset+firstpos+1);
     nt2 = Genome_get_char(genome,chroffset+firstpos+2);
     nt3 = Genome_get_char(genome,chroffset+secondpos-2);
     nt4 = Genome_get_char(genome,chroffset+secondpos-1);
 
     debug(printf("Got splice from %u to %u\n",firstpos,secondpos));
     debug(printf("Dinucleotides are %c%c to %c%c\n",nt1,nt2,nt3,nt4));
 
     if (nt1 == 'G' && (nt2 == 'T' || nt2 == 'C') && nt3 == 'A' && nt4 == 'G') {
       *donor1 = nt1; *donor2 = nt2; *acceptor1 = nt3; *acceptor2 = nt4;
       *canonicalp = true;
       return '+';
     } else if (nt1 == 'C' && nt2 == 'T' && (nt3 == 'A' || nt3 == 'G') && nt4 == 'C') {
       *donor1 = complCode[(int) nt4]; *donor2 = complCode[(int) nt3]; *acceptor1 = complCode[(int) nt2]; *acceptor2 = complCode[(int) nt1];
       *canonicalp = true;
       return '-';
     } else if (nt1 == 'A' && nt2 == 'T' && nt3 == 'A' && nt4 == 'C') {
       *donor1 = nt1; *donor2 = nt2; *acceptor1 = nt3; *acceptor2 = nt4;
       *canonicalp = true;
       return '+';
     } else if (nt1 == 'G' && nt2 == 'T' && nt3 == 'A' && nt4 == 'T') {
       *donor1 = complCode[(int) nt4]; *donor2 = complCode[(int) nt3]; *acceptor1 = complCode[(int) nt2]; *acceptor2 = complCode[(int) nt1];
       *canonicalp = true;
       return '-';
     } else {
       /* In GSNAP, will want to output sense information in SAM output. */
 #if 0
       fprintf(stderr,"Splice %s:%u..%u is not (semi-)canonical: %c%c...%c%c.  Cannot determine sense.\n",
 	      chr,firstpos,secondpos,nt1,nt2,nt3,nt4);
 #endif
       *donor1 = nt1; *donor2 = nt2; *acceptor1 = nt3; *acceptor2 = nt4;
       *canonicalp = false;
       return ' ';
     }
   }
 }
 
 
 
 char
 Bamline_strand (Bamline_T this, Genome_T genome, IIT_T chromosome_iit) {
   char strand = ' ';
   Genomicpos_T chrpos, firstpos, secondpos;
   Intlist_T p;
   Uintlist_T q;
   int type;
   bool canonicalp;
   char donor1, donor2, acceptor1, acceptor2;
 
 
   chrpos = this->chrpos_low;
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
     if ((type = Intlist_head(p)) == 'S') {
       /* Ignore */
 
     } else if (type == 'H') {
       /* Ignore */
 
     } else if (type == 'M') {
       chrpos += Uintlist_head(q);
 
     } else if (type == 'N') {
       firstpos = chrpos - 1U;
       chrpos += Uintlist_head(q);
       secondpos = chrpos;
 
       if (strand == ' ') {
 	strand = find_strand(&canonicalp,&donor1,&donor2,&acceptor1,&acceptor2,
 			     firstpos,secondpos,this->chr,genome,chromosome_iit,
 			     this,/*trust_sam_p*/true);
 	return strand;
       }
 
     } else if (type == 'P') {
       /* Do nothing */
 
     } else if (type == 'I') {
       /* Do nothing */
 
     } else if (type == 'D') {
       /* CHECK */
       chrpos += Uintlist_head(q);
 
     } else {
       fprintf(stderr,"Cannot parse type %c\n",type);
       exit(9);
     }
     debug(printf("  type = %c, chrpos = %u\n",type,chrpos));
   }
 
   return ' ';
 }
 
 
 
 
 Genomicpos_T
 Bamline_chrpos_high (Bamline_T this) {
   Intlist_T p;
   Uintlist_T q;
   Genomicpos_T chrpos_high;
   int type;
 
   chrpos_high = this->chrpos_low;
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
     if ((type = Intlist_head(p)) == 'S') {
       /* Ignore */
 
     } else if (type == 'H') {
       /* Ignore */
 
     } else if (type == 'M') {
       chrpos_high += Uintlist_head(q);
 
     } else if (type == 'N') {
       chrpos_high += Uintlist_head(q);
 
     } else if (type == 'P') {
       /* Do nothing */
 
     } else if (type == 'I') {
       /* Do nothing */
 
     } else if (type == 'D') {
       /* CHECK */
       chrpos_high += Uintlist_head(q);
 
     } else {
       fprintf(stderr,"Cannot parse type %c\n",type);
       exit(9);
     }
     debug(printf("  type = %c, chrpos = %u\n",type,chrpos_high));
   }
 
   return chrpos_high - 1U;
 }
 
c78be696
 Genomicpos_T
 Bamline_chrpos_high_noclip (Bamline_T this) {
   Intlist_T p;
   Uintlist_T q;
   Genomicpos_T chrpos_high, mlength;
   int type;
36cb64f8
 
c78be696
   chrpos_high = this->chrpos_low;
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
     if ((type = Intlist_head(p)) == 'S') {
       /* Ignore, but save length */
       mlength = Uintlist_head(q);
 
     } else if (type == 'H') {
       /* Ignore */
 
     } else if (type == 'M') {
       chrpos_high += Uintlist_head(q);
 
     } else if (type == 'N') {
       chrpos_high += Uintlist_head(q);
 
     } else if (type == 'P') {
       /* Do nothing */
 
     } else if (type == 'I') {
       /* Do nothing */
 
     } else if (type == 'D') {
       /* CHECK */
       chrpos_high += Uintlist_head(q);
 
     } else {
       fprintf(stderr,"Cannot parse type %c\n",type);
       exit(9);
     }
     debug(printf("  type = %c, chrpos = %u\n",type,chrpos_high));
   }
 
   /* Check last type, to extend soft clipping at high end */
   if (type == 'S') {
     chrpos_high += mlength;
   }
 
   return chrpos_high - 1U;
 }
 
 
 Genomicpos_T
 Bamline_total_ins (Bamline_T this) {
   Genomicpos_T total_ins = 0;
   Intlist_T p;
   Uintlist_T q;
 
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
     if (Intlist_head(p) == 'I') {
       total_ins += Uintlist_head(q);
     }
   }
 
   return total_ins;
 }
 
 
 int
 Bamline_nmismatches (Bamline_T this) {
   int nmismatches = 0;
   unsigned char tag1, tag2, *s, *md_string = NULL;
8418591e
   /* unsigned char type; */
c78be696
   Intlist_T p;
   Uintlist_T q;
 
   s = this->aux_start;
   while (s < this->aux_end && md_string == NULL) {
     tag1 = *s++;
     tag2 = *s++;
8418591e
 #if 0
c78be696
     type = *s++;
8418591e
 #else
     s++;
 #endif
c78be696
     if (tag1 == 'M' && tag2 == 'D') {
       /* Start of MD string */
       md_string = s;
       while (*s) {
 	if (*s >= '0' && *s <= '9') {
 	  /* acceptable */
 	} else if (*s == '^') {
 	  /* acceptable */
 	} else {
 	  nmismatches += 1;
 	}
 	s++;
       }
     }
   }
 
   /* Need to subtract deletions */
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
     if (Intlist_head(p) == 'D') {
       nmismatches -= (int) Uintlist_head(q);
     }
   }
 
   return nmismatches;
 }
36cb64f8
 
 static void
 Bamline_regions (Uintlist_T *chrpos_lows, Uintlist_T *chrpos_highs, Bamline_T this) {
   Intlist_T p;
   Uintlist_T q;
   Genomicpos_T position;
   int type;
 
   position = this->chrpos_low;
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
     if ((type = Intlist_head(p)) == 'S') {
       /* Ignore */
 
     } else if (type == 'H') {
       /* Ignore */
 
     } else if (type == 'M') {
       *chrpos_lows = Uintlist_push(*chrpos_lows,position);
       position += Uintlist_head(q);
       *chrpos_highs = Uintlist_push(*chrpos_highs,position);
 
     } else if (type == 'N') {
       position += Uintlist_head(q);
 
     } else if (type == 'P') {
       /* Do nothing */
 
     } else if (type == 'I') {
       /* Do nothing */
 
     } else if (type == 'D') {
       /* CHECK */
       position += Uintlist_head(q);
 
     } else {
       fprintf(stderr,"Cannot parse type %c\n",type);
       exit(9);
     }
   }
 
   return;
 }
 
 
 static void
 Bamline_splices (Uintlist_T *splice_lows, Uintlist_T *splice_highs, Intlist_T *splice_signs,
 		 Bamline_T this) {
   Intlist_T p;
   Uintlist_T q;
   Genomicpos_T position;
   int type;
 
   position = this->chrpos_low;
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
     if ((type = Intlist_head(p)) == 'S') {
       /* Ignore */
 
     } else if (type == 'H') {
       /* Ignore */
 
     } else if (type == 'M') {
       position += Uintlist_head(q);
 
     } else if (type == 'N') {
       *splice_lows = Uintlist_push(*splice_lows,position);
       position += Uintlist_head(q);
       *splice_highs = Uintlist_push(*splice_highs,position);
 
       if (this->splice_strand == '+') {
 	*splice_signs = Intlist_push(*splice_signs,+1);
       } else if (this->splice_strand == '-') {
 	*splice_signs = Intlist_push(*splice_signs,-1);
       } else {
 	*splice_signs = Intlist_push(*splice_signs,0);
       }
 
     } else if (type == 'P') {
       /* Do nothing */
 
     } else if (type == 'I') {
       /* Do nothing */
 
     } else if (type == 'D') {
       /* CHECK */
       position += Uintlist_head(q);
 
     } else {
       fprintf(stderr,"Cannot parse type %c\n",type);
       exit(9);
     }
   }
 
   return;
 }
 
 
 
 
 void
 Bamline_free (Bamline_T *old) {
c78be696
 
36cb64f8
   if (*old) {
     FREE((*old)->acc);
     if ((*old)->chr != NULL) {
       FREE((*old)->chr);
     }
     if ((*old)->mate_chr != NULL) {
       FREE((*old)->mate_chr);
     }
     Intlist_free(&(*old)->cigar_types);
     Uintlist_free(&(*old)->cigar_npositions);
     FREE((*old)->read);
     if ((*old)->quality_string != NULL) {
       FREE((*old)->quality_string);
     }
8418591e
     if ((*old)->hardclip_quality != NULL) {
       FREE((*old)->hardclip_quality);
     }
     if ((*old)->hardclip != NULL) {
       FREE((*old)->hardclip);
     }
c78be696
     if ((*old)->aux_contents != NULL) {
       FREE((*old)->aux_contents);
     }
36cb64f8
     FREE(*old);
   }
 
   return;
 }
 
 
 static Bamline_T
5659f096
 Bamline_new (char *acc, unsigned int flag, int hiti, int nhits, bool good_unique_p, int mapq,
8418591e
 	     int nm, char splice_strand, char *chr, Genomicpos_T chrpos_low,
36cb64f8
 	     char *mate_chr, Genomicpos_T mate_chrpos_low, int insert_length,
 	     Intlist_T cigar_types, Uintlist_T cigar_npositions, int cigar_querylength, int readlength,
8418591e
 	     char *read, char *quality_string, char *hardclip, char *hardclip_quality, bool terminalp,
c78be696
 	     unsigned char *aux_start, unsigned char *aux_end, bool copy_aux_contents_p) {
36cb64f8
   Bamline_T new = (Bamline_T) MALLOC(sizeof(*new));
 
   new->acc = (char *) CALLOC(strlen(acc)+1,sizeof(char));
   strcpy(new->acc,acc);
 
   new->flag = flag;
5659f096
   new->hiti = hiti;
36cb64f8
   new->nhits = nhits;
   new->good_unique_p = good_unique_p;
 
   new->mapq = mapq;
   
8418591e
   new->nm = nm;
36cb64f8
   new->splice_strand = splice_strand;
   if (chr == NULL) {
     new->chr = (char *) NULL;
   } else {
     new->chr = (char *) CALLOC(strlen(chr)+1,sizeof(char));
     strcpy(new->chr,chr);
   }
   new->chrpos_low = chrpos_low;
 
   if (mate_chr == NULL) {
     new->mate_chr = (char *) NULL;
   } else {
     new->mate_chr = (char *) CALLOC(strlen(mate_chr)+1,sizeof(char));
     strcpy(new->mate_chr,mate_chr);
   }
   new->mate_chrpos_low = mate_chrpos_low;
 
   new->insert_length = insert_length;
 
   new->cigar_types = cigar_types;		/* not copying */
   new->cigar_npositions = cigar_npositions;	/* not copying */
 
   new->cigar_querylength = cigar_querylength;
   new->readlength = readlength;
 
8418591e
   assert((int) strlen(read) == readlength);
   /* read should already be allocated from parse_line */
 #if 0
   if (read == NULL) {
     new->read = NULL;
   } else {
     new->read = (char *) CALLOC(readlength+1,sizeof(char));
     strcpy(new->read,read);
   }
 #else
36cb64f8
   new->read = read;
8418591e
 #endif
36cb64f8
 
   if (quality_string == NULL) {
     new->quality_string = NULL;
   } else {
     new->quality_string = (char *) CALLOC(readlength+1,sizeof(char));
     strncpy(new->quality_string,quality_string,readlength);
   }
 
8418591e
   /* No need to copy, since they are allocated already */
   new->hardclip = hardclip;
   new->hardclip_quality = hardclip_quality;
 
17db5e75
   new->terminalp = terminalp;
 
c78be696
   if (copy_aux_contents_p == true) {
     new->aux_length = aux_end - aux_start;
f51bc89d
     if (new->aux_length == 0) {
       new->aux_contents = (uint8_t *) NULL;
       new->aux_start = (uint8_t *) NULL;
       new->aux_end = (uint8_t *) NULL;
     } else {
       new->aux_contents = (uint8_t *) MALLOC(new->aux_length * sizeof(unsigned char));
       memcpy(new->aux_contents,aux_start,new->aux_length);
       new->aux_start = (unsigned char *) new->aux_contents;
       new->aux_end = new->aux_start + new->aux_length;
     }
c78be696
   } else {
     /* Point to bam->data */
     new->aux_contents = (uint8_t *) NULL;
     new->aux_start = aux_start;
     new->aux_end = aux_end;
   }
36cb64f8
 
   return new;
 }
 
 
 Bamline_T
4d5cc806
 Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
95e6ece6
 		      bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
 		      bool need_concordant_p) {
36cb64f8
   char *acc, *chr, *mate_chr, splice_strand;
8418591e
   int nm;
36cb64f8
   unsigned int flag;
5659f096
   int hiti, nhits;
36cb64f8
   bool good_unique_p;
   int mapq;
   Genomicpos_T chrpos_low, mate_chrpos_low;
   int insert_length;
   Intlist_T cigar_types;
   Uintlist_T cigarlengths;
   int cigar_querylength, readlength;
   char *read;
   char *quality_string;
8418591e
   char *hardclip;
   char *hardclip_quality;
4d5cc806
   char *read_group;
17db5e75
   bool terminalp;
36cb64f8
 
b0aa06af
 #ifndef HAVE_SAMTOOLS_LIB
36cb64f8
   return (Bamline_T) NULL;
 #else
   if (this->region_limited_p == 1) {
     debug1(fprintf(stderr,"Region limited\n"));
     while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
       debug1(fprintf(stderr,"Got line\n"));
       parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
 		 &mate_chr,&mate_chrpos_low,&insert_length,
 		 &cigar_types,&cigarlengths,&cigar_querylength,
8418591e
 		 &readlength,&read,&quality_string,&hardclip,&hardclip_quality,
 		 &read_group,&terminalp);
4d5cc806
       if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) {
 	debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group));
 	Intlist_free(&cigar_types);
 	Uintlist_free(&cigarlengths);
 	FREE(read);
       } else if (mapq < minimum_mapq) {
36cb64f8
 	debug1(fprintf(stderr,"Fails because mapq %d < minimum %d\n",mapq,minimum_mapq));
 	Intlist_free(&cigar_types);
 	Uintlist_free(&cigarlengths);
 	FREE(read);
       } else if ((nhits = aux_nhits(this)) > maximum_nhits) {
 	debug1(fprintf(stderr,"Fails because nhits %d > maximum %d\n",nhits,maximum_nhits));
 	Intlist_free(&cigar_types);
 	Uintlist_free(&cigarlengths);
 	FREE(read);
       } else if (need_unique_p == true && nhits > 1) {
 	debug1(fprintf(stderr,"Fails because need unique and nhits %d\n",nhits));
 	Intlist_free(&cigar_types);
 	Uintlist_free(&cigarlengths);
 	FREE(read);
       } else if (need_primary_p == true && (flag & NOT_PRIMARY) != 0) {
 	debug1(fprintf(stderr,"Fails because need primary and flag is %u\n",flag));
 	Intlist_free(&cigar_types);
 	Uintlist_free(&cigarlengths);
 	FREE(read);
95e6ece6
       } else if (ignore_duplicates_p == true && (flag & DUPLICATE_READ) != 0) {
 	debug1(fprintf(stderr,"Fails because ignoring duplicates and flag is %u\n",flag));
 	Intlist_free(&cigar_types);
 	Uintlist_free(&cigarlengths);
 	FREE(read);
36cb64f8
       } else if (need_concordant_p == true && concordantp(flag) == false) {
 	debug1(fprintf(stderr,"Fails because need concordant and flag is %u\n",flag));
 	Intlist_free(&cigar_types);
 	Uintlist_free(&cigarlengths);
 	FREE(read);
       } else {
 	debug1(fprintf(stderr,"Success\n"));
5659f096
 	hiti = aux_hiti(this);
8418591e
 	nm = aux_nm(this);
36cb64f8
 	splice_strand = aux_splice_strand(this);
 	good_unique_p = aux_good_unique_p(this,good_unique_mapq);
5659f096
 	return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
36cb64f8
 			   mate_chr,mate_chrpos_low,insert_length,
 			   cigar_types,cigarlengths,cigar_querylength,
8418591e
 			   readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
36cb64f8
 			   /*aux_start*/bam1_aux(this->bam),
c78be696
 			   /*aux_end*/this->bam->data + this->bam->data_len,
 			   /*copy_aux_contents_p*/false);
36cb64f8
       }
     }
     return (Bamline_T) NULL;
 
   } else {
     while (bam_read1(this->fp,this->bam) >= 0) {
       parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
 		 &mate_chr,&mate_chrpos_low,&insert_length,
 		 &cigar_types,&cigarlengths,&cigar_querylength,
8418591e
 		 &readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
17db5e75
 		 &terminalp);
36cb64f8
       if (mapq >= minimum_mapq &&
 	  (nhits = aux_nhits(this)) <= maximum_nhits &&
 	  (need_unique_p == false || nhits == 1) &&
 	  (need_primary_p == false || (flag & NOT_PRIMARY) == 0) &&
 	  (need_concordant_p == false || concordantp(flag) == true)) {
5659f096
 	hiti = aux_hiti(this);
8418591e
 	nm = aux_nm(this);
36cb64f8
 	splice_strand = aux_splice_strand(this);
 	good_unique_p = aux_good_unique_p(this,good_unique_mapq);
5659f096
 	return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
36cb64f8
 			   mate_chr,mate_chrpos_low,insert_length,
 			   cigar_types,cigarlengths,cigar_querylength,
8418591e
 			   readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
36cb64f8
 			   /*aux_start*/bam1_aux(this->bam),
c78be696
 			   /*aux_end*/this->bam->data + this->bam->data_len,
 			   /*copy_aux_contents_p*/false);
36cb64f8
       } else {
 	Intlist_free(&cigar_types);
 	Uintlist_free(&cigarlengths);
 	FREE(read);
       }
     }
     return (Bamline_T) NULL;
c78be696
   }
 #endif
 }
 
 Bamline_T
 Bamread_next_imperfect_bamline_copy_aux (T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
 					 bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
 					 bool need_concordant_p) {
   char *acc, *chr, *mate_chr, splice_strand;
8418591e
   int nm;
c78be696
   unsigned int flag;
5659f096
   int hiti, nhits;
c78be696
   bool good_unique_p;
   int mapq;
   Genomicpos_T chrpos_low, mate_chrpos_low;
   int insert_length;
   Intlist_T cigar_types;
   Uintlist_T cigarlengths;
   int cigar_querylength, readlength;
   char *read;
   char *quality_string;
8418591e
   char *hardclip;
   char *hardclip_quality;
c78be696
   char *read_group;
   bool terminalp;
 
b0aa06af
 #ifndef HAVE_SAMTOOLS_LIB
c78be696
   return (Bamline_T) NULL;
 #else
   if (this->region_limited_p == 1) {
     debug1(fprintf(stderr,"Region limited\n"));
     while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
       if (perfect_match_p(this) == true) {
 	/* Skip */
       } else {
 	debug1(fprintf(stderr,"Got line\n"));
 	parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
 		   &mate_chr,&mate_chrpos_low,&insert_length,
 		   &cigar_types,&cigarlengths,&cigar_querylength,
8418591e
 		   &readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
c78be696
 		   &terminalp);
 	if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) {
 	  debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else if (mapq < minimum_mapq) {
 	  debug1(fprintf(stderr,"Fails because mapq %d < minimum %d\n",mapq,minimum_mapq));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else if ((nhits = aux_nhits(this)) > maximum_nhits) {
 	  debug1(fprintf(stderr,"Fails because nhits %d > maximum %d\n",nhits,maximum_nhits));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else if (need_unique_p == true && nhits > 1) {
 	  debug1(fprintf(stderr,"Fails because need unique and nhits %d\n",nhits));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else if (need_primary_p == true && (flag & NOT_PRIMARY) != 0) {
 	  debug1(fprintf(stderr,"Fails because need primary and flag is %u\n",flag));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else if (ignore_duplicates_p == true && (flag & DUPLICATE_READ) != 0) {
 	  debug1(fprintf(stderr,"Fails because ignoring duplicates and flag is %u\n",flag));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else if (need_concordant_p == true && concordantp(flag) == false) {
 	  debug1(fprintf(stderr,"Fails because need concordant and flag is %u\n",flag));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else {
 	  debug1(fprintf(stderr,"Success\n"));
5659f096
 	  hiti = aux_hiti(this);
8418591e
 	  nm = aux_nm(this);
c78be696
 	  splice_strand = aux_splice_strand(this);
 	  good_unique_p = aux_good_unique_p(this,good_unique_mapq);
5659f096
 	  return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
c78be696
 			     mate_chr,mate_chrpos_low,insert_length,
 			     cigar_types,cigarlengths,cigar_querylength,
8418591e
 			     readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
c78be696
 			     /*aux_start*/bam1_aux(this->bam),
 			     /*aux_end*/this->bam->data + this->bam->data_len,
 			     /*copy_aux_contents_p*/true);
 	}
       }
     }
     return (Bamline_T) NULL;
 
   } else {
     while (bam_read1(this->fp,this->bam) >= 0) {
       if (perfect_match_p(this) == true) {
 	/* Skip */
       } else {
 	parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
 		   &mate_chr,&mate_chrpos_low,&insert_length,
 		   &cigar_types,&cigarlengths,&cigar_querylength,
8418591e
 		   &readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
c78be696
 		   &terminalp);
 	if (mapq >= minimum_mapq &&
 	    (nhits = aux_nhits(this)) <= maximum_nhits &&
 	    (need_unique_p == false || nhits == 1) &&
 	    (need_primary_p == false || (flag & NOT_PRIMARY) == 0) &&
 	    (need_concordant_p == false || concordantp(flag) == true)) {
5659f096
 	  hiti = aux_hiti(this);
8418591e
 	  nm = aux_nm(this);
c78be696
 	  splice_strand = aux_splice_strand(this);
 	  good_unique_p = aux_good_unique_p(this,good_unique_mapq);
5659f096
 	  return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
c78be696
 			     mate_chr,mate_chrpos_low,insert_length,
 			     cigar_types,cigarlengths,cigar_querylength,
8418591e
 			     readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
c78be696
 			     /*aux_start*/bam1_aux(this->bam),
 			     /*aux_end*/this->bam->data + this->bam->data_len,
 			     /*copy_aux_contents_p*/true);
 	} else {
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	}
       }
     }
     return (Bamline_T) NULL;
 
   }
 #endif
 
 }
 
 
 Bamline_T
 Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
 			    bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
 			    bool need_concordant_p) {
   char *acc, *chr, *mate_chr, splice_strand;
8418591e
   int nm;
c78be696
   unsigned int flag;
5659f096
   int hiti, nhits;
c78be696
   bool good_unique_p;
   int mapq;
   Genomicpos_T chrpos_low, mate_chrpos_low;
   int insert_length;
   Intlist_T cigar_types;
   Uintlist_T cigarlengths;
   int cigar_querylength, readlength;
   char *read;
   char *quality_string;
8418591e
   char *hardclip;
   char *hardclip_quality;
c78be696
   char *read_group;
   bool terminalp;
 
b0aa06af
 #ifndef HAVE_SAMTOOLS_LIB
c78be696
   return (Bamline_T) NULL;
 #else
   if (this->region_limited_p == 1) {
     debug1(fprintf(stderr,"Region limited\n"));
     while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
       if (has_indel_p(this) == false) {
 	/* Skip */
       } else {
 	debug1(fprintf(stderr,"Got line\n"));
 	parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
 		   &mate_chr,&mate_chrpos_low,&insert_length,
 		   &cigar_types,&cigarlengths,&cigar_querylength,
8418591e
 		   &readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
c78be696
 		   &terminalp);
 	if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) {
 	  debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else if (mapq < minimum_mapq) {
 	  debug1(fprintf(stderr,"Fails because mapq %d < minimum %d\n",mapq,minimum_mapq));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else if ((nhits = aux_nhits(this)) > maximum_nhits) {
 	  debug1(fprintf(stderr,"Fails because nhits %d > maximum %d\n",nhits,maximum_nhits));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else if (need_unique_p == true && nhits > 1) {
 	  debug1(fprintf(stderr,"Fails because need unique and nhits %d\n",nhits));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else if (need_primary_p == true && (flag & NOT_PRIMARY) != 0) {
 	  debug1(fprintf(stderr,"Fails because need primary and flag is %u\n",flag));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else if (ignore_duplicates_p == true && (flag & DUPLICATE_READ) != 0) {
 	  debug1(fprintf(stderr,"Fails because ignoring duplicates and flag is %u\n",flag));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else if (need_concordant_p == true && concordantp(flag) == false) {
 	  debug1(fprintf(stderr,"Fails because need concordant and flag is %u\n",flag));
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	} else {
 	  debug1(fprintf(stderr,"Success\n"));
5659f096
 	  hiti = aux_hiti(this);
8418591e
 	  nm = aux_nm(this);
c78be696
 	  splice_strand = aux_splice_strand(this);
 	  good_unique_p = aux_good_unique_p(this,good_unique_mapq);
5659f096
 	  return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
c78be696
 			     mate_chr,mate_chrpos_low,insert_length,
 			     cigar_types,cigarlengths,cigar_querylength,
8418591e
 			     readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
c78be696
 			     /*aux_start*/bam1_aux(this->bam),
 			     /*aux_end*/this->bam->data + this->bam->data_len,
 			     /*copy_aux_contents_p*/false);
 	}
       }
     }
36cb64f8
 
c78be696
     return (Bamline_T) NULL;	/* Signal end of BAM file */
 
   } else {
     while (bam_read1(this->fp,this->bam) >= 0) {
       if (has_indel_p(this) == false) {
 	/* Skip */
       } else {
 	parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
 		   &mate_chr,&mate_chrpos_low,&insert_length,
 		   &cigar_types,&cigarlengths,&cigar_querylength,
8418591e
 		   &readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
c78be696
 		   &terminalp);
 	if (mapq >= minimum_mapq &&
 	    (nhits = aux_nhits(this)) <= maximum_nhits &&
 	    (need_unique_p == false || nhits == 1) &&
 	    (need_primary_p == false || (flag & NOT_PRIMARY) == 0) &&
 	    (need_concordant_p == false || concordantp(flag) == true)) {
5659f096
 	  hiti = aux_hiti(this);
8418591e
 	  nm = aux_nm(this);
c78be696
 	  splice_strand = aux_splice_strand(this);
 	  good_unique_p = aux_good_unique_p(this,good_unique_mapq);
5659f096
 	  return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
c78be696
 			     mate_chr,mate_chrpos_low,insert_length,
 			     cigar_types,cigarlengths,cigar_querylength,
8418591e
 			     readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
c78be696
 			     /*aux_start*/bam1_aux(this->bam),
 			     /*aux_end*/this->bam->data + this->bam->data_len,
 			     /*copy_aux_contents_p*/false);
 	} else {
 	  Intlist_free(&cigar_types);
 	  Uintlist_free(&cigarlengths);
 	  FREE(read);
 	}
       }
     }
 
     return (Bamline_T) NULL;	/* Signal end of BAM file */
36cb64f8
   }
 #endif
c78be696
 }
36cb64f8
 
c78be696
 
 
 
 static int
 bamline_read_cmp (const void *x, const void *y) {
   int cmp;
   Bamline_T a = * (Bamline_T *) x;
   Bamline_T b = * (Bamline_T *) y;
   Genomicpos_T a_chrpos_low_noclip, b_chrpos_low_noclip;
 
8418591e
 #if 0
c78be696
   if (Intlist_head(a->cigar_types) != 'S') {
     a_chrpos_low_noclip = a->chrpos_low;
   } else {
     a_chrpos_low_noclip = a->chrpos_low - Uintlist_head(a->cigar_npositions);
   }
 
   if (Intlist_head(b->cigar_types) != 'S') {
     b_chrpos_low_noclip = b->chrpos_low;
   } else {
     b_chrpos_low_noclip = b->chrpos_low - Uintlist_head(b->cigar_npositions);
   }
 
   if (a_chrpos_low_noclip < b_chrpos_low_noclip) {
     return -1;
   } else if (b_chrpos_low_noclip < a_chrpos_low_noclip) {
     return +1;
 
   } else if ((cmp = strcmp(a->read,b->read)) != 0) {
     return cmp;
 
   } else if (a->mate_chrpos_low < b->mate_chrpos_low) {
     return -1;
   } else if (b->mate_chrpos_low < a->mate_chrpos_low) {
     return +1;
   } else {
     return 0;
   }
8418591e
 #else
 
   return strcmp(a->read,b->read);
 
 #endif
 
c78be696
 }
 
 
 Bamline_T *
 Bamread_next_bamline_set (int *nlines, Bamline_T *prev_bamline,
 			  T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
 			  bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
 			  bool need_concordant_p) {
   Bamline_T *bamlines = NULL;
   List_T bamline_list = NULL;
   Bamline_T bamline = NULL;
   Chrpos_T set_chrpos_low, chrpos_low;
 
   if (*prev_bamline == NULL) {
     set_chrpos_low = 0;
   } else {
     bamline_list = List_push(bamline_list,(void *) *prev_bamline);
     set_chrpos_low = Bamline_chrpos_low(*prev_bamline);
   }
 
   while ((bamline = Bamread_next_imperfect_bamline_copy_aux(this,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits,
 							    need_unique_p,need_primary_p,ignore_duplicates_p,
 							    need_concordant_p)) != NULL) {
 #if 0
     if (Bamline_perfect_match_p(bamline) == true) {
       /* Exclude all perfect matches to the reference */
       Bamline_free(&bamline);
     }
 #endif
 
     if (set_chrpos_low == 0) {
       bamline_list = List_push(bamline_list,(void *) bamline);
       set_chrpos_low = Bamline_chrpos_low(bamline);
     } else if ((chrpos_low = Bamline_chrpos_low(bamline)) < set_chrpos_low) {
       fprintf(stderr,"BAM file not sorted by chrpos_low: %u < %u\n",chrpos_low,set_chrpos_low);
       abort();
     } else if (chrpos_low == set_chrpos_low) {
       bamline_list = List_push(bamline_list,(void *) bamline);
     } else {
       bamline_list = List_reverse(bamline_list);
       bamlines = (Bamline_T *) List_to_array_n(&(*nlines),bamline_list);
       if (*nlines > 0) {
 	qsort(bamlines,*nlines,sizeof(Bamline_T),bamline_read_cmp);
       }
       List_free(&bamline_list);
       *prev_bamline = bamline;
       return bamlines;
     }
   }
 
   bamline_list = List_reverse(bamline_list);
   bamlines = (Bamline_T *) List_to_array_n(&(*nlines),bamline_list);
   if (*nlines > 0) {
     qsort(bamlines,*nlines,sizeof(Bamline_T),bamline_read_cmp);
   }
   List_free(&bamline_list);
   *prev_bamline = bamline;
   return bamlines;
36cb64f8
 }
 
 
8418591e
 
 Bamline_T **
 Bamread_block (int **nlines, Genomicpos_T chrstart, Genomicpos_T chrend,
 	       T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
 	       bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
 	       bool need_concordant_p) {
   Bamline_T **block;
   List_T *block_lists;
   Bamline_T *bamlines = NULL;
   Bamline_T bamline = NULL;
   Chrpos_T chrpos_low_noclip, chrpos;
   int n;
 
 
   block = (Bamline_T **) CALLOC(chrend - chrstart + 1,sizeof(Bamline_T *));
   block_lists = (List_T *) CALLOC(chrend - chrstart + 1,sizeof(List_T));
   *nlines = (int *) CALLOC(chrend - chrstart + 1,sizeof(int));
 
   /* Excludes all perfect matches to the reference */
   while ((bamline = Bamread_next_imperfect_bamline_copy_aux(this,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits,
 							    need_unique_p,need_primary_p,ignore_duplicates_p,
 							    need_concordant_p)) != NULL) {
     if (Intlist_head(bamline->cigar_types) != 'S') {
       chrpos_low_noclip = bamline->chrpos_low;
     } else {
       chrpos_low_noclip = bamline->chrpos_low - Uintlist_head(bamline->cigar_npositions);
     }
 
     if (chrpos_low_noclip < chrstart) {
       abort();
     } else {
       block_lists[chrpos_low_noclip - chrstart] = List_push(block_lists[chrpos_low_noclip - chrstart],(void *) bamline);
     }
   }
 
   for (chrpos = chrstart; chrpos <= chrend; chrpos++) {
     if (block_lists[chrpos - chrstart] != NULL) {
       bamlines = (Bamline_T *) List_to_array_n(&n,block_lists[chrpos - chrstart]);
       qsort(bamlines,n,sizeof(Bamline_T),bamline_read_cmp);
       block[chrpos - chrstart] = bamlines;
       (*nlines)[chrpos - chrstart] = n;
     }
 
     List_free(&(block_lists[chrpos - chrstart]));
   }
 
   FREE(block_lists);
   return block;
 }
 
 
 
95e6ece6
 Bamline_T
 Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *desired_acc) {
   char *acc, *chr, *mate_chr, splice_strand;
8418591e
   int nm;
95e6ece6
   unsigned int flag;
5659f096
   int hiti, nhits;
95e6ece6
   int mapq;
   Genomicpos_T chrpos_low, mate_chrpos_low;
   int insert_length;
   Intlist_T cigar_types;
   Uintlist_T cigarlengths;
   int cigar_querylength, readlength;
   char *read;
   char *quality_string;
8418591e
   char *hardclip;
   char *hardclip_quality;
4d5cc806
   char *read_group;
17db5e75
   bool terminalp;
95e6ece6
 
   Bamread_limit_region(this,desired_chr,desired_chrpos,desired_chrpos);
   while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
     debug1(fprintf(stderr,"Got line\n"));
     parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
 	       &mate_chr,&mate_chrpos_low,&insert_length,
 	       &cigar_types,&cigarlengths,&cigar_querylength,
8418591e
 	       &readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
17db5e75
 	       &terminalp);
5659f096
     hiti = aux_hiti(this);
8418591e
     nm = aux_nm(this);
95e6ece6
     splice_strand = aux_splice_strand(this);
     if (!strcmp(acc,desired_acc) && chrpos_low == desired_chrpos) {
       Bamread_unlimit_region(this);
5659f096
       return Bamline_new(acc,flag,hiti,nhits,/*good_unique_p*/true,mapq,nm,splice_strand,chr,chrpos_low,
95e6ece6
 			 mate_chr,mate_chrpos_low,insert_length,
 			 cigar_types,cigarlengths,cigar_querylength,
8418591e
 			 readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
95e6ece6
 			 /*aux_start*/bam1_aux(this->bam),
c78be696
 			 /*aux_end*/this->bam->data + this->bam->data_len,
 			 /*copy_aux_contents_p*/false);
95e6ece6
     }
   }
 
   Bamread_unlimit_region(this);
   return (Bamline_T) NULL;
 }
 
 
 
36cb64f8
 /************************************************************************
  *   Bamstore
  ************************************************************************/
 
 struct Bamstore_T {
   Genomicpos_T chrpos;
   List_T bamlines;
 };
 
 void
 Bamstore_free (Bamstore_T *old) {
   List_T p;
   Bamline_T bamline;
 
   for (p = (*old)->bamlines; p != NULL; p = List_next(p)) {
     bamline = (Bamline_T) List_head(p);
     Bamline_free(&bamline);
   }
   List_free(&(*old)->bamlines);
 
   FREE(*old);
   return;
 }
 
c78be696
 void
 Bamstore_free_nolines (Bamstore_T *old) {
   List_free(&(*old)->bamlines);
   FREE(*old);
   return;
 }
 
36cb64f8
 
 Bamstore_T
 Bamstore_new (Genomicpos_T chrpos) {
   Bamstore_T new = (Bamstore_T) MALLOC(sizeof(*new));
 
   new->chrpos = chrpos;
   new->bamlines = (List_T) NULL;
 
   return new;
 }
 
 
 
 Bamline_T
 Bamstore_get (Table_T bamstore_chrtable, char *chr, Genomicpos_T low, char *acc,
5659f096
 	      Genomicpos_T mate_low, int hiti) {
36cb64f8
   List_T p, list = NULL;
   Bamline_T wanted = NULL, bamline;
   Bamstore_T bamstore;
   Uinttable_T bamstore_table;
   Chrom_T chrom;
 
   chrom = Chrom_from_string(chr,/*mitochondrial_string*/NULL,/*order*/0U);
   if ((bamstore_table = (Uinttable_T) Table_get(bamstore_chrtable,(void *) chrom)) == NULL) {
95e6ece6
     /* Can happen if we read high end and not low end */
     /* fprintf(stderr,"Unexpected error.  No bamstore_table for chr %s\n",chr); */
36cb64f8
     Chrom_free(&chrom);
     return (Bamline_T) NULL;
   } else {
     Chrom_free(&chrom);
   }
 
   if ((bamstore = (Bamstore_T) Uinttable_get(bamstore_table,low)) == NULL) {
     /* May have been excluded for other reasons */
     return (Bamline_T) NULL;
   } else {
     for (p = bamstore->bamlines; p != NULL; p = List_next(p)) {
       bamline = (Bamline_T) List_head(p);
5659f096
       if (strcmp(Bamline_acc(bamline),acc) == 0 && Bamline_mate_chrpos_low(bamline) == mate_low && bamline->hiti == hiti) {
36cb64f8
 	wanted = bamline;
       } else {
 	list = List_push(list,(void *) bamline);
       }
     }
 
     List_free(&bamstore->bamlines);
     bamstore->bamlines = list;
     if (list == NULL) {
       Uinttable_remove(bamstore_table,low);
       Bamstore_free(&bamstore);
     }
 
     return wanted;
   }
 }
   
 
 void
 Bamstore_add_at_low (Table_T bamstore_chrtable, char *chr, Genomicpos_T low,
 		     Bamline_T bamline) {
   Bamstore_T bamstore;
   Uinttable_T bamstore_table;
   Chrom_T chrom;
 
   chrom = Chrom_from_string(chr,/*mitochondrial_string*/NULL,/*order*/0U);
   if ((bamstore_table = (Uinttable_T) Table_get(bamstore_chrtable,(void *) chrom)) == NULL) {
     bamstore_table = Uinttable_new(65522); /* estimate 65522 splice sites per chromosome */
     Table_put(bamstore_chrtable,(void *) chrom,(void *) bamstore_table);
   } else {
     Chrom_free(&chrom);
   }
 
   if ((bamstore = (Bamstore_T) Uinttable_get(bamstore_table,low)) == NULL) {
     bamstore = Bamstore_new(low);
     Uinttable_put(bamstore_table,low,(void *) bamstore);
   }
 
   bamstore->bamlines = List_push(bamstore->bamlines,(void *) bamline);
   return;
 }
 
 
 void
 Bamstore_table_free (Uinttable_T *bamstore_table) {
   Genomicpos_T *keys;
   int n, i;
   Bamstore_T bamstore;
 
   if ((n = Uinttable_length(*bamstore_table)) > 0) {
     keys = (Genomicpos_T *) Uinttable_keys(*bamstore_table,/*sortp*/false);
     for (i = 0; i < n; i++) {
       bamstore = Uinttable_get(*bamstore_table,keys[i]);
       if (bamstore == NULL) {
 	fprintf(stderr,"key is %u, value is NULL\n",keys[i]);
 	abort();
       } else {
 	Bamstore_free(&bamstore);
       }
     }
     FREE(keys);
   }
 
   return;
 }
 
 
 /************************************************************************
  *   Retrieving and assigning levels to all reads in a region
  ************************************************************************/
 
 struct Bampair_T {
   Bamline_T bamline_low;
   Bamline_T bamline_high;
   Genomicpos_T chrpos_low;
   Genomicpos_T chrpos_high;
5659f096
   Genomicpos_T chrpos_low_noclip;
   Genomicpos_T chrpos_high_noclip;
36cb64f8
   int level;
5659f096
   bool plusp;			/* Based on first read */
36cb64f8
 };
 
8418591e
 char *
 Bampair_acc (Bampair_T this) {
   if (this->bamline_low != NULL) {
     return this->bamline_low->acc;
   } else if (this->bamline_high != NULL) {
     return this->bamline_high->acc;
   } else {
     return (char *) NULL;
   }
 }
 
95e6ece6
 Bamline_T
 Bampair_bamline_low (Bampair_T this) {
   return this->bamline_low;
 }
 
 Bamline_T
 Bampair_bamline_high (Bampair_T this) {
   return this->bamline_high;
 }
 
36cb64f8
 Genomicpos_T
 Bampair_chrpos_low (Bampair_T this) {
   return this->chrpos_low;
 }
 
 Genomicpos_T
 Bampair_chrpos_high (Bampair_T this) {
   return this->chrpos_high;
 }
 
5659f096
 Genomicpos_T
 Bampair_chrpos_low_noclip (Bampair_T this) {
   return this->chrpos_low_noclip;
 }
 
 Genomicpos_T
 Bampair_chrpos_high_noclip (Bampair_T this) {
   return this->chrpos_high_noclip;
 }
 
36cb64f8
 int
 Bampair_level (Bampair_T this) {
   return this->level;
 }
 
5659f096
 bool
 Bampair_plusp (Bampair_T this) {
   return this->plusp;
 }
 
36cb64f8
 bool
 Bampair_good_unique_p (Bampair_T this) {
   if (this->bamline_low != NULL && this->bamline_low->good_unique_p == false) {
     return false;
   }
   if (this->bamline_high != NULL && this->bamline_high->good_unique_p == false) {
     return false;
   }
   return true;
 }
 
 bool
 Bampair_uniquep (Bampair_T this) {
   if (this->bamline_low != NULL && this->bamline_low->nhits > 1) {
     return false;
   }
   if (this->bamline_high != NULL && this->bamline_high->nhits > 1) {
     return false;
   }
   return true;
 }
 
 bool
 Bampair_primaryp (Bampair_T this) {
   if (this->bamline_low != NULL && (this->bamline_low->flag & NOT_PRIMARY) != 0) {
     return false;
   }
   if (this->bamline_high != NULL && (this->bamline_high->flag & NOT_PRIMARY) != 0) {
     return false;
   }
   return true;
 }
 
 
 
 static Bampair_T
 Bampair_new (Bamline_T bamline_low, Bamline_T bamline_high) {
   Bampair_T new = (Bampair_T) MALLOC(sizeof(*new));
 
   new->bamline_low = bamline_low;
   new->bamline_high = bamline_high;
   if (bamline_low == NULL) {
     new->chrpos_low = bamline_high->chrpos_low;
     new->chrpos_high = Bamline_chrpos_high(bamline_high);
5659f096
     new->chrpos_low_noclip = Bamline_chrpos_low_noclip(bamline_high);
     new->chrpos_high_noclip = Bamline_chrpos_high_noclip(bamline_high);
 
     if (Bamline_firstend_p(bamline_high) == true) {
       if (bamline_high->flag & QUERY_MINUSP) {
 	new->plusp = false;
       } else {
 	new->plusp = true;
       }
     } else {
       if (bamline_high->flag & QUERY_MINUSP) {
 	new->plusp = true;
       } else {
 	new->plusp = false;
       }
     }
 
36cb64f8
   } else if (bamline_high == NULL) {
     new->chrpos_low = bamline_low->chrpos_low;
     new->chrpos_high = Bamline_chrpos_high(bamline_low);
5659f096
     new->chrpos_low_noclip = Bamline_chrpos_low_noclip(bamline_low);
     new->chrpos_high_noclip = Bamline_chrpos_high_noclip(bamline_low);
 
     if (Bamline_firstend_p(bamline_low) == true) {
       if (bamline_low->flag & QUERY_MINUSP) {
 	new->plusp = false;
       } else {
 	new->plusp = true;
       }
     } else {
       if (bamline_low->flag & QUERY_MINUSP) {
 	new->plusp = true;
       } else {
 	new->plusp = false;
       }
     }
 
36cb64f8
   } else {
     new->chrpos_low = bamline_low->chrpos_low;
     new->chrpos_high = Bamline_chrpos_high(bamline_high);
5659f096
     new->chrpos_low_noclip = Bamline_chrpos_low_noclip(bamline_low);
     new->chrpos_high_noclip = Bamline_chrpos_high_noclip(bamline_high);
 
     if (Bamline_firstend_p(bamline_low) == true && Bamline_firstend_p(bamline_high) == false) {
       if (bamline_low->flag & QUERY_MINUSP) {
 	new->plusp = false;
       } else {
 	new->plusp = true;
       }
 
     } else if (Bamline_firstend_p(bamline_low) == false && Bamline_firstend_p(bamline_high) == true) {
       if (bamline_high->flag & QUERY_MINUSP) {
 	new->plusp = false;
       } else {
 	new->plusp = true;
       }
 
     } else if (Bamline_firstend_p(bamline_low) == true && Bamline_firstend_p(bamline_high) == true) {
       fprintf(stderr,"For bampair %s, both ends are first ends.  Flags are %d and %d\n",
 	      bamline_low->acc,bamline_low->flag,bamline_high->flag);
       new->plusp = false;
 
     } else {
       fprintf(stderr,"For bampair %s , both ends are second ends.  Flags are %d and %d\n",
 	      bamline_low->acc,bamline_low->flag,bamline_high->flag);
       new->plusp = false;
     }
36cb64f8
   }
5659f096
 
36cb64f8
   new->level = -1;
5659f096
 
36cb64f8
   return new;
 }
 
 
 void
 Bampair_free (Bampair_T *old) {
   if (*old) {
     Bamline_free(&(*old)->bamline_low);
     Bamline_free(&(*old)->bamline_high);
     FREE(*old);
   }
   return;
 }
 
 
 void
95e6ece6
 Bampair_print (FILE *fp, Bampair_T this, int quality_score_adj) {
36cb64f8
   if (this->bamline_low != NULL) {
95e6ece6
     Bamline_print(fp,this->bamline_low,this->bamline_low->flag,quality_score_adj);
36cb64f8
   }
   if (this->bamline_high != NULL) {
95e6ece6
     Bamline_print(fp,this->bamline_high,this->bamline_high->flag,quality_score_adj);
36cb64f8
   }
   return;
 }
 
 
 void
5659f096
 Bampair_details (Uintlist_T *chrpos_first_lows, Uintlist_T *chrpos_first_highs,
 		 Uintlist_T *chrpos_second_lows, Uintlist_T *chrpos_second_highs,
 		 Uintlist_T *chrpos_overlap_lows, Uintlist_T *chrpos_overlap_highs,
36cb64f8
 		 Uintlist_T *splice_lows, Uintlist_T *splice_highs, Intlist_T *splice_signs,
 		 Bampair_T this) {
5659f096
   Uintlist_T p1, p2, q1, q2;
   Chrpos_T low1, high1, low2, high2;
 
   *chrpos_first_lows = (Uintlist_T) NULL;
   *chrpos_first_highs = (Uintlist_T) NULL;
   *chrpos_second_lows = (Uintlist_T) NULL;
   *chrpos_second_highs = (Uintlist_T) NULL;
   *chrpos_overlap_lows = (Uintlist_T) NULL;
   *chrpos_overlap_highs = (Uintlist_T) NULL;
36cb64f8
   *splice_lows = (Uintlist_T) NULL;
   *splice_highs = (Uintlist_T) NULL;
   *splice_signs = (Intlist_T) NULL;
 
   if (this->bamline_low != NULL) {
5659f096
     if (1 || Bamline_firstend_p(this->bamline_low) == true) {
       Bamline_regions(&(*chrpos_first_lows),&(*chrpos_first_highs),this->bamline_low);
     } else {
       Bamline_regions(&(*chrpos_second_lows),&(*chrpos_second_highs),this->bamline_low);
     }
36cb64f8
     Bamline_splices(&(*splice_lows),&(*splice_highs),&(*splice_signs),this->bamline_low);
   }
 
   if (this->bamline_high != NULL) {
5659f096
     if (0 && Bamline_firstend_p(this->bamline_high) == true) {
       Bamline_regions(&(*chrpos_first_lows),&(*chrpos_first_highs),this->bamline_high);
     } else {
       Bamline_regions(&(*chrpos_second_lows),&(*chrpos_second_highs),this->bamline_high);
     }
36cb64f8
     Bamline_splices(&(*splice_lows),&(*splice_highs),&(*splice_signs),this->bamline_high);
   }
 
5659f096
   *chrpos_first_lows = Uintlist_reverse(*chrpos_first_lows);
   *chrpos_first_highs = Uintlist_reverse(*chrpos_first_highs);
   *chrpos_second_lows = Uintlist_reverse(*chrpos_second_lows);
   *chrpos_second_highs = Uintlist_reverse(*chrpos_second_highs);
36cb64f8
   *splice_lows = Uintlist_reverse(*splice_lows);
   *splice_highs = Uintlist_reverse(*splice_highs);
   *splice_signs = Intlist_reverse(*splice_signs);
 
5659f096
   if (this->bamline_low != NULL && this->bamline_high != NULL) {
     p1 = *chrpos_first_lows;
     q1 = *chrpos_first_highs;
     p2 = *chrpos_second_lows;
     q2 = *chrpos_second_highs;
 
     while (p1 != NULL && p2 != NULL) {
       low1 = Uintlist_head(p1);
       high1 = Uintlist_head(q1);
       low2 = Uintlist_head(p2);
       high2 = Uintlist_head(q2);
 
       if (low2 >= high1) {
 	p1 = Uintlist_next(p1);	q1 = Uintlist_next(q1); /* Advance first read */
       } else if (low1 >= high2) {
 	p2 = Uintlist_next(p2);	q2 = Uintlist_next(q2); /* Advance second read */
       } else if (low1 <= low2) {
 	*chrpos_overlap_lows = Uintlist_push(*chrpos_overlap_lows,low2);
 	if (high2 <= high1) {
 	  *chrpos_overlap_highs = Uintlist_push(*chrpos_overlap_highs,high2);
 	  p2 = Uintlist_next(p2); q2 = Uintlist_next(q2); /* Advance second read */
 	} else {
 	  *chrpos_overlap_highs = Uintlist_push(*chrpos_overlap_highs,high1);
 	  p1 = Uintlist_next(p1); q1 = Uintlist_next(q1); /* Advance first read */
 	}
       } else {
 	*chrpos_overlap_lows = Uintlist_push(*chrpos_overlap_lows,low1);
 	if (high1 <= high2) {
 	  *chrpos_overlap_highs = Uintlist_push(*chrpos_overlap_highs,high1);
 	  p1 = Uintlist_next(p1); q1 = Uintlist_next(q1); /* Advance first read */
 	} else {
 	  *chrpos_overlap_highs = Uintlist_push(*chrpos_overlap_highs,high2);
 	  p2 = Uintlist_next(p2); q2 = Uintlist_next(q2); /* Advance second read */
 	}
       }
     }
 
     *chrpos_overlap_lows = Uintlist_reverse(*chrpos_overlap_lows);
     *chrpos_overlap_highs = Uintlist_reverse(*chrpos_overlap_highs);
   }
 
 
36cb64f8
   return;
 }
 
 
 List_T
4d5cc806
 Bamread_all_pairs (T bamreader, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
95e6ece6
 		   bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
 		   bool need_concordant_p) {
   List_T lines = NULL, p;
36cb64f8
   Bamline_T bamline_low, bamline;
 
   Table_T bamstore_chrtable;
   Uinttable_T bamstore_table;
95e6ece6
   Bamstore_T bamstore;
36cb64f8
   Chrom_T *chroms, chrom;
95e6ece6
   unsigned int *keys;
   int nkeys, j;
36cb64f8
   int n, i;
 
 
   bamstore_chrtable = Table_new(100,Chrom_compare_table,Chrom_hash_table);
 
4d5cc806
   while ((bamline = Bamread_next_bamline(bamreader,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits,
95e6ece6
 					 need_unique_p,need_primary_p,ignore_duplicates_p,
 					 need_concordant_p)) != NULL) {
36cb64f8
     if (Bamline_concordantp(bamline) == false) {
       /* Handle now */
       if (Bamline_firstend_p(bamline) == true) {
 	lines = List_push(lines,Bampair_new(/*bamline_low*/bamline,/*bamline_high*/NULL));
       } else {
 	lines = List_push(lines,Bampair_new(/*bamline_low*/NULL,/*bamline_high*/bamline));
       }
 
     } else if (Bamline_lowend_p(bamline) == true) {
       /* Wait for high end */
       Bamstore_add_at_low(bamstore_chrtable,Bamline_chr(bamline),Bamline_chrpos_low(bamline),
 			  bamline);
 
     } else {
       /* This is the high end */
       bamline_low = Bamstore_get(bamstore_chrtable,Bamline_chr(bamline),Bamline_mate_chrpos_low(bamline),
5659f096
 				 Bamline_acc(bamline),Bamline_chrpos_low(bamline),bamline->hiti);
36cb64f8
       if (bamline_low == NULL) {
8418591e
 #if 0
36cb64f8
 	fprintf(stderr,"Hmm...low end not found for %s at %s:%u\n",
 		Bamline_acc(bamline),Bamline_chr(bamline),Bamline_chrpos_low(bamline));
8418591e
 #endif
95e6ece6
 	lines = List_push(lines,Bampair_new(/*bamline_low*/NULL,/*bamline_high*/bamline));
36cb64f8
       } else {
 	lines = List_push(lines,Bampair_new(bamline_low,/*bamline_high*/bamline));
       }
     }
   }
 
95e6ece6
 
36cb64f8
   if ((n = Table_length(bamstore_chrtable)) > 0) {
     chroms = (Chrom_T *) Table_keys(bamstore_chrtable,NULL);
     for (i = 0; i < n; i++) {
       chrom = chroms[i];
       bamstore_table = (Uinttable_T) Table_get(bamstore_chrtable,(void *) chrom);
95e6ece6
 
       /* Handle low ends without high ends */
       nkeys = Uinttable_length(bamstore_table);
       keys = Uinttable_keys_by_timeindex(bamstore_table);
       for (j = 0; j < nkeys; j++) {
 	if ((bamstore = (Bamstore_T) Uinttable_get(bamstore_table,keys[j])) == NULL) {
 	  /* No bamlines at this chrpos_low */
 	} else {
 	  for (p = bamstore->bamlines; p != NULL; p = List_next(p)) {
 	    bamline = (Bamline_T) List_head(p);
8418591e
 #if 0
95e6ece6
 	    fprintf(stderr,"Hmm...high end not found for %s at %s:%u\n",
 		    Bamline_acc(bamline),Bamline_chr(bamline),Bamline_chrpos_low(bamline));
8418591e
 #endif
95e6ece6
 	    lines = List_push(lines,Bampair_new(/*bamline_low*/bamline,/*bamline_high*/NULL));
 	  }
 	  /* Bamstore_free(&bamstore); -- This causes errors */
c78be696
 	  Bamstore_free_nolines(&bamstore);
95e6ece6
 	}
       }
       FREE(keys);
 
       /* Bamstore_table_free(&bamstore_table); */
36cb64f8
       Uinttable_free(&bamstore_table);
     }
     for (i = 0; i < n; i++) {
       Chrom_free(&(chroms[i]));
     }
     FREE(chroms);
   }
   Table_free(&bamstore_chrtable);
 
 
   return List_reverse(lines);
 }
 
 
 static int
 level_cmp (const void *x, const void *y) {
   Bampair_T a = * (Bampair_T *) x;
   Bampair_T b = * (Bampair_T *) y;
 
   if (a->chrpos_low < b->chrpos_low) {
     return -1;
   } else if (a->chrpos_low > b->chrpos_low) {
     return +1;
   } else if (a->chrpos_high < b->chrpos_high) {
     return -1;
   } else if (a->chrpos_high > b->chrpos_high) {
     return +1;
 #if 0
   } else if (a->sign > b->sign) {
     return -1;
   } else if (a->sign < b->sign) {
     return +1;
 #endif
 
   } else {
     return 0;
   }
 }
 
 
 
 int
 Bampair_compute_levels (List_T bampairs, Genomicpos_T mincoord,
 			Genomicpos_T maxcoord, int max_allowed_levels,
4d5cc806
 			double xfactor, Genomicpos_T min_pairlength, bool only_internal_p) {
36cb64f8
   int nbampairs, i;
   int maxlevel = -1, level;
   bool donep;
   Bampair_T *array, bampair;
   double *rightmost, xlow;
 
   if ((nbampairs = List_length(bampairs)) > 0) {
     array = (Bampair_T *) List_to_array(bampairs,NULL);
     qsort(array,nbampairs,sizeof(T),level_cmp);
 
     rightmost = (double *) CALLOC(max_allowed_levels,sizeof(double));
     for (i = 0; i < max_allowed_levels; i++) {
       rightmost[i] = 0.0;
     }
 
     for (i = 0; i < nbampairs; i++) {
       bampair = array[i];
       if (bampair->chrpos_high - bampair->chrpos_low < min_pairlength) {
 	bampair->level = -1;
4d5cc806
       } else if (only_internal_p == true && bampair->chrpos_low < mincoord && bampair->chrpos_high > maxcoord) {
 	bampair->level = -1;
36cb64f8
       } else {
 	/* Find appropriate level */
 	level = 0;
 	donep = false;
 	while (level < max_allowed_levels && !donep) {
 	  xlow = xfactor * bampair->chrpos_low;
 	  if (level > maxlevel) {
 	    donep = true;
 	    maxlevel = level;
 	  } else if (rightmost[level] < xlow) {
 	    donep = true;
 	  } else {
 	    level++;
 	  }
 	}
 
 	if (level < max_allowed_levels) {
 	  rightmost[level] = xfactor * (bampair->chrpos_high + 10);
 
 #if 0
 	  if (bampair->chrpos_high < mincoord || bampair->chrpos_low > maxcoord) {
 	    /* Skip printing if both ends outside of region */
 	  } else if (bampair->chrpos_low < mincoord || bampair->chrpos_high > maxcoord) {
 	    /* Skip printing if either end outside of region */
 	  } else {
 	    bampair->level = level;
 	  }
 #else
 	  bampair->level = level;
 #endif
 	}
       }
     }
 
     FREE(rightmost);
     FREE(array);
   }
 
   return maxlevel + 1;
 }