5659f096 |
static char rcsid[] = "$Id: bamread.c 178960 2015-11-16 19:52:26Z twu $";
|
36cb64f8 |
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "bamread.h"
#include <stdlib.h>
|
4d5cc806 |
#include "assert.h"
|
36cb64f8 |
#include "mem.h"
#include "complement.h"
#include "bool.h"
#include "list.h"
#include "samflags.h"
#ifdef DEBUG
#define debug(x) x
#else
#define debug(x)
#endif
/* Bamread_next_bamline */
#ifdef DEBUG1
#define debug1(x) x
#else
#define debug1(x)
#endif
|
b0aa06af |
#ifdef HAVE_SAMTOOLS_LIB
#include "bam.h"
|
36cb64f8 |
typedef uint8_t *BAM_Sequence_T;
typedef uint32_t *BAM_Cigar_T;
#endif
#define T Bamreader_T
struct T {
|
b0aa06af |
#ifdef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
bamFile fp;
bam_header_t *header;
bam_index_t *idx;
bam_iter_t iter;
bam1_t *bam;
bam1_core_t *core;
#endif
int region_limited_p;
int ndivs;
char **divnames;
unsigned int *divlengths;
};
|
b0aa06af |
#ifdef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
#ifndef bam_init_header_hash
/* Not declared in bam.h */
extern void bam_init_header_hash (bam_header_t *header);
#endif
#endif
void
Bamread_free (T *old) {
|
b0aa06af |
#ifdef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
bam_index_destroy((*old)->idx);
bam_destroy1((*old)->bam);
if ((*old)->header != NULL) {
bam_header_destroy((*old)->header);
}
bam_close((*old)->fp);
#endif
FREE(*old);
return;
}
T
Bamread_new (char *filename) {
T new = (T) MALLOC(sizeof(*new));
|
b0aa06af |
#ifdef HAVE_SAMTOOLS_LIB
|
3a231c30 |
if (Access_file_exists_p(filename) == false) {
fprintf(stderr,"BAM file %s does not exist\n",filename);
return (T) NULL;
} else if ((new->fp = bam_open(filename,"rb")) == NULL) {
|
36cb64f8 |
fprintf(stderr,"Cannot open BAM file %s\n",filename);
return (T) NULL;
}
if ((new->header = bam_header_read(new->fp)) == NULL) {
fprintf(stderr,"bam file has no SQ header lines\n");
} else {
new->ndivs = new->header->n_targets;
new->divnames = new->header->target_name;
new->divlengths = new->header->target_len;
}
bam_init_header_hash(new->header);
if ((new->idx = bam_index_load(filename)) == NULL) {
fprintf(stderr,"Warning: BAM file %s is not indexed\n",filename);
}
new->bam = bam_init1();
new->core = &(new->bam->core);
#endif
new->region_limited_p = 0;
return new;
}
void
Bamread_write_header (T this) {
int i;
for (i = 0; i < this->ndivs; i++) {
printf("@SQ\tSN:%s\tLN:%u\n",this->divnames[i],this->divlengths[i]);
}
#if 0
if (sam_read_group_id != NULL) {
fprintf(fp,"@RG\tID:%s",sam_read_group_id);
if (sam_read_group_platform != NULL) {
fprintf(fp,"\tPL:%s",sam_read_group_platform);
}
if (sam_read_group_library != NULL) {
fprintf(fp,"\tLB:%s",sam_read_group_library);
}
fprintf(fp,"\tSM:%s",sam_read_group_name);
fprintf(fp,"\n");
}
#endif
return;
}
Genomicpos_T
Bamread_chrlength (T this, char *chr) {
int32_t tid;
|
b0aa06af |
#ifdef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
/* bam_parse_region(this->header,region,&tid,&chrstart,&chrend); */
if ((tid = bam_get_tid(this->header,chr)) < 0) {
fprintf(stderr,"chr %s is not in BAM file\n",chr);
return 0U;
} else {
return this->divlengths[tid];
}
#else
return 0U;
#endif
}
#if 0
void
Bamread_reset (T this) {
|
b0aa06af |
#ifdef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
bam_destroy1(this->bam);
this->bam = bam_init1();
this->core = &(this->bam->core);
#endif
this->iter = NULL;
this->region_limited_p = 0;
return;
}
#endif
/* Returns true if chromosome is found */
bool
Bamread_limit_region (T this, char *chr, Genomicpos_T chrstart, Genomicpos_T chrend) {
int32_t tid;
|
b0aa06af |
#ifdef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
/* bam_parse_region(this->header,region,&tid,&chrstart,&chrend); */
if ((tid = bam_get_tid(this->header,chr)) < 0) {
fprintf(stderr,"chr %s is not in BAM file\n",chr);
return false;
} else {
this->iter = bam_iter_query(this->idx,tid,(int) (chrstart - 1U),(int) chrend);
this->region_limited_p = 1;
return true;
}
#else
return false;
#endif
}
void
Bamread_unlimit_region (T this) {
|
b0aa06af |
#ifdef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
if (this->region_limited_p == 1) {
this->region_limited_p = 0;
bam_iter_destroy(this->iter);
this->iter = NULL;
}
#endif
return;
}
int
Bamread_nreads (int *npositions, T this, char *chr, Genomicpos_T chrpos1, Genomicpos_T chrpos2) {
int nreads = 0;
Genomicpos_T chrstart, chrend;
int npositions_lowend, npositions_highend;
if (chrpos1 < chrpos2) {
chrstart = chrpos1;
chrend = chrpos2;
} else {
chrstart = chrpos2;
chrend = chrpos1;
}
|
b0aa06af |
#ifdef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
Genomicpos_T chrpos_low, chrpos_high;
BAM_Cigar_T cigar;
int type;
int i;
unsigned int length;
bool lowend_p;
int max_overhang1_lowend = 0, max_overhang2_lowend = 0,
max_overhang1_highend = 0, max_overhang2_highend = 0;
int overhang1, overhang2;
int max_readlength_lowend = 0, max_readlength_highend = 0;
int readlength;
Bamread_limit_region(this,chr,chrstart,chrend);
while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
readlength = this->core->l_qseq;
chrpos_high = chrpos_low = this->core->pos + 1U;
if (this->core->mtid != this->core->tid) {
lowend_p = true;
} else if (/*mate_chrpos_low*/this->core->mpos + 1U > chrpos_low) {
lowend_p = true;
} else {
lowend_p = false;
}
cigar = bam1_cigar(this->bam);
for (i = 0; i < this->core->n_cigar; i++) {
type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]);
length = cigar[i] >> BAM_CIGAR_SHIFT;
if (type == 'S') {
/* Ignore */
} else if (type == 'H') {
/* Ignore */
} else if (type == 'M') {
chrpos_high += length;
} else if (type == 'N') {
chrpos_high += length;
} else if (type == 'P') {
/* Do nothing */
} else if (type == 'I') {
/* Do nothing */
} else if (type == 'D') {
/* CHECK */
chrpos_high += length;
} else {
fprintf(stderr,"Cannot parse type %c\n",type);
exit(9);
}
}
/* printf("read %d between %u and %u, at %u..%u\n",
nreads,chrstart,chrend,chrpos_low,chrpos_high); */
if (chrpos_low >= chrstart && chrpos_high <= chrend) {
/* Internal to both bounds */
nreads++;
} else if (chrpos_high <= chrend) {
/* Straddles low bound */
if (chrpos_high > chrstart + readlength/2) {
/* More than half of the read is in the intron */
if (lowend_p == true) {
if ((overhang1 = readlength - (chrpos_high - chrstart)) > max_overhang1_lowend) {
max_overhang1_lowend = overhang1;
}
if (readlength > max_readlength_lowend) {
max_readlength_lowend = readlength;
}
} else {
if ((overhang1 = readlength - (chrpos_high - chrstart)) > max_overhang1_highend) {
max_overhang1_highend = overhang1;
}
if (readlength > max_readlength_highend) {
max_readlength_highend = readlength;
}
}
nreads++;
}
} else if (chrpos_low >= chrstart) {
/* Straddles high bound */
if (chrpos_low < chrend - readlength/2) {
/* More than half of the read is in the intron */
if (lowend_p == true) {
if ((overhang2 = readlength - (chrend - chrpos_low)) > max_overhang2_lowend) {
max_overhang2_lowend = overhang2;
}
if (readlength > max_readlength_lowend) {
max_readlength_lowend = readlength;
}
} else {
if ((overhang2 = readlength - (chrend - chrpos_low)) > max_overhang2_highend) {
max_overhang2_highend = overhang2;
}
if (readlength > max_readlength_highend) {
max_readlength_highend = readlength;
}
}
nreads++;
}
} else {
/* Includes both bounds. Don't want these reads. */
}
}
Bamread_unlimit_region(this);
#endif
if (max_readlength_lowend == 0) {
npositions_lowend = 0;
} else if ((npositions_lowend = (chrend - chrstart + 1) + max_overhang1_lowend + max_overhang2_lowend - max_readlength_lowend) < 0) {
npositions_lowend = 0;
}
if (max_readlength_highend == 0) {
npositions_highend = 0;
} else if ((npositions_highend = (chrend - chrstart + 1) + max_overhang1_highend + max_overhang2_highend - max_readlength_highend) < 0) {
npositions_highend = 0;
}
if ((*npositions = npositions_lowend + npositions_highend) == 0) {
*npositions = 1;
}
return nreads;
}
static void
parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genomicpos_T *chrpos,
char **mate_chr, Genomicpos_T *mate_chrpos, int *insert_length,
Intlist_T *cigartypes, Uintlist_T *cigarlengths, int *cigarlength,
|
8418591e |
int *readlength, char **read, char **quality_string, char **hardclip, char **hardclip_quality,
char **read_group, bool *terminalp) {
|
36cb64f8 |
int type;
int i;
unsigned int length;
|
b0aa06af |
#ifdef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
BAM_Sequence_T seq;
BAM_Cigar_T cigar;
uint8_t *ptr;
|
8418591e |
char *string;
|
36cb64f8 |
*acc = bam1_qname(this->bam);
*flag = this->core->flag;
*mapq = this->core->qual;
if (this->core->tid < 0) {
*chr = (char *) NULL;
} else if (this->core->tid >= this->ndivs) {
fprintf(stderr,"tid %d >= ndivs %d\n",this->core->tid,this->ndivs);
exit(9);
} else {
*chr = this->divnames[this->core->tid];
}
*chrpos = this->core->pos + 1U;
/* printf("%s:%u\n",*chr,*chrpos); */
if (this->core->mtid < 0) {
*mate_chr = (char *) NULL;
} else if (this->core->mtid >= this->ndivs) {
fprintf(stderr,"mtid %d >= ndivs %d\n",this->core->mtid,this->ndivs);
exit(9);
} else {
*mate_chr = this->divnames[this->core->mtid];
}
*mate_chrpos = this->core->mpos + 1U;
*insert_length = this->core->isize;
*readlength = this->core->l_qseq;
|
8418591e |
*read = (char *) MALLOC(((*readlength)+1) * sizeof(char));
|
36cb64f8 |
seq = bam1_seq(this->bam);
for (i = 0; i < *readlength; i++) {
switch (bam1_seqi(seq,i)) {
case 1: (*read)[i] = 'A'; break;
case 2: (*read)[i] = 'C'; break;
case 4: (*read)[i] = 'G'; break;
case 8: (*read)[i] = 'T'; break;
case 15: (*read)[i] = 'N'; break;
default: (*read)[i] = '?'; break;
}
}
|
8418591e |
(*read)[*readlength] = '\0';
|
36cb64f8 |
ptr = bam1_qual(this->bam);
if (ptr[0] == 0xff) {
*quality_string = (char *) NULL;
} else {
*quality_string = (char *) ptr;
}
|
8418591e |
ptr = bam_aux_get(this->bam,"XH");
if (ptr == NULL) {
*hardclip = (char *) NULL;
} else {
string = bam_aux2Z(ptr);
*hardclip = (char *) MALLOC((strlen(string)+1) * sizeof(char));
strcpy(*hardclip,string);
}
ptr = bam_aux_get(this->bam,"XI");
if (ptr == NULL) {
*hardclip_quality = (char *) NULL;
} else {
string = bam_aux2Z(ptr);
*hardclip_quality = (char *) MALLOC((strlen(string)+1) * sizeof(char));
strcpy(*hardclip_quality,string);
}
|
4d5cc806 |
ptr = bam_aux_get(this->bam,"RG");
if (ptr == NULL) {
*read_group = (char *) NULL;
} else {
*read_group = bam_aux2Z(ptr);
}
|
8418591e |
ptr = bam_aux_get(this->bam,"XG");
|
17db5e75 |
if (ptr == NULL) {
*terminalp = false;
} else {
|
8418591e |
string = bam_aux2Z(ptr);
if (strcmp((char *) string,"T")) {
*terminalp = false;
} else {
*terminalp = true;
}
|
17db5e75 |
}
|
36cb64f8 |
/* Cigar */
*cigarlength = 0;
*cigartypes = (Intlist_T) NULL;
*cigarlengths = (Uintlist_T) NULL;
cigar = bam1_cigar(this->bam);
for (i = 0; i < this->core->n_cigar; i++) {
length = cigar[i] >> BAM_CIGAR_SHIFT;
*cigarlengths = Uintlist_push(*cigarlengths,length);
type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]);
*cigartypes = Intlist_push(*cigartypes,type);
if (type == 'S' || type == 'M' || type == 'I') {
*cigarlength += (int) length;
} else if (type == 'H') {
*cigarlength += (int) length;
} else if (type == 'D' || type == 'N') {
/* Ignore */
} else if (type == 'P') {
/* Ignore */
} else {
fprintf(stderr,"bamread.c cannot cigar int of %d\n",cigar[i] & BAM_CIGAR_MASK);
exit(9);
}
}
*cigartypes = Intlist_reverse(*cigartypes);
*cigarlengths = Uintlist_reverse(*cigarlengths);
#endif
return;
}
|
c78be696 |
static bool
has_indel_p (T this) {
int type;
int i;
|
b0aa06af |
#ifdef HAVE_SAMTOOLS_LIB
|
c78be696 |
BAM_Cigar_T cigar;
/* Cigar */
cigar = bam1_cigar(this->bam);
for (i = 0; i < this->core->n_cigar; i++) {
/* length = cigar[i] >> BAM_CIGAR_SHIFT; */
type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]);
if (type == 'I' || type == 'D') {
return true;
}
}
#endif
return false;
}
static bool
perfect_match_p (T this) {
int type;
int i;
unsigned char *s;
unsigned char tag1, tag2;
|
b0aa06af |
#ifdef HAVE_SAMTOOLS_LIB
|
c78be696 |
BAM_Cigar_T cigar;
/* Cigar */
cigar = bam1_cigar(this->bam);
for (i = 0; i < this->core->n_cigar; i++) {
/* length = cigar[i] >> BAM_CIGAR_SHIFT; */
type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]);
if (type == 'S') {
return false;
} else if (type == 'H') {
return false;
} else if (type == 'M') {
/* acceptable */
} else if (type == 'N') {
/* acceptable */
} else if (type == 'P') {
/* Do nothing */
} else if (type == 'I') {
return false;
} else if (type == 'D') {
return false;
} else {
fprintf(stderr,"Cannot parse type %c\n",type);
exit(9);
}
}
/* Check MD string */
s = /*aux_start*/ bam1_aux(this->bam);
while (s < /*aux_end*/ this->bam->data + this->bam->data_len) {
tag1 = *s++;
tag2 = *s++;
type = *s++;
if (tag1 == 'M' && tag2 == 'D') {
/* MD string */
while (*s) {
if (*s >= '0' && *s <= '9') {
/* acceptable */
s++;
} else {
return false;
}
}
/* s++; */
return true;
} else {
/* Skip this tag */
while (*s) { s++;}
s++;
}
}
#endif
/* No MD string found */
return true;
}
|
36cb64f8 |
int
Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genomicpos_T *chrpos,
char **mate_chr, Genomicpos_T *mate_chrpos,
Intlist_T *cigartypes, Uintlist_T *cigarlengths, int *cigarlength,
|
8418591e |
int *readlength, char **read, char **quality_string, char **hardclip, char **hardclip_quality,
char **read_group, bool *terminalp) {
|
36cb64f8 |
int insert_length;
|
b0aa06af |
#ifndef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
return 0;
#else
if (this->region_limited_p == 1) {
if (bam_iter_read(this->fp,this->iter,this->bam) < 0) {
return 0;
} else {
parse_line(this,&(*acc),&(*flag),&(*mapq),&(*chr),&(*chrpos),
&(*mate_chr),&(*mate_chrpos),&insert_length,
&(*cigartypes),&(*cigarlengths),&(*cigarlength),
|
8418591e |
&(*readlength),&(*read),&(*quality_string),&(*hardclip),&(*hardclip_quality),
&(*read_group),&(*terminalp));
|
36cb64f8 |
return 1;
}
} else {
if (bam_read1(this->fp,this->bam) < 0) {
return 0;
} else {
parse_line(this,&(*acc),&(*flag),&(*mapq),&(*chr),&(*chrpos),
&(*mate_chr),&(*mate_chrpos),&insert_length,
&(*cigartypes),&(*cigarlengths),&(*cigarlength),
|
8418591e |
&(*readlength),&(*read),&(*quality_string),&(*hardclip),&(*hardclip_quality),
&(*read_group),&(*terminalp));
|
36cb64f8 |
return 1;
}
}
#endif
}
struct Bamline_T {
char *acc;
unsigned int flag;
|
5659f096 |
int hiti;
|
36cb64f8 |
int nhits;
bool good_unique_p; /* Good above good_unique_mapq. Dependent on second_mapq. */
int mapq;
|
8418591e |
int nm;
|
36cb64f8 |
char splice_strand;
char *chr;
Genomicpos_T chrpos_low;
char *mate_chr;
Genomicpos_T mate_chrpos_low;
int insert_length;
Intlist_T cigar_types;
Uintlist_T cigar_npositions;
int cigar_querylength;
int readlength;
char *read;
char *quality_string;
|
8418591e |
char *hardclip;
char *hardclip_quality;
|
17db5e75 |
bool terminalp;
|
c78be696 |
/* Can be used if we read one line at a time. Points to bam->data, which is re-used upon each read */
|
36cb64f8 |
unsigned char *aux_start;
unsigned char *aux_end;
|
c78be696 |
/* Used if we read more than one line at a time */
uint8_t *aux_contents;
size_t aux_length;
|
36cb64f8 |
};
char *
Bamline_acc (Bamline_T this) {
return this->acc;
}
unsigned int
Bamline_flag (Bamline_T this) {
return this->flag;
}
static bool
concordantp (unsigned int flag) {
if (flag & QUERY_UNMAPPED) {
return false;
} else if ((flag & PAIRED_READ) == 0U) {
/* Not even a paired read, so consider it concordant */
return true;
} else if (flag & PAIRED_MAPPING) {
return true;
} else {
return false;
}
}
int
Bamline_concordantp (Bamline_T this) {
if (this->flag & QUERY_UNMAPPED) {
return false;
} else if ((this->flag & PAIRED_READ) == 0U) {
return false;
} else if (this->flag & PAIRED_MAPPING) {
return true;
} else {
return false;
}
}
int
Bamline_lowend_p (Bamline_T this) {
if (this->mate_chrpos_low > this->chrpos_low) {
return true;
} else {
return false;
}
}
int
Bamline_paired_read_p (Bamline_T this) {
if ((this->flag & PAIRED_READ) == 0) {
return false;
} else {
return true;
}
}
int
Bamline_firstend_p (Bamline_T this) {
if ((this->flag & PAIRED_READ) == 0) {
return true;
} else if (this->flag & FIRST_READ_P) {
return true;
} else if (this->flag & SECOND_READ_P) {
return false;
} else {
fprintf(stderr,"Read is marked as paired (0x1), but neither first read nor second read bit is set\n");
exit(9);
}
}
|
5659f096 |
int
Bamline_hiti (Bamline_T this) {
return this->hiti;
}
|
36cb64f8 |
int
Bamline_nhits (Bamline_T this) {
return this->nhits;
}
bool
Bamline_good_unique_p (Bamline_T this) {
return this->good_unique_p;
}
|
c78be696 |
bool
Bamline_perfect_match_p (Bamline_T this) {
Intlist_T p;
int type;
unsigned char *s;
unsigned char tag1, tag2;
for (p = this->cigar_types; p != NULL; p = Intlist_next(p)) {
type = Intlist_head(p);
if (type == 'S') {
return false;
} else if (type == 'H') {
return false;
} else if (type == 'M') {
/* acceptable */
} else if (type == 'N') {
/* acceptable */
} else if (type == 'P') {
/* Do nothing */
} else if (type == 'I') {
return false;
} else if (type == 'D') {
return false;
} else {
fprintf(stderr,"Cannot parse type %c\n",type);
exit(9);
}
}
/* Check MD string */
s = this->aux_start;
while (s < this->aux_end) {
tag1 = *s++;
tag2 = *s++;
type = *s++;
if (tag1 == 'M' && tag2 == 'D') {
/* MD string */
while (*s) {
if (*s >= '0' && *s <= '9') {
/* acceptable */
s++;
} else {
return false;
}
}
/* s++; */
return true;
} else {
/* Skip this tag */
while (*s) { s++;}
s++;
}
}
/* No MD string found */
return true;
}
|
36cb64f8 |
int
Bamline_mapq (Bamline_T this) {
return this->mapq;
}
char *
Bamline_chr (Bamline_T this) {
return this->chr;
}
Genomicpos_T
Bamline_chrpos_low (Bamline_T this) {
return this->chrpos_low;
}
|
144f6bb1 |
Genomicpos_T
Bamline_chrpos_low_noclip (Bamline_T this) {
int type;
if ((type = Intlist_head(this->cigar_types)) != 'S') {
return this->chrpos_low;
} else {
return this->chrpos_low - (Uintlist_head(this->cigar_npositions));
}
}
|
36cb64f8 |
char *
Bamline_mate_chr (Bamline_T this) {
return this->mate_chr;
}
Genomicpos_T
Bamline_mate_chrpos_low (Bamline_T this) {
return this->mate_chrpos_low;
}
int
Bamline_insert_length (Bamline_T this) {
return this->insert_length;
}
Intlist_T
Bamline_cigar_types (Bamline_T this) {
return this->cigar_types;
}
Uintlist_T
Bamline_cigar_npositions (Bamline_T this) {
return this->cigar_npositions;
}
|
52b3ca64 |
Intlist_T
|
8418591e |
Bamline_diffcigar (int *min_overhang, Uintlist_T *npositions, Uintlist_T *chrpositions, Bamline_T this) {
|
52b3ca64 |
Intlist_T types = NULL;
int type;
Genomicpos_T chrpos;
Intlist_T p;
Uintlist_T q;
|
8418591e |
int querypos = 0;
|
52b3ca64 |
*npositions = (Uintlist_T) NULL;
*chrpositions = (Uintlist_T) NULL;
|
8418591e |
*min_overhang = 0;
|
52b3ca64 |
chrpos = this->chrpos_low;
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
if ((type = Intlist_head(p)) == 'S') {
|
8418591e |
querypos += Uintlist_head(q);
|
52b3ca64 |
} else if (type == 'H') {
/* Ignore */
} else if (type == 'M') {
|
8418591e |
querypos += Uintlist_head(q);
|
52b3ca64 |
chrpos += Uintlist_head(q);
} else if (type == 'N') {
types = Intlist_push(types,type);
*npositions = Uintlist_push(*npositions,Uintlist_head(q));
*chrpositions = Uintlist_push(*chrpositions,chrpos);
chrpos += Uintlist_head(q);
} else if (type == 'P') {
/* Do nothing */
} else if (type == 'I') {
|
8418591e |
if (querypos < this->readlength/2) {
if (querypos > *min_overhang) {
*min_overhang = querypos;
}
} else {
if (this->readlength - querypos > *min_overhang) {
*min_overhang = this->readlength - querypos;
}
}
|
52b3ca64 |
types = Intlist_push(types,type);
*npositions = Uintlist_push(*npositions,Uintlist_head(q));
*chrpositions = Uintlist_push(*chrpositions,chrpos);
|
8418591e |
querypos += Uintlist_head(q);
|
52b3ca64 |
} else if (type == 'D') {
|
8418591e |
if (querypos < this->readlength/2) {
if (querypos > *min_overhang) {
*min_overhang = querypos;
}
} else {
if (this->readlength - querypos > *min_overhang) {
*min_overhang = this->readlength - querypos;
}
}
|
52b3ca64 |
types = Intlist_push(types,type);
*npositions = Uintlist_push(*npositions,Uintlist_head(q));
*chrpositions = Uintlist_push(*chrpositions,chrpos);
chrpos += Uintlist_head(q);
} else {
fprintf(stderr,"Cannot parse type %c\n",type);
exit(9);
}
debug(printf(" type = %c, chrpos = %u\n",type,chrpos));
}
*npositions = Uintlist_reverse(*npositions);
*chrpositions = Uintlist_reverse(*chrpositions);
return Intlist_reverse(types);
}
|
36cb64f8 |
void
|
95e6ece6 |
Bamread_print_cigar (FILE *fp, Bamline_T this) {
|
36cb64f8 |
Intlist_T p;
Uintlist_T q;
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
|
95e6ece6 |
fprintf(fp,"%u%c",Uintlist_head(q),Intlist_head(p));
|
36cb64f8 |
}
return;
}
|
4d5cc806 |
char *
Bamline_cigar_string (Bamline_T this) {
char *string, number[12];
Intlist_T p;
Uintlist_T q;
int string_length = 0;
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
sprintf(number,"%u",Uintlist_head(q));
string_length += strlen(number) + 1;
}
string = (char *) CALLOC(string_length+1,sizeof(char));
string_length = 0;
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
sprintf(number,"%u",Uintlist_head(q));
sprintf(&(string[string_length]),"%u%c",Uintlist_head(q),Intlist_head(p));
string_length += strlen(number) + 1;
}
return string;
}
|
36cb64f8 |
int
Bamline_cigar_querylength (Bamline_T this) {
return this->cigar_querylength;
}
int
Bamline_readlength (Bamline_T this) {
return this->readlength;
}
char *
Bamline_read (Bamline_T this) {
return this->read;
}
char *
Bamline_quality_string (Bamline_T this) {
return this->quality_string;
}
|
8418591e |
char *
Bamline_hardclip (Bamline_T this) {
return this->hardclip;
}
char *
Bamline_hardclip_quality (Bamline_T this) {
return this->hardclip_quality;
}
|
17db5e75 |
bool
Bamline_terminalp (Bamline_T this) {
return this->terminalp;
}
|
36cb64f8 |
static void
aux_print (FILE *fp, unsigned char *s, unsigned char *aux_end) {
unsigned char tag1, tag2;
int type;
while (s < aux_end) {
tag1 = *s++;
tag2 = *s++;
type = *s++;
fprintf(fp,"\t%c%c:",tag1,tag2);
if (type == 'c') {
fprintf(fp,"i:%d",* (int8_t *) s);
s += 1;
} else if (type == 'C') {
fprintf(fp,"i:%u",* (uint8_t *) s);
s += 1;
} else if (type == 's') {
fprintf(fp,"i:%d",* (int16_t *) s);
s += 2;
} else if (type == 'S') {
fprintf(fp,"i:%u",* (uint16_t *) s);
s += 2;
} else if (type == 'i') {
fprintf(fp,"i:%d",* (int32_t *) s);
s += 4;
} else if (type == 'I') {
fprintf(fp,"i:%u",* (uint32_t *) s);
s += 4;
} else if (type == 'A') {
fprintf(fp,"A:%c",* (char *) s);
s += 1;
} else if (type == 'f') {
fprintf(fp,"f:%f",* (float *) s);
s += 4;
} else if (type == 'd') {
fprintf(fp,"d:%f",* (double *) s);
s += 8;
} else if (type == 'Z' || type == 'H') {
fprintf(fp,"Z:");
while (*s) {
fprintf(fp,"%c",*s++);
}
s++;
} else {
/* fprintf(stderr,"Unrecognized type %c\n",type); */
}
}
return;
}
|
144f6bb1 |
static char *
aux_print_skip_md (FILE *fp, unsigned char *s, unsigned char *aux_end) {
char *orig_md_string = NULL;
unsigned char tag1, tag2;
int type;
while (s < aux_end) {
tag1 = *s++;
tag2 = *s++;
type = *s++;
if (tag1 == 'M' && tag2 == 'D') {
/* Skip MD string. Assume type is 'Z' or 'H' */
orig_md_string = (char *) s;
while (*s) { s++;}
s++;
} else {
fprintf(fp,"\t%c%c:",tag1,tag2);
if (type == 'c') {
fprintf(fp,"i:%d",* (int8_t *) s);
s += 1;
} else if (type == 'C') {
fprintf(fp,"i:%u",* (uint8_t *) s);
s += 1;
} else if (type == 's') {
fprintf(fp,"i:%d",* (int16_t *) s);
s += 2;
} else if (type == 'S') {
fprintf(fp,"i:%u",* (uint16_t *) s);
s += 2;
} else if (type == 'i') {
fprintf(fp,"i:%d",* (int32_t *) s);
s += 4;
} else if (type == 'I') {
fprintf(fp,"i:%u",* (uint32_t *) s);
s += 4;
} else if (type == 'A') {
fprintf(fp,"A:%c",* (char *) s);
s += 1;
} else if (type == 'f') {
fprintf(fp,"f:%f",* (float *) s);
s += 4;
} else if (type == 'd') {
fprintf(fp,"d:%f",* (double *) s);
s += 8;
} else if (type == 'Z' || type == 'H') {
fprintf(fp,"Z:");
while (*s) {
fprintf(fp,"%c",*s++);
}
s++;
} else {
/* fprintf(stderr,"Unrecognized type %c\n",type); */
}
}
}
return orig_md_string;
}
|
36cb64f8 |
void
|
95e6ece6 |
Bamline_print (FILE *fp, Bamline_T this, unsigned int newflag, int quality_score_adj) {
|
36cb64f8 |
Intlist_T p;
Uintlist_T q;
|
95e6ece6 |
char *c;
|
36cb64f8 |
fprintf(fp,"%s\t",this->acc);
fprintf(fp,"%u\t",newflag);
if (this->chr == NULL) {
fprintf(fp,"*\t0\t");
} else {
fprintf(fp,"%s\t%u\t",this->chr,this->chrpos_low);
}
fprintf(fp,"%d\t",this->mapq);
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
fprintf(fp,"%u%c",Uintlist_head(q),Intlist_head(p));
}
fprintf(fp,"\t");
if (this->mate_chr == NULL) {
fprintf(fp,"*\t0\t");
} else if (this->chr != NULL && strcmp(this->mate_chr,this->chr) == 0) {
fprintf(fp,"=\t%u\t",this->mate_chrpos_low);
} else {
fprintf(fp,"%s\t%u\t",this->mate_chr,this->mate_chrpos_low);
}
fprintf(fp,"%d\t",this->insert_length);
fprintf(fp,"%s\t",this->read);
if (this->quality_string == NULL) {
fprintf(fp,"*");
} else {
|
95e6ece6 |
/* fprintf(fp,"%s",this->quality_string); */
c = this->quality_string;
while (*c != '\0') {
fputc(*c + quality_score_adj,fp);
c++;
}
|
36cb64f8 |
}
aux_print(fp,this->aux_start,this->aux_end);
fprintf(fp,"\n");
return;
}
|
144f6bb1 |
void
Bamline_print_new_cigar (FILE *fp, Bamline_T this, Genomicpos_T chrpos_low, char *new_cigar,
|
c78be696 |
char *new_md_string, int quality_score_adj) {
|
144f6bb1 |
Intlist_T p;
Uintlist_T q;
char *orig_md_string;
|
c78be696 |
char *c;
|
144f6bb1 |
fprintf(fp,"%s\t",this->acc);
fprintf(fp,"%u\t",this->flag);
fprintf(fp,"%s\t%u\t",this->chr,chrpos_low);
fprintf(fp,"%d\t",this->mapq);
fprintf(fp,"%s\t",new_cigar);
/* TODO: Fix mate information */
if (this->mate_chr == NULL) {
fprintf(fp,"*\t0\t");
} else if (this->chr != NULL && strcmp(this->mate_chr,this->chr) == 0) {
fprintf(fp,"=\t%u\t",this->mate_chrpos_low);
} else {
fprintf(fp,"%s\t%u\t",this->mate_chr,this->mate_chrpos_low);
}
fprintf(fp,"%d\t",this->insert_length);
fprintf(fp,"%s\t",this->read);
if (this->quality_string == NULL) {
fprintf(fp,"*");
} else {
|
c78be696 |
/* fprintf(fp,"%s",this->quality_string); */
c = this->quality_string;
while (*c != '\0') {
fputc(*c + quality_score_adj,fp);
c++;
}
|
144f6bb1 |
}
fprintf(fp,"\tMD:Z:%s",new_md_string);
orig_md_string = aux_print_skip_md(fp,this->aux_start,this->aux_end);
/* Original alignment */
fprintf(fp,"\tXX:i:%u",this->chrpos_low);
fprintf(fp,"\tXY:Z:");
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
fprintf(fp,"%u%c",Uintlist_head(q),Intlist_head(p));
}
if (orig_md_string != NULL) {
fprintf(fp,"\tXZ:Z:%s",orig_md_string);
}
fprintf(fp,"\n");
return;
}
|
52b3ca64 |
void
Bamline_print_new_mate (FILE *fp, Bamline_T this, char *mate_chr, Genomicpos_T mate_chrpos_low,
int insert_length) {
Intlist_T p;
Uintlist_T q;
fprintf(fp,"%s\t",this->acc);
fprintf(fp,"%u\t",this->flag);
if (this->chr == NULL) {
fprintf(fp,"*\t0\t");
} else {
fprintf(fp,"%s\t%u\t",this->chr,this->chrpos_low);
}
fprintf(fp,"%d\t",this->mapq);
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
fprintf(fp,"%u%c",Uintlist_head(q),Intlist_head(p));
}
fprintf(fp,"\t");
if (mate_chr == NULL) {
fprintf(fp,"*\t0\t");
} else if (this->chr != NULL && strcmp(mate_chr,this->chr) == 0) {
fprintf(fp,"=\t%u\t",mate_chrpos_low);
} else {
fprintf(fp,"%s\t%u\t",mate_chr,mate_chrpos_low);
}
assert(insert_length >= 0);
if (this->chrpos_low < mate_chrpos_low) {
fprintf(fp,"%d\t",insert_length);
} else {
fprintf(fp,"%d\t",-insert_length);
}
fprintf(fp,"%s\t",this->read);
if (this->quality_string == NULL) {
fprintf(fp,"*");
} else {
fprintf(fp,"%s",this->quality_string);
}
aux_print(fp,this->aux_start,this->aux_end);
fprintf(fp,"\n");
return;
}
|
8418591e |
static int
aux_nm (T this) {
#ifndef HAVE_SAMTOOLS_LIB
return 0;
#else
uint8_t *s;
s = bam_aux_get(this->bam,"NM");
if (s == NULL) {
return 0;
} else {
return bam_aux2i(s);
}
#endif
}
int
Bamline_nm (Bamline_T this) {
return this->nm;
}
|
36cb64f8 |
static char
aux_splice_strand (T this) {
char strand;
|
b0aa06af |
#ifndef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
return ' ';
#else
uint8_t *s;
s = bam_aux_get(this->bam,"XS");
if (s == NULL) {
return ' ';
} else if ((strand = bam_aux2A(s)) == '?') {
return '?';
} else {
return strand;
}
#endif
}
char
Bamline_splice_strand (Bamline_T this) {
return this->splice_strand;
}
|
5659f096 |
static int
aux_hiti (T this) {
#ifndef HAVE_SAMTOOLS_LIB
return 1;
#else
uint8_t *s;
s = bam_aux_get(this->bam,"HI");
if (s == NULL) {
return 1;
} else {
return bam_aux2i(s);
}
#endif
}
|
36cb64f8 |
static int
aux_nhits (T this) {
|
b0aa06af |
#ifndef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
return 1;
#else
uint8_t *s;
s = bam_aux_get(this->bam,"NH");
if (s == NULL) {
return 1;
} else {
return bam_aux2i(s);
}
#endif
}
static bool
aux_good_unique_p (T this, int good_unique_mapq) {
|
b0aa06af |
#ifndef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
return true;
#else
uint8_t *s;
s = bam_aux_get(this->bam,"X2");
if (s != NULL) {
if (bam_aux2i(s) < good_unique_mapq) {
return true;
} else {
return false;
}
} else {
s = bam_aux_get(this->bam,"NH");
if (s == NULL) {
return true;
} else if (bam_aux2i(s) <= 1) {
return true;
} else {
return false;
}
}
#endif
}
static char complCode[128] = COMPLEMENT_LC;
static char
find_strand (bool *canonicalp, char *donor1, char *donor2, char *acceptor1, char *acceptor2,
Genomicpos_T firstpos, Genomicpos_T secondpos, char *chr,
Genome_T genome, IIT_T chromosome_iit, Bamline_T this, bool trust_sam_p) {
Chrnum_T chrnum;
Genomicpos_T chroffset;
char nt1, nt2, nt3, nt4;
char truestrand;
if ((truestrand = this->splice_strand) != ' ') {
if (trust_sam_p == true) {
*canonicalp = true;
*donor1 = *donor2 = *acceptor1 = *acceptor2 = ' ';
return truestrand;
} else {
chrnum = IIT_find_one(chromosome_iit,chr);
chroffset = Interval_low(IIT_interval(chromosome_iit,chrnum)) - 1U;
/* Look at genome inside of firstpos and secondpos to get dinucleotides */
nt1 = Genome_get_char(genome,chroffset+firstpos+1);
nt2 = Genome_get_char(genome,chroffset+firstpos+2);
nt3 = Genome_get_char(genome,chroffset+secondpos-2);
nt4 = Genome_get_char(genome,chroffset+secondpos-1);
debug(printf("Got splice from %u to %u\n",firstpos,secondpos));
debug(printf("Dinucleotides are %c%c to %c%c\n",nt1,nt2,nt3,nt4));
if (truestrand == '+') {
if (nt1 == 'G' && (nt2 == 'T' || nt2 == 'C') && nt3 == 'A' && nt4 == 'G') {
*canonicalp = true;
} else if (nt1 == 'A' && nt2 == 'T' && nt3 == 'A' && nt4 == 'C') {
*canonicalp = true;
} else {
*canonicalp = false;
}
*donor1 = nt1; *donor2 = nt2; *acceptor1 = nt3; *acceptor2 = nt4;
} else if (truestrand == '-') {
if (nt1 == 'C' && nt2 == 'T' && (nt3 == 'A' || nt3 == 'G') && nt4 == 'C') {
*canonicalp = true;
} else if (nt1 == 'G' && nt2 == 'T' && nt3 == 'A' && nt4 == 'T') {
*canonicalp = true;
} else {
*canonicalp = false;
}
*donor1 = complCode[(int) nt4]; *donor2 = complCode[(int) nt3]; *acceptor1 = complCode[(int) nt2]; *acceptor2 = complCode[(int) nt1];
} else {
fprintf(stderr,"Unrecognized truestrand %c\n",truestrand);
abort();
}
return truestrand;
}
} else if (chromosome_iit == NULL) {
fprintf(stderr,"Strand is not present in auxinfo\n");
fprintf(stderr,"To determine strand, need to provide index file with -d flag\n");
exit(9);
} else {
chrnum = IIT_find_one(chromosome_iit,chr);
chroffset = Interval_low(IIT_interval(chromosome_iit,chrnum)) - 1U;
/* Look at genome inside of firstpos and secondpos to determine truestrand */
nt1 = Genome_get_char(genome,chroffset+firstpos+1);
nt2 = Genome_get_char(genome,chroffset+firstpos+2);
nt3 = Genome_get_char(genome,chroffset+secondpos-2);
nt4 = Genome_get_char(genome,chroffset+secondpos-1);
debug(printf("Got splice from %u to %u\n",firstpos,secondpos));
debug(printf("Dinucleotides are %c%c to %c%c\n",nt1,nt2,nt3,nt4));
if (nt1 == 'G' && (nt2 == 'T' || nt2 == 'C') && nt3 == 'A' && nt4 == 'G') {
*donor1 = nt1; *donor2 = nt2; *acceptor1 = nt3; *acceptor2 = nt4;
*canonicalp = true;
return '+';
} else if (nt1 == 'C' && nt2 == 'T' && (nt3 == 'A' || nt3 == 'G') && nt4 == 'C') {
*donor1 = complCode[(int) nt4]; *donor2 = complCode[(int) nt3]; *acceptor1 = complCode[(int) nt2]; *acceptor2 = complCode[(int) nt1];
*canonicalp = true;
return '-';
} else if (nt1 == 'A' && nt2 == 'T' && nt3 == 'A' && nt4 == 'C') {
*donor1 = nt1; *donor2 = nt2; *acceptor1 = nt3; *acceptor2 = nt4;
*canonicalp = true;
return '+';
} else if (nt1 == 'G' && nt2 == 'T' && nt3 == 'A' && nt4 == 'T') {
*donor1 = complCode[(int) nt4]; *donor2 = complCode[(int) nt3]; *acceptor1 = complCode[(int) nt2]; *acceptor2 = complCode[(int) nt1];
*canonicalp = true;
return '-';
} else {
/* In GSNAP, will want to output sense information in SAM output. */
#if 0
fprintf(stderr,"Splice %s:%u..%u is not (semi-)canonical: %c%c...%c%c. Cannot determine sense.\n",
chr,firstpos,secondpos,nt1,nt2,nt3,nt4);
#endif
*donor1 = nt1; *donor2 = nt2; *acceptor1 = nt3; *acceptor2 = nt4;
*canonicalp = false;
return ' ';
}
}
}
char
Bamline_strand (Bamline_T this, Genome_T genome, IIT_T chromosome_iit) {
char strand = ' ';
Genomicpos_T chrpos, firstpos, secondpos;
Intlist_T p;
Uintlist_T q;
int type;
bool canonicalp;
char donor1, donor2, acceptor1, acceptor2;
chrpos = this->chrpos_low;
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
if ((type = Intlist_head(p)) == 'S') {
/* Ignore */
} else if (type == 'H') {
/* Ignore */
} else if (type == 'M') {
chrpos += Uintlist_head(q);
} else if (type == 'N') {
firstpos = chrpos - 1U;
chrpos += Uintlist_head(q);
secondpos = chrpos;
if (strand == ' ') {
strand = find_strand(&canonicalp,&donor1,&donor2,&acceptor1,&acceptor2,
firstpos,secondpos,this->chr,genome,chromosome_iit,
this,/*trust_sam_p*/true);
return strand;
}
} else if (type == 'P') {
/* Do nothing */
} else if (type == 'I') {
/* Do nothing */
} else if (type == 'D') {
/* CHECK */
chrpos += Uintlist_head(q);
} else {
fprintf(stderr,"Cannot parse type %c\n",type);
exit(9);
}
debug(printf(" type = %c, chrpos = %u\n",type,chrpos));
}
return ' ';
}
Genomicpos_T
Bamline_chrpos_high (Bamline_T this) {
Intlist_T p;
Uintlist_T q;
Genomicpos_T chrpos_high;
int type;
chrpos_high = this->chrpos_low;
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
if ((type = Intlist_head(p)) == 'S') {
/* Ignore */
} else if (type == 'H') {
/* Ignore */
} else if (type == 'M') {
chrpos_high += Uintlist_head(q);
} else if (type == 'N') {
chrpos_high += Uintlist_head(q);
} else if (type == 'P') {
/* Do nothing */
} else if (type == 'I') {
/* Do nothing */
} else if (type == 'D') {
/* CHECK */
chrpos_high += Uintlist_head(q);
} else {
fprintf(stderr,"Cannot parse type %c\n",type);
exit(9);
}
debug(printf(" type = %c, chrpos = %u\n",type,chrpos_high));
}
return chrpos_high - 1U;
}
|
c78be696 |
Genomicpos_T
Bamline_chrpos_high_noclip (Bamline_T this) {
Intlist_T p;
Uintlist_T q;
Genomicpos_T chrpos_high, mlength;
int type;
|
36cb64f8 |
|
c78be696 |
chrpos_high = this->chrpos_low;
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
if ((type = Intlist_head(p)) == 'S') {
/* Ignore, but save length */
mlength = Uintlist_head(q);
} else if (type == 'H') {
/* Ignore */
} else if (type == 'M') {
chrpos_high += Uintlist_head(q);
} else if (type == 'N') {
chrpos_high += Uintlist_head(q);
} else if (type == 'P') {
/* Do nothing */
} else if (type == 'I') {
/* Do nothing */
} else if (type == 'D') {
/* CHECK */
chrpos_high += Uintlist_head(q);
} else {
fprintf(stderr,"Cannot parse type %c\n",type);
exit(9);
}
debug(printf(" type = %c, chrpos = %u\n",type,chrpos_high));
}
/* Check last type, to extend soft clipping at high end */
if (type == 'S') {
chrpos_high += mlength;
}
return chrpos_high - 1U;
}
Genomicpos_T
Bamline_total_ins (Bamline_T this) {
Genomicpos_T total_ins = 0;
Intlist_T p;
Uintlist_T q;
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
if (Intlist_head(p) == 'I') {
total_ins += Uintlist_head(q);
}
}
return total_ins;
}
int
Bamline_nmismatches (Bamline_T this) {
int nmismatches = 0;
unsigned char tag1, tag2, *s, *md_string = NULL;
|
8418591e |
/* unsigned char type; */
|
c78be696 |
Intlist_T p;
Uintlist_T q;
s = this->aux_start;
while (s < this->aux_end && md_string == NULL) {
tag1 = *s++;
tag2 = *s++;
|
8418591e |
#if 0
|
c78be696 |
type = *s++;
|
8418591e |
#else
s++;
#endif
|
c78be696 |
if (tag1 == 'M' && tag2 == 'D') {
/* Start of MD string */
md_string = s;
while (*s) {
if (*s >= '0' && *s <= '9') {
/* acceptable */
} else if (*s == '^') {
/* acceptable */
} else {
nmismatches += 1;
}
s++;
}
}
}
/* Need to subtract deletions */
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
if (Intlist_head(p) == 'D') {
nmismatches -= (int) Uintlist_head(q);
}
}
return nmismatches;
}
|
36cb64f8 |
static void
Bamline_regions (Uintlist_T *chrpos_lows, Uintlist_T *chrpos_highs, Bamline_T this) {
Intlist_T p;
Uintlist_T q;
Genomicpos_T position;
int type;
position = this->chrpos_low;
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
if ((type = Intlist_head(p)) == 'S') {
/* Ignore */
} else if (type == 'H') {
/* Ignore */
} else if (type == 'M') {
*chrpos_lows = Uintlist_push(*chrpos_lows,position);
position += Uintlist_head(q);
*chrpos_highs = Uintlist_push(*chrpos_highs,position);
} else if (type == 'N') {
position += Uintlist_head(q);
} else if (type == 'P') {
/* Do nothing */
} else if (type == 'I') {
/* Do nothing */
} else if (type == 'D') {
/* CHECK */
position += Uintlist_head(q);
} else {
fprintf(stderr,"Cannot parse type %c\n",type);
exit(9);
}
}
return;
}
static void
Bamline_splices (Uintlist_T *splice_lows, Uintlist_T *splice_highs, Intlist_T *splice_signs,
Bamline_T this) {
Intlist_T p;
Uintlist_T q;
Genomicpos_T position;
int type;
position = this->chrpos_low;
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
if ((type = Intlist_head(p)) == 'S') {
/* Ignore */
} else if (type == 'H') {
/* Ignore */
} else if (type == 'M') {
position += Uintlist_head(q);
} else if (type == 'N') {
*splice_lows = Uintlist_push(*splice_lows,position);
position += Uintlist_head(q);
*splice_highs = Uintlist_push(*splice_highs,position);
if (this->splice_strand == '+') {
*splice_signs = Intlist_push(*splice_signs,+1);
} else if (this->splice_strand == '-') {
*splice_signs = Intlist_push(*splice_signs,-1);
} else {
*splice_signs = Intlist_push(*splice_signs,0);
}
} else if (type == 'P') {
/* Do nothing */
} else if (type == 'I') {
/* Do nothing */
} else if (type == 'D') {
/* CHECK */
position += Uintlist_head(q);
} else {
fprintf(stderr,"Cannot parse type %c\n",type);
exit(9);
}
}
return;
}
void
Bamline_free (Bamline_T *old) {
|
c78be696 |
|
36cb64f8 |
if (*old) {
FREE((*old)->acc);
if ((*old)->chr != NULL) {
FREE((*old)->chr);
}
if ((*old)->mate_chr != NULL) {
FREE((*old)->mate_chr);
}
Intlist_free(&(*old)->cigar_types);
Uintlist_free(&(*old)->cigar_npositions);
FREE((*old)->read);
if ((*old)->quality_string != NULL) {
FREE((*old)->quality_string);
}
|
8418591e |
if ((*old)->hardclip_quality != NULL) {
FREE((*old)->hardclip_quality);
}
if ((*old)->hardclip != NULL) {
FREE((*old)->hardclip);
}
|
c78be696 |
if ((*old)->aux_contents != NULL) {
FREE((*old)->aux_contents);
}
|
36cb64f8 |
FREE(*old);
}
return;
}
static Bamline_T
|
5659f096 |
Bamline_new (char *acc, unsigned int flag, int hiti, int nhits, bool good_unique_p, int mapq,
|
8418591e |
int nm, char splice_strand, char *chr, Genomicpos_T chrpos_low,
|
36cb64f8 |
char *mate_chr, Genomicpos_T mate_chrpos_low, int insert_length,
Intlist_T cigar_types, Uintlist_T cigar_npositions, int cigar_querylength, int readlength,
|
8418591e |
char *read, char *quality_string, char *hardclip, char *hardclip_quality, bool terminalp,
|
c78be696 |
unsigned char *aux_start, unsigned char *aux_end, bool copy_aux_contents_p) {
|
36cb64f8 |
Bamline_T new = (Bamline_T) MALLOC(sizeof(*new));
new->acc = (char *) CALLOC(strlen(acc)+1,sizeof(char));
strcpy(new->acc,acc);
new->flag = flag;
|
5659f096 |
new->hiti = hiti;
|
36cb64f8 |
new->nhits = nhits;
new->good_unique_p = good_unique_p;
new->mapq = mapq;
|
8418591e |
new->nm = nm;
|
36cb64f8 |
new->splice_strand = splice_strand;
if (chr == NULL) {
new->chr = (char *) NULL;
} else {
new->chr = (char *) CALLOC(strlen(chr)+1,sizeof(char));
strcpy(new->chr,chr);
}
new->chrpos_low = chrpos_low;
if (mate_chr == NULL) {
new->mate_chr = (char *) NULL;
} else {
new->mate_chr = (char *) CALLOC(strlen(mate_chr)+1,sizeof(char));
strcpy(new->mate_chr,mate_chr);
}
new->mate_chrpos_low = mate_chrpos_low;
new->insert_length = insert_length;
new->cigar_types = cigar_types; /* not copying */
new->cigar_npositions = cigar_npositions; /* not copying */
new->cigar_querylength = cigar_querylength;
new->readlength = readlength;
|
8418591e |
assert((int) strlen(read) == readlength);
/* read should already be allocated from parse_line */
#if 0
if (read == NULL) {
new->read = NULL;
} else {
new->read = (char *) CALLOC(readlength+1,sizeof(char));
strcpy(new->read,read);
}
#else
|
36cb64f8 |
new->read = read;
|
8418591e |
#endif
|
36cb64f8 |
if (quality_string == NULL) {
new->quality_string = NULL;
} else {
new->quality_string = (char *) CALLOC(readlength+1,sizeof(char));
strncpy(new->quality_string,quality_string,readlength);
}
|
8418591e |
/* No need to copy, since they are allocated already */
new->hardclip = hardclip;
new->hardclip_quality = hardclip_quality;
|
17db5e75 |
new->terminalp = terminalp;
|
c78be696 |
if (copy_aux_contents_p == true) {
new->aux_length = aux_end - aux_start;
|
f51bc89d |
if (new->aux_length == 0) {
new->aux_contents = (uint8_t *) NULL;
new->aux_start = (uint8_t *) NULL;
new->aux_end = (uint8_t *) NULL;
} else {
new->aux_contents = (uint8_t *) MALLOC(new->aux_length * sizeof(unsigned char));
memcpy(new->aux_contents,aux_start,new->aux_length);
new->aux_start = (unsigned char *) new->aux_contents;
new->aux_end = new->aux_start + new->aux_length;
}
|
c78be696 |
} else {
/* Point to bam->data */
new->aux_contents = (uint8_t *) NULL;
new->aux_start = aux_start;
new->aux_end = aux_end;
}
|
36cb64f8 |
return new;
}
Bamline_T
|
4d5cc806 |
Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
|
95e6ece6 |
bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
bool need_concordant_p) {
|
36cb64f8 |
char *acc, *chr, *mate_chr, splice_strand;
|
8418591e |
int nm;
|
36cb64f8 |
unsigned int flag;
|
5659f096 |
int hiti, nhits;
|
36cb64f8 |
bool good_unique_p;
int mapq;
Genomicpos_T chrpos_low, mate_chrpos_low;
int insert_length;
Intlist_T cigar_types;
Uintlist_T cigarlengths;
int cigar_querylength, readlength;
char *read;
char *quality_string;
|
8418591e |
char *hardclip;
char *hardclip_quality;
|
4d5cc806 |
char *read_group;
|
17db5e75 |
bool terminalp;
|
36cb64f8 |
|
b0aa06af |
#ifndef HAVE_SAMTOOLS_LIB
|
36cb64f8 |
return (Bamline_T) NULL;
#else
if (this->region_limited_p == 1) {
debug1(fprintf(stderr,"Region limited\n"));
while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
debug1(fprintf(stderr,"Got line\n"));
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
&mate_chr,&mate_chrpos_low,&insert_length,
&cigar_types,&cigarlengths,&cigar_querylength,
|
8418591e |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality,
&read_group,&terminalp);
|
4d5cc806 |
if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) {
debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if (mapq < minimum_mapq) {
|
36cb64f8 |
debug1(fprintf(stderr,"Fails because mapq %d < minimum %d\n",mapq,minimum_mapq));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if ((nhits = aux_nhits(this)) > maximum_nhits) {
debug1(fprintf(stderr,"Fails because nhits %d > maximum %d\n",nhits,maximum_nhits));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if (need_unique_p == true && nhits > 1) {
debug1(fprintf(stderr,"Fails because need unique and nhits %d\n",nhits));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if (need_primary_p == true && (flag & NOT_PRIMARY) != 0) {
debug1(fprintf(stderr,"Fails because need primary and flag is %u\n",flag));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
|
95e6ece6 |
} else if (ignore_duplicates_p == true && (flag & DUPLICATE_READ) != 0) {
debug1(fprintf(stderr,"Fails because ignoring duplicates and flag is %u\n",flag));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
|
36cb64f8 |
} else if (need_concordant_p == true && concordantp(flag) == false) {
debug1(fprintf(stderr,"Fails because need concordant and flag is %u\n",flag));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else {
debug1(fprintf(stderr,"Success\n"));
|
5659f096 |
hiti = aux_hiti(this);
|
8418591e |
nm = aux_nm(this);
|
36cb64f8 |
splice_strand = aux_splice_strand(this);
good_unique_p = aux_good_unique_p(this,good_unique_mapq);
|
5659f096 |
return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
|
36cb64f8 |
mate_chr,mate_chrpos_low,insert_length,
cigar_types,cigarlengths,cigar_querylength,
|
8418591e |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
|
36cb64f8 |
/*aux_start*/bam1_aux(this->bam),
|
c78be696 |
/*aux_end*/this->bam->data + this->bam->data_len,
/*copy_aux_contents_p*/false);
|
36cb64f8 |
}
}
return (Bamline_T) NULL;
} else {
while (bam_read1(this->fp,this->bam) >= 0) {
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
&mate_chr,&mate_chrpos_low,&insert_length,
&cigar_types,&cigarlengths,&cigar_querylength,
|
8418591e |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
|
17db5e75 |
&terminalp);
|
36cb64f8 |
if (mapq >= minimum_mapq &&
(nhits = aux_nhits(this)) <= maximum_nhits &&
(need_unique_p == false || nhits == 1) &&
(need_primary_p == false || (flag & NOT_PRIMARY) == 0) &&
(need_concordant_p == false || concordantp(flag) == true)) {
|
5659f096 |
hiti = aux_hiti(this);
|
8418591e |
nm = aux_nm(this);
|
36cb64f8 |
splice_strand = aux_splice_strand(this);
good_unique_p = aux_good_unique_p(this,good_unique_mapq);
|
5659f096 |
return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
|
36cb64f8 |
mate_chr,mate_chrpos_low,insert_length,
cigar_types,cigarlengths,cigar_querylength,
|
8418591e |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
|
36cb64f8 |
/*aux_start*/bam1_aux(this->bam),
|
c78be696 |
/*aux_end*/this->bam->data + this->bam->data_len,
/*copy_aux_contents_p*/false);
|
36cb64f8 |
} else {
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
}
}
return (Bamline_T) NULL;
|
c78be696 |
}
#endif
}
Bamline_T
Bamread_next_imperfect_bamline_copy_aux (T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
bool need_concordant_p) {
char *acc, *chr, *mate_chr, splice_strand;
|
8418591e |
int nm;
|
c78be696 |
unsigned int flag;
|
5659f096 |
int hiti, nhits;
|
c78be696 |
bool good_unique_p;
int mapq;
Genomicpos_T chrpos_low, mate_chrpos_low;
int insert_length;
Intlist_T cigar_types;
Uintlist_T cigarlengths;
int cigar_querylength, readlength;
char *read;
char *quality_string;
|
8418591e |
char *hardclip;
char *hardclip_quality;
|
c78be696 |
char *read_group;
bool terminalp;
|
b0aa06af |
#ifndef HAVE_SAMTOOLS_LIB
|
c78be696 |
return (Bamline_T) NULL;
#else
if (this->region_limited_p == 1) {
debug1(fprintf(stderr,"Region limited\n"));
while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
if (perfect_match_p(this) == true) {
/* Skip */
} else {
debug1(fprintf(stderr,"Got line\n"));
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
&mate_chr,&mate_chrpos_low,&insert_length,
&cigar_types,&cigarlengths,&cigar_querylength,
|
8418591e |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
|
c78be696 |
&terminalp);
if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) {
debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if (mapq < minimum_mapq) {
debug1(fprintf(stderr,"Fails because mapq %d < minimum %d\n",mapq,minimum_mapq));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if ((nhits = aux_nhits(this)) > maximum_nhits) {
debug1(fprintf(stderr,"Fails because nhits %d > maximum %d\n",nhits,maximum_nhits));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if (need_unique_p == true && nhits > 1) {
debug1(fprintf(stderr,"Fails because need unique and nhits %d\n",nhits));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if (need_primary_p == true && (flag & NOT_PRIMARY) != 0) {
debug1(fprintf(stderr,"Fails because need primary and flag is %u\n",flag));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if (ignore_duplicates_p == true && (flag & DUPLICATE_READ) != 0) {
debug1(fprintf(stderr,"Fails because ignoring duplicates and flag is %u\n",flag));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if (need_concordant_p == true && concordantp(flag) == false) {
debug1(fprintf(stderr,"Fails because need concordant and flag is %u\n",flag));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else {
debug1(fprintf(stderr,"Success\n"));
|
5659f096 |
hiti = aux_hiti(this);
|
8418591e |
nm = aux_nm(this);
|
c78be696 |
splice_strand = aux_splice_strand(this);
good_unique_p = aux_good_unique_p(this,good_unique_mapq);
|
5659f096 |
return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
|
c78be696 |
mate_chr,mate_chrpos_low,insert_length,
cigar_types,cigarlengths,cigar_querylength,
|
8418591e |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
|
c78be696 |
/*aux_start*/bam1_aux(this->bam),
/*aux_end*/this->bam->data + this->bam->data_len,
/*copy_aux_contents_p*/true);
}
}
}
return (Bamline_T) NULL;
} else {
while (bam_read1(this->fp,this->bam) >= 0) {
if (perfect_match_p(this) == true) {
/* Skip */
} else {
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
&mate_chr,&mate_chrpos_low,&insert_length,
&cigar_types,&cigarlengths,&cigar_querylength,
|
8418591e |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
|
c78be696 |
&terminalp);
if (mapq >= minimum_mapq &&
(nhits = aux_nhits(this)) <= maximum_nhits &&
(need_unique_p == false || nhits == 1) &&
(need_primary_p == false || (flag & NOT_PRIMARY) == 0) &&
(need_concordant_p == false || concordantp(flag) == true)) {
|
5659f096 |
hiti = aux_hiti(this);
|
8418591e |
nm = aux_nm(this);
|
c78be696 |
splice_strand = aux_splice_strand(this);
good_unique_p = aux_good_unique_p(this,good_unique_mapq);
|
5659f096 |
return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
|
c78be696 |
mate_chr,mate_chrpos_low,insert_length,
cigar_types,cigarlengths,cigar_querylength,
|
8418591e |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
|
c78be696 |
/*aux_start*/bam1_aux(this->bam),
/*aux_end*/this->bam->data + this->bam->data_len,
/*copy_aux_contents_p*/true);
} else {
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
}
}
}
return (Bamline_T) NULL;
}
#endif
}
Bamline_T
Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
bool need_concordant_p) {
char *acc, *chr, *mate_chr, splice_strand;
|
8418591e |
int nm;
|
c78be696 |
unsigned int flag;
|
5659f096 |
int hiti, nhits;
|
c78be696 |
bool good_unique_p;
int mapq;
Genomicpos_T chrpos_low, mate_chrpos_low;
int insert_length;
Intlist_T cigar_types;
Uintlist_T cigarlengths;
int cigar_querylength, readlength;
char *read;
char *quality_string;
|
8418591e |
char *hardclip;
char *hardclip_quality;
|
c78be696 |
char *read_group;
bool terminalp;
|
b0aa06af |
#ifndef HAVE_SAMTOOLS_LIB
|
c78be696 |
return (Bamline_T) NULL;
#else
if (this->region_limited_p == 1) {
debug1(fprintf(stderr,"Region limited\n"));
while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
if (has_indel_p(this) == false) {
/* Skip */
} else {
debug1(fprintf(stderr,"Got line\n"));
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
&mate_chr,&mate_chrpos_low,&insert_length,
&cigar_types,&cigarlengths,&cigar_querylength,
|
8418591e |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
|
c78be696 |
&terminalp);
if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) {
debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if (mapq < minimum_mapq) {
debug1(fprintf(stderr,"Fails because mapq %d < minimum %d\n",mapq,minimum_mapq));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if ((nhits = aux_nhits(this)) > maximum_nhits) {
debug1(fprintf(stderr,"Fails because nhits %d > maximum %d\n",nhits,maximum_nhits));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if (need_unique_p == true && nhits > 1) {
debug1(fprintf(stderr,"Fails because need unique and nhits %d\n",nhits));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if (need_primary_p == true && (flag & NOT_PRIMARY) != 0) {
debug1(fprintf(stderr,"Fails because need primary and flag is %u\n",flag));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if (ignore_duplicates_p == true && (flag & DUPLICATE_READ) != 0) {
debug1(fprintf(stderr,"Fails because ignoring duplicates and flag is %u\n",flag));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else if (need_concordant_p == true && concordantp(flag) == false) {
debug1(fprintf(stderr,"Fails because need concordant and flag is %u\n",flag));
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
} else {
debug1(fprintf(stderr,"Success\n"));
|
5659f096 |
hiti = aux_hiti(this);
|
8418591e |
nm = aux_nm(this);
|
c78be696 |
splice_strand = aux_splice_strand(this);
good_unique_p = aux_good_unique_p(this,good_unique_mapq);
|
5659f096 |
return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
|
c78be696 |
mate_chr,mate_chrpos_low,insert_length,
cigar_types,cigarlengths,cigar_querylength,
|
8418591e |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
|
c78be696 |
/*aux_start*/bam1_aux(this->bam),
/*aux_end*/this->bam->data + this->bam->data_len,
/*copy_aux_contents_p*/false);
}
}
}
|
36cb64f8 |
|
c78be696 |
return (Bamline_T) NULL; /* Signal end of BAM file */
} else {
while (bam_read1(this->fp,this->bam) >= 0) {
if (has_indel_p(this) == false) {
/* Skip */
} else {
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
&mate_chr,&mate_chrpos_low,&insert_length,
&cigar_types,&cigarlengths,&cigar_querylength,
|
8418591e |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
|
c78be696 |
&terminalp);
if (mapq >= minimum_mapq &&
(nhits = aux_nhits(this)) <= maximum_nhits &&
(need_unique_p == false || nhits == 1) &&
(need_primary_p == false || (flag & NOT_PRIMARY) == 0) &&
(need_concordant_p == false || concordantp(flag) == true)) {
|
5659f096 |
hiti = aux_hiti(this);
|
8418591e |
nm = aux_nm(this);
|
c78be696 |
splice_strand = aux_splice_strand(this);
good_unique_p = aux_good_unique_p(this,good_unique_mapq);
|
5659f096 |
return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
|
c78be696 |
mate_chr,mate_chrpos_low,insert_length,
cigar_types,cigarlengths,cigar_querylength,
|
8418591e |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
|
c78be696 |
/*aux_start*/bam1_aux(this->bam),
/*aux_end*/this->bam->data + this->bam->data_len,
/*copy_aux_contents_p*/false);
} else {
Intlist_free(&cigar_types);
Uintlist_free(&cigarlengths);
FREE(read);
}
}
}
return (Bamline_T) NULL; /* Signal end of BAM file */
|
36cb64f8 |
}
#endif
|
c78be696 |
}
|
36cb64f8 |
|
c78be696 |
static int
bamline_read_cmp (const void *x, const void *y) {
int cmp;
Bamline_T a = * (Bamline_T *) x;
Bamline_T b = * (Bamline_T *) y;
Genomicpos_T a_chrpos_low_noclip, b_chrpos_low_noclip;
|
8418591e |
#if 0
|
c78be696 |
if (Intlist_head(a->cigar_types) != 'S') {
a_chrpos_low_noclip = a->chrpos_low;
} else {
a_chrpos_low_noclip = a->chrpos_low - Uintlist_head(a->cigar_npositions);
}
if (Intlist_head(b->cigar_types) != 'S') {
b_chrpos_low_noclip = b->chrpos_low;
} else {
b_chrpos_low_noclip = b->chrpos_low - Uintlist_head(b->cigar_npositions);
}
if (a_chrpos_low_noclip < b_chrpos_low_noclip) {
return -1;
} else if (b_chrpos_low_noclip < a_chrpos_low_noclip) {
return +1;
} else if ((cmp = strcmp(a->read,b->read)) != 0) {
return cmp;
} else if (a->mate_chrpos_low < b->mate_chrpos_low) {
return -1;
} else if (b->mate_chrpos_low < a->mate_chrpos_low) {
return +1;
} else {
return 0;
}
|
8418591e |
#else
return strcmp(a->read,b->read);
#endif
|
c78be696 |
}
Bamline_T *
Bamread_next_bamline_set (int *nlines, Bamline_T *prev_bamline,
T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
bool need_concordant_p) {
Bamline_T *bamlines = NULL;
List_T bamline_list = NULL;
Bamline_T bamline = NULL;
Chrpos_T set_chrpos_low, chrpos_low;
if (*prev_bamline == NULL) {
set_chrpos_low = 0;
} else {
bamline_list = List_push(bamline_list,(void *) *prev_bamline);
set_chrpos_low = Bamline_chrpos_low(*prev_bamline);
}
while ((bamline = Bamread_next_imperfect_bamline_copy_aux(this,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits,
need_unique_p,need_primary_p,ignore_duplicates_p,
need_concordant_p)) != NULL) {
#if 0
if (Bamline_perfect_match_p(bamline) == true) {
/* Exclude all perfect matches to the reference */
Bamline_free(&bamline);
}
#endif
if (set_chrpos_low == 0) {
bamline_list = List_push(bamline_list,(void *) bamline);
set_chrpos_low = Bamline_chrpos_low(bamline);
} else if ((chrpos_low = Bamline_chrpos_low(bamline)) < set_chrpos_low) {
fprintf(stderr,"BAM file not sorted by chrpos_low: %u < %u\n",chrpos_low,set_chrpos_low);
abort();
} else if (chrpos_low == set_chrpos_low) {
bamline_list = List_push(bamline_list,(void *) bamline);
} else {
bamline_list = List_reverse(bamline_list);
bamlines = (Bamline_T *) List_to_array_n(&(*nlines),bamline_list);
if (*nlines > 0) {
qsort(bamlines,*nlines,sizeof(Bamline_T),bamline_read_cmp);
}
List_free(&bamline_list);
*prev_bamline = bamline;
return bamlines;
}
}
bamline_list = List_reverse(bamline_list);
bamlines = (Bamline_T *) List_to_array_n(&(*nlines),bamline_list);
if (*nlines > 0) {
qsort(bamlines,*nlines,sizeof(Bamline_T),bamline_read_cmp);
}
List_free(&bamline_list);
*prev_bamline = bamline;
return bamlines;
|
36cb64f8 |
}
|
8418591e |
Bamline_T **
Bamread_block (int **nlines, Genomicpos_T chrstart, Genomicpos_T chrend,
T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
bool need_concordant_p) {
Bamline_T **block;
List_T *block_lists;
Bamline_T *bamlines = NULL;
Bamline_T bamline = NULL;
Chrpos_T chrpos_low_noclip, chrpos;
int n;
block = (Bamline_T **) CALLOC(chrend - chrstart + 1,sizeof(Bamline_T *));
block_lists = (List_T *) CALLOC(chrend - chrstart + 1,sizeof(List_T));
*nlines = (int *) CALLOC(chrend - chrstart + 1,sizeof(int));
/* Excludes all perfect matches to the reference */
while ((bamline = Bamread_next_imperfect_bamline_copy_aux(this,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits,
need_unique_p,need_primary_p,ignore_duplicates_p,
need_concordant_p)) != NULL) {
if (Intlist_head(bamline->cigar_types) != 'S') {
chrpos_low_noclip = bamline->chrpos_low;
} else {
chrpos_low_noclip = bamline->chrpos_low - Uintlist_head(bamline->cigar_npositions);
}
if (chrpos_low_noclip < chrstart) {
abort();
} else {
block_lists[chrpos_low_noclip - chrstart] = List_push(block_lists[chrpos_low_noclip - chrstart],(void *) bamline);
}
}
for (chrpos = chrstart; chrpos <= chrend; chrpos++) {
if (block_lists[chrpos - chrstart] != NULL) {
bamlines = (Bamline_T *) List_to_array_n(&n,block_lists[chrpos - chrstart]);
qsort(bamlines,n,sizeof(Bamline_T),bamline_read_cmp);
block[chrpos - chrstart] = bamlines;
(*nlines)[chrpos - chrstart] = n;
}
List_free(&(block_lists[chrpos - chrstart]));
}
FREE(block_lists);
return block;
}
|
95e6ece6 |
Bamline_T
Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *desired_acc) {
char *acc, *chr, *mate_chr, splice_strand;
|
8418591e |
int nm;
|
95e6ece6 |
unsigned int flag;
|
5659f096 |
int hiti, nhits;
|
95e6ece6 |
int mapq;
Genomicpos_T chrpos_low, mate_chrpos_low;
int insert_length;
Intlist_T cigar_types;
Uintlist_T cigarlengths;
int cigar_querylength, readlength;
char *read;
char *quality_string;
|
8418591e |
char *hardclip;
char *hardclip_quality;
|
4d5cc806 |
char *read_group;
|
17db5e75 |
bool terminalp;
|
95e6ece6 |
Bamread_limit_region(this,desired_chr,desired_chrpos,desired_chrpos);
while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
debug1(fprintf(stderr,"Got line\n"));
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
&mate_chr,&mate_chrpos_low,&insert_length,
&cigar_types,&cigarlengths,&cigar_querylength,
|
8418591e |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
|
17db5e75 |
&terminalp);
|
5659f096 |
hiti = aux_hiti(this);
|
8418591e |
nm = aux_nm(this);
|
95e6ece6 |
splice_strand = aux_splice_strand(this);
if (!strcmp(acc,desired_acc) && chrpos_low == desired_chrpos) {
Bamread_unlimit_region(this);
|
5659f096 |
return Bamline_new(acc,flag,hiti,nhits,/*good_unique_p*/true,mapq,nm,splice_strand,chr,chrpos_low,
|
95e6ece6 |
mate_chr,mate_chrpos_low,insert_length,
cigar_types,cigarlengths,cigar_querylength,
|
8418591e |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
|
95e6ece6 |
/*aux_start*/bam1_aux(this->bam),
|
c78be696 |
/*aux_end*/this->bam->data + this->bam->data_len,
/*copy_aux_contents_p*/false);
|
95e6ece6 |
}
}
Bamread_unlimit_region(this);
return (Bamline_T) NULL;
}
|
36cb64f8 |
/************************************************************************
* Bamstore
************************************************************************/
struct Bamstore_T {
Genomicpos_T chrpos;
List_T bamlines;
};
void
Bamstore_free (Bamstore_T *old) {
List_T p;
Bamline_T bamline;
for (p = (*old)->bamlines; p != NULL; p = List_next(p)) {
bamline = (Bamline_T) List_head(p);
Bamline_free(&bamline);
}
List_free(&(*old)->bamlines);
FREE(*old);
return;
}
|
c78be696 |
void
Bamstore_free_nolines (Bamstore_T *old) {
List_free(&(*old)->bamlines);
FREE(*old);
return;
}
|
36cb64f8 |
Bamstore_T
Bamstore_new (Genomicpos_T chrpos) {
Bamstore_T new = (Bamstore_T) MALLOC(sizeof(*new));
new->chrpos = chrpos;
new->bamlines = (List_T) NULL;
return new;
}
Bamline_T
Bamstore_get (Table_T bamstore_chrtable, char *chr, Genomicpos_T low, char *acc,
|
5659f096 |
Genomicpos_T mate_low, int hiti) {
|
36cb64f8 |
List_T p, list = NULL;
Bamline_T wanted = NULL, bamline;
Bamstore_T bamstore;
Uinttable_T bamstore_table;
Chrom_T chrom;
chrom = Chrom_from_string(chr,/*mitochondrial_string*/NULL,/*order*/0U);
if ((bamstore_table = (Uinttable_T) Table_get(bamstore_chrtable,(void *) chrom)) == NULL) {
|
95e6ece6 |
/* Can happen if we read high end and not low end */
/* fprintf(stderr,"Unexpected error. No bamstore_table for chr %s\n",chr); */
|
36cb64f8 |
Chrom_free(&chrom);
return (Bamline_T) NULL;
} else {
Chrom_free(&chrom);
}
if ((bamstore = (Bamstore_T) Uinttable_get(bamstore_table,low)) == NULL) {
/* May have been excluded for other reasons */
return (Bamline_T) NULL;
} else {
for (p = bamstore->bamlines; p != NULL; p = List_next(p)) {
bamline = (Bamline_T) List_head(p);
|
5659f096 |
if (strcmp(Bamline_acc(bamline),acc) == 0 && Bamline_mate_chrpos_low(bamline) == mate_low && bamline->hiti == hiti) {
|
36cb64f8 |
wanted = bamline;
} else {
list = List_push(list,(void *) bamline);
}
}
List_free(&bamstore->bamlines);
bamstore->bamlines = list;
if (list == NULL) {
Uinttable_remove(bamstore_table,low);
Bamstore_free(&bamstore);
}
return wanted;
}
}
void
Bamstore_add_at_low (Table_T bamstore_chrtable, char *chr, Genomicpos_T low,
Bamline_T bamline) {
Bamstore_T bamstore;
Uinttable_T bamstore_table;
Chrom_T chrom;
chrom = Chrom_from_string(chr,/*mitochondrial_string*/NULL,/*order*/0U);
if ((bamstore_table = (Uinttable_T) Table_get(bamstore_chrtable,(void *) chrom)) == NULL) {
bamstore_table = Uinttable_new(65522); /* estimate 65522 splice sites per chromosome */
Table_put(bamstore_chrtable,(void *) chrom,(void *) bamstore_table);
} else {
Chrom_free(&chrom);
}
if ((bamstore = (Bamstore_T) Uinttable_get(bamstore_table,low)) == NULL) {
bamstore = Bamstore_new(low);
Uinttable_put(bamstore_table,low,(void *) bamstore);
}
bamstore->bamlines = List_push(bamstore->bamlines,(void *) bamline);
return;
}
void
Bamstore_table_free (Uinttable_T *bamstore_table) {
Genomicpos_T *keys;
int n, i;
Bamstore_T bamstore;
if ((n = Uinttable_length(*bamstore_table)) > 0) {
keys = (Genomicpos_T *) Uinttable_keys(*bamstore_table,/*sortp*/false);
for (i = 0; i < n; i++) {
bamstore = Uinttable_get(*bamstore_table,keys[i]);
if (bamstore == NULL) {
fprintf(stderr,"key is %u, value is NULL\n",keys[i]);
abort();
} else {
Bamstore_free(&bamstore);
}
}
FREE(keys);
}
return;
}
/************************************************************************
* Retrieving and assigning levels to all reads in a region
************************************************************************/
struct Bampair_T {
Bamline_T bamline_low;
Bamline_T bamline_high;
Genomicpos_T chrpos_low;
Genomicpos_T chrpos_high;
|
5659f096 |
Genomicpos_T chrpos_low_noclip;
Genomicpos_T chrpos_high_noclip;
|
36cb64f8 |
int level;
|
5659f096 |
bool plusp; /* Based on first read */
|
36cb64f8 |
};
|
8418591e |
char *
Bampair_acc (Bampair_T this) {
if (this->bamline_low != NULL) {
return this->bamline_low->acc;
} else if (this->bamline_high != NULL) {
return this->bamline_high->acc;
} else {
return (char *) NULL;
}
}
|
95e6ece6 |
Bamline_T
Bampair_bamline_low (Bampair_T this) {
return this->bamline_low;
}
Bamline_T
Bampair_bamline_high (Bampair_T this) {
return this->bamline_high;
}
|
36cb64f8 |
Genomicpos_T
Bampair_chrpos_low (Bampair_T this) {
return this->chrpos_low;
}
Genomicpos_T
Bampair_chrpos_high (Bampair_T this) {
return this->chrpos_high;
}
|
5659f096 |
Genomicpos_T
Bampair_chrpos_low_noclip (Bampair_T this) {
return this->chrpos_low_noclip;
}
Genomicpos_T
Bampair_chrpos_high_noclip (Bampair_T this) {
return this->chrpos_high_noclip;
}
|
36cb64f8 |
int
Bampair_level (Bampair_T this) {
return this->level;
}
|
5659f096 |
bool
Bampair_plusp (Bampair_T this) {
return this->plusp;
}
|
36cb64f8 |
bool
Bampair_good_unique_p (Bampair_T this) {
if (this->bamline_low != NULL && this->bamline_low->good_unique_p == false) {
return false;
}
if (this->bamline_high != NULL && this->bamline_high->good_unique_p == false) {
return false;
}
return true;
}
bool
Bampair_uniquep (Bampair_T this) {
if (this->bamline_low != NULL && this->bamline_low->nhits > 1) {
return false;
}
if (this->bamline_high != NULL && this->bamline_high->nhits > 1) {
return false;
}
return true;
}
bool
Bampair_primaryp (Bampair_T this) {
if (this->bamline_low != NULL && (this->bamline_low->flag & NOT_PRIMARY) != 0) {
return false;
}
if (this->bamline_high != NULL && (this->bamline_high->flag & NOT_PRIMARY) != 0) {
return false;
}
return true;
}
static Bampair_T
Bampair_new (Bamline_T bamline_low, Bamline_T bamline_high) {
Bampair_T new = (Bampair_T) MALLOC(sizeof(*new));
new->bamline_low = bamline_low;
new->bamline_high = bamline_high;
if (bamline_low == NULL) {
new->chrpos_low = bamline_high->chrpos_low;
new->chrpos_high = Bamline_chrpos_high(bamline_high);
|
5659f096 |
new->chrpos_low_noclip = Bamline_chrpos_low_noclip(bamline_high);
new->chrpos_high_noclip = Bamline_chrpos_high_noclip(bamline_high);
if (Bamline_firstend_p(bamline_high) == true) {
if (bamline_high->flag & QUERY_MINUSP) {
new->plusp = false;
} else {
new->plusp = true;
}
} else {
if (bamline_high->flag & QUERY_MINUSP) {
new->plusp = true;
} else {
new->plusp = false;
}
}
|
36cb64f8 |
} else if (bamline_high == NULL) {
new->chrpos_low = bamline_low->chrpos_low;
new->chrpos_high = Bamline_chrpos_high(bamline_low);
|
5659f096 |
new->chrpos_low_noclip = Bamline_chrpos_low_noclip(bamline_low);
new->chrpos_high_noclip = Bamline_chrpos_high_noclip(bamline_low);
if (Bamline_firstend_p(bamline_low) == true) {
if (bamline_low->flag & QUERY_MINUSP) {
new->plusp = false;
} else {
new->plusp = true;
}
} else {
if (bamline_low->flag & QUERY_MINUSP) {
new->plusp = true;
} else {
new->plusp = false;
}
}
|
36cb64f8 |
} else {
new->chrpos_low = bamline_low->chrpos_low;
new->chrpos_high = Bamline_chrpos_high(bamline_high);
|
5659f096 |
new->chrpos_low_noclip = Bamline_chrpos_low_noclip(bamline_low);
new->chrpos_high_noclip = Bamline_chrpos_high_noclip(bamline_high);
if (Bamline_firstend_p(bamline_low) == true && Bamline_firstend_p(bamline_high) == false) {
if (bamline_low->flag & QUERY_MINUSP) {
new->plusp = false;
} else {
new->plusp = true;
}
} else if (Bamline_firstend_p(bamline_low) == false && Bamline_firstend_p(bamline_high) == true) {
if (bamline_high->flag & QUERY_MINUSP) {
new->plusp = false;
} else {
new->plusp = true;
}
} else if (Bamline_firstend_p(bamline_low) == true && Bamline_firstend_p(bamline_high) == true) {
fprintf(stderr,"For bampair %s, both ends are first ends. Flags are %d and %d\n",
bamline_low->acc,bamline_low->flag,bamline_high->flag);
new->plusp = false;
} else {
fprintf(stderr,"For bampair %s , both ends are second ends. Flags are %d and %d\n",
bamline_low->acc,bamline_low->flag,bamline_high->flag);
new->plusp = false;
}
|
36cb64f8 |
}
|
5659f096 |
|
36cb64f8 |
new->level = -1;
|
5659f096 |
|
36cb64f8 |
return new;
}
void
Bampair_free (Bampair_T *old) {
if (*old) {
Bamline_free(&(*old)->bamline_low);
Bamline_free(&(*old)->bamline_high);
FREE(*old);
}
return;
}
void
|
95e6ece6 |
Bampair_print (FILE *fp, Bampair_T this, int quality_score_adj) {
|
36cb64f8 |
if (this->bamline_low != NULL) {
|
95e6ece6 |
Bamline_print(fp,this->bamline_low,this->bamline_low->flag,quality_score_adj);
|
36cb64f8 |
}
if (this->bamline_high != NULL) {
|
95e6ece6 |
Bamline_print(fp,this->bamline_high,this->bamline_high->flag,quality_score_adj);
|
36cb64f8 |
}
return;
}
void
|
5659f096 |
Bampair_details (Uintlist_T *chrpos_first_lows, Uintlist_T *chrpos_first_highs,
Uintlist_T *chrpos_second_lows, Uintlist_T *chrpos_second_highs,
Uintlist_T *chrpos_overlap_lows, Uintlist_T *chrpos_overlap_highs,
|
36cb64f8 |
Uintlist_T *splice_lows, Uintlist_T *splice_highs, Intlist_T *splice_signs,
Bampair_T this) {
|
5659f096 |
Uintlist_T p1, p2, q1, q2;
Chrpos_T low1, high1, low2, high2;
*chrpos_first_lows = (Uintlist_T) NULL;
*chrpos_first_highs = (Uintlist_T) NULL;
*chrpos_second_lows = (Uintlist_T) NULL;
*chrpos_second_highs = (Uintlist_T) NULL;
*chrpos_overlap_lows = (Uintlist_T) NULL;
*chrpos_overlap_highs = (Uintlist_T) NULL;
|
36cb64f8 |
*splice_lows = (Uintlist_T) NULL;
*splice_highs = (Uintlist_T) NULL;
*splice_signs = (Intlist_T) NULL;
if (this->bamline_low != NULL) {
|
5659f096 |
if (1 || Bamline_firstend_p(this->bamline_low) == true) {
Bamline_regions(&(*chrpos_first_lows),&(*chrpos_first_highs),this->bamline_low);
} else {
Bamline_regions(&(*chrpos_second_lows),&(*chrpos_second_highs),this->bamline_low);
}
|
36cb64f8 |
Bamline_splices(&(*splice_lows),&(*splice_highs),&(*splice_signs),this->bamline_low);
}
if (this->bamline_high != NULL) {
|
5659f096 |
if (0 && Bamline_firstend_p(this->bamline_high) == true) {
Bamline_regions(&(*chrpos_first_lows),&(*chrpos_first_highs),this->bamline_high);
} else {
Bamline_regions(&(*chrpos_second_lows),&(*chrpos_second_highs),this->bamline_high);
}
|
36cb64f8 |
Bamline_splices(&(*splice_lows),&(*splice_highs),&(*splice_signs),this->bamline_high);
}
|
5659f096 |
*chrpos_first_lows = Uintlist_reverse(*chrpos_first_lows);
*chrpos_first_highs = Uintlist_reverse(*chrpos_first_highs);
*chrpos_second_lows = Uintlist_reverse(*chrpos_second_lows);
*chrpos_second_highs = Uintlist_reverse(*chrpos_second_highs);
|
36cb64f8 |
*splice_lows = Uintlist_reverse(*splice_lows);
*splice_highs = Uintlist_reverse(*splice_highs);
*splice_signs = Intlist_reverse(*splice_signs);
|
5659f096 |
if (this->bamline_low != NULL && this->bamline_high != NULL) {
p1 = *chrpos_first_lows;
q1 = *chrpos_first_highs;
p2 = *chrpos_second_lows;
q2 = *chrpos_second_highs;
while (p1 != NULL && p2 != NULL) {
low1 = Uintlist_head(p1);
high1 = Uintlist_head(q1);
low2 = Uintlist_head(p2);
high2 = Uintlist_head(q2);
if (low2 >= high1) {
p1 = Uintlist_next(p1); q1 = Uintlist_next(q1); /* Advance first read */
} else if (low1 >= high2) {
p2 = Uintlist_next(p2); q2 = Uintlist_next(q2); /* Advance second read */
} else if (low1 <= low2) {
*chrpos_overlap_lows = Uintlist_push(*chrpos_overlap_lows,low2);
if (high2 <= high1) {
*chrpos_overlap_highs = Uintlist_push(*chrpos_overlap_highs,high2);
p2 = Uintlist_next(p2); q2 = Uintlist_next(q2); /* Advance second read */
} else {
*chrpos_overlap_highs = Uintlist_push(*chrpos_overlap_highs,high1);
p1 = Uintlist_next(p1); q1 = Uintlist_next(q1); /* Advance first read */
}
} else {
*chrpos_overlap_lows = Uintlist_push(*chrpos_overlap_lows,low1);
if (high1 <= high2) {
*chrpos_overlap_highs = Uintlist_push(*chrpos_overlap_highs,high1);
p1 = Uintlist_next(p1); q1 = Uintlist_next(q1); /* Advance first read */
} else {
*chrpos_overlap_highs = Uintlist_push(*chrpos_overlap_highs,high2);
p2 = Uintlist_next(p2); q2 = Uintlist_next(q2); /* Advance second read */
}
}
}
*chrpos_overlap_lows = Uintlist_reverse(*chrpos_overlap_lows);
*chrpos_overlap_highs = Uintlist_reverse(*chrpos_overlap_highs);
}
|
36cb64f8 |
return;
}
List_T
|
4d5cc806 |
Bamread_all_pairs (T bamreader, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
|
95e6ece6 |
bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
bool need_concordant_p) {
List_T lines = NULL, p;
|
36cb64f8 |
Bamline_T bamline_low, bamline;
Table_T bamstore_chrtable;
Uinttable_T bamstore_table;
|
95e6ece6 |
Bamstore_T bamstore;
|
36cb64f8 |
Chrom_T *chroms, chrom;
|
95e6ece6 |
unsigned int *keys;
int nkeys, j;
|
36cb64f8 |
int n, i;
bamstore_chrtable = Table_new(100,Chrom_compare_table,Chrom_hash_table);
|
4d5cc806 |
while ((bamline = Bamread_next_bamline(bamreader,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits,
|
95e6ece6 |
need_unique_p,need_primary_p,ignore_duplicates_p,
need_concordant_p)) != NULL) {
|
36cb64f8 |
if (Bamline_concordantp(bamline) == false) {
/* Handle now */
if (Bamline_firstend_p(bamline) == true) {
lines = List_push(lines,Bampair_new(/*bamline_low*/bamline,/*bamline_high*/NULL));
} else {
lines = List_push(lines,Bampair_new(/*bamline_low*/NULL,/*bamline_high*/bamline));
}
} else if (Bamline_lowend_p(bamline) == true) {
/* Wait for high end */
Bamstore_add_at_low(bamstore_chrtable,Bamline_chr(bamline),Bamline_chrpos_low(bamline),
bamline);
} else {
/* This is the high end */
bamline_low = Bamstore_get(bamstore_chrtable,Bamline_chr(bamline),Bamline_mate_chrpos_low(bamline),
|
5659f096 |
Bamline_acc(bamline),Bamline_chrpos_low(bamline),bamline->hiti);
|
36cb64f8 |
if (bamline_low == NULL) {
|
8418591e |
#if 0
|
36cb64f8 |
fprintf(stderr,"Hmm...low end not found for %s at %s:%u\n",
Bamline_acc(bamline),Bamline_chr(bamline),Bamline_chrpos_low(bamline));
|
8418591e |
#endif
|
95e6ece6 |
lines = List_push(lines,Bampair_new(/*bamline_low*/NULL,/*bamline_high*/bamline));
|
36cb64f8 |
} else {
lines = List_push(lines,Bampair_new(bamline_low,/*bamline_high*/bamline));
}
}
}
|
95e6ece6 |
|
36cb64f8 |
if ((n = Table_length(bamstore_chrtable)) > 0) {
chroms = (Chrom_T *) Table_keys(bamstore_chrtable,NULL);
for (i = 0; i < n; i++) {
chrom = chroms[i];
bamstore_table = (Uinttable_T) Table_get(bamstore_chrtable,(void *) chrom);
|
95e6ece6 |
/* Handle low ends without high ends */
nkeys = Uinttable_length(bamstore_table);
keys = Uinttable_keys_by_timeindex(bamstore_table);
for (j = 0; j < nkeys; j++) {
if ((bamstore = (Bamstore_T) Uinttable_get(bamstore_table,keys[j])) == NULL) {
/* No bamlines at this chrpos_low */
} else {
for (p = bamstore->bamlines; p != NULL; p = List_next(p)) {
bamline = (Bamline_T) List_head(p);
|
8418591e |
#if 0
|
95e6ece6 |
fprintf(stderr,"Hmm...high end not found for %s at %s:%u\n",
Bamline_acc(bamline),Bamline_chr(bamline),Bamline_chrpos_low(bamline));
|
8418591e |
#endif
|
95e6ece6 |
lines = List_push(lines,Bampair_new(/*bamline_low*/bamline,/*bamline_high*/NULL));
}
/* Bamstore_free(&bamstore); -- This causes errors */
|
c78be696 |
Bamstore_free_nolines(&bamstore);
|
95e6ece6 |
}
}
FREE(keys);
/* Bamstore_table_free(&bamstore_table); */
|
36cb64f8 |
Uinttable_free(&bamstore_table);
}
for (i = 0; i < n; i++) {
Chrom_free(&(chroms[i]));
}
FREE(chroms);
}
Table_free(&bamstore_chrtable);
return List_reverse(lines);
}
static int
level_cmp (const void *x, const void *y) {
Bampair_T a = * (Bampair_T *) x;
Bampair_T b = * (Bampair_T *) y;
if (a->chrpos_low < b->chrpos_low) {
return -1;
} else if (a->chrpos_low > b->chrpos_low) {
return +1;
} else if (a->chrpos_high < b->chrpos_high) {
return -1;
} else if (a->chrpos_high > b->chrpos_high) {
return +1;
#if 0
} else if (a->sign > b->sign) {
return -1;
} else if (a->sign < b->sign) {
return +1;
#endif
} else {
return 0;
}
}
int
Bampair_compute_levels (List_T bampairs, Genomicpos_T mincoord,
Genomicpos_T maxcoord, int max_allowed_levels,
|
4d5cc806 |
double xfactor, Genomicpos_T min_pairlength, bool only_internal_p) {
|
36cb64f8 |
int nbampairs, i;
int maxlevel = -1, level;
bool donep;
Bampair_T *array, bampair;
double *rightmost, xlow;
if ((nbampairs = List_length(bampairs)) > 0) {
array = (Bampair_T *) List_to_array(bampairs,NULL);
qsort(array,nbampairs,sizeof(T),level_cmp);
rightmost = (double *) CALLOC(max_allowed_levels,sizeof(double));
for (i = 0; i < max_allowed_levels; i++) {
rightmost[i] = 0.0;
}
for (i = 0; i < nbampairs; i++) {
bampair = array[i];
if (bampair->chrpos_high - bampair->chrpos_low < min_pairlength) {
bampair->level = -1;
|
4d5cc806 |
} else if (only_internal_p == true && bampair->chrpos_low < mincoord && bampair->chrpos_high > maxcoord) {
bampair->level = -1;
|
36cb64f8 |
} else {
/* Find appropriate level */
level = 0;
donep = false;
while (level < max_allowed_levels && !donep) {
xlow = xfactor * bampair->chrpos_low;
if (level > maxlevel) {
donep = true;
maxlevel = level;
} else if (rightmost[level] < xlow) {
donep = true;
} else {
level++;
}
}
if (level < max_allowed_levels) {
rightmost[level] = xfactor * (bampair->chrpos_high + 10);
#if 0
if (bampair->chrpos_high < mincoord || bampair->chrpos_low > maxcoord) {
/* Skip printing if both ends outside of region */
} else if (bampair->chrpos_low < mincoord || bampair->chrpos_high > maxcoord) {
/* Skip printing if either end outside of region */
} else {
bampair->level = level;
}
#else
bampair->level = level;
#endif
}
}
}
FREE(rightmost);
FREE(array);
}
return maxlevel + 1;
}
|