... | ... |
@@ -1,10 +1,12 @@ |
1 |
-static char rcsid[] = "$Id: bamread.c 198589 2016-10-01 04:22:06Z twu $"; |
|
1 |
+static char rcsid[] = "$Id: bamread.c 219284 2019-05-21 01:02:18Z twu $"; |
|
2 | 2 |
#ifdef HAVE_CONFIG_H |
3 | 3 |
#include <config.h> |
4 | 4 |
#endif |
5 | 5 |
|
6 | 6 |
#include "bamread.h" |
7 |
+#include <stdio.h> |
|
7 | 8 |
#include <stdlib.h> |
9 |
+#include <string.h> /* For strlen, strcpy, strncpy */ |
|
8 | 10 |
#include "assert.h" |
9 | 11 |
#include "mem.h" |
10 | 12 |
#include "complement.h" |
... | ... |
@@ -342,8 +344,6 @@ Bamread_nreads (int *npositions, T this, char *chr, Genomicpos_T chrpos1, Genomi |
342 | 344 |
} |
343 | 345 |
} |
344 | 346 |
Bamread_unlimit_region(this); |
345 |
-#endif |
|
346 |
- |
|
347 | 347 |
|
348 | 348 |
if (max_readlength_lowend == 0) { |
349 | 349 |
npositions_lowend = 0; |
... | ... |
@@ -360,6 +360,7 @@ Bamread_nreads (int *npositions, T this, char *chr, Genomicpos_T chrpos1, Genomi |
360 | 360 |
if ((*npositions = npositions_lowend + npositions_highend) == 0) { |
361 | 361 |
*npositions = 1; |
362 | 362 |
} |
363 |
+#endif |
|
363 | 364 |
|
364 | 365 |
return nreads; |
365 | 366 |
} |
... | ... |
@@ -663,6 +664,7 @@ struct Bamline_T { |
663 | 664 |
char *mate_chr; |
664 | 665 |
Genomicpos_T mate_chrpos_low; |
665 | 666 |
int insert_length; |
667 |
+ int nmult; /* XX flag */ |
|
666 | 668 |
|
667 | 669 |
char *cigar_string; |
668 | 670 |
Intlist_T cigar_types; |
... | ... |
@@ -681,8 +683,10 @@ struct Bamline_T { |
681 | 683 |
unsigned char *aux_end; |
682 | 684 |
|
683 | 685 |
/* Used if we read more than one line at a time */ |
686 |
+#ifdef HAVE_SAMTOOLS_LIB |
|
684 | 687 |
uint8_t *aux_contents; |
685 | 688 |
size_t aux_length; |
689 |
+#endif |
|
686 | 690 |
}; |
687 | 691 |
|
688 | 692 |
|
... | ... |
@@ -909,6 +913,27 @@ Bamline_insert_length (Bamline_T this) { |
909 | 913 |
return this->insert_length; |
910 | 914 |
} |
911 | 915 |
|
916 |
+static int |
|
917 |
+aux_nmult (T this) { |
|
918 |
+#ifndef HAVE_SAMTOOLS_LIB |
|
919 |
+ return 0; |
|
920 |
+#else |
|
921 |
+ uint8_t *s; |
|
922 |
+ |
|
923 |
+ s = bam_aux_get(this->bam,"XX"); |
|
924 |
+ if (s == NULL) { |
|
925 |
+ return 0; |
|
926 |
+ } else { |
|
927 |
+ return bam_aux2i(s); |
|
928 |
+ } |
|
929 |
+#endif |
|
930 |
+} |
|
931 |
+ |
|
932 |
+int |
|
933 |
+Bamline_nmult (Bamline_T this) { |
|
934 |
+ return this->nmult; |
|
935 |
+} |
|
936 |
+ |
|
912 | 937 |
char * |
913 | 938 |
Bamline_cigar_string (Bamline_T this) { |
914 | 939 |
return this->cigar_string; |
... | ... |
@@ -1046,6 +1071,43 @@ Bamline_cigar_querylength (Bamline_T this) { |
1046 | 1071 |
return this->cigar_querylength; |
1047 | 1072 |
} |
1048 | 1073 |
|
1074 |
+int |
|
1075 |
+Bamline_cigar_outer_softclip_length (Bamline_T this) { |
|
1076 |
+ unsigned int outer_softclip_length; |
|
1077 |
+ Intlist_T p; |
|
1078 |
+ Uintlist_T q; |
|
1079 |
+ |
|
1080 |
+ if ((this->flag & PAIRED_READ) == 0U) { |
|
1081 |
+ /* For single-end reads, consider either end and take the larger value */ |
|
1082 |
+ outer_softclip_length = 0; |
|
1083 |
+ for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) { |
|
1084 |
+ if (Intlist_head(p) == 'S') { |
|
1085 |
+ if (Uintlist_head(q) > outer_softclip_length) { |
|
1086 |
+ outer_softclip_length = Uintlist_head(q); |
|
1087 |
+ } |
|
1088 |
+ } |
|
1089 |
+ } |
|
1090 |
+ return (int) outer_softclip_length; |
|
1091 |
+ |
|
1092 |
+ } else if (this->flag & QUERY_MINUSP) { |
|
1093 |
+ /* For paired-end reads, it depends on QUERY_MINUS */ |
|
1094 |
+ if (Intlist_last_value(this->cigar_types) == 'S') { |
|
1095 |
+ return (int) Uintlist_last_value(this->cigar_npositions); |
|
1096 |
+ } else { |
|
1097 |
+ return 0; |
|
1098 |
+ } |
|
1099 |
+ |
|
1100 |
+ } else { |
|
1101 |
+ /* For paired-end reads, it depends on QUERY_MINUS */ |
|
1102 |
+ if (Intlist_head(this->cigar_types) == 'S') { |
|
1103 |
+ return (int) Uintlist_head(this->cigar_npositions); |
|
1104 |
+ } else { |
|
1105 |
+ return 0; |
|
1106 |
+ } |
|
1107 |
+ } |
|
1108 |
+} |
|
1109 |
+ |
|
1110 |
+ |
|
1049 | 1111 |
int |
1050 | 1112 |
Bamline_readlength (Bamline_T this) { |
1051 | 1113 |
return this->readlength; |
... | ... |
@@ -1077,6 +1139,7 @@ Bamline_terminalp (Bamline_T this) { |
1077 | 1139 |
} |
1078 | 1140 |
|
1079 | 1141 |
|
1142 |
+#ifdef HAVE_SAMTOOLS_LIB |
|
1080 | 1143 |
static void |
1081 | 1144 |
aux_print (FILE *fp, unsigned char *s, unsigned char *aux_end) { |
1082 | 1145 |
unsigned char tag1, tag2; |
... | ... |
@@ -1128,8 +1191,10 @@ aux_print (FILE *fp, unsigned char *s, unsigned char *aux_end) { |
1128 | 1191 |
|
1129 | 1192 |
return; |
1130 | 1193 |
} |
1194 |
+#endif |
|
1131 | 1195 |
|
1132 | 1196 |
|
1197 |
+#ifdef HAVE_SAMTOOLS_LIB |
|
1133 | 1198 |
static char * |
1134 | 1199 |
aux_print_skip_md (FILE *fp, unsigned char *s, unsigned char *aux_end) { |
1135 | 1200 |
char *orig_md_string = NULL; |
... | ... |
@@ -1190,6 +1255,55 @@ aux_print_skip_md (FILE *fp, unsigned char *s, unsigned char *aux_end) { |
1190 | 1255 |
|
1191 | 1256 |
return orig_md_string; |
1192 | 1257 |
} |
1258 |
+#endif |
|
1259 |
+ |
|
1260 |
+ |
|
1261 |
+bool |
|
1262 |
+Bamline_exon_overlap_p (Bamline_T this, Genomicpos_T chrstart, Genomicpos_T chrend) { |
|
1263 |
+ Intlist_T p; |
|
1264 |
+ Uintlist_T q; |
|
1265 |
+ Genomicpos_T pos, chrpos_high; |
|
1266 |
+ int type; |
|
1267 |
+ |
|
1268 |
+ chrpos_high = this->chrpos_low; |
|
1269 |
+ for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) { |
|
1270 |
+ pos = chrpos_high; |
|
1271 |
+ if ((type = Intlist_head(p)) == 'S') { |
|
1272 |
+ /* Ignore */ |
|
1273 |
+ |
|
1274 |
+ } else if (type == 'H') { |
|
1275 |
+ /* Ignore */ |
|
1276 |
+ |
|
1277 |
+ } else if (type == 'M') { |
|
1278 |
+ chrpos_high += Uintlist_head(q); |
|
1279 |
+ if (pos <= chrend && chrpos_high - 1 >= chrstart) { |
|
1280 |
+ return true; |
|
1281 |
+ } |
|
1282 |
+ |
|
1283 |
+ } else if (type == 'X') { |
|
1284 |
+ chrpos_high += Uintlist_head(q); |
|
1285 |
+ |
|
1286 |
+ } else if (type == 'N') { |
|
1287 |
+ chrpos_high += Uintlist_head(q); |
|
1288 |
+ |
|
1289 |
+ } else if (type == 'P') { |
|
1290 |
+ /* Do nothing */ |
|
1291 |
+ |
|
1292 |
+ } else if (type == 'I') { |
|
1293 |
+ /* Do nothing */ |
|
1294 |
+ |
|
1295 |
+ } else if (type == 'D') { |
|
1296 |
+ chrpos_high += Uintlist_head(q); |
|
1297 |
+ |
|
1298 |
+ } else { |
|
1299 |
+ fprintf(stderr,"Cannot parse type %c\n",type); |
|
1300 |
+ exit(9); |
|
1301 |
+ } |
|
1302 |
+ debug(printf(" type = %c, chrpos = %u\n",type,chrpos_high)); |
|
1303 |
+ } |
|
1304 |
+ |
|
1305 |
+ return false; |
|
1306 |
+} |
|
1193 | 1307 |
|
1194 | 1308 |
|
1195 | 1309 |
void |
... | ... |
@@ -1230,7 +1344,9 @@ Bamline_print (FILE *fp, Bamline_T this, unsigned int newflag, int quality_score |
1230 | 1344 |
} |
1231 | 1345 |
} |
1232 | 1346 |
|
1347 |
+#ifdef HAVE_SAMTOOLS_LIB |
|
1233 | 1348 |
aux_print(fp,this->aux_start,this->aux_end); |
1349 |
+#endif |
|
1234 | 1350 |
|
1235 | 1351 |
fprintf(fp,"\n"); |
1236 | 1352 |
return; |
... | ... |
@@ -1274,16 +1390,18 @@ Bamline_print_new_cigar (FILE *fp, Bamline_T this, Genomicpos_T chrpos_low, char |
1274 | 1390 |
|
1275 | 1391 |
fprintf(fp,"\tMD:Z:%s",new_md_string); |
1276 | 1392 |
|
1393 |
+#ifdef HAVE_SAMTOOLS_LIB |
|
1277 | 1394 |
orig_md_string = aux_print_skip_md(fp,this->aux_start,this->aux_end); |
1395 |
+#endif |
|
1278 | 1396 |
|
1279 | 1397 |
/* Original alignment */ |
1280 |
- fprintf(fp,"\tXX:i:%u",this->chrpos_low); |
|
1281 |
- fprintf(fp,"\tXY:Z:"); |
|
1398 |
+ fprintf(fp,"\tXP:i:%u",this->chrpos_low); |
|
1399 |
+ fprintf(fp,"\tXQ:Z:"); |
|
1282 | 1400 |
for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) { |
1283 | 1401 |
fprintf(fp,"%u%c",Uintlist_head(q),Intlist_head(p)); |
1284 | 1402 |
} |
1285 | 1403 |
if (orig_md_string != NULL) { |
1286 |
- fprintf(fp,"\tXZ:Z:%s",orig_md_string); |
|
1404 |
+ fprintf(fp,"\tXR:Z:%s",orig_md_string); |
|
1287 | 1405 |
} |
1288 | 1406 |
|
1289 | 1407 |
fprintf(fp,"\n"); |
... | ... |
@@ -1329,7 +1447,9 @@ Bamline_print_new_mate (FILE *fp, Bamline_T this, char *mate_chr, Genomicpos_T m |
1329 | 1447 |
fprintf(fp,"%s",this->quality_string); |
1330 | 1448 |
} |
1331 | 1449 |
|
1450 |
+#ifdef HAVE_SAMTOOLS_LIB |
|
1332 | 1451 |
aux_print(fp,this->aux_start,this->aux_end); |
1452 |
+#endif |
|
1333 | 1453 |
|
1334 | 1454 |
fprintf(fp,"\n"); |
1335 | 1455 |
return; |
... | ... |
@@ -1896,9 +2016,11 @@ Bamline_free (Bamline_T *old) { |
1896 | 2016 |
if ((*old)->hardclip != NULL) { |
1897 | 2017 |
FREE((*old)->hardclip); |
1898 | 2018 |
} |
2019 |
+#ifdef HAVE_SAMTOOLS_LIB |
|
1899 | 2020 |
if ((*old)->aux_contents != NULL) { |
1900 | 2021 |
FREE((*old)->aux_contents); |
1901 | 2022 |
} |
2023 |
+#endif |
|
1902 | 2024 |
FREE(*old); |
1903 | 2025 |
} |
1904 | 2026 |
|
... | ... |
@@ -1909,7 +2031,7 @@ Bamline_free (Bamline_T *old) { |
1909 | 2031 |
static Bamline_T |
1910 | 2032 |
Bamline_new (char *acc, unsigned int flag, int hiti, int nhits, bool good_unique_p, int mapq, |
1911 | 2033 |
int nm, char splice_strand, char *chr, Genomicpos_T chrpos_low, |
1912 |
- char *mate_chr, Genomicpos_T mate_chrpos_low, int insert_length, |
|
2034 |
+ char *mate_chr, Genomicpos_T mate_chrpos_low, int insert_length, int nmult, |
|
1913 | 2035 |
Intlist_T cigar_types, Uintlist_T cigar_npositions, int cigar_querylength, int readlength, |
1914 | 2036 |
char *read, char *quality_string, char *hardclip, char *hardclip_quality, bool terminalp, |
1915 | 2037 |
unsigned char *aux_start, unsigned char *aux_end, bool copy_aux_contents_p) { |
... | ... |
@@ -1944,6 +2066,7 @@ Bamline_new (char *acc, unsigned int flag, int hiti, int nhits, bool good_unique |
1944 | 2066 |
new->mate_chrpos_low = mate_chrpos_low; |
1945 | 2067 |
|
1946 | 2068 |
new->insert_length = insert_length; |
2069 |
+ new->nmult = nmult; |
|
1947 | 2070 |
|
1948 | 2071 |
new->cigar_string = compute_cigar_string(cigar_types,cigar_npositions); |
1949 | 2072 |
new->cigar_types = cigar_types; /* not copying */ |
... | ... |
@@ -1978,6 +2101,7 @@ Bamline_new (char *acc, unsigned int flag, int hiti, int nhits, bool good_unique |
1978 | 2101 |
|
1979 | 2102 |
new->terminalp = terminalp; |
1980 | 2103 |
|
2104 |
+#ifdef HAVE_SAMTOOLS_LIB |
|
1981 | 2105 |
if (copy_aux_contents_p == true) { |
1982 | 2106 |
new->aux_length = aux_end - aux_start; |
1983 | 2107 |
if (new->aux_length == 0) { |
... | ... |
@@ -1996,6 +2120,7 @@ Bamline_new (char *acc, unsigned int flag, int hiti, int nhits, bool good_unique |
1996 | 2120 |
new->aux_start = aux_start; |
1997 | 2121 |
new->aux_end = aux_end; |
1998 | 2122 |
} |
2123 |
+#endif |
|
1999 | 2124 |
|
2000 | 2125 |
return new; |
2001 | 2126 |
} |
... | ... |
@@ -2006,7 +2131,7 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go |
2006 | 2131 |
bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p, |
2007 | 2132 |
bool need_concordant_p) { |
2008 | 2133 |
char *acc, *chr, *mate_chr, splice_strand; |
2009 |
- int nm; |
|
2134 |
+ int nm, nmult; |
|
2010 | 2135 |
unsigned int flag; |
2011 | 2136 |
int hiti, nhits; |
2012 | 2137 |
bool good_unique_p; |
... | ... |
@@ -2034,7 +2159,7 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go |
2034 | 2159 |
&cigar_types,&cigarlengths,&cigar_querylength, |
2035 | 2160 |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality, |
2036 | 2161 |
&read_group,&terminalp); |
2037 |
- debug1(fprintf(stderr,"Got line %u\n",chrpos_low)); |
|
2162 |
+ debug1(fprintf(stderr,"Got line %s at %u\n",acc,chrpos_low)); |
|
2038 | 2163 |
if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) { |
2039 | 2164 |
debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group)); |
2040 | 2165 |
Intlist_free(&cigar_types); |
... | ... |
@@ -2074,10 +2199,11 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go |
2074 | 2199 |
debug1(fprintf(stderr,"Success\n")); |
2075 | 2200 |
hiti = aux_hiti(this); |
2076 | 2201 |
nm = aux_nm(this); |
2202 |
+ nmult = aux_nmult(this); |
|
2077 | 2203 |
splice_strand = aux_splice_strand(this); |
2078 | 2204 |
good_unique_p = aux_good_unique_p(this,good_unique_mapq); |
2079 | 2205 |
return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low, |
2080 |
- mate_chr,mate_chrpos_low,insert_length, |
|
2206 |
+ mate_chr,mate_chrpos_low,insert_length,nmult, |
|
2081 | 2207 |
cigar_types,cigarlengths,cigar_querylength, |
2082 | 2208 |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp, |
2083 | 2209 |
/*aux_start*/bam1_aux(this->bam), |
... | ... |
@@ -2101,10 +2227,11 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go |
2101 | 2227 |
(need_concordant_p == false || concordantp(flag) == true)) { |
2102 | 2228 |
hiti = aux_hiti(this); |
2103 | 2229 |
nm = aux_nm(this); |
2230 |
+ nmult = aux_nmult(this); |
|
2104 | 2231 |
splice_strand = aux_splice_strand(this); |
2105 | 2232 |
good_unique_p = aux_good_unique_p(this,good_unique_mapq); |
2106 | 2233 |
return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low, |
2107 |
- mate_chr,mate_chrpos_low,insert_length, |
|
2234 |
+ mate_chr,mate_chrpos_low,insert_length,nmult, |
|
2108 | 2235 |
cigar_types,cigarlengths,cigar_querylength, |
2109 | 2236 |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp, |
2110 | 2237 |
/*aux_start*/bam1_aux(this->bam), |
... | ... |
@@ -2126,7 +2253,7 @@ Bamread_next_bamline_copy_aux (T this, char *desired_read_group, int minimum_map |
2126 | 2253 |
bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p, |
2127 | 2254 |
bool need_concordant_p, bool exclude_perfect_p) { |
2128 | 2255 |
char *acc, *chr, *mate_chr, splice_strand; |
2129 |
- int nm; |
|
2256 |
+ int nm, nmult; |
|
2130 | 2257 |
unsigned int flag; |
2131 | 2258 |
int hiti, nhits; |
2132 | 2259 |
bool good_unique_p; |
... | ... |
@@ -2158,7 +2285,7 @@ Bamread_next_bamline_copy_aux (T this, char *desired_read_group, int minimum_map |
2158 | 2285 |
&cigar_types,&cigarlengths,&cigar_querylength, |
2159 | 2286 |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group, |
2160 | 2287 |
&terminalp); |
2161 |
- debug1(fprintf(stderr,"Got line %u\n",chrpos_low)); |
|
2288 |
+ debug1(fprintf(stderr,"Got line %s at %u\n",acc,chrpos_low)); |
|
2162 | 2289 |
if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) { |
2163 | 2290 |
debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group)); |
2164 | 2291 |
Intlist_free(&cigar_types); |
... | ... |
@@ -2198,10 +2325,11 @@ Bamread_next_bamline_copy_aux (T this, char *desired_read_group, int minimum_map |
2198 | 2325 |
debug1(fprintf(stderr,"Success\n")); |
2199 | 2326 |
hiti = aux_hiti(this); |
2200 | 2327 |
nm = aux_nm(this); |
2328 |
+ nmult = aux_nmult(this); |
|
2201 | 2329 |
splice_strand = aux_splice_strand(this); |
2202 | 2330 |
good_unique_p = aux_good_unique_p(this,good_unique_mapq); |
2203 | 2331 |
return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low, |
2204 |
- mate_chr,mate_chrpos_low,insert_length, |
|
2332 |
+ mate_chr,mate_chrpos_low,insert_length,nmult, |
|
2205 | 2333 |
cigar_types,cigarlengths,cigar_querylength, |
2206 | 2334 |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp, |
2207 | 2335 |
/*aux_start*/bam1_aux(this->bam), |
... | ... |
@@ -2229,10 +2357,11 @@ Bamread_next_bamline_copy_aux (T this, char *desired_read_group, int minimum_map |
2229 | 2357 |
(need_concordant_p == false || concordantp(flag) == true)) { |
2230 | 2358 |
hiti = aux_hiti(this); |
2231 | 2359 |
nm = aux_nm(this); |
2360 |
+ nmult = aux_nmult(this); |
|
2232 | 2361 |
splice_strand = aux_splice_strand(this); |
2233 | 2362 |
good_unique_p = aux_good_unique_p(this,good_unique_mapq); |
2234 | 2363 |
return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low, |
2235 |
- mate_chr,mate_chrpos_low,insert_length, |
|
2364 |
+ mate_chr,mate_chrpos_low,insert_length,nmult, |
|
2236 | 2365 |
cigar_types,cigarlengths,cigar_querylength, |
2237 | 2366 |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp, |
2238 | 2367 |
/*aux_start*/bam1_aux(this->bam), |
... | ... |
@@ -2258,7 +2387,7 @@ Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq, |
2258 | 2387 |
bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p, |
2259 | 2388 |
bool need_concordant_p) { |
2260 | 2389 |
char *acc, *chr, *mate_chr, splice_strand; |
2261 |
- int nm; |
|
2390 |
+ int nm, nmult; |
|
2262 | 2391 |
unsigned int flag; |
2263 | 2392 |
int hiti, nhits; |
2264 | 2393 |
bool good_unique_p; |
... | ... |
@@ -2289,7 +2418,7 @@ Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq, |
2289 | 2418 |
&cigar_types,&cigarlengths,&cigar_querylength, |
2290 | 2419 |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group, |
2291 | 2420 |
&terminalp); |
2292 |
- debug1(fprintf(stderr,"Got line %u\n",chrpos_low)); |
|
2421 |
+ debug1(fprintf(stderr,"Got line %s at %u\n",acc,chrpos_low)); |
|
2293 | 2422 |
if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) { |
2294 | 2423 |
debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group)); |
2295 | 2424 |
Intlist_free(&cigar_types); |
... | ... |
@@ -2329,10 +2458,11 @@ Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq, |
2329 | 2458 |
debug1(fprintf(stderr,"Success\n")); |
2330 | 2459 |
hiti = aux_hiti(this); |
2331 | 2460 |
nm = aux_nm(this); |
2461 |
+ nmult = aux_nmult(this); |
|
2332 | 2462 |
splice_strand = aux_splice_strand(this); |
2333 | 2463 |
good_unique_p = aux_good_unique_p(this,good_unique_mapq); |
2334 | 2464 |
return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low, |
2335 |
- mate_chr,mate_chrpos_low,insert_length, |
|
2465 |
+ mate_chr,mate_chrpos_low,insert_length,nmult, |
|
2336 | 2466 |
cigar_types,cigarlengths,cigar_querylength, |
2337 | 2467 |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp, |
2338 | 2468 |
/*aux_start*/bam1_aux(this->bam), |
... | ... |
@@ -2361,10 +2491,11 @@ Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq, |
2361 | 2491 |
(need_concordant_p == false || concordantp(flag) == true)) { |
2362 | 2492 |
hiti = aux_hiti(this); |
2363 | 2493 |
nm = aux_nm(this); |
2494 |
+ nmult = aux_nmult(this); |
|
2364 | 2495 |
splice_strand = aux_splice_strand(this); |
2365 | 2496 |
good_unique_p = aux_good_unique_p(this,good_unique_mapq); |
2366 | 2497 |
return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low, |
2367 |
- mate_chr,mate_chrpos_low,insert_length, |
|
2498 |
+ mate_chr,mate_chrpos_low,insert_length,nmult, |
|
2368 | 2499 |
cigar_types,cigarlengths,cigar_querylength, |
2369 | 2500 |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp, |
2370 | 2501 |
/*aux_start*/bam1_aux(this->bam), |
... | ... |
@@ -2548,7 +2679,7 @@ Bamread_block (int **nlines, Genomicpos_T chrstart, Genomicpos_T chrend, |
2548 | 2679 |
Bamline_T |
2549 | 2680 |
Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *desired_acc) { |
2550 | 2681 |
char *acc, *chr, *mate_chr, splice_strand; |
2551 |
- int nm; |
|
2682 |
+ int nm, nmult; |
|
2552 | 2683 |
unsigned int flag; |
2553 | 2684 |
int hiti, nhits; |
2554 | 2685 |
int mapq; |
... | ... |
@@ -2564,6 +2695,7 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d |
2564 | 2695 |
char *read_group; |
2565 | 2696 |
bool terminalp; |
2566 | 2697 |
|
2698 |
+#ifdef HAVE_SAMTOOLS_LIB |
|
2567 | 2699 |
Bamread_limit_region(this,desired_chr,desired_chrpos,desired_chrpos); |
2568 | 2700 |
while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) { |
2569 | 2701 |
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low, |
... | ... |
@@ -2571,14 +2703,15 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d |
2571 | 2703 |
&cigar_types,&cigarlengths,&cigar_querylength, |
2572 | 2704 |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group, |
2573 | 2705 |
&terminalp); |
2574 |
- debug1(fprintf(stderr,"Got line %u\n",chrpos_low)); |
|
2706 |
+ debug1(fprintf(stderr,"Got line %s at %u\n",acc,chrpos_low)); |
|
2575 | 2707 |
hiti = aux_hiti(this); |
2576 | 2708 |
nm = aux_nm(this); |
2709 |
+ nmult = aux_nmult(this); |
|
2577 | 2710 |
splice_strand = aux_splice_strand(this); |
2578 | 2711 |
if (!strcmp(acc,desired_acc) && chrpos_low == desired_chrpos) { |
2579 | 2712 |
Bamread_unlimit_region(this); |
2580 | 2713 |
return Bamline_new(acc,flag,hiti,nhits,/*good_unique_p*/true,mapq,nm,splice_strand,chr,chrpos_low, |
2581 |
- mate_chr,mate_chrpos_low,insert_length, |
|
2714 |
+ mate_chr,mate_chrpos_low,insert_length,nmult, |
|
2582 | 2715 |
cigar_types,cigarlengths,cigar_querylength, |
2583 | 2716 |
readlength,read,quality_string,hardclip,hardclip_quality,terminalp, |
2584 | 2717 |
/*aux_start*/bam1_aux(this->bam), |
... | ... |
@@ -2588,6 +2721,8 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d |
2588 | 2721 |
} |
2589 | 2722 |
|
2590 | 2723 |
Bamread_unlimit_region(this); |
2724 |
+#endif |
|
2725 |
+ |
|
2591 | 2726 |
return (Bamline_T) NULL; |
2592 | 2727 |
} |
2593 | 2728 |
|
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@123972 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-static char rcsid[] = "$Id: bamread.c 178960 2015-11-16 19:52:26Z twu $"; |
|
1 |
+static char rcsid[] = "$Id: bamread.c 198589 2016-10-01 04:22:06Z twu $"; |
|
2 | 2 |
#ifdef HAVE_CONFIG_H |
3 | 3 |
#include <config.h> |
4 | 4 |
#endif |
... | ... |
@@ -28,7 +28,6 @@ static char rcsid[] = "$Id: bamread.c 178960 2015-11-16 19:52:26Z twu $"; |
28 | 28 |
#endif |
29 | 29 |
|
30 | 30 |
|
31 |
- |
|
32 | 31 |
#ifdef HAVE_SAMTOOLS_LIB |
33 | 32 |
#include "bam.h" |
34 | 33 |
typedef uint8_t *BAM_Sequence_T; |
... | ... |
@@ -257,7 +256,7 @@ Bamread_nreads (int *npositions, T this, char *chr, Genomicpos_T chrpos1, Genomi |
257 | 256 |
|
258 | 257 |
cigar = bam1_cigar(this->bam); |
259 | 258 |
for (i = 0; i < this->core->n_cigar; i++) { |
260 |
- type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]); |
|
259 |
+ type = (int) ("MIDNSHP=X"[cigar[i] & BAM_CIGAR_MASK]); |
|
261 | 260 |
length = cigar[i] >> BAM_CIGAR_SHIFT; |
262 | 261 |
|
263 | 262 |
if (type == 'S') { |
... | ... |
@@ -456,6 +455,8 @@ parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genom |
456 | 455 |
*read_group = bam_aux2Z(ptr); |
457 | 456 |
} |
458 | 457 |
|
458 |
+#if 0 |
|
459 |
+ /* XG is used as XG:i in other software */ |
|
459 | 460 |
ptr = bam_aux_get(this->bam,"XG"); |
460 | 461 |
if (ptr == NULL) { |
461 | 462 |
*terminalp = false; |
... | ... |
@@ -467,6 +468,10 @@ parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genom |
467 | 468 |
*terminalp = true; |
468 | 469 |
} |
469 | 470 |
} |
471 |
+#else |
|
472 |
+ *terminalp = false; |
|
473 |
+#endif |
|
474 |
+ |
|
470 | 475 |
|
471 | 476 |
/* Cigar */ |
472 | 477 |
*cigarlength = 0; |
... | ... |
@@ -478,10 +483,10 @@ parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genom |
478 | 483 |
length = cigar[i] >> BAM_CIGAR_SHIFT; |
479 | 484 |
*cigarlengths = Uintlist_push(*cigarlengths,length); |
480 | 485 |
|
481 |
- type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]); |
|
486 |
+ type = (int) ("MIDNSHP=X"[cigar[i] & BAM_CIGAR_MASK]); |
|
482 | 487 |
*cigartypes = Intlist_push(*cigartypes,type); |
483 | 488 |
|
484 |
- if (type == 'S' || type == 'M' || type == 'I') { |
|
489 |
+ if (type == 'S' || type == 'M' || type == 'I' || type == 'X') { |
|
485 | 490 |
*cigarlength += (int) length; |
486 | 491 |
} else if (type == 'H') { |
487 | 492 |
*cigarlength += (int) length; |
... | ... |
@@ -516,7 +521,7 @@ has_indel_p (T this) { |
516 | 521 |
cigar = bam1_cigar(this->bam); |
517 | 522 |
for (i = 0; i < this->core->n_cigar; i++) { |
518 | 523 |
/* length = cigar[i] >> BAM_CIGAR_SHIFT; */ |
519 |
- type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]); |
|
524 |
+ type = (int) ("MIDNSHP=X"[cigar[i] & BAM_CIGAR_MASK]); |
|
520 | 525 |
if (type == 'I' || type == 'D') { |
521 | 526 |
return true; |
522 | 527 |
} |
... | ... |
@@ -541,7 +546,7 @@ perfect_match_p (T this) { |
541 | 546 |
cigar = bam1_cigar(this->bam); |
542 | 547 |
for (i = 0; i < this->core->n_cigar; i++) { |
543 | 548 |
/* length = cigar[i] >> BAM_CIGAR_SHIFT; */ |
544 |
- type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]); |
|
549 |
+ type = (int) ("MIDNSHP=X"[cigar[i] & BAM_CIGAR_MASK]); |
|
545 | 550 |
if (type == 'S') { |
546 | 551 |
return false; |
547 | 552 |
|
... | ... |
@@ -551,6 +556,9 @@ perfect_match_p (T this) { |
551 | 556 |
} else if (type == 'M') { |
552 | 557 |
/* acceptable */ |
553 | 558 |
|
559 |
+ } else if (type == 'X') { |
|
560 |
+ /* acceptable */ |
|
561 |
+ |
|
554 | 562 |
} else if (type == 'N') { |
555 | 563 |
/* acceptable */ |
556 | 564 |
|
... | ... |
@@ -655,9 +663,12 @@ struct Bamline_T { |
655 | 663 |
char *mate_chr; |
656 | 664 |
Genomicpos_T mate_chrpos_low; |
657 | 665 |
int insert_length; |
666 |
+ |
|
667 |
+ char *cigar_string; |
|
658 | 668 |
Intlist_T cigar_types; |
659 | 669 |
Uintlist_T cigar_npositions; |
660 | 670 |
int cigar_querylength; |
671 |
+ |
|
661 | 672 |
int readlength; |
662 | 673 |
char *read; |
663 | 674 |
char *quality_string; |
... | ... |
@@ -685,6 +696,16 @@ Bamline_flag (Bamline_T this) { |
685 | 696 |
return this->flag; |
686 | 697 |
} |
687 | 698 |
|
699 |
+bool |
|
700 |
+Bamline_plusp (Bamline_T this) { |
|
701 |
+ if (this->flag & QUERY_MINUSP) { |
|
702 |
+ return false; |
|
703 |
+ } else { |
|
704 |
+ return true; |
|
705 |
+ } |
|
706 |
+} |
|
707 |
+ |
|
708 |
+ |
|
688 | 709 |
static bool |
689 | 710 |
concordantp (unsigned int flag) { |
690 | 711 |
if (flag & QUERY_UNMAPPED) { |
... | ... |
@@ -780,6 +801,9 @@ Bamline_perfect_match_p (Bamline_T this) { |
780 | 801 |
} else if (type == 'M') { |
781 | 802 |
/* acceptable */ |
782 | 803 |
|
804 |
+ } else if (type == 'X') { |
|
805 |
+ /* acceptable */ |
|
806 |
+ |
|
783 | 807 |
} else if (type == 'N') { |
784 | 808 |
/* acceptable */ |
785 | 809 |
|
... | ... |
@@ -829,6 +853,21 @@ Bamline_perfect_match_p (Bamline_T this) { |
829 | 853 |
} |
830 | 854 |
|
831 | 855 |
|
856 |
+bool |
|
857 |
+Bamline_microinv_p (Bamline_T this) { |
|
858 |
+ Intlist_T p; |
|
859 |
+ int type; |
|
860 |
+ |
|
861 |
+ for (p = this->cigar_types; p != NULL; p = Intlist_next(p)) { |
|
862 |
+ type = Intlist_head(p); |
|
863 |
+ if (type == 'X') { |
|
864 |
+ return true; |
|
865 |
+ } |
|
866 |
+ } |
|
867 |
+ |
|
868 |
+ return false; |
|
869 |
+} |
|
870 |
+ |
|
832 | 871 |
int |
833 | 872 |
Bamline_mapq (Bamline_T this) { |
834 | 873 |
return this->mapq; |
... | ... |
@@ -870,12 +909,16 @@ Bamline_insert_length (Bamline_T this) { |
870 | 909 |
return this->insert_length; |
871 | 910 |
} |
872 | 911 |
|
912 |
+char * |
|
913 |
+Bamline_cigar_string (Bamline_T this) { |
|
914 |
+ return this->cigar_string; |
|
915 |
+} |
|
916 |
+ |
|
873 | 917 |
Intlist_T |
874 | 918 |
Bamline_cigar_types (Bamline_T this) { |
875 | 919 |
return this->cigar_types; |
876 | 920 |
} |
877 | 921 |
|
878 |
- |
|
879 | 922 |
Uintlist_T |
880 | 923 |
Bamline_cigar_npositions (Bamline_T this) { |
881 | 924 |
return this->cigar_npositions; |
... | ... |
@@ -906,6 +949,10 @@ Bamline_diffcigar (int *min_overhang, Uintlist_T *npositions, Uintlist_T *chrpos |
906 | 949 |
querypos += Uintlist_head(q); |
907 | 950 |
chrpos += Uintlist_head(q); |
908 | 951 |
|
952 |
+ } else if (type == 'X') { |
|
953 |
+ querypos += Uintlist_head(q); |
|
954 |
+ chrpos += Uintlist_head(q); |
|
955 |
+ |
|
909 | 956 |
} else if (type == 'N') { |
910 | 957 |
types = Intlist_push(types,type); |
911 | 958 |
*npositions = Uintlist_push(*npositions,Uintlist_head(q)); |
... | ... |
@@ -971,21 +1018,21 @@ Bamread_print_cigar (FILE *fp, Bamline_T this) { |
971 | 1018 |
return; |
972 | 1019 |
} |
973 | 1020 |
|
974 |
-char * |
|
975 |
-Bamline_cigar_string (Bamline_T this) { |
|
1021 |
+static char * |
|
1022 |
+compute_cigar_string (Intlist_T cigar_types, Uintlist_T cigar_npositions) { |
|
976 | 1023 |
char *string, number[12]; |
977 | 1024 |
Intlist_T p; |
978 | 1025 |
Uintlist_T q; |
979 | 1026 |
int string_length = 0; |
980 | 1027 |
|
981 |
- for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) { |
|
1028 |
+ for (p = cigar_types, q = cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) { |
|
982 | 1029 |
sprintf(number,"%u",Uintlist_head(q)); |
983 | 1030 |
string_length += strlen(number) + 1; |
984 | 1031 |
} |
985 | 1032 |
string = (char *) CALLOC(string_length+1,sizeof(char)); |
986 | 1033 |
|
987 | 1034 |
string_length = 0; |
988 |
- for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) { |
|
1035 |
+ for (p = cigar_types, q = cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) { |
|
989 | 1036 |
sprintf(number,"%u",Uintlist_head(q)); |
990 | 1037 |
sprintf(&(string[string_length]),"%u%c",Uintlist_head(q),Intlist_head(p)); |
991 | 1038 |
string_length += strlen(number) + 1; |
... | ... |
@@ -1528,6 +1575,9 @@ Bamline_strand (Bamline_T this, Genome_T genome, IIT_T chromosome_iit) { |
1528 | 1575 |
} else if (type == 'M') { |
1529 | 1576 |
chrpos += Uintlist_head(q); |
1530 | 1577 |
|
1578 |
+ } else if (type == 'X') { |
|
1579 |
+ chrpos += Uintlist_head(q); |
|
1580 |
+ |
|
1531 | 1581 |
} else if (type == 'N') { |
1532 | 1582 |
firstpos = chrpos - 1U; |
1533 | 1583 |
chrpos += Uintlist_head(q); |
... | ... |
@@ -1581,6 +1631,9 @@ Bamline_chrpos_high (Bamline_T this) { |
1581 | 1631 |
} else if (type == 'M') { |
1582 | 1632 |
chrpos_high += Uintlist_head(q); |
1583 | 1633 |
|
1634 |
+ } else if (type == 'X') { |
|
1635 |
+ chrpos_high += Uintlist_head(q); |
|
1636 |
+ |
|
1584 | 1637 |
} else if (type == 'N') { |
1585 | 1638 |
chrpos_high += Uintlist_head(q); |
1586 | 1639 |
|
... | ... |
@@ -1623,6 +1676,9 @@ Bamline_chrpos_high_noclip (Bamline_T this) { |
1623 | 1676 |
} else if (type == 'M') { |
1624 | 1677 |
chrpos_high += Uintlist_head(q); |
1625 | 1678 |
|
1679 |
+ } else if (type == 'X') { |
|
1680 |
+ chrpos_high += Uintlist_head(q); |
|
1681 |
+ |
|
1626 | 1682 |
} else if (type == 'N') { |
1627 | 1683 |
chrpos_high += Uintlist_head(q); |
1628 | 1684 |
|
... | ... |
@@ -1731,6 +1787,11 @@ Bamline_regions (Uintlist_T *chrpos_lows, Uintlist_T *chrpos_highs, Bamline_T th |
1731 | 1787 |
position += Uintlist_head(q); |
1732 | 1788 |
*chrpos_highs = Uintlist_push(*chrpos_highs,position); |
1733 | 1789 |
|
1790 |
+ } else if (type == 'X') { |
|
1791 |
+ *chrpos_lows = Uintlist_push(*chrpos_lows,position); |
|
1792 |
+ position += Uintlist_head(q); |
|
1793 |
+ *chrpos_highs = Uintlist_push(*chrpos_highs,position); |
|
1794 |
+ |
|
1734 | 1795 |
} else if (type == 'N') { |
1735 | 1796 |
position += Uintlist_head(q); |
1736 | 1797 |
|
... | ... |
@@ -1773,6 +1834,9 @@ Bamline_splices (Uintlist_T *splice_lows, Uintlist_T *splice_highs, Intlist_T *s |
1773 | 1834 |
} else if (type == 'M') { |
1774 | 1835 |
position += Uintlist_head(q); |
1775 | 1836 |
|
1837 |
+ } else if (type == 'X') { |
|
1838 |
+ position += Uintlist_head(q); |
|
1839 |
+ |
|
1776 | 1840 |
} else if (type == 'N') { |
1777 | 1841 |
*splice_lows = Uintlist_push(*splice_lows,position); |
1778 | 1842 |
position += Uintlist_head(q); |
... | ... |
@@ -1819,6 +1883,7 @@ Bamline_free (Bamline_T *old) { |
1819 | 1883 |
if ((*old)->mate_chr != NULL) { |
1820 | 1884 |
FREE((*old)->mate_chr); |
1821 | 1885 |
} |
1886 |
+ FREE((*old)->cigar_string); |
|
1822 | 1887 |
Intlist_free(&(*old)->cigar_types); |
1823 | 1888 |
Uintlist_free(&(*old)->cigar_npositions); |
1824 | 1889 |
FREE((*old)->read); |
... | ... |
@@ -1880,6 +1945,7 @@ Bamline_new (char *acc, unsigned int flag, int hiti, int nhits, bool good_unique |
1880 | 1945 |
|
1881 | 1946 |
new->insert_length = insert_length; |
1882 | 1947 |
|
1948 |
+ new->cigar_string = compute_cigar_string(cigar_types,cigar_npositions); |
|
1883 | 1949 |
new->cigar_types = cigar_types; /* not copying */ |
1884 | 1950 |
new->cigar_npositions = cigar_npositions; /* not copying */ |
1885 | 1951 |
|
... | ... |
@@ -1961,14 +2027,14 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go |
1961 | 2027 |
return (Bamline_T) NULL; |
1962 | 2028 |
#else |
1963 | 2029 |
if (this->region_limited_p == 1) { |
1964 |
- debug1(fprintf(stderr,"Region limited\n")); |
|
2030 |
+ /* debug1(fprintf(stderr,"Region limited\n")); */ |
|
1965 | 2031 |
while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) { |
1966 |
- debug1(fprintf(stderr,"Got line\n")); |
|
1967 | 2032 |
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low, |
1968 | 2033 |
&mate_chr,&mate_chrpos_low,&insert_length, |
1969 | 2034 |
&cigar_types,&cigarlengths,&cigar_querylength, |
1970 | 2035 |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality, |
1971 | 2036 |
&read_group,&terminalp); |
2037 |
+ debug1(fprintf(stderr,"Got line %u\n",chrpos_low)); |
|
1972 | 2038 |
if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) { |
1973 | 2039 |
debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group)); |
1974 | 2040 |
Intlist_free(&cigar_types); |
... | ... |
@@ -2055,10 +2121,10 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go |
2055 | 2121 |
#endif |
2056 | 2122 |
} |
2057 | 2123 |
|
2058 |
-Bamline_T |
|
2059 |
-Bamread_next_imperfect_bamline_copy_aux (T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits, |
|
2060 |
- bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p, |
|
2061 |
- bool need_concordant_p) { |
|
2124 |
+static Bamline_T |
|
2125 |
+Bamread_next_bamline_copy_aux (T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits, |
|
2126 |
+ bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p, |
|
2127 |
+ bool need_concordant_p, bool exclude_perfect_p) { |
|
2062 | 2128 |
char *acc, *chr, *mate_chr, splice_strand; |
2063 | 2129 |
int nm; |
2064 | 2130 |
unsigned int flag; |
... | ... |
@@ -2081,17 +2147,18 @@ Bamread_next_imperfect_bamline_copy_aux (T this, char *desired_read_group, int m |
2081 | 2147 |
return (Bamline_T) NULL; |
2082 | 2148 |
#else |
2083 | 2149 |
if (this->region_limited_p == 1) { |
2084 |
- debug1(fprintf(stderr,"Region limited\n")); |
|
2150 |
+ /* debug1(fprintf(stderr,"Region limited\n")); */ |
|
2085 | 2151 |
while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) { |
2086 |
- if (perfect_match_p(this) == true) { |
|
2152 |
+ if (exclude_perfect_p == true && perfect_match_p(this) == true) { |
|
2087 | 2153 |
/* Skip */ |
2154 |
+ debug1(fprintf(stderr,"Skipping line\n")); |
|
2088 | 2155 |
} else { |
2089 |
- debug1(fprintf(stderr,"Got line\n")); |
|
2090 | 2156 |
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low, |
2091 | 2157 |
&mate_chr,&mate_chrpos_low,&insert_length, |
2092 | 2158 |
&cigar_types,&cigarlengths,&cigar_querylength, |
2093 | 2159 |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group, |
2094 | 2160 |
&terminalp); |
2161 |
+ debug1(fprintf(stderr,"Got line %u\n",chrpos_low)); |
|
2095 | 2162 |
if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) { |
2096 | 2163 |
debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group)); |
2097 | 2164 |
Intlist_free(&cigar_types); |
... | ... |
@@ -2147,7 +2214,7 @@ Bamread_next_imperfect_bamline_copy_aux (T this, char *desired_read_group, int m |
2147 | 2214 |
|
2148 | 2215 |
} else { |
2149 | 2216 |
while (bam_read1(this->fp,this->bam) >= 0) { |
2150 |
- if (perfect_match_p(this) == true) { |
|
2217 |
+ if (exclude_perfect_p == true && perfect_match_p(this) == true) { |
|
2151 | 2218 |
/* Skip */ |
2152 | 2219 |
} else { |
2153 | 2220 |
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low, |
... | ... |
@@ -2212,17 +2279,17 @@ Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq, |
2212 | 2279 |
return (Bamline_T) NULL; |
2213 | 2280 |
#else |
2214 | 2281 |
if (this->region_limited_p == 1) { |
2215 |
- debug1(fprintf(stderr,"Region limited\n")); |
|
2282 |
+ /* debug1(fprintf(stderr,"Region limited\n")); */ |
|
2216 | 2283 |
while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) { |
2217 | 2284 |
if (has_indel_p(this) == false) { |
2218 | 2285 |
/* Skip */ |
2219 | 2286 |
} else { |
2220 |
- debug1(fprintf(stderr,"Got line\n")); |
|
2221 | 2287 |
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low, |
2222 | 2288 |
&mate_chr,&mate_chrpos_low,&insert_length, |
2223 | 2289 |
&cigar_types,&cigarlengths,&cigar_querylength, |
2224 | 2290 |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group, |
2225 | 2291 |
&terminalp); |
2292 |
+ debug1(fprintf(stderr,"Got line %u\n",chrpos_low)); |
|
2226 | 2293 |
if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) { |
2227 | 2294 |
debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group)); |
2228 | 2295 |
Intlist_free(&cigar_types); |
... | ... |
@@ -2356,7 +2423,11 @@ bamline_read_cmp (const void *x, const void *y) { |
2356 | 2423 |
} |
2357 | 2424 |
#else |
2358 | 2425 |
|
2359 |
- return strcmp(a->read,b->read); |
|
2426 |
+ if ((cmp = strcmp(a->read,b->read)) != 0) { |
|
2427 |
+ return cmp; |
|
2428 |
+ } else { |
|
2429 |
+ return strcmp(a->cigar_string,b->cigar_string); |
|
2430 |
+ } |
|
2360 | 2431 |
|
2361 | 2432 |
#endif |
2362 | 2433 |
|
... | ... |
@@ -2380,9 +2451,9 @@ Bamread_next_bamline_set (int *nlines, Bamline_T *prev_bamline, |
2380 | 2451 |
set_chrpos_low = Bamline_chrpos_low(*prev_bamline); |
2381 | 2452 |
} |
2382 | 2453 |
|
2383 |
- while ((bamline = Bamread_next_imperfect_bamline_copy_aux(this,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits, |
|
2384 |
- need_unique_p,need_primary_p,ignore_duplicates_p, |
|
2385 |
- need_concordant_p)) != NULL) { |
|
2454 |
+ while ((bamline = Bamread_next_bamline_copy_aux(this,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits, |
|
2455 |
+ need_unique_p,need_primary_p,ignore_duplicates_p, |
|
2456 |
+ need_concordant_p,/*exclude_perfect_p*/false)) != NULL) { |
|
2386 | 2457 |
#if 0 |
2387 | 2458 |
if (Bamline_perfect_match_p(bamline) == true) { |
2388 | 2459 |
/* Exclude all perfect matches to the reference */ |
... | ... |
@@ -2416,12 +2487,13 @@ Bamread_next_bamline_set (int *nlines, Bamline_T *prev_bamline, |
2416 | 2487 |
qsort(bamlines,*nlines,sizeof(Bamline_T),bamline_read_cmp); |
2417 | 2488 |
} |
2418 | 2489 |
List_free(&bamline_list); |
2419 |
- *prev_bamline = bamline; |
|
2490 |
+ *prev_bamline = bamline; /* Should be NULL */ |
|
2491 |
+ |
|
2420 | 2492 |
return bamlines; |
2421 | 2493 |
} |
2422 | 2494 |
|
2423 | 2495 |
|
2424 |
- |
|
2496 |
+/* Called by indelfix.c */ |
|
2425 | 2497 |
Bamline_T ** |
2426 | 2498 |
Bamread_block (int **nlines, Genomicpos_T chrstart, Genomicpos_T chrend, |
2427 | 2499 |
T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits, |
... | ... |
@@ -2440,9 +2512,9 @@ Bamread_block (int **nlines, Genomicpos_T chrstart, Genomicpos_T chrend, |
2440 | 2512 |
*nlines = (int *) CALLOC(chrend - chrstart + 1,sizeof(int)); |
2441 | 2513 |
|
2442 | 2514 |
/* Excludes all perfect matches to the reference */ |
2443 |
- while ((bamline = Bamread_next_imperfect_bamline_copy_aux(this,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits, |
|
2444 |
- need_unique_p,need_primary_p,ignore_duplicates_p, |
|
2445 |
- need_concordant_p)) != NULL) { |
|
2515 |
+ while ((bamline = Bamread_next_bamline_copy_aux(this,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits, |
|
2516 |
+ need_unique_p,need_primary_p,ignore_duplicates_p, |
|
2517 |
+ need_concordant_p,/*exclude_perfect_p*/true)) != NULL) { |
|
2446 | 2518 |
if (Intlist_head(bamline->cigar_types) != 'S') { |
2447 | 2519 |
chrpos_low_noclip = bamline->chrpos_low; |
2448 | 2520 |
} else { |
... | ... |
@@ -2494,12 +2566,12 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d |
2494 | 2566 |
|
2495 | 2567 |
Bamread_limit_region(this,desired_chr,desired_chrpos,desired_chrpos); |
2496 | 2568 |
while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) { |
2497 |
- debug1(fprintf(stderr,"Got line\n")); |
|
2498 | 2569 |
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low, |
2499 | 2570 |
&mate_chr,&mate_chrpos_low,&insert_length, |
2500 | 2571 |
&cigar_types,&cigarlengths,&cigar_querylength, |
2501 | 2572 |
&readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group, |
2502 | 2573 |
&terminalp); |
2574 |
+ debug1(fprintf(stderr,"Got line %u\n",chrpos_low)); |
|
2503 | 2575 |
hiti = aux_hiti(this); |
2504 | 2576 |
nm = aux_nm(this); |
2505 | 2577 |
splice_strand = aux_splice_strand(this); |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110891 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-static char rcsid[] = "$Id: bamread.c 163197 2015-04-13 21:57:42Z twu $"; |
|
1 |
+static char rcsid[] = "$Id: bamread.c 178960 2015-11-16 19:52:26Z twu $"; |
|
2 | 2 |
#ifdef HAVE_CONFIG_H |
3 | 3 |
#include <config.h> |
4 | 4 |
#endif |
... | ... |
@@ -644,6 +644,7 @@ Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr |
644 | 644 |
struct Bamline_T { |
645 | 645 |
char *acc; |
646 | 646 |
unsigned int flag; |
647 |
+ int hiti; |
|
647 | 648 |
int nhits; |
648 | 649 |
bool good_unique_p; /* Good above good_unique_mapq. Dependent on second_mapq. */ |
649 | 650 |
int mapq; |
... | ... |
@@ -746,6 +747,11 @@ Bamline_firstend_p (Bamline_T this) { |
746 | 747 |
} |
747 | 748 |
} |
748 | 749 |
|
750 |
+int |
|
751 |
+Bamline_hiti (Bamline_T this) { |
|
752 |
+ return this->hiti; |
|
753 |
+} |
|
754 |
+ |
|
749 | 755 |
int |
750 | 756 |
Bamline_nhits (Bamline_T this) { |
751 | 757 |
return this->nhits; |
... | ... |
@@ -1331,6 +1337,22 @@ Bamline_splice_strand (Bamline_T this) { |
1331 | 1337 |
} |
1332 | 1338 |
|
1333 | 1339 |
|
1340 |
+static int |
|
1341 |
+aux_hiti (T this) { |
|
1342 |
+#ifndef HAVE_SAMTOOLS_LIB |
|
1343 |
+ return 1; |
|
1344 |
+#else |
|
1345 |
+ uint8_t *s; |
|
1346 |
+ |
|
1347 |
+ s = bam_aux_get(this->bam,"HI"); |
|
1348 |
+ if (s == NULL) { |
|