Browse code

update GSTRUCT source code

Michael Lawrence authored on 22/05/2019 19:39:45
Showing 1 changed files
... ...
@@ -1,10 +1,12 @@
1
-static char rcsid[] = "$Id: bamread.c 198589 2016-10-01 04:22:06Z twu $";
1
+static char rcsid[] = "$Id: bamread.c 219284 2019-05-21 01:02:18Z twu $";
2 2
 #ifdef HAVE_CONFIG_H
3 3
 #include <config.h>
4 4
 #endif
5 5
 
6 6
 #include "bamread.h"
7
+#include <stdio.h>
7 8
 #include <stdlib.h>
9
+#include <string.h>		/* For strlen, strcpy, strncpy */
8 10
 #include "assert.h"
9 11
 #include "mem.h"
10 12
 #include "complement.h"
... ...
@@ -342,8 +344,6 @@ Bamread_nreads (int *npositions, T this, char *chr, Genomicpos_T chrpos1, Genomi
342 344
     }
343 345
   }
344 346
   Bamread_unlimit_region(this);
345
-#endif
346
-
347 347
 
348 348
   if (max_readlength_lowend == 0) {
349 349
     npositions_lowend = 0;
... ...
@@ -360,6 +360,7 @@ Bamread_nreads (int *npositions, T this, char *chr, Genomicpos_T chrpos1, Genomi
360 360
   if ((*npositions = npositions_lowend + npositions_highend) == 0) {
361 361
     *npositions = 1;
362 362
   }
363
+#endif
363 364
 
364 365
   return nreads;
365 366
 }
... ...
@@ -663,6 +664,7 @@ struct Bamline_T {
663 664
   char *mate_chr;
664 665
   Genomicpos_T mate_chrpos_low;
665 666
   int insert_length;
667
+  int nmult;			/* XX flag */
666 668
 
667 669
   char *cigar_string;
668 670
   Intlist_T cigar_types;
... ...
@@ -681,8 +683,10 @@ struct Bamline_T {
681 683
   unsigned char *aux_end;
682 684
 
683 685
   /* Used if we read more than one line at a time */
686
+#ifdef HAVE_SAMTOOLS_LIB
684 687
   uint8_t *aux_contents;
685 688
   size_t aux_length;
689
+#endif
686 690
 };
687 691
 
688 692
 
... ...
@@ -909,6 +913,27 @@ Bamline_insert_length (Bamline_T this) {
909 913
   return this->insert_length;
910 914
 }
911 915
 
916
+static int
917
+aux_nmult (T this) {
918
+#ifndef HAVE_SAMTOOLS_LIB
919
+  return 0;
920
+#else
921
+  uint8_t *s;
922
+
923
+  s = bam_aux_get(this->bam,"XX");
924
+  if (s == NULL) {
925
+    return 0;
926
+  } else {
927
+    return bam_aux2i(s);
928
+  }
929
+#endif
930
+}
931
+
932
+int
933
+Bamline_nmult (Bamline_T this) {
934
+  return this->nmult;
935
+}
936
+
912 937
 char *
913 938
 Bamline_cigar_string (Bamline_T this) {
914 939
   return this->cigar_string;
... ...
@@ -1046,6 +1071,43 @@ Bamline_cigar_querylength (Bamline_T this) {
1046 1071
   return this->cigar_querylength;
1047 1072
 }
1048 1073
 
1074
+int
1075
+Bamline_cigar_outer_softclip_length (Bamline_T this) {
1076
+  unsigned int outer_softclip_length;
1077
+  Intlist_T p;
1078
+  Uintlist_T q;
1079
+
1080
+  if ((this->flag & PAIRED_READ) == 0U) {
1081
+    /* For single-end reads, consider either end and take the larger value */
1082
+    outer_softclip_length = 0;
1083
+    for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
1084
+      if (Intlist_head(p) == 'S') {
1085
+	if (Uintlist_head(q) > outer_softclip_length) {
1086
+	  outer_softclip_length = Uintlist_head(q);
1087
+	}
1088
+      }
1089
+    }
1090
+    return (int) outer_softclip_length;
1091
+
1092
+  } else if (this->flag & QUERY_MINUSP) {
1093
+    /* For paired-end reads, it depends on QUERY_MINUS */
1094
+    if (Intlist_last_value(this->cigar_types) == 'S') {
1095
+      return (int) Uintlist_last_value(this->cigar_npositions);
1096
+    } else {
1097
+      return 0;
1098
+    }
1099
+
1100
+  } else {
1101
+    /* For paired-end reads, it depends on QUERY_MINUS */
1102
+    if (Intlist_head(this->cigar_types) == 'S') {
1103
+      return (int) Uintlist_head(this->cigar_npositions);
1104
+    } else {
1105
+      return 0;
1106
+    }
1107
+  }
1108
+}
1109
+
1110
+
1049 1111
 int
1050 1112
 Bamline_readlength (Bamline_T this) {
1051 1113
   return this->readlength;
... ...
@@ -1077,6 +1139,7 @@ Bamline_terminalp (Bamline_T this) {
1077 1139
 }
1078 1140
 
1079 1141
 
1142
+#ifdef HAVE_SAMTOOLS_LIB
1080 1143
 static void
1081 1144
 aux_print (FILE *fp, unsigned char *s, unsigned char *aux_end) {
1082 1145
   unsigned char tag1, tag2;
... ...
@@ -1128,8 +1191,10 @@ aux_print (FILE *fp, unsigned char *s, unsigned char *aux_end) {
1128 1191
 
1129 1192
   return;
1130 1193
 }
1194
+#endif
1131 1195
 
1132 1196
 
1197
+#ifdef HAVE_SAMTOOLS_LIB
1133 1198
 static char *
1134 1199
 aux_print_skip_md (FILE *fp, unsigned char *s, unsigned char *aux_end) {
1135 1200
   char *orig_md_string = NULL;
... ...
@@ -1190,6 +1255,55 @@ aux_print_skip_md (FILE *fp, unsigned char *s, unsigned char *aux_end) {
1190 1255
 
1191 1256
   return orig_md_string;
1192 1257
 }
1258
+#endif
1259
+
1260
+
1261
+bool
1262
+Bamline_exon_overlap_p (Bamline_T this, Genomicpos_T chrstart, Genomicpos_T chrend) {
1263
+  Intlist_T p;
1264
+  Uintlist_T q;
1265
+  Genomicpos_T pos, chrpos_high;
1266
+  int type;
1267
+
1268
+  chrpos_high = this->chrpos_low;
1269
+  for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
1270
+    pos = chrpos_high;
1271
+    if ((type = Intlist_head(p)) == 'S') {
1272
+      /* Ignore */
1273
+
1274
+    } else if (type == 'H') {
1275
+      /* Ignore */
1276
+
1277
+    } else if (type == 'M') {
1278
+      chrpos_high += Uintlist_head(q);
1279
+      if (pos <= chrend && chrpos_high - 1 >= chrstart) {
1280
+	return true;
1281
+      }
1282
+
1283
+    } else if (type == 'X') {
1284
+      chrpos_high += Uintlist_head(q);
1285
+
1286
+    } else if (type == 'N') {
1287
+      chrpos_high += Uintlist_head(q);
1288
+
1289
+    } else if (type == 'P') {
1290
+      /* Do nothing */
1291
+
1292
+    } else if (type == 'I') {
1293
+      /* Do nothing */
1294
+
1295
+    } else if (type == 'D') {
1296
+      chrpos_high += Uintlist_head(q);
1297
+
1298
+    } else {
1299
+      fprintf(stderr,"Cannot parse type %c\n",type);
1300
+      exit(9);
1301
+    }
1302
+    debug(printf("  type = %c, chrpos = %u\n",type,chrpos_high));
1303
+  }
1304
+
1305
+  return false;
1306
+}
1193 1307
 
1194 1308
 
1195 1309
 void
... ...
@@ -1230,7 +1344,9 @@ Bamline_print (FILE *fp, Bamline_T this, unsigned int newflag, int quality_score
1230 1344
     }
1231 1345
   }
1232 1346
 
1347
+#ifdef HAVE_SAMTOOLS_LIB
1233 1348
   aux_print(fp,this->aux_start,this->aux_end);
1349
+#endif
1234 1350
 
1235 1351
   fprintf(fp,"\n");
1236 1352
   return;
... ...
@@ -1274,16 +1390,18 @@ Bamline_print_new_cigar (FILE *fp, Bamline_T this, Genomicpos_T chrpos_low, char
1274 1390
 
1275 1391
   fprintf(fp,"\tMD:Z:%s",new_md_string);
1276 1392
   
1393
+#ifdef HAVE_SAMTOOLS_LIB
1277 1394
   orig_md_string = aux_print_skip_md(fp,this->aux_start,this->aux_end);
1395
+#endif
1278 1396
 
1279 1397
   /* Original alignment */
1280
-  fprintf(fp,"\tXX:i:%u",this->chrpos_low);
1281
-  fprintf(fp,"\tXY:Z:");
1398
+  fprintf(fp,"\tXP:i:%u",this->chrpos_low);
1399
+  fprintf(fp,"\tXQ:Z:");
1282 1400
   for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
1283 1401
     fprintf(fp,"%u%c",Uintlist_head(q),Intlist_head(p));
1284 1402
   }
1285 1403
   if (orig_md_string != NULL) {
1286
-    fprintf(fp,"\tXZ:Z:%s",orig_md_string);
1404
+    fprintf(fp,"\tXR:Z:%s",orig_md_string);
1287 1405
   }
1288 1406
 
1289 1407
   fprintf(fp,"\n");
... ...
@@ -1329,7 +1447,9 @@ Bamline_print_new_mate (FILE *fp, Bamline_T this, char *mate_chr, Genomicpos_T m
1329 1447
     fprintf(fp,"%s",this->quality_string);
1330 1448
   }
1331 1449
 
1450
+#ifdef HAVE_SAMTOOLS_LIB
1332 1451
   aux_print(fp,this->aux_start,this->aux_end);
1452
+#endif
1333 1453
 
1334 1454
   fprintf(fp,"\n");
1335 1455
   return;
... ...
@@ -1896,9 +2016,11 @@ Bamline_free (Bamline_T *old) {
1896 2016
     if ((*old)->hardclip != NULL) {
1897 2017
       FREE((*old)->hardclip);
1898 2018
     }
2019
+#ifdef HAVE_SAMTOOLS_LIB
1899 2020
     if ((*old)->aux_contents != NULL) {
1900 2021
       FREE((*old)->aux_contents);
1901 2022
     }
2023
+#endif
1902 2024
     FREE(*old);
1903 2025
   }
1904 2026
 
... ...
@@ -1909,7 +2031,7 @@ Bamline_free (Bamline_T *old) {
1909 2031
 static Bamline_T
1910 2032
 Bamline_new (char *acc, unsigned int flag, int hiti, int nhits, bool good_unique_p, int mapq,
1911 2033
 	     int nm, char splice_strand, char *chr, Genomicpos_T chrpos_low,
1912
-	     char *mate_chr, Genomicpos_T mate_chrpos_low, int insert_length,
2034
+	     char *mate_chr, Genomicpos_T mate_chrpos_low, int insert_length, int nmult,
1913 2035
 	     Intlist_T cigar_types, Uintlist_T cigar_npositions, int cigar_querylength, int readlength,
1914 2036
 	     char *read, char *quality_string, char *hardclip, char *hardclip_quality, bool terminalp,
1915 2037
 	     unsigned char *aux_start, unsigned char *aux_end, bool copy_aux_contents_p) {
... ...
@@ -1944,6 +2066,7 @@ Bamline_new (char *acc, unsigned int flag, int hiti, int nhits, bool good_unique
1944 2066
   new->mate_chrpos_low = mate_chrpos_low;
1945 2067
 
1946 2068
   new->insert_length = insert_length;
2069
+  new->nmult = nmult;
1947 2070
 
1948 2071
   new->cigar_string = compute_cigar_string(cigar_types,cigar_npositions);
1949 2072
   new->cigar_types = cigar_types;		/* not copying */
... ...
@@ -1978,6 +2101,7 @@ Bamline_new (char *acc, unsigned int flag, int hiti, int nhits, bool good_unique
1978 2101
 
1979 2102
   new->terminalp = terminalp;
1980 2103
 
2104
+#ifdef HAVE_SAMTOOLS_LIB
1981 2105
   if (copy_aux_contents_p == true) {
1982 2106
     new->aux_length = aux_end - aux_start;
1983 2107
     if (new->aux_length == 0) {
... ...
@@ -1996,6 +2120,7 @@ Bamline_new (char *acc, unsigned int flag, int hiti, int nhits, bool good_unique
1996 2120
     new->aux_start = aux_start;
1997 2121
     new->aux_end = aux_end;
1998 2122
   }
2123
+#endif
1999 2124
 
2000 2125
   return new;
2001 2126
 }
... ...
@@ -2006,7 +2131,7 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
2006 2131
 		      bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
2007 2132
 		      bool need_concordant_p) {
2008 2133
   char *acc, *chr, *mate_chr, splice_strand;
2009
-  int nm;
2134
+  int nm, nmult;
2010 2135
   unsigned int flag;
2011 2136
   int hiti, nhits;
2012 2137
   bool good_unique_p;
... ...
@@ -2034,7 +2159,7 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
2034 2159
 		 &cigar_types,&cigarlengths,&cigar_querylength,
2035 2160
 		 &readlength,&read,&quality_string,&hardclip,&hardclip_quality,
2036 2161
 		 &read_group,&terminalp);
2037
-      debug1(fprintf(stderr,"Got line %u\n",chrpos_low));
2162
+      debug1(fprintf(stderr,"Got line %s at %u\n",acc,chrpos_low));
2038 2163
       if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) {
2039 2164
 	debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group));
2040 2165
 	Intlist_free(&cigar_types);
... ...
@@ -2074,10 +2199,11 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
2074 2199
 	debug1(fprintf(stderr,"Success\n"));
2075 2200
 	hiti = aux_hiti(this);
2076 2201
 	nm = aux_nm(this);
2202
+	nmult = aux_nmult(this);
2077 2203
 	splice_strand = aux_splice_strand(this);
2078 2204
 	good_unique_p = aux_good_unique_p(this,good_unique_mapq);
2079 2205
 	return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2080
-			   mate_chr,mate_chrpos_low,insert_length,
2206
+			   mate_chr,mate_chrpos_low,insert_length,nmult,
2081 2207
 			   cigar_types,cigarlengths,cigar_querylength,
2082 2208
 			   readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
2083 2209
 			   /*aux_start*/bam1_aux(this->bam),
... ...
@@ -2101,10 +2227,11 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
2101 2227
 	  (need_concordant_p == false || concordantp(flag) == true)) {
2102 2228
 	hiti = aux_hiti(this);
2103 2229
 	nm = aux_nm(this);
2230
+	nmult = aux_nmult(this);
2104 2231
 	splice_strand = aux_splice_strand(this);
2105 2232
 	good_unique_p = aux_good_unique_p(this,good_unique_mapq);
2106 2233
 	return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2107
-			   mate_chr,mate_chrpos_low,insert_length,
2234
+			   mate_chr,mate_chrpos_low,insert_length,nmult,
2108 2235
 			   cigar_types,cigarlengths,cigar_querylength,
2109 2236
 			   readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
2110 2237
 			   /*aux_start*/bam1_aux(this->bam),
... ...
@@ -2126,7 +2253,7 @@ Bamread_next_bamline_copy_aux (T this, char *desired_read_group, int minimum_map
2126 2253
 			       bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
2127 2254
 			       bool need_concordant_p, bool exclude_perfect_p) {
2128 2255
   char *acc, *chr, *mate_chr, splice_strand;
2129
-  int nm;
2256
+  int nm, nmult;
2130 2257
   unsigned int flag;
2131 2258
   int hiti, nhits;
2132 2259
   bool good_unique_p;
... ...
@@ -2158,7 +2285,7 @@ Bamread_next_bamline_copy_aux (T this, char *desired_read_group, int minimum_map
2158 2285
 		   &cigar_types,&cigarlengths,&cigar_querylength,
2159 2286
 		   &readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
2160 2287
 		   &terminalp);
2161
-	debug1(fprintf(stderr,"Got line %u\n",chrpos_low));
2288
+	debug1(fprintf(stderr,"Got line %s at %u\n",acc,chrpos_low));
2162 2289
 	if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) {
2163 2290
 	  debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group));
2164 2291
 	  Intlist_free(&cigar_types);
... ...
@@ -2198,10 +2325,11 @@ Bamread_next_bamline_copy_aux (T this, char *desired_read_group, int minimum_map
2198 2325
 	  debug1(fprintf(stderr,"Success\n"));
2199 2326
 	  hiti = aux_hiti(this);
2200 2327
 	  nm = aux_nm(this);
2328
+	  nmult = aux_nmult(this);
2201 2329
 	  splice_strand = aux_splice_strand(this);
2202 2330
 	  good_unique_p = aux_good_unique_p(this,good_unique_mapq);
2203 2331
 	  return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2204
-			     mate_chr,mate_chrpos_low,insert_length,
2332
+			     mate_chr,mate_chrpos_low,insert_length,nmult,
2205 2333
 			     cigar_types,cigarlengths,cigar_querylength,
2206 2334
 			     readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
2207 2335
 			     /*aux_start*/bam1_aux(this->bam),
... ...
@@ -2229,10 +2357,11 @@ Bamread_next_bamline_copy_aux (T this, char *desired_read_group, int minimum_map
2229 2357
 	    (need_concordant_p == false || concordantp(flag) == true)) {
2230 2358
 	  hiti = aux_hiti(this);
2231 2359
 	  nm = aux_nm(this);
2360
+	  nmult = aux_nmult(this);
2232 2361
 	  splice_strand = aux_splice_strand(this);
2233 2362
 	  good_unique_p = aux_good_unique_p(this,good_unique_mapq);
2234 2363
 	  return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2235
-			     mate_chr,mate_chrpos_low,insert_length,
2364
+			     mate_chr,mate_chrpos_low,insert_length,nmult,
2236 2365
 			     cigar_types,cigarlengths,cigar_querylength,
2237 2366
 			     readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
2238 2367
 			     /*aux_start*/bam1_aux(this->bam),
... ...
@@ -2258,7 +2387,7 @@ Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq,
2258 2387
 			    bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
2259 2388
 			    bool need_concordant_p) {
2260 2389
   char *acc, *chr, *mate_chr, splice_strand;
2261
-  int nm;
2390
+  int nm, nmult;
2262 2391
   unsigned int flag;
2263 2392
   int hiti, nhits;
2264 2393
   bool good_unique_p;
... ...
@@ -2289,7 +2418,7 @@ Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq,
2289 2418
 		   &cigar_types,&cigarlengths,&cigar_querylength,
2290 2419
 		   &readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
2291 2420
 		   &terminalp);
2292
-	debug1(fprintf(stderr,"Got line %u\n",chrpos_low));
2421
+	debug1(fprintf(stderr,"Got line %s at %u\n",acc,chrpos_low));
2293 2422
 	if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) {
2294 2423
 	  debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group));
2295 2424
 	  Intlist_free(&cigar_types);
... ...
@@ -2329,10 +2458,11 @@ Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq,
2329 2458
 	  debug1(fprintf(stderr,"Success\n"));
2330 2459
 	  hiti = aux_hiti(this);
2331 2460
 	  nm = aux_nm(this);
2461
+	  nmult = aux_nmult(this);
2332 2462
 	  splice_strand = aux_splice_strand(this);
2333 2463
 	  good_unique_p = aux_good_unique_p(this,good_unique_mapq);
2334 2464
 	  return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2335
-			     mate_chr,mate_chrpos_low,insert_length,
2465
+			     mate_chr,mate_chrpos_low,insert_length,nmult,
2336 2466
 			     cigar_types,cigarlengths,cigar_querylength,
2337 2467
 			     readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
2338 2468
 			     /*aux_start*/bam1_aux(this->bam),
... ...
@@ -2361,10 +2491,11 @@ Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq,
2361 2491
 	    (need_concordant_p == false || concordantp(flag) == true)) {
2362 2492
 	  hiti = aux_hiti(this);
2363 2493
 	  nm = aux_nm(this);
2494
+	  nmult = aux_nmult(this);
2364 2495
 	  splice_strand = aux_splice_strand(this);
2365 2496
 	  good_unique_p = aux_good_unique_p(this,good_unique_mapq);
2366 2497
 	  return Bamline_new(acc,flag,hiti,nhits,good_unique_p,mapq,nm,splice_strand,chr,chrpos_low,
2367
-			     mate_chr,mate_chrpos_low,insert_length,
2498
+			     mate_chr,mate_chrpos_low,insert_length,nmult,
2368 2499
 			     cigar_types,cigarlengths,cigar_querylength,
2369 2500
 			     readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
2370 2501
 			     /*aux_start*/bam1_aux(this->bam),
... ...
@@ -2548,7 +2679,7 @@ Bamread_block (int **nlines, Genomicpos_T chrstart, Genomicpos_T chrend,
2548 2679
 Bamline_T
2549 2680
 Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *desired_acc) {
2550 2681
   char *acc, *chr, *mate_chr, splice_strand;
2551
-  int nm;
2682
+  int nm, nmult;
2552 2683
   unsigned int flag;
2553 2684
   int hiti, nhits;
2554 2685
   int mapq;
... ...
@@ -2564,6 +2695,7 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d
2564 2695
   char *read_group;
2565 2696
   bool terminalp;
2566 2697
 
2698
+#ifdef HAVE_SAMTOOLS_LIB
2567 2699
   Bamread_limit_region(this,desired_chr,desired_chrpos,desired_chrpos);
2568 2700
   while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
2569 2701
     parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
... ...
@@ -2571,14 +2703,15 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d
2571 2703
 	       &cigar_types,&cigarlengths,&cigar_querylength,
2572 2704
 	       &readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
2573 2705
 	       &terminalp);
2574
-    debug1(fprintf(stderr,"Got line %u\n",chrpos_low));
2706
+    debug1(fprintf(stderr,"Got line %s at %u\n",acc,chrpos_low));
2575 2707
     hiti = aux_hiti(this);
2576 2708
     nm = aux_nm(this);
2709
+    nmult = aux_nmult(this);
2577 2710
     splice_strand = aux_splice_strand(this);
2578 2711
     if (!strcmp(acc,desired_acc) && chrpos_low == desired_chrpos) {
2579 2712
       Bamread_unlimit_region(this);
2580 2713
       return Bamline_new(acc,flag,hiti,nhits,/*good_unique_p*/true,mapq,nm,splice_strand,chr,chrpos_low,
2581
-			 mate_chr,mate_chrpos_low,insert_length,
2714
+			 mate_chr,mate_chrpos_low,insert_length,nmult,
2582 2715
 			 cigar_types,cigarlengths,cigar_querylength,
2583 2716
 			 readlength,read,quality_string,hardclip,hardclip_quality,terminalp,
2584 2717
 			 /*aux_start*/bam1_aux(this->bam),
... ...
@@ -2588,6 +2721,8 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d
2588 2721
   }
2589 2722
 
2590 2723
   Bamread_unlimit_region(this);
2724
+#endif
2725
+
2591 2726
   return (Bamline_T) NULL;
2592 2727
 }
2593 2728
 
Browse code

update to latest GSTRUCT

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@123972 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 14/11/2016 21:54:30
Showing 1 changed files
... ...
@@ -1,4 +1,4 @@
1
-static char rcsid[] = "$Id: bamread.c 178960 2015-11-16 19:52:26Z twu $";
1
+static char rcsid[] = "$Id: bamread.c 198589 2016-10-01 04:22:06Z twu $";
2 2
 #ifdef HAVE_CONFIG_H
3 3
 #include <config.h>
4 4
 #endif
... ...
@@ -28,7 +28,6 @@ static char rcsid[] = "$Id: bamread.c 178960 2015-11-16 19:52:26Z twu $";
28 28
 #endif
29 29
 
30 30
 
31
-
32 31
 #ifdef HAVE_SAMTOOLS_LIB
33 32
 #include "bam.h"
34 33
 typedef uint8_t *BAM_Sequence_T;
... ...
@@ -257,7 +256,7 @@ Bamread_nreads (int *npositions, T this, char *chr, Genomicpos_T chrpos1, Genomi
257 256
 
258 257
     cigar = bam1_cigar(this->bam);
259 258
     for (i = 0; i < this->core->n_cigar; i++) {
260
-      type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]);
259
+      type = (int) ("MIDNSHP=X"[cigar[i] & BAM_CIGAR_MASK]);
261 260
       length = cigar[i] >> BAM_CIGAR_SHIFT;
262 261
 
263 262
       if (type == 'S') {
... ...
@@ -456,6 +455,8 @@ parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genom
456 455
     *read_group = bam_aux2Z(ptr);
457 456
   }
458 457
 
458
+#if 0
459
+  /* XG is used as XG:i in other software */
459 460
   ptr = bam_aux_get(this->bam,"XG");
460 461
   if (ptr == NULL) {
461 462
     *terminalp = false;
... ...
@@ -467,6 +468,10 @@ parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genom
467 468
       *terminalp = true;
468 469
     }
469 470
   }
471
+#else
472
+  *terminalp = false;
473
+#endif
474
+
470 475
 
471 476
   /* Cigar */
472 477
   *cigarlength = 0;
... ...
@@ -478,10 +483,10 @@ parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genom
478 483
     length = cigar[i] >> BAM_CIGAR_SHIFT;
479 484
     *cigarlengths = Uintlist_push(*cigarlengths,length);
480 485
 
481
-    type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]);
486
+    type = (int) ("MIDNSHP=X"[cigar[i] & BAM_CIGAR_MASK]);
482 487
     *cigartypes = Intlist_push(*cigartypes,type);
483 488
 
484
-    if (type == 'S' || type == 'M' || type == 'I') {
489
+    if (type == 'S' || type == 'M' || type == 'I' || type == 'X') {
485 490
       *cigarlength += (int) length;
486 491
     } else if (type == 'H') {
487 492
       *cigarlength += (int) length;
... ...
@@ -516,7 +521,7 @@ has_indel_p (T this) {
516 521
   cigar = bam1_cigar(this->bam);
517 522
   for (i = 0; i < this->core->n_cigar; i++) {
518 523
     /* length = cigar[i] >> BAM_CIGAR_SHIFT; */
519
-    type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]);
524
+    type = (int) ("MIDNSHP=X"[cigar[i] & BAM_CIGAR_MASK]);
520 525
     if (type == 'I' || type == 'D') {
521 526
       return true;
522 527
     }
... ...
@@ -541,7 +546,7 @@ perfect_match_p (T this) {
541 546
   cigar = bam1_cigar(this->bam);
542 547
   for (i = 0; i < this->core->n_cigar; i++) {
543 548
     /* length = cigar[i] >> BAM_CIGAR_SHIFT; */
544
-    type = (int) ("MIDNSHP"[cigar[i] & BAM_CIGAR_MASK]);
549
+    type = (int) ("MIDNSHP=X"[cigar[i] & BAM_CIGAR_MASK]);
545 550
     if (type == 'S') {
546 551
       return false;
547 552
 
... ...
@@ -551,6 +556,9 @@ perfect_match_p (T this) {
551 556
     } else if (type == 'M') {
552 557
       /* acceptable */
553 558
 
559
+    } else if (type == 'X') {
560
+      /* acceptable */
561
+
554 562
     } else if (type == 'N') {
555 563
       /* acceptable */
556 564
       
... ...
@@ -655,9 +663,12 @@ struct Bamline_T {
655 663
   char *mate_chr;
656 664
   Genomicpos_T mate_chrpos_low;
657 665
   int insert_length;
666
+
667
+  char *cigar_string;
658 668
   Intlist_T cigar_types;
659 669
   Uintlist_T cigar_npositions;
660 670
   int cigar_querylength;
671
+
661 672
   int readlength;
662 673
   char *read;
663 674
   char *quality_string;
... ...
@@ -685,6 +696,16 @@ Bamline_flag (Bamline_T this) {
685 696
   return this->flag;
686 697
 }
687 698
 
699
+bool
700
+Bamline_plusp (Bamline_T this) {
701
+  if (this->flag & QUERY_MINUSP) {
702
+    return false;
703
+  } else {
704
+    return true;
705
+  }
706
+}
707
+
708
+
688 709
 static bool
689 710
 concordantp (unsigned int flag) {
690 711
   if (flag & QUERY_UNMAPPED) {
... ...
@@ -780,6 +801,9 @@ Bamline_perfect_match_p (Bamline_T this) {
780 801
     } else if (type == 'M') {
781 802
       /* acceptable */
782 803
 
804
+    } else if (type == 'X') {
805
+      /* acceptable */
806
+
783 807
     } else if (type == 'N') {
784 808
       /* acceptable */
785 809
       
... ...
@@ -829,6 +853,21 @@ Bamline_perfect_match_p (Bamline_T this) {
829 853
 }
830 854
 
831 855
 
856
+bool
857
+Bamline_microinv_p (Bamline_T this) {
858
+  Intlist_T p;
859
+  int type;
860
+
861
+  for (p = this->cigar_types; p != NULL; p = Intlist_next(p)) {
862
+    type = Intlist_head(p);
863
+    if (type == 'X') {
864
+      return true;
865
+    }
866
+  }
867
+
868
+  return false;
869
+}
870
+
832 871
 int
833 872
 Bamline_mapq (Bamline_T this) {
834 873
   return this->mapq;
... ...
@@ -870,12 +909,16 @@ Bamline_insert_length (Bamline_T this) {
870 909
   return this->insert_length;
871 910
 }
872 911
 
912
+char *
913
+Bamline_cigar_string (Bamline_T this) {
914
+  return this->cigar_string;
915
+}
916
+
873 917
 Intlist_T
874 918
 Bamline_cigar_types (Bamline_T this) {
875 919
   return this->cigar_types;
876 920
 }
877 921
 
878
-
879 922
 Uintlist_T
880 923
 Bamline_cigar_npositions (Bamline_T this) {
881 924
   return this->cigar_npositions;
... ...
@@ -906,6 +949,10 @@ Bamline_diffcigar (int *min_overhang, Uintlist_T *npositions, Uintlist_T *chrpos
906 949
       querypos += Uintlist_head(q);
907 950
       chrpos += Uintlist_head(q);
908 951
 
952
+    } else if (type == 'X') {
953
+      querypos += Uintlist_head(q);
954
+      chrpos += Uintlist_head(q);
955
+
909 956
     } else if (type == 'N') {
910 957
       types = Intlist_push(types,type);
911 958
       *npositions = Uintlist_push(*npositions,Uintlist_head(q));
... ...
@@ -971,21 +1018,21 @@ Bamread_print_cigar (FILE *fp, Bamline_T this) {
971 1018
   return;
972 1019
 }
973 1020
 
974
-char *
975
-Bamline_cigar_string (Bamline_T this) {
1021
+static char *
1022
+compute_cigar_string (Intlist_T cigar_types, Uintlist_T cigar_npositions) {
976 1023
   char *string, number[12];
977 1024
   Intlist_T p;
978 1025
   Uintlist_T q;
979 1026
   int string_length = 0;
980 1027
 
981
-  for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
1028
+  for (p = cigar_types, q = cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
982 1029
     sprintf(number,"%u",Uintlist_head(q));
983 1030
     string_length += strlen(number) + 1;
984 1031
   }
985 1032
   string = (char *) CALLOC(string_length+1,sizeof(char));
986 1033
 
987 1034
   string_length = 0;
988
-  for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
1035
+  for (p = cigar_types, q = cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) {
989 1036
     sprintf(number,"%u",Uintlist_head(q));
990 1037
     sprintf(&(string[string_length]),"%u%c",Uintlist_head(q),Intlist_head(p));
991 1038
     string_length += strlen(number) + 1;
... ...
@@ -1528,6 +1575,9 @@ Bamline_strand (Bamline_T this, Genome_T genome, IIT_T chromosome_iit) {
1528 1575
     } else if (type == 'M') {
1529 1576
       chrpos += Uintlist_head(q);
1530 1577
 
1578
+    } else if (type == 'X') {
1579
+      chrpos += Uintlist_head(q);
1580
+
1531 1581
     } else if (type == 'N') {
1532 1582
       firstpos = chrpos - 1U;
1533 1583
       chrpos += Uintlist_head(q);
... ...
@@ -1581,6 +1631,9 @@ Bamline_chrpos_high (Bamline_T this) {
1581 1631
     } else if (type == 'M') {
1582 1632
       chrpos_high += Uintlist_head(q);
1583 1633
 
1634
+    } else if (type == 'X') {
1635
+      chrpos_high += Uintlist_head(q);
1636
+
1584 1637
     } else if (type == 'N') {
1585 1638
       chrpos_high += Uintlist_head(q);
1586 1639
 
... ...
@@ -1623,6 +1676,9 @@ Bamline_chrpos_high_noclip (Bamline_T this) {
1623 1676
     } else if (type == 'M') {
1624 1677
       chrpos_high += Uintlist_head(q);
1625 1678
 
1679
+    } else if (type == 'X') {
1680
+      chrpos_high += Uintlist_head(q);
1681
+
1626 1682
     } else if (type == 'N') {
1627 1683
       chrpos_high += Uintlist_head(q);
1628 1684
 
... ...
@@ -1731,6 +1787,11 @@ Bamline_regions (Uintlist_T *chrpos_lows, Uintlist_T *chrpos_highs, Bamline_T th
1731 1787
       position += Uintlist_head(q);
1732 1788
       *chrpos_highs = Uintlist_push(*chrpos_highs,position);
1733 1789
 
1790
+    } else if (type == 'X') {
1791
+      *chrpos_lows = Uintlist_push(*chrpos_lows,position);
1792
+      position += Uintlist_head(q);
1793
+      *chrpos_highs = Uintlist_push(*chrpos_highs,position);
1794
+
1734 1795
     } else if (type == 'N') {
1735 1796
       position += Uintlist_head(q);
1736 1797
 
... ...
@@ -1773,6 +1834,9 @@ Bamline_splices (Uintlist_T *splice_lows, Uintlist_T *splice_highs, Intlist_T *s
1773 1834
     } else if (type == 'M') {
1774 1835
       position += Uintlist_head(q);
1775 1836
 
1837
+    } else if (type == 'X') {
1838
+      position += Uintlist_head(q);
1839
+
1776 1840
     } else if (type == 'N') {
1777 1841
       *splice_lows = Uintlist_push(*splice_lows,position);
1778 1842
       position += Uintlist_head(q);
... ...
@@ -1819,6 +1883,7 @@ Bamline_free (Bamline_T *old) {
1819 1883
     if ((*old)->mate_chr != NULL) {
1820 1884
       FREE((*old)->mate_chr);
1821 1885
     }
1886
+    FREE((*old)->cigar_string);
1822 1887
     Intlist_free(&(*old)->cigar_types);
1823 1888
     Uintlist_free(&(*old)->cigar_npositions);
1824 1889
     FREE((*old)->read);
... ...
@@ -1880,6 +1945,7 @@ Bamline_new (char *acc, unsigned int flag, int hiti, int nhits, bool good_unique
1880 1945
 
1881 1946
   new->insert_length = insert_length;
1882 1947
 
1948
+  new->cigar_string = compute_cigar_string(cigar_types,cigar_npositions);
1883 1949
   new->cigar_types = cigar_types;		/* not copying */
1884 1950
   new->cigar_npositions = cigar_npositions;	/* not copying */
1885 1951
 
... ...
@@ -1961,14 +2027,14 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
1961 2027
   return (Bamline_T) NULL;
1962 2028
 #else
1963 2029
   if (this->region_limited_p == 1) {
1964
-    debug1(fprintf(stderr,"Region limited\n"));
2030
+    /* debug1(fprintf(stderr,"Region limited\n")); */
1965 2031
     while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
1966
-      debug1(fprintf(stderr,"Got line\n"));
1967 2032
       parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
1968 2033
 		 &mate_chr,&mate_chrpos_low,&insert_length,
1969 2034
 		 &cigar_types,&cigarlengths,&cigar_querylength,
1970 2035
 		 &readlength,&read,&quality_string,&hardclip,&hardclip_quality,
1971 2036
 		 &read_group,&terminalp);
2037
+      debug1(fprintf(stderr,"Got line %u\n",chrpos_low));
1972 2038
       if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) {
1973 2039
 	debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group));
1974 2040
 	Intlist_free(&cigar_types);
... ...
@@ -2055,10 +2121,10 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
2055 2121
 #endif
2056 2122
 }
2057 2123
 
2058
-Bamline_T
2059
-Bamread_next_imperfect_bamline_copy_aux (T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
2060
-					 bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
2061
-					 bool need_concordant_p) {
2124
+static Bamline_T
2125
+Bamread_next_bamline_copy_aux (T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
2126
+			       bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
2127
+			       bool need_concordant_p, bool exclude_perfect_p) {
2062 2128
   char *acc, *chr, *mate_chr, splice_strand;
2063 2129
   int nm;
2064 2130
   unsigned int flag;
... ...
@@ -2081,17 +2147,18 @@ Bamread_next_imperfect_bamline_copy_aux (T this, char *desired_read_group, int m
2081 2147
   return (Bamline_T) NULL;
2082 2148
 #else
2083 2149
   if (this->region_limited_p == 1) {
2084
-    debug1(fprintf(stderr,"Region limited\n"));
2150
+    /* debug1(fprintf(stderr,"Region limited\n")); */
2085 2151
     while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
2086
-      if (perfect_match_p(this) == true) {
2152
+      if (exclude_perfect_p == true && perfect_match_p(this) == true) {
2087 2153
 	/* Skip */
2154
+	debug1(fprintf(stderr,"Skipping line\n"));
2088 2155
       } else {
2089
-	debug1(fprintf(stderr,"Got line\n"));
2090 2156
 	parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
2091 2157
 		   &mate_chr,&mate_chrpos_low,&insert_length,
2092 2158
 		   &cigar_types,&cigarlengths,&cigar_querylength,
2093 2159
 		   &readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
2094 2160
 		   &terminalp);
2161
+	debug1(fprintf(stderr,"Got line %u\n",chrpos_low));
2095 2162
 	if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) {
2096 2163
 	  debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group));
2097 2164
 	  Intlist_free(&cigar_types);
... ...
@@ -2147,7 +2214,7 @@ Bamread_next_imperfect_bamline_copy_aux (T this, char *desired_read_group, int m
2147 2214
 
2148 2215
   } else {
2149 2216
     while (bam_read1(this->fp,this->bam) >= 0) {
2150
-      if (perfect_match_p(this) == true) {
2217
+      if (exclude_perfect_p == true && perfect_match_p(this) == true) {
2151 2218
 	/* Skip */
2152 2219
       } else {
2153 2220
 	parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
... ...
@@ -2212,17 +2279,17 @@ Bamread_next_indel_bamline (T this, char *desired_read_group, int minimum_mapq,
2212 2279
   return (Bamline_T) NULL;
2213 2280
 #else
2214 2281
   if (this->region_limited_p == 1) {
2215
-    debug1(fprintf(stderr,"Region limited\n"));
2282
+    /* debug1(fprintf(stderr,"Region limited\n")); */
2216 2283
     while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
2217 2284
       if (has_indel_p(this) == false) {
2218 2285
 	/* Skip */
2219 2286
       } else {
2220
-	debug1(fprintf(stderr,"Got line\n"));
2221 2287
 	parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
2222 2288
 		   &mate_chr,&mate_chrpos_low,&insert_length,
2223 2289
 		   &cigar_types,&cigarlengths,&cigar_querylength,
2224 2290
 		   &readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
2225 2291
 		   &terminalp);
2292
+	debug1(fprintf(stderr,"Got line %u\n",chrpos_low));
2226 2293
 	if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) {
2227 2294
 	  debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group));
2228 2295
 	  Intlist_free(&cigar_types);
... ...
@@ -2356,7 +2423,11 @@ bamline_read_cmp (const void *x, const void *y) {
2356 2423
   }
2357 2424
 #else
2358 2425
 
2359
-  return strcmp(a->read,b->read);
2426
+  if ((cmp = strcmp(a->read,b->read)) != 0) {
2427
+    return cmp;
2428
+  } else {
2429
+    return strcmp(a->cigar_string,b->cigar_string);
2430
+  }
2360 2431
 
2361 2432
 #endif
2362 2433
 
... ...
@@ -2380,9 +2451,9 @@ Bamread_next_bamline_set (int *nlines, Bamline_T *prev_bamline,
2380 2451
     set_chrpos_low = Bamline_chrpos_low(*prev_bamline);
2381 2452
   }
2382 2453
 
2383
-  while ((bamline = Bamread_next_imperfect_bamline_copy_aux(this,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits,
2384
-							    need_unique_p,need_primary_p,ignore_duplicates_p,
2385
-							    need_concordant_p)) != NULL) {
2454
+  while ((bamline = Bamread_next_bamline_copy_aux(this,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits,
2455
+						  need_unique_p,need_primary_p,ignore_duplicates_p,
2456
+						  need_concordant_p,/*exclude_perfect_p*/false)) != NULL) {
2386 2457
 #if 0
2387 2458
     if (Bamline_perfect_match_p(bamline) == true) {
2388 2459
       /* Exclude all perfect matches to the reference */
... ...
@@ -2416,12 +2487,13 @@ Bamread_next_bamline_set (int *nlines, Bamline_T *prev_bamline,
2416 2487
     qsort(bamlines,*nlines,sizeof(Bamline_T),bamline_read_cmp);
2417 2488
   }
2418 2489
   List_free(&bamline_list);
2419
-  *prev_bamline = bamline;
2490
+  *prev_bamline = bamline;	/* Should be NULL */
2491
+
2420 2492
   return bamlines;
2421 2493
 }
2422 2494
 
2423 2495
 
2424
-
2496
+/* Called by indelfix.c */
2425 2497
 Bamline_T **
2426 2498
 Bamread_block (int **nlines, Genomicpos_T chrstart, Genomicpos_T chrend,
2427 2499
 	       T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
... ...
@@ -2440,9 +2512,9 @@ Bamread_block (int **nlines, Genomicpos_T chrstart, Genomicpos_T chrend,
2440 2512
   *nlines = (int *) CALLOC(chrend - chrstart + 1,sizeof(int));
2441 2513
 
2442 2514
   /* Excludes all perfect matches to the reference */
2443
-  while ((bamline = Bamread_next_imperfect_bamline_copy_aux(this,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits,
2444
-							    need_unique_p,need_primary_p,ignore_duplicates_p,
2445
-							    need_concordant_p)) != NULL) {
2515
+  while ((bamline = Bamread_next_bamline_copy_aux(this,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits,
2516
+						  need_unique_p,need_primary_p,ignore_duplicates_p,
2517
+						  need_concordant_p,/*exclude_perfect_p*/true)) != NULL) {
2446 2518
     if (Intlist_head(bamline->cigar_types) != 'S') {
2447 2519
       chrpos_low_noclip = bamline->chrpos_low;
2448 2520
     } else {
... ...
@@ -2494,12 +2566,12 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d
2494 2566
 
2495 2567
   Bamread_limit_region(this,desired_chr,desired_chrpos,desired_chrpos);
2496 2568
   while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
2497
-    debug1(fprintf(stderr,"Got line\n"));
2498 2569
     parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
2499 2570
 	       &mate_chr,&mate_chrpos_low,&insert_length,
2500 2571
 	       &cigar_types,&cigarlengths,&cigar_querylength,
2501 2572
 	       &readlength,&read,&quality_string,&hardclip,&hardclip_quality,&read_group,
2502 2573
 	       &terminalp);
2574
+    debug1(fprintf(stderr,"Got line %u\n",chrpos_low));
2503 2575
     hiti = aux_hiti(this);
2504 2576
     nm = aux_nm(this);
2505 2577
     splice_strand = aux_splice_strand(this);
Browse code

add raw total count statistic to tallies (everything else now qual filtered)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110891 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 24/11/2015 22:19:59
Showing 1 changed files
... ...
@@ -1,4 +1,4 @@
1
-static char rcsid[] = "$Id: bamread.c 163197 2015-04-13 21:57:42Z twu $";
1
+static char rcsid[] = "$Id: bamread.c 178960 2015-11-16 19:52:26Z twu $";
2 2
 #ifdef HAVE_CONFIG_H
3 3
 #include <config.h>
4 4
 #endif
... ...
@@ -644,6 +644,7 @@ Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr
644 644
 struct Bamline_T {
645 645
   char *acc;
646 646
   unsigned int flag;
647
+  int hiti;
647 648
   int nhits;
648 649
   bool good_unique_p;			/* Good above good_unique_mapq.  Dependent on second_mapq. */
649 650
   int mapq;
... ...
@@ -746,6 +747,11 @@ Bamline_firstend_p (Bamline_T this) {
746 747
   }
747 748
 }
748 749
 
750
+int
751
+Bamline_hiti (Bamline_T this) {
752
+  return this->hiti;
753
+}
754
+
749 755
 int
750 756
 Bamline_nhits (Bamline_T this) {
751 757
   return this->nhits;
... ...
@@ -1331,6 +1337,22 @@ Bamline_splice_strand (Bamline_T this) {
1331 1337
 }
1332 1338
 
1333 1339
 
1340
+static int
1341
+aux_hiti (T this) {
1342
+#ifndef HAVE_SAMTOOLS_LIB
1343
+  return 1;
1344
+#else
1345
+  uint8_t *s;
1346
+
1347
+  s = bam_aux_get(this->bam,"HI");
1348
+  if (s == NULL) {