Browse code

update GSTRUCT (bam_tally); add include_soft_clip parameter for counting over soft clips of a given max length (more accurate allele frequency)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@88343 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 02/04/2014 22:03:40
Showing 23 changed files

... ...
@@ -10,7 +10,7 @@ Description: GSNAP and GMAP are a pair of tools to align short-read
10 10
         methods to work with GMAP and GSNAP from within R. In addition,
11 11
         it provides methods to tally alignment results on a
12 12
         per-nucleotide basis using the bam_tally tool.
13
-Version: 1.5.13
13
+Version: 1.5.14
14 14
 Depends: R (>= 2.15.0), methods, GenomicRanges
15 15
 Imports: IRanges, Rsamtools (>= 1.7.4), rtracklayer (>= 1.17.15),
16 16
         GenomicFeatures, Biostrings, VariantAnnotation (>= 1.9.4),
... ...
@@ -17,7 +17,8 @@ setClass("BamTallyParam",
17 17
                         min_depth = "integer",
18 18
                         variant_strand = "integer",
19 19
                         ignore_query_Ns = "logical",
20
-                        indels = "logical"))
20
+                        indels = "logical",
21
+                        include_soft_clips = "integer"))
21 22
 
22 23
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
23 24
 ### Constructor
... ...
@@ -38,7 +39,7 @@ BamTallyParam <- function(genome, which = GRanges(),
38 39
                           primary_only = FALSE, ignore_duplicates = FALSE,
39 40
                           min_depth = 0L, variant_strand = 0L,
40 41
                           ignore_query_Ns = FALSE,
41
-                          indels = FALSE)
42
+                          indels = FALSE, include_soft_clips = 0L)
42 43
 {
43 44
   if (!is.null(desired_read_group) && !isSingleString(desired_read_group))
44 45
     stop("'desired_read_group' must be NULL or a single, non-NA string")
... ...
@@ -60,11 +61,14 @@ BamTallyParam <- function(genome, which = GRanges(),
60 61
     stop("ignore_query_Ns must be TRUE or FALSE")
61 62
   if (!isTRUEorFALSE(indels))
62 63
     stop("indels must be TRUE or FALSE")
64
+  if (include_soft_clips < 0)
65
+    stop("include_soft_clips must be non-negative")
63 66
   args <- names(formals(sys.function()))
64 67
   params <- mget(args, environment())
65 68
   params$genome <- as(genome, "GmapGenome")
66 69
   params$which <- normArgWhich(which, params$genome)
67
-  integer_params <- c("minimum_mapq", "min_depth", "variant_strand")
70
+  integer_params <- c("minimum_mapq", "min_depth", "variant_strand",
71
+                      "include_soft_clips")
68 72
   params[integer_params] <- lapply(params[integer_params], as.integer)
69 73
   do.call(new, c("BamTallyParam", params))  
70 74
 }
... ...
@@ -184,7 +184,8 @@ normArgTRUEorFALSE <- function(x) {
184 184
                          min_depth = 0L, variant_strand = 0L,
185 185
                          ignore_query_Ns = FALSE,
186 186
                          indels = FALSE,
187
-                         blocksize = 1000L, verbosep = FALSE)
187
+                         blocksize = 1000L, verbosep = FALSE,
188
+                         include_soft_clips = 0L)
188 189
 {
189 190
   if (!is(bamreader, "GmapBamReader"))
190 191
     stop("'bamreader' must be a GmapBamReader")
... ...
@@ -221,7 +222,8 @@ normArgTRUEorFALSE <- function(x) {
221 222
         normArgTRUEorFALSE(ignore_query_Ns),
222 223
         normArgTRUEorFALSE(indels),
223 224
         normArgSingleInteger(blocksize),
224
-        normArgTRUEorFALSE(verbosep))
225
+        normArgTRUEorFALSE(verbosep),
226
+        normArgSingleInteger(include_soft_clips))
225 227
 }
226 228
 
227 229
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
... ...
@@ -21,7 +21,7 @@
21 21
                 primary_only = FALSE, ignore_duplicates = FALSE,
22 22
                 min_depth = 0L, variant_strand = 0L,
23 23
                 ignore_query_Ns = FALSE,
24
-                indels = FALSE)
24
+                indels = FALSE, include_soft_clips = 0L)
25 25
 }
26 26
 \arguments{
27 27
   \item{genome}{A \code{GmapGenome} object, or something coercible to one.}
... ...
@@ -57,6 +57,13 @@
57 57
     always spans the sequence in \code{ref}; so e.g. a deletion extends
58 58
     one nt upstream of the actual deleted sequence.
59 59
   }
60
+  \item{include_soft_clips}{Maximum length of soft clips that are
61
+    considered for counting. Soft-clipping is often useful (for GSNAP at
62
+    least) during alignment, and it should be preserved in the
63
+    output. However, soft clipping can preferentially occur in regions
64
+    of discordance with the reference, and if those clipped regions are
65
+    ignored during counting, the allele fraction is misestimated.
66
+  }
60 67
 }
61 68
 
62 69
 \seealso{
... ...
@@ -7,7 +7,7 @@
7 7
 static const R_CallMethodDef callMethods[] = {
8 8
 
9 9
   /* bamtally.c */
10
-  CALLMETHOD_DEF(R_Bamtally_iit, 19),
10
+  CALLMETHOD_DEF(R_Bamtally_iit, 20),
11 11
   CALLMETHOD_DEF(R_tally_iit_parse, 5),
12 12
   
13 13
   /* bamreader.c */
... ...
@@ -23,7 +23,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R,
23 23
                 SEXP ignore_query_Ns_p_R,
24 24
                 SEXP print_indels_p_R,
25 25
                 SEXP blocksize_R, 
26
-                SEXP verbosep_R)
26
+                SEXP verbosep_R, SEXP max_softclip_R)
27 27
 {
28 28
   Bamreader_T bamreader = (Bamreader_T) R_ExternalPtrAddr(bamreader_R);
29 29
   const char *genome_dir =
... ...
@@ -46,6 +46,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R,
46 46
   bool print_indels_p = asLogical(print_indels_p_R);
47 47
   int blocksize = asInteger(blocksize_R);
48 48
   int verbosep = asLogical(verbosep_R);
49
+  int max_softclip = asInteger(max_softclip_R);
49 50
 
50 51
   Genome_T genome = createGenome(genome_dir, db);
51 52
   IIT_T chromosome_iit = readChromosomeIIT(genome_dir, db);
... ...
@@ -71,7 +72,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R,
71 72
                                  ignore_duplicates_p,
72 73
                                  min_depth, variant_strands, ignore_query_Ns_p,
73 74
                                  print_indels_p, blocksize, verbosep,
74
-                                 /*readlevel_p*/false);
75
+                                 /*readlevel_p*/false, max_softclip);
75 76
   IIT_free(&chromosome_iit);
76 77
   Genome_free(&genome);
77 78
 
... ...
@@ -20,7 +20,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R,
20 20
                 SEXP ignore_query_Ns_p,
21 21
                 SEXP print_indels_p_R,
22 22
                 SEXP blocksize_R, 
23
-                SEXP verbosep_R);
23
+                SEXP verbosep_R, SEXP max_softclip_R);
24 24
 
25 25
 SEXP
26 26
 R_tally_iit_parse(SEXP tally_iit_R, SEXP cycle_breaks_R,
... ...
@@ -12,8 +12,8 @@ without warranty of any kind.
12 12
 Basic Installation
13 13
 ==================
14 14
 
15
-   Briefly, the shell commands `./configure; make; make install' should
16
-configure, build, and install this package.  The following
15
+   Briefly, the shell command `./configure && make && make install'
16
+should configure, build, and install this package.  The following
17 17
 more-detailed instructions are generic; see the `README' file for
18 18
 instructions specific to this package.  Some packages provide this
19 19
 `INSTALL' file but do not implement all of the features documented
... ...
@@ -1,4 +1,4 @@
1
-# Makefile.in generated by automake 1.14 from Makefile.am.
1
+# Makefile.in generated by automake 1.14.1 from Makefile.am.
2 2
 # @configure_input@
3 3
 
4 4
 # Copyright (C) 1994-2013 Free Software Foundation, Inc.
... ...
@@ -79,13 +79,13 @@ build_triplet = @build@
79 79
 host_triplet = @host@
80 80
 target_triplet = @target@
81 81
 subdir = .
82
-DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog \
82
+DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog COPYING \
83 83
 	$(srcdir)/Makefile.in $(srcdir)/Makefile.am \
84 84
 	$(top_srcdir)/configure $(am__configure_deps) \
85
-	$(top_srcdir)/lib/gstruct.pc.in COPYING config/compile \
86
-	config/config.guess config/config.sub config/depcomp \
87
-	config/install-sh config/missing config/ltmain.sh \
88
-	$(top_srcdir)/config/compile $(top_srcdir)/config/config.guess \
85
+	$(top_srcdir)/lib/gstruct.pc.in config/compile \
86
+	config/config.guess config/config.sub config/install-sh \
87
+	config/missing config/ltmain.sh $(top_srcdir)/config/compile \
88
+	$(top_srcdir)/config/config.guess \
89 89
 	$(top_srcdir)/config/config.sub \
90 90
 	$(top_srcdir)/config/install-sh $(top_srcdir)/config/ltmain.sh \
91 91
 	$(top_srcdir)/config/missing
... ...
@@ -259,7 +259,6 @@ LIBTOOL = @LIBTOOL@
259 259
 LIPO = @LIPO@
260 260
 LN_S = @LN_S@
261 261
 LTLIBOBJS = @LTLIBOBJS@
262
-MAINT = @MAINT@
263 262
 MAKEINFO = @MAKEINFO@
264 263
 MANIFEST_TOOL = @MANIFEST_TOOL@
265 264
 MKDIR_P = @MKDIR_P@
... ...
@@ -357,7 +356,7 @@ all: all-recursive
357 356
 .SUFFIXES:
358 357
 am--refresh: Makefile
359 358
 	@:
360
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
359
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
361 360
 	@for dep in $?; do \
362 361
 	  case '$(am__configure_deps)' in \
363 362
 	    *$$dep*) \
... ...
@@ -384,9 +383,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
384 383
 $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
385 384
 	$(SHELL) ./config.status --recheck
386 385
 
387
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
386
+$(top_srcdir)/configure:  $(am__configure_deps)
388 387
 	$(am__cd) $(srcdir) && $(AUTOCONF)
389
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
388
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
390 389
 	$(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
391 390
 $(am__aclocal_m4_deps):
392 391
 lib/gstruct-${LIBGSTRUCT_API_VERSION}.pc: $(top_builddir)/config.status $(top_srcdir)/lib/gstruct.pc.in
... ...
@@ -642,9 +641,10 @@ distcheck: dist
642 641
 	  && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
643 642
 	  && am__cwd=`pwd` \
644 643
 	  && $(am__cd) $(distdir)/_build \
645
-	  && ../configure --srcdir=.. --prefix="$$dc_install_base" \
644
+	  && ../configure \
646 645
 	    $(AM_DISTCHECK_CONFIGURE_FLAGS) \
647 646
 	    $(DISTCHECK_CONFIGURE_FLAGS) \
647
+	    --srcdir=.. --prefix="$$dc_install_base" \
648 648
 	  && $(MAKE) $(AM_MAKEFLAGS) \
649 649
 	  && $(MAKE) $(AM_MAKEFLAGS) dvi \
650 650
 	  && $(MAKE) $(AM_MAKEFLAGS) check \
... ...
@@ -1,4 +1,4 @@
1
-# generated automatically by aclocal 1.14 -*- Autoconf -*-
1
+# generated automatically by aclocal 1.14.1 -*- Autoconf -*-
2 2
 
3 3
 # Copyright (C) 1996-2013 Free Software Foundation, Inc.
4 4
 
... ...
@@ -35,7 +35,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION],
35 35
 [am__api_version='1.14'
36 36
 dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
37 37
 dnl require some minimum version.  Point them to the right macro.
38
-m4_if([$1], [1.14], [],
38
+m4_if([$1], [1.14.1], [],
39 39
       [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
40 40
 ])
41 41
 
... ...
@@ -51,7 +51,7 @@ m4_define([_AM_AUTOCONF_VERSION], [])
51 51
 # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
52 52
 # This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
53 53
 AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
54
-[AM_AUTOMAKE_VERSION([1.14])dnl
54
+[AM_AUTOMAKE_VERSION([1.14.1])dnl
55 55
 m4_ifndef([AC_AUTOCONF_VERSION],
56 56
   [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
57 57
 _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
... ...
@@ -642,42 +642,6 @@ fi
642 642
 rmdir .tst 2>/dev/null
643 643
 AC_SUBST([am__leading_dot])])
644 644
 
645
-# Add --enable-maintainer-mode option to configure.         -*- Autoconf -*-
646
-# From Jim Meyering
647
-
648
-# Copyright (C) 1996-2013 Free Software Foundation, Inc.
649
-#
650
-# This file is free software; the Free Software Foundation
651
-# gives unlimited permission to copy and/or distribute it,
652
-# with or without modifications, as long as this notice is preserved.
653
-
654
-# AM_MAINTAINER_MODE([DEFAULT-MODE])
655
-# ----------------------------------
656
-# Control maintainer-specific portions of Makefiles.
657
-# Default is to disable them, unless 'enable' is passed literally.
658
-# For symmetry, 'disable' may be passed as well.  Anyway, the user
659
-# can override the default with the --enable/--disable switch.
660
-AC_DEFUN([AM_MAINTAINER_MODE],
661
-[m4_case(m4_default([$1], [disable]),
662
-       [enable], [m4_define([am_maintainer_other], [disable])],
663
-       [disable], [m4_define([am_maintainer_other], [enable])],
664
-       [m4_define([am_maintainer_other], [enable])
665
-        m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])])
666
-AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
667
-  dnl maintainer-mode's default is 'disable' unless 'enable' is passed
668
-  AC_ARG_ENABLE([maintainer-mode],
669
-    [AS_HELP_STRING([--]am_maintainer_other[-maintainer-mode],
670
-      am_maintainer_other[ make rules and dependencies not useful
671
-      (and sometimes confusing) to the casual installer])],
672
-    [USE_MAINTAINER_MODE=$enableval],
673
-    [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes]))
674
-  AC_MSG_RESULT([$USE_MAINTAINER_MODE])
675
-  AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes])
676
-  MAINT=$MAINTAINER_MODE_TRUE
677
-  AC_SUBST([MAINT])dnl
678
-]
679
-)
680
-
681 645
 # Check to see how 'make' treats includes.	            -*- Autoconf -*-
682 646
 
683 647
 # Copyright (C) 2001-2013 Free Software Foundation, Inc.
... ...
@@ -1,8 +1,8 @@
1 1
 #! /bin/sh
2 2
 # Attempt to guess a canonical system name.
3
-#   Copyright 1992-2013 Free Software Foundation, Inc.
3
+#   Copyright 1992-2014 Free Software Foundation, Inc.
4 4
 
5
-timestamp='2013-05-16'
5
+timestamp='2014-02-12'
6 6
 
7 7
 # This file is free software; you can redistribute it and/or modify it
8 8
 # under the terms of the GNU General Public License as published by
... ...
@@ -50,7 +50,7 @@ version="\
50 50
 GNU config.guess ($timestamp)
51 51
 
52 52
 Originally written by Per Bothner.
53
-Copyright 1992-2013 Free Software Foundation, Inc.
53
+Copyright 1992-2014 Free Software Foundation, Inc.
54 54
 
55 55
 This is free software; see the source for copying conditions.  There is NO
56 56
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
... ...
@@ -149,7 +149,7 @@ Linux|GNU|GNU/*)
149 149
 	LIBC=gnu
150 150
 	#endif
151 151
 	EOF
152
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
152
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
153 153
 	;;
154 154
 esac
155 155
 
... ...
@@ -995,6 +995,12 @@ EOF
995 995
     ppc:Linux:*:*)
996 996
 	echo powerpc-unknown-linux-${LIBC}
997 997
 	exit ;;
998
+    ppc64le:Linux:*:*)
999
+	echo powerpc64le-unknown-linux-${LIBC}
1000
+	exit ;;
1001
+    ppcle:Linux:*:*)
1002
+	echo powerpcle-unknown-linux-${LIBC}
1003
+	exit ;;
998 1004
     s390:Linux:*:* | s390x:Linux:*:*)
999 1005
 	echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
1000 1006
 	exit ;;
... ...
@@ -1014,7 +1020,18 @@ EOF
1014 1020
 	echo ${UNAME_MACHINE}-dec-linux-${LIBC}
1015 1021
 	exit ;;
1016 1022
     x86_64:Linux:*:*)
1017
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
1023
+	eval $set_cc_for_build
1024
+	X86_64_ABI=
1025
+	# If there is a compiler, see if it is configured for 32-bit objects.
1026
+	if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
1027
+	    if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \
1028
+		(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
1029
+		grep IS_X32 >/dev/null
1030
+	    then
1031
+		X86_64_ABI=x32
1032
+	    fi
1033
+	fi
1034
+	echo x86_64-unknown-linux-gnu${X86_64_ABI}
1018 1035
 	exit ;;
1019 1036
     xtensa*:Linux:*:*)
1020 1037
 	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
... ...
@@ -1254,16 +1271,26 @@ EOF
1254 1271
 	if test "$UNAME_PROCESSOR" = unknown ; then
1255 1272
 	    UNAME_PROCESSOR=powerpc
1256 1273
 	fi
1257
-	if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
1258
-	    if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
1259
-		(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
1260
-		grep IS_64BIT_ARCH >/dev/null
1261
-	    then
1262
-		case $UNAME_PROCESSOR in
1263
-		    i386) UNAME_PROCESSOR=x86_64 ;;
1264
-		    powerpc) UNAME_PROCESSOR=powerpc64 ;;
1265
-		esac
1274
+	if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
1275
+	    if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
1276
+		if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
1277
+		    (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
1278
+		    grep IS_64BIT_ARCH >/dev/null
1279
+		then
1280
+		    case $UNAME_PROCESSOR in
1281
+			i386) UNAME_PROCESSOR=x86_64 ;;
1282
+			powerpc) UNAME_PROCESSOR=powerpc64 ;;
1283
+		    esac
1284
+		fi
1266 1285
 	    fi
1286
+	elif test "$UNAME_PROCESSOR" = i386 ; then
1287
+	    # Avoid executing cc on OS X 10.9, as it ships with a stub
1288
+	    # that puts up a graphical alert prompting to install
1289
+	    # developer tools.  Any system running Mac OS X 10.7 or
1290
+	    # later (Darwin 11 and later) is required to have a 64-bit
1291
+	    # processor. This is not true of the ARM version of Darwin
1292
+	    # that Apple uses in portable devices.
1293
+	    UNAME_PROCESSOR=x86_64
1267 1294
 	fi
1268 1295
 	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
1269 1296
 	exit ;;
... ...
@@ -1355,154 +1382,6 @@ EOF
1355 1382
 	exit ;;
1356 1383
 esac
1357 1384
 
1358
-eval $set_cc_for_build
1359
-cat >$dummy.c <<EOF
1360
-#ifdef _SEQUENT_
1361
-# include <sys/types.h>
1362
-# include <sys/utsname.h>
1363
-#endif
1364
-main ()
1365
-{
1366
-#if defined (sony)
1367
-#if defined (MIPSEB)
1368
-  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
1369
-     I don't know....  */
1370
-  printf ("mips-sony-bsd\n"); exit (0);
1371
-#else
1372
-#include <sys/param.h>
1373
-  printf ("m68k-sony-newsos%s\n",
1374
-#ifdef NEWSOS4
1375
-	"4"
1376
-#else
1377
-	""
1378
-#endif
1379
-	); exit (0);
1380
-#endif
1381
-#endif
1382
-
1383
-#if defined (__arm) && defined (__acorn) && defined (__unix)
1384
-  printf ("arm-acorn-riscix\n"); exit (0);
1385
-#endif
1386
-
1387
-#if defined (hp300) && !defined (hpux)
1388
-  printf ("m68k-hp-bsd\n"); exit (0);
1389
-#endif
1390
-
1391
-#if defined (NeXT)
1392
-#if !defined (__ARCHITECTURE__)
1393
-#define __ARCHITECTURE__ "m68k"
1394
-#endif
1395
-  int version;
1396
-  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
1397
-  if (version < 4)
1398
-    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
1399
-  else
1400
-    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
1401
-  exit (0);
1402
-#endif
1403
-
1404
-#if defined (MULTIMAX) || defined (n16)
1405
-#if defined (UMAXV)
1406
-  printf ("ns32k-encore-sysv\n"); exit (0);
1407
-#else
1408
-#if defined (CMU)
1409
-  printf ("ns32k-encore-mach\n"); exit (0);
1410
-#else
1411
-  printf ("ns32k-encore-bsd\n"); exit (0);
1412
-#endif
1413
-#endif
1414
-#endif
1415
-
1416
-#if defined (__386BSD__)
1417
-  printf ("i386-pc-bsd\n"); exit (0);
1418
-#endif
1419
-
1420
-#if defined (sequent)
1421
-#if defined (i386)
1422
-  printf ("i386-sequent-dynix\n"); exit (0);
1423
-#endif
1424
-#if defined (ns32000)
1425
-  printf ("ns32k-sequent-dynix\n"); exit (0);
1426
-#endif
1427
-#endif
1428
-
1429
-#if defined (_SEQUENT_)
1430
-    struct utsname un;
1431
-
1432
-    uname(&un);
1433
-
1434
-    if (strncmp(un.version, "V2", 2) == 0) {
1435
-	printf ("i386-sequent-ptx2\n"); exit (0);
1436
-    }
1437
-    if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
1438
-	printf ("i386-sequent-ptx1\n"); exit (0);
1439
-    }
1440
-    printf ("i386-sequent-ptx\n"); exit (0);
1441
-
1442
-#endif
1443
-
1444
-#if defined (vax)
1445
-# if !defined (ultrix)
1446
-#  include <sys/param.h>
1447
-#  if defined (BSD)
1448
-#   if BSD == 43
1449
-      printf ("vax-dec-bsd4.3\n"); exit (0);
1450
-#   else
1451
-#    if BSD == 199006
1452
-      printf ("vax-dec-bsd4.3reno\n"); exit (0);
1453
-#    else
1454
-      printf ("vax-dec-bsd\n"); exit (0);
1455
-#    endif
1456
-#   endif
1457
-#  else
1458
-    printf ("vax-dec-bsd\n"); exit (0);
1459
-#  endif
1460
-# else
1461
-    printf ("vax-dec-ultrix\n"); exit (0);
1462
-# endif
1463
-#endif
1464
-
1465
-#if defined (alliant) && defined (i860)
1466
-  printf ("i860-alliant-bsd\n"); exit (0);
1467
-#endif
1468
-
1469
-  exit (1);
1470
-}
1471
-EOF
1472
-
1473
-$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
1474
-	{ echo "$SYSTEM_NAME"; exit; }
1475
-
1476
-# Apollos put the system type in the environment.
1477
-
1478
-test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
1479
-
1480
-# Convex versions that predate uname can use getsysinfo(1)
1481
-
1482
-if [ -x /usr/convex/getsysinfo ]
1483
-then
1484
-    case `getsysinfo -f cpu_type` in
1485
-    c1*)
1486
-	echo c1-convex-bsd
1487
-	exit ;;
1488
-    c2*)
1489
-	if getsysinfo -f scalar_acc
1490
-	then echo c32-convex-bsd
1491
-	else echo c2-convex-bsd
1492
-	fi
1493
-	exit ;;
1494
-    c34*)
1495
-	echo c34-convex-bsd
1496
-	exit ;;
1497
-    c38*)
1498
-	echo c38-convex-bsd
1499
-	exit ;;
1500
-    c4*)
1501
-	echo c4-convex-bsd
1502
-	exit ;;
1503
-    esac
1504
-fi
1505
-
1506 1385
 cat >&2 <<EOF
1507 1386
 $0: unable to guess system type
1508 1387
 
... ...
@@ -1,8 +1,8 @@
1 1
 #! /bin/sh
2 2
 # Configuration validation subroutine script.
3
-#   Copyright 1992-2013 Free Software Foundation, Inc.
3
+#   Copyright 1992-2014 Free Software Foundation, Inc.
4 4
 
5
-timestamp='2013-04-24'
5
+timestamp='2014-01-01'
6 6
 
7 7
 # This file is free software; you can redistribute it and/or modify it
8 8
 # under the terms of the GNU General Public License as published by
... ...
@@ -68,7 +68,7 @@ Report bugs and patches to <config-patches@gnu.org>."
68 68
 version="\
69 69
 GNU config.sub ($timestamp)
70 70
 
71
-Copyright 1992-2013 Free Software Foundation, Inc.
71
+Copyright 1992-2014 Free Software Foundation, Inc.
72 72
 
73 73
 This is free software; see the source for copying conditions.  There is NO
74 74
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
... ...
@@ -257,7 +257,7 @@ case $basic_machine in
257 257
 	| avr | avr32 \
258 258
 	| be32 | be64 \
259 259
 	| bfin \
260
-	| c4x | clipper \
260
+	| c4x | c8051 | clipper \
261 261
 	| d10v | d30v | dlx | dsp16xx | dvp \
262 262
 	| epiphany \
263 263
 	| fido | fr30 | frv \
... ...
@@ -265,6 +265,7 @@ case $basic_machine in
265 265
 	| hexagon \
266 266
 	| i370 | i860 | i960 | ia64 \
267 267
 	| ip2k | iq2000 \
268
+	| k1om \
268 269
 	| le32 | le64 \
269 270
 	| lm32 \
270 271
 	| m32c | m32r | m32rle | m68000 | m68k | m88k \
... ...
@@ -324,7 +325,7 @@ case $basic_machine in
324 325
 	c6x)
325 326
 		basic_machine=tic6x-unknown
326 327
 		;;
327
-	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
328
+	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
328 329
 		basic_machine=$basic_machine-unknown
329 330
 		os=-none
330 331
 		;;
... ...
@@ -372,7 +373,7 @@ case $basic_machine in
372 373
 	| be32-* | be64-* \
373 374
 	| bfin-* | bs2000-* \
374 375
 	| c[123]* | c30-* | [cjt]90-* | c4x-* \
375
-	| clipper-* | craynv-* | cydra-* \
376
+	| c8051-* | clipper-* | craynv-* | cydra-* \
376 377
 	| d10v-* | d30v-* | dlx-* \
377 378
 	| elxsi-* \
378 379
 	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
... ...
@@ -381,6 +382,7 @@ case $basic_machine in
381 382
 	| hexagon-* \
382 383
 	| i*86-* | i860-* | i960-* | ia64-* \
383 384
 	| ip2k-* | iq2000-* \
385
+	| k1om-* \
384 386
 	| le32-* | le64-* \
385 387
 	| lm32-* \
386 388
 	| m32c-* | m32r-* | m32rle-* \
... ...
@@ -794,7 +796,7 @@ case $basic_machine in
794 796
 		os=-mingw64
795 797
 		;;
796 798
 	mingw32)
797
-		basic_machine=i386-pc
799
+		basic_machine=i686-pc
798 800
 		os=-mingw32
799 801
 		;;
800 802
 	mingw32ce)
... ...
@@ -848,7 +850,7 @@ case $basic_machine in
848 850
 		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
849 851
 		;;
850 852
 	msys)
851
-		basic_machine=i386-pc
853
+		basic_machine=i686-pc
852 854
 		os=-msys
853 855
 		;;
854 856
 	mvs)
... ...
@@ -1564,6 +1566,9 @@ case $basic_machine in
1564 1566
 	c4x-* | tic4x-*)
1565 1567
 		os=-coff
1566 1568
 		;;
1569
+	c8051-*)
1570
+		os=-elf
1571
+		;;
1567 1572
 	hexagon-*)
1568 1573
 		os=-elf
1569 1574
 		;;
... ...
@@ -1,7 +1,7 @@
1 1
 #! /bin/sh
2 2
 # Common wrapper for a few potentially missing GNU programs.
3 3
 
4
-scriptversion=2012-06-26.16; # UTC
4
+scriptversion=2013-10-28.13; # UTC
5 5
 
6 6
 # Copyright (C) 1996-2013 Free Software Foundation, Inc.
7 7
 # Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
... ...
@@ -160,7 +160,7 @@ give_advice ()
160 160
       ;;
161 161
    autom4te*)
162 162
       echo "You might have modified some maintainer files that require"
163
-      echo "the 'automa4te' program to be rebuilt."
163
+      echo "the 'autom4te' program to be rebuilt."
164 164
       program_details 'autom4te'
165 165
       ;;
166 166
     bison*|yacc*)
... ...
@@ -684,9 +684,6 @@ MAINTAINER_FALSE
684 684
 MAINTAINER_TRUE
685 685
 FULLDIST_FALSE
686 686
 FULLDIST_TRUE
687
-MAINT
688
-MAINTAINER_MODE_FALSE
689
-MAINTAINER_MODE_TRUE
690 687
 AM_BACKSLASH
691 688
 AM_DEFAULT_VERBOSITY
692 689
 AM_DEFAULT_V
... ...
@@ -789,7 +786,6 @@ enable_option_checking
789 786
 enable_largefile
790 787
 enable_dependency_tracking
791 788
 enable_silent_rules
792
-enable_maintainer_mode
793 789
 enable_fulldist
794 790
 enable_maintainer
795 791
 enable_static_linking
... ...
@@ -1441,9 +1437,6 @@ Optional Features:
1441 1437
                           speeds up one-time build
1442 1438
   --enable-silent-rules   less verbose build output (undo: "make V=1")
1443 1439
   --disable-silent-rules  verbose build output (undo: "make V=0")
1444
-  --disable-maintainer-mode
1445
-                          disable make rules and dependencies not useful (and
1446
-                          sometimes confusing) to the casual installer
1447 1440
   --enable-fulldist       For use by program maintainer
1448 1441
   --enable-maintainer     For use by program maintainer
1449 1442
   --enable-static-linking Link binaries statically (default=no)
... ...
@@ -4538,29 +4531,6 @@ END
4538 4531
   fi
4539 4532
 fi
4540 4533
 
4541
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5
4542
-$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; }
4543
-    # Check whether --enable-maintainer-mode was given.
4544
-if test "${enable_maintainer_mode+set}" = set; then :
4545
-  enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval
4546
-else
4547
-  USE_MAINTAINER_MODE=yes
4548
-fi
4549
-
4550
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5
4551
-$as_echo "$USE_MAINTAINER_MODE" >&6; }
4552
-   if test $USE_MAINTAINER_MODE = yes; then
4553
-  MAINTAINER_MODE_TRUE=
4554
-  MAINTAINER_MODE_FALSE='#'
4555
-else
4556
-  MAINTAINER_MODE_TRUE='#'
4557
-  MAINTAINER_MODE_FALSE=
4558
-fi
4559
-
4560
-  MAINT=$MAINTAINER_MODE_TRUE
4561
-
4562
-
4563
-
4564 4534
  if test "x$enable_fulldist" = xyes; then
4565 4535
   FULLDIST_TRUE=
4566 4536
   FULLDIST_FALSE='#'
... ...
@@ -15497,10 +15467,6 @@ else
15497 15467
   am__EXEEXT_FALSE=
15498 15468
 fi
15499 15469
 
15500
-if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then
15501
-  as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined.
15502
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
15503
-fi
15504 15470
 if test -z "${FULLDIST_TRUE}" && test -z "${FULLDIST_FALSE}"; then
15505 15471
   as_fn_error $? "conditional \"FULLDIST\" was never defined.
15506 15472
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
... ...
@@ -64,7 +64,6 @@ AC_ARG_PROGRAM
64 64
 
65 65
 #AM_INIT_AUTOMAKE([no-dependencies])
66 66
 AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION)
67
-AM_MAINTAINER_MODE([enable])
68 67
 
69 68
 AM_CONDITIONAL(FULLDIST,test "x$enable_fulldist" = xyes)
70 69
 AC_ARG_ENABLE([fulldist],
... ...
@@ -1,4 +1,4 @@
1
-# Makefile.in generated by automake 1.14 from Makefile.am.
1
+# Makefile.in generated by automake 1.14.1 from Makefile.am.
2 2
 # @configure_input@
3 3
 
4 4
 # Copyright (C) 1994-2013 Free Software Foundation, Inc.
... ...
@@ -388,7 +388,6 @@ LIBTOOL = @LIBTOOL@
388 388
 LIPO = @LIPO@
389 389
 LN_S = @LN_S@
390 390
 LTLIBOBJS = @LTLIBOBJS@
391
-MAINT = @MAINT@
392 391
 MAKEINFO = @MAKEINFO@
393 392
 MANIFEST_TOOL = @MANIFEST_TOOL@
394 393
 MKDIR_P = @MKDIR_P@
... ...
@@ -733,7 +732,7 @@ all: config.h
733 732
 
734 733
 .SUFFIXES:
735 734
 .SUFFIXES: .c .lo .o .obj
736
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
735
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
737 736
 	@for dep in $?; do \
738 737
 	  case '$(am__configure_deps)' in \
739 738
 	    *$$dep*) \
... ...
@@ -758,9 +757,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
758 757
 $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
759 758
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
760 759
 
761
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
760
+$(top_srcdir)/configure:  $(am__configure_deps)
762 761
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
763
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
762
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
764 763
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
765 764
 $(am__aclocal_m4_deps):
766 765
 
... ...
@@ -771,7 +770,7 @@ config.h: stamp-h1
771 770
 stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
772 771
 	@rm -f stamp-h1
773 772
 	cd $(top_builddir) && $(SHELL) ./config.status src/config.h
774
-$(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) 
773
+$(srcdir)/config.h.in:  $(am__configure_deps) 
775 774
 	($(am__cd) $(top_srcdir) && $(AUTOHEADER))
776 775
 	rm -f stamp-h1
777 776
 	touch $@
... ...
@@ -1,4 +1,4 @@
1
-static char rcsid[] = "$Id: bamread.c 114330 2013-11-07 19:55:05Z twu $";
1
+static char rcsid[] = "$Id: bamread.c 129099 2014-03-04 18:44:44Z twu $";
2 2
 #ifdef HAVE_CONFIG_H
3 3
 #include <config.h>
4 4
 #endif
... ...
@@ -370,7 +370,7 @@ static void
370 370
 parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genomicpos_T *chrpos,
371 371
 	    char **mate_chr, Genomicpos_T *mate_chrpos, int *insert_length,
372 372
 	    Intlist_T *cigartypes, Uintlist_T *cigarlengths, int *cigarlength,
373
-	    int *readlength, char **read, char **quality_string, char **read_group) {
373
+	    int *readlength, char **read, char **quality_string, char **read_group, bool *terminalp) {
374 374
   int type;
375 375
   int i;
376 376
   unsigned int length;
... ...
@@ -435,6 +435,15 @@ parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genom
435 435
     *read_group = bam_aux2Z(ptr);
436 436
   }
437 437
 
438
+  ptr = bam_aux_get(this->bam,"PG");
439
+  if (ptr == NULL) {
440
+    *terminalp = false;
441
+  } else if (strcmp((char *) ptr,"T")) {
442
+    *terminalp = false;
443
+  } else {
444
+    *terminalp = true;
445
+  }
446
+
438 447
   /* Cigar */
439 448
   *cigarlength = 0;
440 449
   *cigartypes = (Intlist_T) NULL;
... ...
@@ -475,7 +484,8 @@ int
475 484
 Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genomicpos_T *chrpos,
476 485
 		   char **mate_chr, Genomicpos_T *mate_chrpos,
477 486
 		   Intlist_T *cigartypes, Uintlist_T *cigarlengths, int *cigarlength,
478
-		   int *readlength, char **read, char **quality_string, char **read_group) {
487
+		   int *readlength, char **read, char **quality_string, char **read_group,
488
+		   bool *terminalp) {
479 489
   int insert_length;
480 490
 
481 491
 #ifndef HAVE_SAMTOOLS
... ...
@@ -488,7 +498,8 @@ Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr
488 498
       parse_line(this,&(*acc),&(*flag),&(*mapq),&(*chr),&(*chrpos),
489 499
 		 &(*mate_chr),&(*mate_chrpos),&insert_length,
490 500
 		 &(*cigartypes),&(*cigarlengths),&(*cigarlength),
491
-		 &(*readlength),&(*read),&(*quality_string),&(*read_group));
501
+		 &(*readlength),&(*read),&(*quality_string),&(*read_group),
502
+		 &(*terminalp));
492 503
       return 1;
493 504
     }
494 505
   } else {
... ...
@@ -498,7 +509,8 @@ Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr
498 509
       parse_line(this,&(*acc),&(*flag),&(*mapq),&(*chr),&(*chrpos),
499 510
 		 &(*mate_chr),&(*mate_chrpos),&insert_length,
500 511
 		 &(*cigartypes),&(*cigarlengths),&(*cigarlength),
501
-		 &(*readlength),&(*read),&(*quality_string),&(*read_group));
512
+		 &(*readlength),&(*read),&(*quality_string),&(*read_group),
513
+		 &(*terminalp));
502 514
       return 1;
503 515
     }
504 516
   }
... ...
@@ -525,6 +537,7 @@ struct Bamline_T {
525 537
   int readlength;
526 538
   char *read;
527 539
   char *quality_string;
540
+  bool terminalp;
528 541
   unsigned char *aux_start;
529 542
   unsigned char *aux_end;
530 543
 };
... ...
@@ -707,6 +720,11 @@ Bamline_quality_string (Bamline_T this) {
707 720
   return this->quality_string;
708 721
 }
709 722
 
723
+bool
724
+Bamline_terminalp (Bamline_T this) {
725
+  return this->terminalp;
726
+}
727
+
710 728
 
711 729
 static void
712 730
 aux_print (FILE *fp, unsigned char *s, unsigned char *aux_end) {
... ...
@@ -1211,7 +1229,8 @@ Bamline_new (char *acc, unsigned int flag, int nhits, bool good_unique_p, int ma
1211 1229
 	     char splice_strand, char *chr, Genomicpos_T chrpos_low,
1212 1230
 	     char *mate_chr, Genomicpos_T mate_chrpos_low, int insert_length,
1213 1231
 	     Intlist_T cigar_types, Uintlist_T cigar_npositions, int cigar_querylength, int readlength,
1214
-	     char *read, char *quality_string, unsigned char *aux_start, unsigned char *aux_end) {
1232
+	     char *read, char *quality_string, bool terminalp,
1233
+	     unsigned char *aux_start, unsigned char *aux_end) {
1215 1234
   Bamline_T new = (Bamline_T) MALLOC(sizeof(*new));
1216 1235
 
1217 1236
   new->acc = (char *) CALLOC(strlen(acc)+1,sizeof(char));
... ...
@@ -1258,6 +1277,8 @@ Bamline_new (char *acc, unsigned int flag, int nhits, bool good_unique_p, int ma
1258 1277
     strncpy(new->quality_string,quality_string,readlength);
1259 1278
   }
1260 1279
 
1280
+  new->terminalp = terminalp;
1281
+
1261 1282
   new->aux_start = aux_start;
1262 1283
   new->aux_end = aux_end;
1263 1284
 
... ...
@@ -1282,6 +1303,7 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
1282 1303
   char *read;
1283 1304
   char *quality_string;
1284 1305
   char *read_group;
1306
+  bool terminalp;
1285 1307
 
1286 1308
 #ifndef HAVE_SAMTOOLS
1287 1309
   return (Bamline_T) NULL;
... ...
@@ -1293,7 +1315,8 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
1293 1315
       parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
1294 1316
 		 &mate_chr,&mate_chrpos_low,&insert_length,
1295 1317
 		 &cigar_types,&cigarlengths,&cigar_querylength,
1296
-		 &readlength,&read,&quality_string,&read_group);
1318
+		 &readlength,&read,&quality_string,&read_group,
1319
+		 &terminalp);
1297 1320
       if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) {
1298 1321
 	debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group));
1299 1322
 	Intlist_free(&cigar_types);
... ...
@@ -1336,7 +1359,7 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
1336 1359
 	return Bamline_new(acc,flag,nhits,good_unique_p,mapq,splice_strand,chr,chrpos_low,
1337 1360
 			   mate_chr,mate_chrpos_low,insert_length,
1338 1361
 			   cigar_types,cigarlengths,cigar_querylength,
1339
-			   readlength,read,quality_string,
1362
+			   readlength,read,quality_string,terminalp,
1340 1363
 			   /*aux_start*/bam1_aux(this->bam),
1341 1364
 			   /*aux_end*/this->bam->data + this->bam->data_len);
1342 1365
       }
... ...
@@ -1348,7 +1371,8 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
1348 1371
       parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
1349 1372
 		 &mate_chr,&mate_chrpos_low,&insert_length,
1350 1373
 		 &cigar_types,&cigarlengths,&cigar_querylength,
1351
-		 &readlength,&read,&quality_string,&read_group);
1374
+		 &readlength,&read,&quality_string,&read_group,
1375
+		 &terminalp);
1352 1376
       if (mapq >= minimum_mapq &&
1353 1377
 	  (nhits = aux_nhits(this)) <= maximum_nhits &&
1354 1378
 	  (need_unique_p == false || nhits == 1) &&
... ...
@@ -1359,7 +1383,7 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go
1359 1383
 	return Bamline_new(acc,flag,nhits,good_unique_p,mapq,splice_strand,chr,chrpos_low,
1360 1384
 			   mate_chr,mate_chrpos_low,insert_length,
1361 1385
 			   cigar_types,cigarlengths,cigar_querylength,
1362
-			   readlength,read,quality_string,
1386
+			   readlength,read,quality_string,terminalp,
1363 1387
 			   /*aux_start*/bam1_aux(this->bam),
1364 1388
 			   /*aux_end*/this->bam->data + this->bam->data_len);
1365 1389
       } else {
... ...
@@ -1390,6 +1414,7 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d
1390 1414
   char *read;
1391 1415
   char *quality_string;
1392 1416
   char *read_group;
1417
+  bool terminalp;
1393 1418
 
1394 1419
   Bamread_limit_region(this,desired_chr,desired_chrpos,desired_chrpos);
1395 1420
   while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) {
... ...
@@ -1397,14 +1422,15 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d
1397 1422
     parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low,
1398 1423
 	       &mate_chr,&mate_chrpos_low,&insert_length,
1399 1424
 	       &cigar_types,&cigarlengths,&cigar_querylength,
1400
-	       &readlength,&read,&quality_string,&read_group);
1425
+	       &readlength,&read,&quality_string,&read_group,
1426
+	       &terminalp);
1401 1427
     splice_strand = aux_splice_strand(this);
1402 1428
     if (!strcmp(acc,desired_acc) && chrpos_low == desired_chrpos) {
1403 1429
       Bamread_unlimit_region(this);
1404 1430
       return Bamline_new(acc,flag,nhits,/*good_unique_p*/true,mapq,splice_strand,chr,chrpos_low,
1405 1431
 			 mate_chr,mate_chrpos_low,insert_length,
1406 1432
 			 cigar_types,cigarlengths,cigar_querylength,
1407
-			 readlength,read,quality_string,
1433
+			 readlength,read,quality_string,terminalp,
1408 1434
 			 /*aux_start*/bam1_aux(this->bam),
1409 1435
 			 /*aux_end*/this->bam->data + this->bam->data_len);
1410 1436
     }
... ...
@@ -1,4 +1,4 @@
1
-/* $Id: bamread.h 108654 2013-09-19 23:11:00Z twu $ */
1
+/* $Id: bamread.h 129099 2014-03-04 18:44:44Z twu $ */
2 2
 #ifndef BAMREAD_INCLUDED
3 3
 #define BAMREAD_INCLUDED
4 4
 /* Cannot use bool, since it appears to conflict with samtools */
... ...
@@ -43,7 +43,8 @@ extern int
43 43
 Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genomicpos_T *chrpos,
44 44
 		   char **mate_chr, Genomicpos_T *mate_chrpos,
45 45
 		   Intlist_T *cigartypes, Uintlist_T *cigarlengths, int *cigarlength,
46
-		   int *readlength, char **read, char **quality_string, char **read_group);
46
+		   int *readlength, char **read, char **quality_string, char **read_group,
47
+		   bool *terminalp);
47 48
 
48 49
 typedef struct Bamline_T *Bamline_T;
49 50
 
... ...
@@ -91,6 +92,8 @@ extern char *
91 92
 Bamline_read (Bamline_T this);
92 93
 extern char *
93 94
 Bamline_quality_string (Bamline_T this);
95
+extern bool
96
+Bamline_terminalp (Bamline_T this);
94 97
 extern char *
95 98
 Bamline_read_group (Bamline_T this);
96 99
 extern void
... ...
@@ -1,4 +1,4 @@
1
-static char rcsid[] = "$Id: bamtally.c 123611 2014-01-15 21:22:04Z twu $";
1
+static char rcsid[] = "$Id: bamtally.c 129100 2014-03-04 18:45:02Z twu $";
2 2
 #ifdef HAVE_CONFIG_H
3 3
 #include <config.h>
4 4
 #endif
... ...
@@ -69,11 +69,11 @@ static bool totals_only_p = false;
69 69
 #endif
70 70
 
71 71
 
72
+
72 73
 #define T Tally_T
73 74
 typedef struct T *T;
74 75
 
75 76
 
76
-
77 77
 typedef enum {FIRST, SECOND} Pairend_T;
78 78
 
79 79
 
... ...
@@ -1525,6 +1525,7 @@ make_mismatches_unique_signed (List_T mismatches) {
1525 1525
   for (ptr = mismatches; ptr != NULL; ptr = List_next(ptr)) {
1526 1526
     mismatch = (Mismatch_T) List_head(ptr);
1527 1527
     if ((mismatch0 = find_mismatch_nt(unique_mismatches,mismatch->nt)) != NULL) {
1528
+      mismatch0->count += mismatch->count;
1528 1529
       if (mismatch->shift > 0) {
1529 1530
 	mismatch0->count_plus += mismatch->count;
1530 1531
       } else {
... ...
@@ -1538,6 +1539,7 @@ make_mismatches_unique_signed (List_T mismatches) {
1538 1539
       mismatch0->shift += 1; /* Used here as nshifts */
1539 1540
     } else {
1540 1541
       mismatch0 = Mismatch_new(mismatch->nt,/*shift, used here as nshifts*/1,/*mapq*/0,' ');
1542
+      mismatch0->count = mismatch->count;
1541 1543
       if (mismatch->shift > 0) {
1542 1544
 	mismatch0->count_plus = mismatch->count;
1543 1545
 	mismatch0->count_minus = 0;
... ...
@@ -1791,7 +1793,6 @@ print_block (T *block_tallies, Genomicpos_T blockstart, Genomicpos_T blockptr,
1791 1793
 
1792 1794
 
1793 1795
       /* Handle mismatches */
1794
-
1795 1796
       if (pass_variant_filter_p(this->nmatches,this->mismatches_byshift,
1796 1797
 				min_depth,variant_strands) == false) {
1797 1798
 	if (blockp == true) {
... ...
@@ -1983,7 +1984,7 @@ print_block (T *block_tallies, Genomicpos_T blockstart, Genomicpos_T blockptr,
1983 1984
 	  for (i = 0; i < List_length(unique_mismatches_byshift); i++) {
1984 1985
 	    mismatch0 = mm_array[i];
1985 1986
 	    if (signed_counts_p == false) {
1986
-	      printf(" %c%ld",mismatch0->nt,mismatch0->count_plus+mismatch0->count_minus);
1987
+	      printf(" %c%ld",mismatch0->nt,mismatch0->count);
1987 1988
 	    } else {
1988 1989
 	      printf(" %c%ld|%ld",mismatch0->nt,mismatch0->count_plus,mismatch0->count_minus);
1989 1990
 	    }
... ...
@@ -3009,6 +3010,7 @@ revise_position (char querynt, char genomicnt, int mapq, int quality, int signed
3009 3010
     /* Skip query N's */
3010 3011
 
3011 3012
   } else {
3013
+
3012 3014
     if ((mismatch = find_mismatch_byshift(this->mismatches_byshift,toupper(querynt),signed_shift)) != NULL) {
3013 3015
       mismatch->count += 1;
3014 3016
     } else {
... ...
@@ -3083,10 +3085,10 @@ average (char *quality_scores, int n) {
3083 3085
 static void
3084 3086
 revise_read (T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend, Genomicpos_T chrpos_low,
3085 3087
 	     unsigned int flag, Intlist_T types, Uintlist_T npositions, int cigar_querylength,
3086
-	     char *shortread, int mapq, char *quality_string, Genomicpos_T alloc_low,
3087
-	     int *quality_counts_match, int *quality_counts_mismatch,
3088
+	     char *shortread, int mapq, char *quality_string, bool terminalp,
3089
+	     Genomicpos_T alloc_low, int *quality_counts_match, int *quality_counts_mismatch,
3088 3090
 	     Genome_T genome, Genomicpos_T chroffset, bool ignore_query_Ns_p,
3089
-	     bool print_indels_p, bool readlevel_p) {
3091
+	     bool print_indels_p, bool readlevel_p, int max_softclip) {
3090 3092
   T this, right;
3091 3093
   int alloci;
3092 3094
   int shift, signed_shift;
... ...
@@ -3113,12 +3115,43 @@ revise_read (T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend, Genom
3113 3115
 
3114 3116
   pos = chrpos_low - 1U;		/* Bamread reports chrpos as 1-based */
3115 3117
 
3118
+  if (terminalp == false) {
3119
+    /* Don't handle soft clip for terminal alignments */
3120
+    max_softclip = 0;
3121
+  }
3122
+
3116 3123
   p = shortread;
3117 3124
   r = quality_string;
3125
+  if (max_softclip > 0 && types != NULL) {
3126
+    if (Intlist_head(types) == 'S') {
3127
+      /* Revise pos, so we handle the initial soft clip */
3128
+      mlength = Uintlist_head(npositions);
3129
+      if (pos < mlength) {
3130
+	/* Make sure initial soft clip does not extend past beginning of chromosome */
3131
+	pos = 0U;
3132
+	p += (mlength - pos);
3133
+	r += (mlength - pos);
3134
+	Uintlist_head_set(npositions,pos);
3135
+      } else {
3136
+	pos -= mlength;
3137
+      }
3138
+
3139
+      mlength = Uintlist_head(npositions);
3140
+      if (mlength > max_softclip) {
3141
+	/* Make sure initial soft clip does not extend past max_softclip */
3142
+	fprintf(stderr,"Read has initial soft clip %d greater than max_softclip %d\n",mlength,max_softclip);
3143
+	pos += (mlength - max_softclip);
3144
+	p += (mlength - max_softclip);
3145
+	r += (mlength - max_softclip);
3146
+	Uintlist_head_set(npositions,max_softclip);
3147
+      }
3148
+    }
3149
+  }
3150
+
3118 3151
   for (a = types, b = npositions; a != NULL; a = Intlist_next(a), b = Uintlist_next(b)) {
3119 3152
     type = Intlist_head(a);
3120 3153
     mlength = Uintlist_head(b);
3121
-    if (type == 'S') {
3154
+    if (type == 'S' && max_softclip == 0) {
3122 3155
       /* pos += mlength; -- SAM assumes genome coordinates are of clipped region */
3123 3156
       p += mlength;
3124 3157
       r += mlength;
... ...
@@ -3187,7 +3220,7 @@ revise_read (T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend, Genom
3187 3220
 
3188 3221
       pos += mlength;
3189 3222
 
3190
-    } else if (type == 'M') {
3223
+    } else if (type == 'M' || (type == 'S' && max_softclip > 0)) {
3191 3224
       if (0 /* mlength < min_mlength */) {
3192 3225
 	p += mlength;
3193 3226
 	r += mlength;
... ...
@@ -3195,6 +3228,12 @@ revise_read (T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend, Genom
3195 3228
 	shift += ((strand == '+') ? +mlength : -mlength);
3196 3229
 
3197 3230
       } else {
3231
+	if (type == 'S' && mlength > max_softclip) {
3232
+	  /* Must be final softclip, because we handled initial one already */
3233
+	  fprintf(stderr,"Read has final soft clip %d greater than max_softclip %d\n",mlength,max_softclip);
3234
+	  mlength = max_softclip;
3235
+	}
3236
+
3198 3237
 	debug1(printf("Genomic pos is %u + %u = %u\n",chroffset,pos,chroffset+pos));
3199 3238
 	if (genome == NULL) {
3200 3239
 	  q = &(*p);
... ...
@@ -3287,10 +3326,10 @@ static void
3287 3326
 revise_read_lh (T *alloc_tallies_low, T *alloc_tallies_high, Genomicpos_T chrstart, Genomicpos_T chrend,
3288 3327
 		bool lowend_p, Genomicpos_T chrpos_low,
3289 3328
 		unsigned int flag, Intlist_T types, Uintlist_T npositions, int cigar_querylength,
3290
-		char *shortread, int mapq, char *quality_string, Genomicpos_T alloc_low,
3291
-		int *quality_counts_match, int *quality_counts_mismatch,
3329
+		char *shortread, int mapq, char *quality_string, bool terminalp,
3330
+		Genomicpos_T alloc_low, int *quality_counts_match, int *quality_counts_mismatch,
3292 3331
 		Genome_T genome, Genomicpos_T chroffset, bool ignore_query_Ns_p, bool print_indels_p,
3293
-		bool readlevel_p) {
3332
+		bool readlevel_p, int max_softclip) {
3294 3333
   T this, right;
3295 3334
   int alloci;
3296 3335
   int shift, signed_shift;
... ...
@@ -3317,12 +3356,43 @@ revise_read_lh (T *alloc_tallies_low, T *alloc_tallies_high, Genomicpos_T chrsta
3317 3356
 
3318 3357
   pos = chrpos_low - 1U;		/* Bamread reports chrpos as 1-based */
3319 3358
 
3359
+  if (terminalp == false) {
3360
+    /* Don't handle soft clip for terminal alignments */
3361
+    max_softclip = 0;
3362
+  }
3363
+
3320 3364
   p = shortread;
3321 3365
   r = quality_string;
3366
+  if (max_softclip > 0 && types != NULL) {
3367
+    if (Intlist_head(types) == 'S') {
3368
+      /* Revise pos, so we handle the initial soft clip */
3369
+      mlength = Uintlist_head(npositions);
3370
+      if (pos < mlength) {
3371
+	/* Make sure initial soft clip does not extend past beginning of chromosome */
3372
+	pos = 0U;
3373
+	p += (mlength - pos);
3374
+	r += (mlength - pos);
3375
+	Uintlist_head_set(npositions,pos);
3376
+      } else {
3377
+	pos -= mlength;
3378
+      }
3379
+
3380
+      mlength = Uintlist_head(npositions);
3381
+      if (mlength > max_softclip) {
3382
+	/* Make sure initial soft clip does not extend past max_softclip */
3383
+	fprintf(stderr,"Read has initial soft clip %d greater than max_softclip %d\n",mlength,max_softclip);
3384
+	pos += (mlength - max_softclip);
3385
+	p += (mlength - max_softclip);
3386
+	r += (mlength - max_softclip);
3387
+	Uintlist_head_set(npositions,max_softclip);
3388
+      }
3389
+    }
3390
+  }
3391
+
3322 3392
   for (a = types, b = npositions; a != NULL; a = Intlist_next(a), b = Uintlist_next(b)) {
3323 3393
     type = Intlist_head(a);
3324 3394
     mlength = Uintlist_head(b);
3325
-    if (type == 'S') {
3395
+    if (type == 'S' && max_softclip == 0) {
3326 3396
       /* pos += mlength; -- SAM assumes genome coordinates are of clipped region */
3327 3397
       p += mlength;
3328 3398
       r += mlength;
... ...
@@ -3389,7 +3459,7 @@ revise_read_lh (T *alloc_tallies_low, T *alloc_tallies_high, Genomicpos_T chrsta
3389 3459
 
3390 3460
       pos += mlength;
3391 3461
 
3392
-    } else if (type == 'M') {
3462
+    } else if (type == 'M' || (type == 'S' && max_softclip > 0)) {
3393 3463
       if (0 /* mlength < min_mlength */) {
3394 3464
 	p += mlength;
3395 3465
 	r += mlength;
... ...
@@ -3397,6 +3467,12 @@ revise_read_lh (T *alloc_tallies_low, T *alloc_tallies_high, Genomicpos_T chrsta
3397 3467
 	shift += ((strand == '+') ? +mlength : -mlength);
3398 3468
 
3399 3469
       } else {
3470
+	if (type == 'S' && mlength > max_softclip) {
3471
+	  /* Must be final softclip, because we handled initial one already */
3472
+	  fprintf(stderr,"Read has final soft clip %d greater than max_softclip %d\n",mlength,max_softclip);
3473
+	  mlength = max_softclip;
3474
+	}
3475
+
3400 3476
 	debug1(printf("Genomic pos is %u + %u = %u\n",chroffset,pos,chroffset+pos));
3401 3477
 	if (genome == NULL) {
3402 3478
 	  q = &(*p);
... ...
@@ -3511,7 +3587,7 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
3511 3587
 	      bool genomic_diff_p, bool signed_counts_p, bool ignore_query_Ns_p,
3512 3588
 	      bool print_indels_p, bool print_totals_p, bool print_cycles_p,
3513 3589
 	      bool print_quality_scores_p, bool print_mapq_scores_p, bool want_genotypes_p,
3514
-	      bool verbosep, bool readlevel_p) {
3590
+	      bool verbosep, bool readlevel_p, int max_softclip) {
3515 3591
   long int grand_total = 0;
3516 3592
   T this;
3517 3593
   Genomicpos_T alloc_ptr, alloc_low, alloc_high, chrpos_low, chrpos_high, chrpos;
... ...
@@ -3527,8 +3603,8 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
3527 3603
   
3528 3604
 
3529 3605
   /* Create tally at position N to store n_fromleft */
3530
-  alloc_tallies_alloc = (T *) CALLOC(alloclength+1,sizeof(T));
3531
-  for (i = 0; i < alloclength+1; i++) {
3606
+  alloc_tallies_alloc = (T *) CALLOC(alloclength + 2*max_softclip + 1,sizeof(T));
3607
+  for (i = 0; i < alloclength + 2*max_softclip + 1; i++) {
3532 3608
     alloc_tallies_alloc[i] = Tally_new();
3533 3609
   }
3534 3610
   alloc_tallies = &(alloc_tallies_alloc[0]);
... ...
@@ -3552,15 +3628,20 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
3552 3628
   
3553 3629
   alloc_ptr = 0U;
3554 3630
   alloc_low = 0U;
3555
-  alloc_high = alloc_low + alloclength;
3631
+  alloc_high = alloc_low + alloclength + 2*max_softclip;
3556 3632
   goodp = false;
3557 3633
 
3634
+  blockstart = 0U;
3635
+  blockend = 0U;
3636
+  blockptr = 0U;
3637
+
3558 3638
   while (goodp == false &&
3559 3639
 	 (bamline = Bamread_next_bamline(bamreader,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits,
3560 3640
 					 need_unique_p,need_primary_p,ignore_duplicates_p,
3561 3641
 					 need_concordant_p)) != NULL) {
3562 3642
     /* chrpos_high = Samread_chrpos_high(types,npositions,chrpos_low); */
3563
-    debug0(printf(">%s:%u..%u ",chr,chrpos_low,chrpos_high));
3643
+    debug0(printf("\n"));
3644
+    debug0(printf(">%s:%u..%u ",printchr,Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline)));
3564 3645
     debug0(Bamread_print_cigar(stdout,bamline));
3565 3646
     debug0(printf("\n"));
3566 3647
 
... ...
@@ -3573,34 +3654,31 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
3573 3654
       chrpos_low = Bamline_chrpos_low(bamline);
3574 3655
       chrpos_high = Bamline_chrpos_high(bamline);
3575 3656
 
3576
-      alloc_low = chrpos_low;
3577
-      alloc_high = alloc_low + alloclength;
3578
-      alloc_ptr = chrpos_low;
3579
-
3580
-      blockstart = chrpos_low;
3581
-      blockend = blockstart + blocksize;
3582
-      blockptr = chrpos_low;
3657
+      alloc_low = (chrpos_low < max_softclip) ? 0U : (chrpos_low - max_softclip);
3658
+      alloc_high = alloc_low + alloclength + 2*max_softclip;
3659
+      alloc_ptr = alloc_low;
3583 3660
 
3584 3661
       debug0(printf("    initialize alloc_low %u, alloc_high = %u, blockstart = %u, blockend = %u\n",
3585 3662
 		   alloc_low,alloc_high,blockstart,blockend));
3586 3663
 
3587
-      if (chrpos_high >= alloc_high) {
3664
+      if (chrpos_high + max_softclip >= alloc_high) {
3588 3665
 	if (verbosep == true) {
3589 3666
 	  fprintf(stderr,"read %s at %s:%u..%u is longer than allocated buffer ending at %u => skipping\n",
3590 3667
 		  Bamline_acc(bamline),printchr,chrpos_low,chrpos_high,alloclength);
3591 3668
 	}
3592 3669
       } else {
3593
-	if (chrpos_high + 1U > alloc_ptr) {
3594
-	  alloc_ptr = chrpos_high + 1U;
3670
+	if (chrpos_high + max_softclip + 1U > alloc_ptr) {
3671
+	  alloc_ptr = chrpos_high + max_softclip + 1U;
3595 3672
 	  debug0(printf("    revising alloc_ptr to be %u\n",alloc_ptr));
3596 3673
 	}
3597 3674
 
3598 3675
 	revise_read(alloc_tallies,chrstart,chrend,chrpos_low,Bamline_flag(bamline),
3599 3676
 		    Bamline_cigar_types(bamline),Bamline_cigar_npositions(bamline),
3600 3677
 		    Bamline_cigar_querylength(bamline),Bamline_read(bamline),
3601
-		    Bamline_mapq(bamline),Bamline_quality_string(bamline),alloc_low,
3602
-		    quality_counts_match,quality_counts_mismatch,
3603
-		    genome,chroffset,ignore_query_Ns_p,print_indels_p,readlevel_p);
3678
+		    Bamline_mapq(bamline),Bamline_quality_string(bamline),
3679
+		    Bamline_terminalp(bamline),alloc_low,quality_counts_match,quality_counts_mismatch,
3680
+		    genome,chroffset,ignore_query_Ns_p,print_indels_p,readlevel_p,
3681
+		    max_softclip);
3604 3682
 
3605 3683
 	goodp = true;
3606 3684
       }
... ...
@@ -3616,7 +3694,8 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
3616 3694
 
3617 3695
     debug0(printf("*  alloc: %u..%u..%u  block: %u..%u..%u  ",
3618 3696
 		  alloc_low,alloc_ptr,alloc_high,blockstart,blockptr,blockend));
3619
-    debug0(printf("%s:%u..%u ",printchr,chrpos_low,chrpos_high));
3697
+    debug0(printf("\n"));
3698
+    debug0(printf(">%s:%u..%u ",printchr,Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline)));
3620 3699
     debug0(Bamread_print_cigar(stdout,bamline));
3621 3700
     debug0(printf("\n"));
3622 3701
     
... ...
@@ -3633,8 +3712,8 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
3633 3712
       chrpos_low = Bamline_chrpos_low(bamline);
3634 3713
       chrpos_high = Bamline_chrpos_high(bamline);
3635 3714
 
3636
-      if (chrpos_low > alloc_ptr) {
3637
-	debug0(printf("Case 1: chrpos_low %u > alloc_ptr %u\n",chrpos_low,alloc_ptr));
3715
+      if (chrpos_low > alloc_ptr + max_softclip) {
3716
+	debug0(printf("Case 1: chrpos_low %u > alloc_ptr %u + max_softclip %d\n",chrpos_low,alloc_ptr,max_softclip));
3638 3717
 	debug0(printf("    transfer from alloc_low %u to alloc_ptr %u\n",alloc_low,alloc_ptr));
3639 3718
 	for (chrpos = alloc_low; chrpos < alloc_ptr; chrpos++) {
3640 3719
 	  alloci = chrpos - alloc_low;
... ...
@@ -3654,16 +3733,16 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
3654 3733
 	  }
3655 3734
 	}
3656 3735
 
3657
-	debug0(printf("    reset alloc_low = chrpos_low\n"));
3658
-	alloc_low = chrpos_low;
3659
-	alloc_high = alloc_low + alloclength;
3660
-	alloc_tallies[alloclength]->n_fromleft_plus = 0;
3661
-	alloc_tallies[alloclength]->n_fromleft_minus = 0;
3736
+	debug0(printf("    reset alloc_low = chrpos_low - max_softclip\n"));
3737
+	alloc_low = (chrpos_low < max_softclip) ? 0U : chrpos_low - max_softclip;
3738
+	alloc_high = alloc_low + alloclength + 2*max_softclip;
3739
+	alloc_tallies[alloclength + 2*max_softclip]->n_fromleft_plus = 0;
3740
+	alloc_tallies[alloclength + 2*max_softclip]->n_fromleft_minus = 0;
3662 3741
 	
3663
-      } else if (chrpos_low > alloc_low) {
3664
-	debug0(printf("Case 2: chrpos_low %u > alloc_low %u\n",chrpos_low,alloc_low));
3742
+      } else if (chrpos_low > alloc_low + max_softclip) {
3743
+	debug0(printf("Case 2: chrpos_low %u > alloc_low %u + max_softclip %d\n",chrpos_low,alloc_low,max_softclip));
3665 3744
 	debug0(printf("    transfer from alloc_low %u up to chrpos_low %u\n",alloc_low,chrpos_low));
3666
-	for (chrpos = alloc_low; chrpos < chrpos_low; chrpos++) {
3745
+	for (chrpos = alloc_low; chrpos + max_softclip < chrpos_low; chrpos++) {
3667 3746
 	  alloci = chrpos - alloc_low;
3668 3747
 	  this = alloc_tallies[alloci];
3669 3748
 	  if (this->nmatches > 0 || this->mismatches_byshift != NULL ||
... ...
@@ -3681,8 +3760,12 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
3681 3760
 	  }
3682 3761
 	}
3683 3762
 
3684
-	delta = chrpos_low - alloc_low;
3685
-	debug0(printf("    shift alloc downward by %d so alloc_low = chrpos_low\n",delta));
3763
+	if (chrpos_low < max_softclip) {
3764
+	  delta = chrpos_low /* - alloc_low (should be 0) */;
3765
+	} else {
3766
+	  delta = chrpos_low - (alloc_low + max_softclip);
3767
+	}
3768
+	debug0(printf("    shift alloc upward by %d so alloc_low = chrpos_low - max_softclip\n",delta,max_softclip));
3686 3769
 	for (alloci = 0; alloci < (int) (alloc_ptr - alloc_low - delta); alloci++) {
3687 3770
 	  Tally_transfer(&(alloc_tallies[alloci]),&(alloc_tallies[alloci+delta]));
3688 3771
 	}
... ...
@@ -3692,35 +3775,36 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
3692 3775
 	  debug1(printf("Resetting alloci %d\n",alloci));
3693 3776
 	  Tally_clear(alloc_tallies[alloci]);
3694 3777
 	}
3695
-	alloc_low = chrpos_low;
3696
-	alloc_high = alloc_low + alloclength;
3778
+	alloc_low = (chrpos_low < max_softclip) ? 0U : (chrpos_low - max_softclip);
3779
+	alloc_high = alloc_low + alloclength + 2*max_softclip;
3697 3780
 	
3698
-      } else if (chrpos_low == alloc_low) {
3699
-	debug0(printf("Case 3: chrpos_low %u == alloc_low %u\n",chrpos_low,alloc_low));
3781
+      } else if (chrpos_low == alloc_low + max_softclip) {
3782
+	debug0(printf("Case 3: chrpos_low %u == alloc_low %u + max_softclip %d\n",chrpos_low,alloc_low,max_softclip));
3700 3783
 
3701 3784
       } else {
3702
-	fprintf(stderr,"Sequences are not in order.  Got chrpos_low %u < alloc_low %u\n",
3703
-		chrpos_low,alloc_low);
3785
+	fprintf(stderr,"Sequences are not in order.  Got chrpos_low %u < alloc_low %u + max_softclip %d\n",
3786
+		chrpos_low,alloc_low,max_softclip);
3704 3787
 	abort();
3705 3788
       }
3706 3789
 
3707
-      if (chrpos_high >= alloc_high) {
3790
+      if (chrpos_high + max_softclip >= alloc_high) {
3708 3791
 	if (verbosep == true) {
3709 3792
 	  fprintf(stderr,"read %s at %s:%u..%u is longer than allocated buffer ending at %u => skipping\n",
3710 3793
 		  Bamline_acc(bamline),printchr,chrpos_low,chrpos_high,alloc_high);
3711 3794
 	}
3712 3795
       } else {
3713
-	if (chrpos_high + 1U > alloc_ptr) {
3714
-	  alloc_ptr = chrpos_high + 1U;
3796
+	if (chrpos_high + max_softclip + 1U > alloc_ptr) {
3797
+	  alloc_ptr = chrpos_high + max_softclip + 1U;
3715 3798
 	  debug0(printf("    revising alloc_ptr to be %u\n",alloc_ptr));
3716 3799
 	}
3717 3800
 
3718 3801
 	revise_read(alloc_tallies,chrstart,chrend,chrpos_low,Bamline_flag(bamline),
3719 3802
 		    Bamline_cigar_types(bamline),Bamline_cigar_npositions(bamline),
3720 3803
 		    Bamline_cigar_querylength(bamline),Bamline_read(bamline),
3721
-		    Bamline_mapq(bamline),Bamline_quality_string(bamline),alloc_low,
3722
-		    quality_counts_match,quality_counts_mismatch,
3723
-		    genome,chroffset,ignore_query_Ns_p,print_indels_p,readlevel_p);
3804
+		    Bamline_mapq(bamline),Bamline_quality_string(bamline),
3805
+		    Bamline_terminalp(bamline),alloc_low,quality_counts_match,quality_counts_mismatch,
3806
+		    genome,chroffset,ignore_query_Ns_p,print_indels_p,readlevel_p,
3807
+		    max_softclip);
3724 3808
       }
3725 3809
     }
3726 3810
 
... ...
@@ -3772,7 +3856,7 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
3772 3856
   }
3773 3857
 
3774 3858
 
3775
-  for (i = 0; i < alloclength+1; i++) {
3859
+  for (i = 0; i < alloclength + 2*max_softclip + 1; i++) {
3776 3860
     Tally_free(&(alloc_tallies_alloc[i]));
3777 3861
   }
3778 3862
   FREE(alloc_tallies_alloc);
... ...
@@ -3788,15 +3872,16 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches,
3788 3872
 
3789 3873
 /* Assumes output_type is OUTPUT_TALLY */
3790 3874
 void
3791
-Bamtally_run_lh (long int *tally_matches_low, long int *tally_mismatches_low,
3792
-		 long int *tally_matches_high, long int *tally_mismatches_high,
3875
+Bamtally_run_lh (long int **tally_matches_low, long int **tally_mismatches_low,
3876
+		 long int **tally_matches_high, long int **tally_mismatches_high,
3793 3877
 		 int *quality_counts_match, int *quality_counts_mismatch,
3794 3878
 		 Bamreader_T bamreader, Genome_T genome, char *printchr,
3795 3879
 		 Genomicpos_T chroffset, Genomicpos_T chrstart, Genomicpos_T chrend, int alloclength,
3796 3880
 		 char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits,
3797 3881
 		 bool need_concordant_p, bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p,
3798 3882
 		 int blocksize, int quality_score_adj, int min_depth, int variant_strands,
3799
-		 bool genomic_diff_p, bool ignore_query_Ns_p, bool verbosep, bool readlevel_p) {
3883
+		 bool genomic_diff_p, bool ignore_query_Ns_p, bool verbosep, bool readlevel_p,
3884
+		 int max_softclip) {
3800 3885
   T this_low, this_high;
3801 3886
   Genomicpos_T alloc_ptr, alloc_low, alloc_high, chrpos_low, chrpos_high, chrpos;
3802 3887
   Genomicpos_T blockptr = 0U, blockstart = 0U, blockend = 0U;
... ...
@@ -3809,8 +3894,8 @@ Bamtally_run_lh (long int *tally_matches_low, long int *tally_mismatches_low,
3809 3894
   T *alloc_tallies_low_alloc, *alloc_tallies_high_alloc, *block_tallies_low_alloc, *block_tallies_high_alloc;
3810 3895
 
3811 3896
 
3812
-  alloc_tallies_low_alloc = (T *) CALLOC(alloclength+1,sizeof(T));
3813
-  for (i = 0; i < alloclength+1; i++) {
3897
+  alloc_tallies_low_alloc = (T *) CALLOC(alloclength + 2*max_softclip + 1,sizeof(T));
3898
+  for (i = 0; i < alloclength + 2*max_softclip + 1; i++) {
3814 3899
     alloc_tallies_low_alloc[i] = Tally_new();
3815 3900
   }
3816 3901
   alloc_tallies_low = &(alloc_tallies_low_alloc[0]);
...