git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@88343 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -10,7 +10,7 @@ Description: GSNAP and GMAP are a pair of tools to align short-read |
10 | 10 |
methods to work with GMAP and GSNAP from within R. In addition, |
11 | 11 |
it provides methods to tally alignment results on a |
12 | 12 |
per-nucleotide basis using the bam_tally tool. |
13 |
-Version: 1.5.13 |
|
13 |
+Version: 1.5.14 |
|
14 | 14 |
Depends: R (>= 2.15.0), methods, GenomicRanges |
15 | 15 |
Imports: IRanges, Rsamtools (>= 1.7.4), rtracklayer (>= 1.17.15), |
16 | 16 |
GenomicFeatures, Biostrings, VariantAnnotation (>= 1.9.4), |
... | ... |
@@ -17,7 +17,8 @@ setClass("BamTallyParam", |
17 | 17 |
min_depth = "integer", |
18 | 18 |
variant_strand = "integer", |
19 | 19 |
ignore_query_Ns = "logical", |
20 |
- indels = "logical")) |
|
20 |
+ indels = "logical", |
|
21 |
+ include_soft_clips = "integer")) |
|
21 | 22 |
|
22 | 23 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
23 | 24 |
### Constructor |
... | ... |
@@ -38,7 +39,7 @@ BamTallyParam <- function(genome, which = GRanges(), |
38 | 39 |
primary_only = FALSE, ignore_duplicates = FALSE, |
39 | 40 |
min_depth = 0L, variant_strand = 0L, |
40 | 41 |
ignore_query_Ns = FALSE, |
41 |
- indels = FALSE) |
|
42 |
+ indels = FALSE, include_soft_clips = 0L) |
|
42 | 43 |
{ |
43 | 44 |
if (!is.null(desired_read_group) && !isSingleString(desired_read_group)) |
44 | 45 |
stop("'desired_read_group' must be NULL or a single, non-NA string") |
... | ... |
@@ -60,11 +61,14 @@ BamTallyParam <- function(genome, which = GRanges(), |
60 | 61 |
stop("ignore_query_Ns must be TRUE or FALSE") |
61 | 62 |
if (!isTRUEorFALSE(indels)) |
62 | 63 |
stop("indels must be TRUE or FALSE") |
64 |
+ if (include_soft_clips < 0) |
|
65 |
+ stop("include_soft_clips must be non-negative") |
|
63 | 66 |
args <- names(formals(sys.function())) |
64 | 67 |
params <- mget(args, environment()) |
65 | 68 |
params$genome <- as(genome, "GmapGenome") |
66 | 69 |
params$which <- normArgWhich(which, params$genome) |
67 |
- integer_params <- c("minimum_mapq", "min_depth", "variant_strand") |
|
70 |
+ integer_params <- c("minimum_mapq", "min_depth", "variant_strand", |
|
71 |
+ "include_soft_clips") |
|
68 | 72 |
params[integer_params] <- lapply(params[integer_params], as.integer) |
69 | 73 |
do.call(new, c("BamTallyParam", params)) |
70 | 74 |
} |
... | ... |
@@ -184,7 +184,8 @@ normArgTRUEorFALSE <- function(x) { |
184 | 184 |
min_depth = 0L, variant_strand = 0L, |
185 | 185 |
ignore_query_Ns = FALSE, |
186 | 186 |
indels = FALSE, |
187 |
- blocksize = 1000L, verbosep = FALSE) |
|
187 |
+ blocksize = 1000L, verbosep = FALSE, |
|
188 |
+ include_soft_clips = 0L) |
|
188 | 189 |
{ |
189 | 190 |
if (!is(bamreader, "GmapBamReader")) |
190 | 191 |
stop("'bamreader' must be a GmapBamReader") |
... | ... |
@@ -221,7 +222,8 @@ normArgTRUEorFALSE <- function(x) { |
221 | 222 |
normArgTRUEorFALSE(ignore_query_Ns), |
222 | 223 |
normArgTRUEorFALSE(indels), |
223 | 224 |
normArgSingleInteger(blocksize), |
224 |
- normArgTRUEorFALSE(verbosep)) |
|
225 |
+ normArgTRUEorFALSE(verbosep), |
|
226 |
+ normArgSingleInteger(include_soft_clips)) |
|
225 | 227 |
} |
226 | 228 |
|
227 | 229 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
... | ... |
@@ -21,7 +21,7 @@ |
21 | 21 |
primary_only = FALSE, ignore_duplicates = FALSE, |
22 | 22 |
min_depth = 0L, variant_strand = 0L, |
23 | 23 |
ignore_query_Ns = FALSE, |
24 |
- indels = FALSE) |
|
24 |
+ indels = FALSE, include_soft_clips = 0L) |
|
25 | 25 |
} |
26 | 26 |
\arguments{ |
27 | 27 |
\item{genome}{A \code{GmapGenome} object, or something coercible to one.} |
... | ... |
@@ -57,6 +57,13 @@ |
57 | 57 |
always spans the sequence in \code{ref}; so e.g. a deletion extends |
58 | 58 |
one nt upstream of the actual deleted sequence. |
59 | 59 |
} |
60 |
+ \item{include_soft_clips}{Maximum length of soft clips that are |
|
61 |
+ considered for counting. Soft-clipping is often useful (for GSNAP at |
|
62 |
+ least) during alignment, and it should be preserved in the |
|
63 |
+ output. However, soft clipping can preferentially occur in regions |
|
64 |
+ of discordance with the reference, and if those clipped regions are |
|
65 |
+ ignored during counting, the allele fraction is misestimated. |
|
66 |
+ } |
|
60 | 67 |
} |
61 | 68 |
|
62 | 69 |
\seealso{ |
... | ... |
@@ -23,7 +23,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R, |
23 | 23 |
SEXP ignore_query_Ns_p_R, |
24 | 24 |
SEXP print_indels_p_R, |
25 | 25 |
SEXP blocksize_R, |
26 |
- SEXP verbosep_R) |
|
26 |
+ SEXP verbosep_R, SEXP max_softclip_R) |
|
27 | 27 |
{ |
28 | 28 |
Bamreader_T bamreader = (Bamreader_T) R_ExternalPtrAddr(bamreader_R); |
29 | 29 |
const char *genome_dir = |
... | ... |
@@ -46,6 +46,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R, |
46 | 46 |
bool print_indels_p = asLogical(print_indels_p_R); |
47 | 47 |
int blocksize = asInteger(blocksize_R); |
48 | 48 |
int verbosep = asLogical(verbosep_R); |
49 |
+ int max_softclip = asInteger(max_softclip_R); |
|
49 | 50 |
|
50 | 51 |
Genome_T genome = createGenome(genome_dir, db); |
51 | 52 |
IIT_T chromosome_iit = readChromosomeIIT(genome_dir, db); |
... | ... |
@@ -71,7 +72,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R, |
71 | 72 |
ignore_duplicates_p, |
72 | 73 |
min_depth, variant_strands, ignore_query_Ns_p, |
73 | 74 |
print_indels_p, blocksize, verbosep, |
74 |
- /*readlevel_p*/false); |
|
75 |
+ /*readlevel_p*/false, max_softclip); |
|
75 | 76 |
IIT_free(&chromosome_iit); |
76 | 77 |
Genome_free(&genome); |
77 | 78 |
|
... | ... |
@@ -20,7 +20,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R, |
20 | 20 |
SEXP ignore_query_Ns_p, |
21 | 21 |
SEXP print_indels_p_R, |
22 | 22 |
SEXP blocksize_R, |
23 |
- SEXP verbosep_R); |
|
23 |
+ SEXP verbosep_R, SEXP max_softclip_R); |
|
24 | 24 |
|
25 | 25 |
SEXP |
26 | 26 |
R_tally_iit_parse(SEXP tally_iit_R, SEXP cycle_breaks_R, |
... | ... |
@@ -12,8 +12,8 @@ without warranty of any kind. |
12 | 12 |
Basic Installation |
13 | 13 |
================== |
14 | 14 |
|
15 |
- Briefly, the shell commands `./configure; make; make install' should |
|
16 |
-configure, build, and install this package. The following |
|
15 |
+ Briefly, the shell command `./configure && make && make install' |
|
16 |
+should configure, build, and install this package. The following |
|
17 | 17 |
more-detailed instructions are generic; see the `README' file for |
18 | 18 |
instructions specific to this package. Some packages provide this |
19 | 19 |
`INSTALL' file but do not implement all of the features documented |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-# Makefile.in generated by automake 1.14 from Makefile.am. |
|
1 |
+# Makefile.in generated by automake 1.14.1 from Makefile.am. |
|
2 | 2 |
# @configure_input@ |
3 | 3 |
|
4 | 4 |
# Copyright (C) 1994-2013 Free Software Foundation, Inc. |
... | ... |
@@ -79,13 +79,13 @@ build_triplet = @build@ |
79 | 79 |
host_triplet = @host@ |
80 | 80 |
target_triplet = @target@ |
81 | 81 |
subdir = . |
82 |
-DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog \ |
|
82 |
+DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog COPYING \ |
|
83 | 83 |
$(srcdir)/Makefile.in $(srcdir)/Makefile.am \ |
84 | 84 |
$(top_srcdir)/configure $(am__configure_deps) \ |
85 |
- $(top_srcdir)/lib/gstruct.pc.in COPYING config/compile \ |
|
86 |
- config/config.guess config/config.sub config/depcomp \ |
|
87 |
- config/install-sh config/missing config/ltmain.sh \ |
|
88 |
- $(top_srcdir)/config/compile $(top_srcdir)/config/config.guess \ |
|
85 |
+ $(top_srcdir)/lib/gstruct.pc.in config/compile \ |
|
86 |
+ config/config.guess config/config.sub config/install-sh \ |
|
87 |
+ config/missing config/ltmain.sh $(top_srcdir)/config/compile \ |
|
88 |
+ $(top_srcdir)/config/config.guess \ |
|
89 | 89 |
$(top_srcdir)/config/config.sub \ |
90 | 90 |
$(top_srcdir)/config/install-sh $(top_srcdir)/config/ltmain.sh \ |
91 | 91 |
$(top_srcdir)/config/missing |
... | ... |
@@ -259,7 +259,6 @@ LIBTOOL = @LIBTOOL@ |
259 | 259 |
LIPO = @LIPO@ |
260 | 260 |
LN_S = @LN_S@ |
261 | 261 |
LTLIBOBJS = @LTLIBOBJS@ |
262 |
-MAINT = @MAINT@ |
|
263 | 262 |
MAKEINFO = @MAKEINFO@ |
264 | 263 |
MANIFEST_TOOL = @MANIFEST_TOOL@ |
265 | 264 |
MKDIR_P = @MKDIR_P@ |
... | ... |
@@ -357,7 +356,7 @@ all: all-recursive |
357 | 356 |
.SUFFIXES: |
358 | 357 |
am--refresh: Makefile |
359 | 358 |
@: |
360 |
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) |
|
359 |
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) |
|
361 | 360 |
@for dep in $?; do \ |
362 | 361 |
case '$(am__configure_deps)' in \ |
363 | 362 |
*$$dep*) \ |
... | ... |
@@ -384,9 +383,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status |
384 | 383 |
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) |
385 | 384 |
$(SHELL) ./config.status --recheck |
386 | 385 |
|
387 |
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) |
|
386 |
+$(top_srcdir)/configure: $(am__configure_deps) |
|
388 | 387 |
$(am__cd) $(srcdir) && $(AUTOCONF) |
389 |
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) |
|
388 |
+$(ACLOCAL_M4): $(am__aclocal_m4_deps) |
|
390 | 389 |
$(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) |
391 | 390 |
$(am__aclocal_m4_deps): |
392 | 391 |
lib/gstruct-${LIBGSTRUCT_API_VERSION}.pc: $(top_builddir)/config.status $(top_srcdir)/lib/gstruct.pc.in |
... | ... |
@@ -642,9 +641,10 @@ distcheck: dist |
642 | 641 |
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ |
643 | 642 |
&& am__cwd=`pwd` \ |
644 | 643 |
&& $(am__cd) $(distdir)/_build \ |
645 |
- && ../configure --srcdir=.. --prefix="$$dc_install_base" \ |
|
644 |
+ && ../configure \ |
|
646 | 645 |
$(AM_DISTCHECK_CONFIGURE_FLAGS) \ |
647 | 646 |
$(DISTCHECK_CONFIGURE_FLAGS) \ |
647 |
+ --srcdir=.. --prefix="$$dc_install_base" \ |
|
648 | 648 |
&& $(MAKE) $(AM_MAKEFLAGS) \ |
649 | 649 |
&& $(MAKE) $(AM_MAKEFLAGS) dvi \ |
650 | 650 |
&& $(MAKE) $(AM_MAKEFLAGS) check \ |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-# generated automatically by aclocal 1.14 -*- Autoconf -*- |
|
1 |
+# generated automatically by aclocal 1.14.1 -*- Autoconf -*- |
|
2 | 2 |
|
3 | 3 |
# Copyright (C) 1996-2013 Free Software Foundation, Inc. |
4 | 4 |
|
... | ... |
@@ -35,7 +35,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION], |
35 | 35 |
[am__api_version='1.14' |
36 | 36 |
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to |
37 | 37 |
dnl require some minimum version. Point them to the right macro. |
38 |
-m4_if([$1], [1.14], [], |
|
38 |
+m4_if([$1], [1.14.1], [], |
|
39 | 39 |
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl |
40 | 40 |
]) |
41 | 41 |
|
... | ... |
@@ -51,7 +51,7 @@ m4_define([_AM_AUTOCONF_VERSION], []) |
51 | 51 |
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. |
52 | 52 |
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. |
53 | 53 |
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], |
54 |
-[AM_AUTOMAKE_VERSION([1.14])dnl |
|
54 |
+[AM_AUTOMAKE_VERSION([1.14.1])dnl |
|
55 | 55 |
m4_ifndef([AC_AUTOCONF_VERSION], |
56 | 56 |
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl |
57 | 57 |
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) |
... | ... |
@@ -642,42 +642,6 @@ fi |
642 | 642 |
rmdir .tst 2>/dev/null |
643 | 643 |
AC_SUBST([am__leading_dot])]) |
644 | 644 |
|
645 |
-# Add --enable-maintainer-mode option to configure. -*- Autoconf -*- |
|
646 |
-# From Jim Meyering |
|
647 |
- |
|
648 |
-# Copyright (C) 1996-2013 Free Software Foundation, Inc. |
|
649 |
-# |
|
650 |
-# This file is free software; the Free Software Foundation |
|
651 |
-# gives unlimited permission to copy and/or distribute it, |
|
652 |
-# with or without modifications, as long as this notice is preserved. |
|
653 |
- |
|
654 |
-# AM_MAINTAINER_MODE([DEFAULT-MODE]) |
|
655 |
-# ---------------------------------- |
|
656 |
-# Control maintainer-specific portions of Makefiles. |
|
657 |
-# Default is to disable them, unless 'enable' is passed literally. |
|
658 |
-# For symmetry, 'disable' may be passed as well. Anyway, the user |
|
659 |
-# can override the default with the --enable/--disable switch. |
|
660 |
-AC_DEFUN([AM_MAINTAINER_MODE], |
|
661 |
-[m4_case(m4_default([$1], [disable]), |
|
662 |
- [enable], [m4_define([am_maintainer_other], [disable])], |
|
663 |
- [disable], [m4_define([am_maintainer_other], [enable])], |
|
664 |
- [m4_define([am_maintainer_other], [enable]) |
|
665 |
- m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])]) |
|
666 |
-AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles]) |
|
667 |
- dnl maintainer-mode's default is 'disable' unless 'enable' is passed |
|
668 |
- AC_ARG_ENABLE([maintainer-mode], |
|
669 |
- [AS_HELP_STRING([--]am_maintainer_other[-maintainer-mode], |
|
670 |
- am_maintainer_other[ make rules and dependencies not useful |
|
671 |
- (and sometimes confusing) to the casual installer])], |
|
672 |
- [USE_MAINTAINER_MODE=$enableval], |
|
673 |
- [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes])) |
|
674 |
- AC_MSG_RESULT([$USE_MAINTAINER_MODE]) |
|
675 |
- AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes]) |
|
676 |
- MAINT=$MAINTAINER_MODE_TRUE |
|
677 |
- AC_SUBST([MAINT])dnl |
|
678 |
-] |
|
679 |
-) |
|
680 |
- |
|
681 | 645 |
# Check to see how 'make' treats includes. -*- Autoconf -*- |
682 | 646 |
|
683 | 647 |
# Copyright (C) 2001-2013 Free Software Foundation, Inc. |
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
#! /bin/sh |
2 | 2 |
# Attempt to guess a canonical system name. |
3 |
-# Copyright 1992-2013 Free Software Foundation, Inc. |
|
3 |
+# Copyright 1992-2014 Free Software Foundation, Inc. |
|
4 | 4 |
|
5 |
-timestamp='2013-05-16' |
|
5 |
+timestamp='2014-02-12' |
|
6 | 6 |
|
7 | 7 |
# This file is free software; you can redistribute it and/or modify it |
8 | 8 |
# under the terms of the GNU General Public License as published by |
... | ... |
@@ -50,7 +50,7 @@ version="\ |
50 | 50 |
GNU config.guess ($timestamp) |
51 | 51 |
|
52 | 52 |
Originally written by Per Bothner. |
53 |
-Copyright 1992-2013 Free Software Foundation, Inc. |
|
53 |
+Copyright 1992-2014 Free Software Foundation, Inc. |
|
54 | 54 |
|
55 | 55 |
This is free software; see the source for copying conditions. There is NO |
56 | 56 |
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." |
... | ... |
@@ -149,7 +149,7 @@ Linux|GNU|GNU/*) |
149 | 149 |
LIBC=gnu |
150 | 150 |
#endif |
151 | 151 |
EOF |
152 |
- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` |
|
152 |
+ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'` |
|
153 | 153 |
;; |
154 | 154 |
esac |
155 | 155 |
|
... | ... |
@@ -995,6 +995,12 @@ EOF |
995 | 995 |
ppc:Linux:*:*) |
996 | 996 |
echo powerpc-unknown-linux-${LIBC} |
997 | 997 |
exit ;; |
998 |
+ ppc64le:Linux:*:*) |
|
999 |
+ echo powerpc64le-unknown-linux-${LIBC} |
|
1000 |
+ exit ;; |
|
1001 |
+ ppcle:Linux:*:*) |
|
1002 |
+ echo powerpcle-unknown-linux-${LIBC} |
|
1003 |
+ exit ;; |
|
998 | 1004 |
s390:Linux:*:* | s390x:Linux:*:*) |
999 | 1005 |
echo ${UNAME_MACHINE}-ibm-linux-${LIBC} |
1000 | 1006 |
exit ;; |
... | ... |
@@ -1014,7 +1020,18 @@ EOF |
1014 | 1020 |
echo ${UNAME_MACHINE}-dec-linux-${LIBC} |
1015 | 1021 |
exit ;; |
1016 | 1022 |
x86_64:Linux:*:*) |
1017 |
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC} |
|
1023 |
+ eval $set_cc_for_build |
|
1024 |
+ X86_64_ABI= |
|
1025 |
+ # If there is a compiler, see if it is configured for 32-bit objects. |
|
1026 |
+ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then |
|
1027 |
+ if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \ |
|
1028 |
+ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ |
|
1029 |
+ grep IS_X32 >/dev/null |
|
1030 |
+ then |
|
1031 |
+ X86_64_ABI=x32 |
|
1032 |
+ fi |
|
1033 |
+ fi |
|
1034 |
+ echo x86_64-unknown-linux-gnu${X86_64_ABI} |
|
1018 | 1035 |
exit ;; |
1019 | 1036 |
xtensa*:Linux:*:*) |
1020 | 1037 |
echo ${UNAME_MACHINE}-unknown-linux-${LIBC} |
... | ... |
@@ -1254,16 +1271,26 @@ EOF |
1254 | 1271 |
if test "$UNAME_PROCESSOR" = unknown ; then |
1255 | 1272 |
UNAME_PROCESSOR=powerpc |
1256 | 1273 |
fi |
1257 |
- if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then |
|
1258 |
- if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ |
|
1259 |
- (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ |
|
1260 |
- grep IS_64BIT_ARCH >/dev/null |
|
1261 |
- then |
|
1262 |
- case $UNAME_PROCESSOR in |
|
1263 |
- i386) UNAME_PROCESSOR=x86_64 ;; |
|
1264 |
- powerpc) UNAME_PROCESSOR=powerpc64 ;; |
|
1265 |
- esac |
|
1274 |
+ if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then |
|
1275 |
+ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then |
|
1276 |
+ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ |
|
1277 |
+ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ |
|
1278 |
+ grep IS_64BIT_ARCH >/dev/null |
|
1279 |
+ then |
|
1280 |
+ case $UNAME_PROCESSOR in |
|
1281 |
+ i386) UNAME_PROCESSOR=x86_64 ;; |
|
1282 |
+ powerpc) UNAME_PROCESSOR=powerpc64 ;; |
|
1283 |
+ esac |
|
1284 |
+ fi |
|
1266 | 1285 |
fi |
1286 |
+ elif test "$UNAME_PROCESSOR" = i386 ; then |
|
1287 |
+ # Avoid executing cc on OS X 10.9, as it ships with a stub |
|
1288 |
+ # that puts up a graphical alert prompting to install |
|
1289 |
+ # developer tools. Any system running Mac OS X 10.7 or |
|
1290 |
+ # later (Darwin 11 and later) is required to have a 64-bit |
|
1291 |
+ # processor. This is not true of the ARM version of Darwin |
|
1292 |
+ # that Apple uses in portable devices. |
|
1293 |
+ UNAME_PROCESSOR=x86_64 |
|
1267 | 1294 |
fi |
1268 | 1295 |
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} |
1269 | 1296 |
exit ;; |
... | ... |
@@ -1355,154 +1382,6 @@ EOF |
1355 | 1382 |
exit ;; |
1356 | 1383 |
esac |
1357 | 1384 |
|
1358 |
-eval $set_cc_for_build |
|
1359 |
-cat >$dummy.c <<EOF |
|
1360 |
-#ifdef _SEQUENT_ |
|
1361 |
-# include <sys/types.h> |
|
1362 |
-# include <sys/utsname.h> |
|
1363 |
-#endif |
|
1364 |
-main () |
|
1365 |
-{ |
|
1366 |
-#if defined (sony) |
|
1367 |
-#if defined (MIPSEB) |
|
1368 |
- /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, |
|
1369 |
- I don't know.... */ |
|
1370 |
- printf ("mips-sony-bsd\n"); exit (0); |
|
1371 |
-#else |
|
1372 |
-#include <sys/param.h> |
|
1373 |
- printf ("m68k-sony-newsos%s\n", |
|
1374 |
-#ifdef NEWSOS4 |
|
1375 |
- "4" |
|
1376 |
-#else |
|
1377 |
- "" |
|
1378 |
-#endif |
|
1379 |
- ); exit (0); |
|
1380 |
-#endif |
|
1381 |
-#endif |
|
1382 |
- |
|
1383 |
-#if defined (__arm) && defined (__acorn) && defined (__unix) |
|
1384 |
- printf ("arm-acorn-riscix\n"); exit (0); |
|
1385 |
-#endif |
|
1386 |
- |
|
1387 |
-#if defined (hp300) && !defined (hpux) |
|
1388 |
- printf ("m68k-hp-bsd\n"); exit (0); |
|
1389 |
-#endif |
|
1390 |
- |
|
1391 |
-#if defined (NeXT) |
|
1392 |
-#if !defined (__ARCHITECTURE__) |
|
1393 |
-#define __ARCHITECTURE__ "m68k" |
|
1394 |
-#endif |
|
1395 |
- int version; |
|
1396 |
- version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; |
|
1397 |
- if (version < 4) |
|
1398 |
- printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); |
|
1399 |
- else |
|
1400 |
- printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); |
|
1401 |
- exit (0); |
|
1402 |
-#endif |
|
1403 |
- |
|
1404 |
-#if defined (MULTIMAX) || defined (n16) |
|
1405 |
-#if defined (UMAXV) |
|
1406 |
- printf ("ns32k-encore-sysv\n"); exit (0); |
|
1407 |
-#else |
|
1408 |
-#if defined (CMU) |
|
1409 |
- printf ("ns32k-encore-mach\n"); exit (0); |
|
1410 |
-#else |
|
1411 |
- printf ("ns32k-encore-bsd\n"); exit (0); |
|
1412 |
-#endif |
|
1413 |
-#endif |
|
1414 |
-#endif |
|
1415 |
- |
|
1416 |
-#if defined (__386BSD__) |
|
1417 |
- printf ("i386-pc-bsd\n"); exit (0); |
|
1418 |
-#endif |
|
1419 |
- |
|
1420 |
-#if defined (sequent) |
|
1421 |
-#if defined (i386) |
|
1422 |
- printf ("i386-sequent-dynix\n"); exit (0); |
|
1423 |
-#endif |
|
1424 |
-#if defined (ns32000) |
|
1425 |
- printf ("ns32k-sequent-dynix\n"); exit (0); |
|
1426 |
-#endif |
|
1427 |
-#endif |
|
1428 |
- |
|
1429 |
-#if defined (_SEQUENT_) |
|
1430 |
- struct utsname un; |
|
1431 |
- |
|
1432 |
- uname(&un); |
|
1433 |
- |
|
1434 |
- if (strncmp(un.version, "V2", 2) == 0) { |
|
1435 |
- printf ("i386-sequent-ptx2\n"); exit (0); |
|
1436 |
- } |
|
1437 |
- if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ |
|
1438 |
- printf ("i386-sequent-ptx1\n"); exit (0); |
|
1439 |
- } |
|
1440 |
- printf ("i386-sequent-ptx\n"); exit (0); |
|
1441 |
- |
|
1442 |
-#endif |
|
1443 |
- |
|
1444 |
-#if defined (vax) |
|
1445 |
-# if !defined (ultrix) |
|
1446 |
-# include <sys/param.h> |
|
1447 |
-# if defined (BSD) |
|
1448 |
-# if BSD == 43 |
|
1449 |
- printf ("vax-dec-bsd4.3\n"); exit (0); |
|
1450 |
-# else |
|
1451 |
-# if BSD == 199006 |
|
1452 |
- printf ("vax-dec-bsd4.3reno\n"); exit (0); |
|
1453 |
-# else |
|
1454 |
- printf ("vax-dec-bsd\n"); exit (0); |
|
1455 |
-# endif |
|
1456 |
-# endif |
|
1457 |
-# else |
|
1458 |
- printf ("vax-dec-bsd\n"); exit (0); |
|
1459 |
-# endif |
|
1460 |
-# else |
|
1461 |
- printf ("vax-dec-ultrix\n"); exit (0); |
|
1462 |
-# endif |
|
1463 |
-#endif |
|
1464 |
- |
|
1465 |
-#if defined (alliant) && defined (i860) |
|
1466 |
- printf ("i860-alliant-bsd\n"); exit (0); |
|
1467 |
-#endif |
|
1468 |
- |
|
1469 |
- exit (1); |
|
1470 |
-} |
|
1471 |
-EOF |
|
1472 |
- |
|
1473 |
-$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && |
|
1474 |
- { echo "$SYSTEM_NAME"; exit; } |
|
1475 |
- |
|
1476 |
-# Apollos put the system type in the environment. |
|
1477 |
- |
|
1478 |
-test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } |
|
1479 |
- |
|
1480 |
-# Convex versions that predate uname can use getsysinfo(1) |
|
1481 |
- |
|
1482 |
-if [ -x /usr/convex/getsysinfo ] |
|
1483 |
-then |
|
1484 |
- case `getsysinfo -f cpu_type` in |
|
1485 |
- c1*) |
|
1486 |
- echo c1-convex-bsd |
|
1487 |
- exit ;; |
|
1488 |
- c2*) |
|
1489 |
- if getsysinfo -f scalar_acc |
|
1490 |
- then echo c32-convex-bsd |
|
1491 |
- else echo c2-convex-bsd |
|
1492 |
- fi |
|
1493 |
- exit ;; |
|
1494 |
- c34*) |
|
1495 |
- echo c34-convex-bsd |
|
1496 |
- exit ;; |
|
1497 |
- c38*) |
|
1498 |
- echo c38-convex-bsd |
|
1499 |
- exit ;; |
|
1500 |
- c4*) |
|
1501 |
- echo c4-convex-bsd |
|
1502 |
- exit ;; |
|
1503 |
- esac |
|
1504 |
-fi |
|
1505 |
- |
|
1506 | 1385 |
cat >&2 <<EOF |
1507 | 1386 |
$0: unable to guess system type |
1508 | 1387 |
|
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
#! /bin/sh |
2 | 2 |
# Configuration validation subroutine script. |
3 |
-# Copyright 1992-2013 Free Software Foundation, Inc. |
|
3 |
+# Copyright 1992-2014 Free Software Foundation, Inc. |
|
4 | 4 |
|
5 |
-timestamp='2013-04-24' |
|
5 |
+timestamp='2014-01-01' |
|
6 | 6 |
|
7 | 7 |
# This file is free software; you can redistribute it and/or modify it |
8 | 8 |
# under the terms of the GNU General Public License as published by |
... | ... |
@@ -68,7 +68,7 @@ Report bugs and patches to <config-patches@gnu.org>." |
68 | 68 |
version="\ |
69 | 69 |
GNU config.sub ($timestamp) |
70 | 70 |
|
71 |
-Copyright 1992-2013 Free Software Foundation, Inc. |
|
71 |
+Copyright 1992-2014 Free Software Foundation, Inc. |
|
72 | 72 |
|
73 | 73 |
This is free software; see the source for copying conditions. There is NO |
74 | 74 |
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." |
... | ... |
@@ -257,7 +257,7 @@ case $basic_machine in |
257 | 257 |
| avr | avr32 \ |
258 | 258 |
| be32 | be64 \ |
259 | 259 |
| bfin \ |
260 |
- | c4x | clipper \ |
|
260 |
+ | c4x | c8051 | clipper \ |
|
261 | 261 |
| d10v | d30v | dlx | dsp16xx | dvp \ |
262 | 262 |
| epiphany \ |
263 | 263 |
| fido | fr30 | frv \ |
... | ... |
@@ -265,6 +265,7 @@ case $basic_machine in |
265 | 265 |
| hexagon \ |
266 | 266 |
| i370 | i860 | i960 | ia64 \ |
267 | 267 |
| ip2k | iq2000 \ |
268 |
+ | k1om \ |
|
268 | 269 |
| le32 | le64 \ |
269 | 270 |
| lm32 \ |
270 | 271 |
| m32c | m32r | m32rle | m68000 | m68k | m88k \ |
... | ... |
@@ -324,7 +325,7 @@ case $basic_machine in |
324 | 325 |
c6x) |
325 | 326 |
basic_machine=tic6x-unknown |
326 | 327 |
;; |
327 |
- m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) |
|
328 |
+ m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip) |
|
328 | 329 |
basic_machine=$basic_machine-unknown |
329 | 330 |
os=-none |
330 | 331 |
;; |
... | ... |
@@ -372,7 +373,7 @@ case $basic_machine in |
372 | 373 |
| be32-* | be64-* \ |
373 | 374 |
| bfin-* | bs2000-* \ |
374 | 375 |
| c[123]* | c30-* | [cjt]90-* | c4x-* \ |
375 |
- | clipper-* | craynv-* | cydra-* \ |
|
376 |
+ | c8051-* | clipper-* | craynv-* | cydra-* \ |
|
376 | 377 |
| d10v-* | d30v-* | dlx-* \ |
377 | 378 |
| elxsi-* \ |
378 | 379 |
| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ |
... | ... |
@@ -381,6 +382,7 @@ case $basic_machine in |
381 | 382 |
| hexagon-* \ |
382 | 383 |
| i*86-* | i860-* | i960-* | ia64-* \ |
383 | 384 |
| ip2k-* | iq2000-* \ |
385 |
+ | k1om-* \ |
|
384 | 386 |
| le32-* | le64-* \ |
385 | 387 |
| lm32-* \ |
386 | 388 |
| m32c-* | m32r-* | m32rle-* \ |
... | ... |
@@ -794,7 +796,7 @@ case $basic_machine in |
794 | 796 |
os=-mingw64 |
795 | 797 |
;; |
796 | 798 |
mingw32) |
797 |
- basic_machine=i386-pc |
|
799 |
+ basic_machine=i686-pc |
|
798 | 800 |
os=-mingw32 |
799 | 801 |
;; |
800 | 802 |
mingw32ce) |
... | ... |
@@ -848,7 +850,7 @@ case $basic_machine in |
848 | 850 |
basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` |
849 | 851 |
;; |
850 | 852 |
msys) |
851 |
- basic_machine=i386-pc |
|
853 |
+ basic_machine=i686-pc |
|
852 | 854 |
os=-msys |
853 | 855 |
;; |
854 | 856 |
mvs) |
... | ... |
@@ -1564,6 +1566,9 @@ case $basic_machine in |
1564 | 1566 |
c4x-* | tic4x-*) |
1565 | 1567 |
os=-coff |
1566 | 1568 |
;; |
1569 |
+ c8051-*) |
|
1570 |
+ os=-elf |
|
1571 |
+ ;; |
|
1567 | 1572 |
hexagon-*) |
1568 | 1573 |
os=-elf |
1569 | 1574 |
;; |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
#! /bin/sh |
2 | 2 |
# Common wrapper for a few potentially missing GNU programs. |
3 | 3 |
|
4 |
-scriptversion=2012-06-26.16; # UTC |
|
4 |
+scriptversion=2013-10-28.13; # UTC |
|
5 | 5 |
|
6 | 6 |
# Copyright (C) 1996-2013 Free Software Foundation, Inc. |
7 | 7 |
# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996. |
... | ... |
@@ -160,7 +160,7 @@ give_advice () |
160 | 160 |
;; |
161 | 161 |
autom4te*) |
162 | 162 |
echo "You might have modified some maintainer files that require" |
163 |
- echo "the 'automa4te' program to be rebuilt." |
|
163 |
+ echo "the 'autom4te' program to be rebuilt." |
|
164 | 164 |
program_details 'autom4te' |
165 | 165 |
;; |
166 | 166 |
bison*|yacc*) |
... | ... |
@@ -684,9 +684,6 @@ MAINTAINER_FALSE |
684 | 684 |
MAINTAINER_TRUE |
685 | 685 |
FULLDIST_FALSE |
686 | 686 |
FULLDIST_TRUE |
687 |
-MAINT |
|
688 |
-MAINTAINER_MODE_FALSE |
|
689 |
-MAINTAINER_MODE_TRUE |
|
690 | 687 |
AM_BACKSLASH |
691 | 688 |
AM_DEFAULT_VERBOSITY |
692 | 689 |
AM_DEFAULT_V |
... | ... |
@@ -789,7 +786,6 @@ enable_option_checking |
789 | 786 |
enable_largefile |
790 | 787 |
enable_dependency_tracking |
791 | 788 |
enable_silent_rules |
792 |
-enable_maintainer_mode |
|
793 | 789 |
enable_fulldist |
794 | 790 |
enable_maintainer |
795 | 791 |
enable_static_linking |
... | ... |
@@ -1441,9 +1437,6 @@ Optional Features: |
1441 | 1437 |
speeds up one-time build |
1442 | 1438 |
--enable-silent-rules less verbose build output (undo: "make V=1") |
1443 | 1439 |
--disable-silent-rules verbose build output (undo: "make V=0") |
1444 |
- --disable-maintainer-mode |
|
1445 |
- disable make rules and dependencies not useful (and |
|
1446 |
- sometimes confusing) to the casual installer |
|
1447 | 1440 |
--enable-fulldist For use by program maintainer |
1448 | 1441 |
--enable-maintainer For use by program maintainer |
1449 | 1442 |
--enable-static-linking Link binaries statically (default=no) |
... | ... |
@@ -4538,29 +4531,6 @@ END |
4538 | 4531 |
fi |
4539 | 4532 |
fi |
4540 | 4533 |
|
4541 |
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5 |
|
4542 |
-$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; } |
|
4543 |
- # Check whether --enable-maintainer-mode was given. |
|
4544 |
-if test "${enable_maintainer_mode+set}" = set; then : |
|
4545 |
- enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval |
|
4546 |
-else |
|
4547 |
- USE_MAINTAINER_MODE=yes |
|
4548 |
-fi |
|
4549 |
- |
|
4550 |
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5 |
|
4551 |
-$as_echo "$USE_MAINTAINER_MODE" >&6; } |
|
4552 |
- if test $USE_MAINTAINER_MODE = yes; then |
|
4553 |
- MAINTAINER_MODE_TRUE= |
|
4554 |
- MAINTAINER_MODE_FALSE='#' |
|
4555 |
-else |
|
4556 |
- MAINTAINER_MODE_TRUE='#' |
|
4557 |
- MAINTAINER_MODE_FALSE= |
|
4558 |
-fi |
|
4559 |
- |
|
4560 |
- MAINT=$MAINTAINER_MODE_TRUE |
|
4561 |
- |
|
4562 |
- |
|
4563 |
- |
|
4564 | 4534 |
if test "x$enable_fulldist" = xyes; then |
4565 | 4535 |
FULLDIST_TRUE= |
4566 | 4536 |
FULLDIST_FALSE='#' |
... | ... |
@@ -15497,10 +15467,6 @@ else |
15497 | 15467 |
am__EXEEXT_FALSE= |
15498 | 15468 |
fi |
15499 | 15469 |
|
15500 |
-if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then |
|
15501 |
- as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined. |
|
15502 |
-Usually this means the macro was only invoked conditionally." "$LINENO" 5 |
|
15503 |
-fi |
|
15504 | 15470 |
if test -z "${FULLDIST_TRUE}" && test -z "${FULLDIST_FALSE}"; then |
15505 | 15471 |
as_fn_error $? "conditional \"FULLDIST\" was never defined. |
15506 | 15472 |
Usually this means the macro was only invoked conditionally." "$LINENO" 5 |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-# Makefile.in generated by automake 1.14 from Makefile.am. |
|
1 |
+# Makefile.in generated by automake 1.14.1 from Makefile.am. |
|
2 | 2 |
# @configure_input@ |
3 | 3 |
|
4 | 4 |
# Copyright (C) 1994-2013 Free Software Foundation, Inc. |
... | ... |
@@ -388,7 +388,6 @@ LIBTOOL = @LIBTOOL@ |
388 | 388 |
LIPO = @LIPO@ |
389 | 389 |
LN_S = @LN_S@ |
390 | 390 |
LTLIBOBJS = @LTLIBOBJS@ |
391 |
-MAINT = @MAINT@ |
|
392 | 391 |
MAKEINFO = @MAKEINFO@ |
393 | 392 |
MANIFEST_TOOL = @MANIFEST_TOOL@ |
394 | 393 |
MKDIR_P = @MKDIR_P@ |
... | ... |
@@ -733,7 +732,7 @@ all: config.h |
733 | 732 |
|
734 | 733 |
.SUFFIXES: |
735 | 734 |
.SUFFIXES: .c .lo .o .obj |
736 |
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) |
|
735 |
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) |
|
737 | 736 |
@for dep in $?; do \ |
738 | 737 |
case '$(am__configure_deps)' in \ |
739 | 738 |
*$$dep*) \ |
... | ... |
@@ -758,9 +757,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status |
758 | 757 |
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) |
759 | 758 |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
760 | 759 |
|
761 |
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) |
|
760 |
+$(top_srcdir)/configure: $(am__configure_deps) |
|
762 | 761 |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
763 |
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) |
|
762 |
+$(ACLOCAL_M4): $(am__aclocal_m4_deps) |
|
764 | 763 |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
765 | 764 |
$(am__aclocal_m4_deps): |
766 | 765 |
|
... | ... |
@@ -771,7 +770,7 @@ config.h: stamp-h1 |
771 | 770 |
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status |
772 | 771 |
@rm -f stamp-h1 |
773 | 772 |
cd $(top_builddir) && $(SHELL) ./config.status src/config.h |
774 |
-$(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) |
|
773 |
+$(srcdir)/config.h.in: $(am__configure_deps) |
|
775 | 774 |
($(am__cd) $(top_srcdir) && $(AUTOHEADER)) |
776 | 775 |
rm -f stamp-h1 |
777 | 776 |
touch $@ |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-static char rcsid[] = "$Id: bamread.c 114330 2013-11-07 19:55:05Z twu $"; |
|
1 |
+static char rcsid[] = "$Id: bamread.c 129099 2014-03-04 18:44:44Z twu $"; |
|
2 | 2 |
#ifdef HAVE_CONFIG_H |
3 | 3 |
#include <config.h> |
4 | 4 |
#endif |
... | ... |
@@ -370,7 +370,7 @@ static void |
370 | 370 |
parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genomicpos_T *chrpos, |
371 | 371 |
char **mate_chr, Genomicpos_T *mate_chrpos, int *insert_length, |
372 | 372 |
Intlist_T *cigartypes, Uintlist_T *cigarlengths, int *cigarlength, |
373 |
- int *readlength, char **read, char **quality_string, char **read_group) { |
|
373 |
+ int *readlength, char **read, char **quality_string, char **read_group, bool *terminalp) { |
|
374 | 374 |
int type; |
375 | 375 |
int i; |
376 | 376 |
unsigned int length; |
... | ... |
@@ -435,6 +435,15 @@ parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genom |
435 | 435 |
*read_group = bam_aux2Z(ptr); |
436 | 436 |
} |
437 | 437 |
|
438 |
+ ptr = bam_aux_get(this->bam,"PG"); |
|
439 |
+ if (ptr == NULL) { |
|
440 |
+ *terminalp = false; |
|
441 |
+ } else if (strcmp((char *) ptr,"T")) { |
|
442 |
+ *terminalp = false; |
|
443 |
+ } else { |
|
444 |
+ *terminalp = true; |
|
445 |
+ } |
|
446 |
+ |
|
438 | 447 |
/* Cigar */ |
439 | 448 |
*cigarlength = 0; |
440 | 449 |
*cigartypes = (Intlist_T) NULL; |
... | ... |
@@ -475,7 +484,8 @@ int |
475 | 484 |
Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genomicpos_T *chrpos, |
476 | 485 |
char **mate_chr, Genomicpos_T *mate_chrpos, |
477 | 486 |
Intlist_T *cigartypes, Uintlist_T *cigarlengths, int *cigarlength, |
478 |
- int *readlength, char **read, char **quality_string, char **read_group) { |
|
487 |
+ int *readlength, char **read, char **quality_string, char **read_group, |
|
488 |
+ bool *terminalp) { |
|
479 | 489 |
int insert_length; |
480 | 490 |
|
481 | 491 |
#ifndef HAVE_SAMTOOLS |
... | ... |
@@ -488,7 +498,8 @@ Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr |
488 | 498 |
parse_line(this,&(*acc),&(*flag),&(*mapq),&(*chr),&(*chrpos), |
489 | 499 |
&(*mate_chr),&(*mate_chrpos),&insert_length, |
490 | 500 |
&(*cigartypes),&(*cigarlengths),&(*cigarlength), |
491 |
- &(*readlength),&(*read),&(*quality_string),&(*read_group)); |
|
501 |
+ &(*readlength),&(*read),&(*quality_string),&(*read_group), |
|
502 |
+ &(*terminalp)); |
|
492 | 503 |
return 1; |
493 | 504 |
} |
494 | 505 |
} else { |
... | ... |
@@ -498,7 +509,8 @@ Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr |
498 | 509 |
parse_line(this,&(*acc),&(*flag),&(*mapq),&(*chr),&(*chrpos), |
499 | 510 |
&(*mate_chr),&(*mate_chrpos),&insert_length, |
500 | 511 |
&(*cigartypes),&(*cigarlengths),&(*cigarlength), |
501 |
- &(*readlength),&(*read),&(*quality_string),&(*read_group)); |
|
512 |
+ &(*readlength),&(*read),&(*quality_string),&(*read_group), |
|
513 |
+ &(*terminalp)); |
|
502 | 514 |
return 1; |
503 | 515 |
} |
504 | 516 |
} |
... | ... |
@@ -525,6 +537,7 @@ struct Bamline_T { |
525 | 537 |
int readlength; |
526 | 538 |
char *read; |
527 | 539 |
char *quality_string; |
540 |
+ bool terminalp; |
|
528 | 541 |
unsigned char *aux_start; |
529 | 542 |
unsigned char *aux_end; |
530 | 543 |
}; |
... | ... |
@@ -707,6 +720,11 @@ Bamline_quality_string (Bamline_T this) { |
707 | 720 |
return this->quality_string; |
708 | 721 |
} |
709 | 722 |
|
723 |
+bool |
|
724 |
+Bamline_terminalp (Bamline_T this) { |
|
725 |
+ return this->terminalp; |
|
726 |
+} |
|
727 |
+ |
|
710 | 728 |
|
711 | 729 |
static void |
712 | 730 |
aux_print (FILE *fp, unsigned char *s, unsigned char *aux_end) { |
... | ... |
@@ -1211,7 +1229,8 @@ Bamline_new (char *acc, unsigned int flag, int nhits, bool good_unique_p, int ma |
1211 | 1229 |
char splice_strand, char *chr, Genomicpos_T chrpos_low, |
1212 | 1230 |
char *mate_chr, Genomicpos_T mate_chrpos_low, int insert_length, |
1213 | 1231 |
Intlist_T cigar_types, Uintlist_T cigar_npositions, int cigar_querylength, int readlength, |
1214 |
- char *read, char *quality_string, unsigned char *aux_start, unsigned char *aux_end) { |
|
1232 |
+ char *read, char *quality_string, bool terminalp, |
|
1233 |
+ unsigned char *aux_start, unsigned char *aux_end) { |
|
1215 | 1234 |
Bamline_T new = (Bamline_T) MALLOC(sizeof(*new)); |
1216 | 1235 |
|
1217 | 1236 |
new->acc = (char *) CALLOC(strlen(acc)+1,sizeof(char)); |
... | ... |
@@ -1258,6 +1277,8 @@ Bamline_new (char *acc, unsigned int flag, int nhits, bool good_unique_p, int ma |
1258 | 1277 |
strncpy(new->quality_string,quality_string,readlength); |
1259 | 1278 |
} |
1260 | 1279 |
|
1280 |
+ new->terminalp = terminalp; |
|
1281 |
+ |
|
1261 | 1282 |
new->aux_start = aux_start; |
1262 | 1283 |
new->aux_end = aux_end; |
1263 | 1284 |
|
... | ... |
@@ -1282,6 +1303,7 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go |
1282 | 1303 |
char *read; |
1283 | 1304 |
char *quality_string; |
1284 | 1305 |
char *read_group; |
1306 |
+ bool terminalp; |
|
1285 | 1307 |
|
1286 | 1308 |
#ifndef HAVE_SAMTOOLS |
1287 | 1309 |
return (Bamline_T) NULL; |
... | ... |
@@ -1293,7 +1315,8 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go |
1293 | 1315 |
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low, |
1294 | 1316 |
&mate_chr,&mate_chrpos_low,&insert_length, |
1295 | 1317 |
&cigar_types,&cigarlengths,&cigar_querylength, |
1296 |
- &readlength,&read,&quality_string,&read_group); |
|
1318 |
+ &readlength,&read,&quality_string,&read_group, |
|
1319 |
+ &terminalp); |
|
1297 | 1320 |
if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) { |
1298 | 1321 |
debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group)); |
1299 | 1322 |
Intlist_free(&cigar_types); |
... | ... |
@@ -1336,7 +1359,7 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go |
1336 | 1359 |
return Bamline_new(acc,flag,nhits,good_unique_p,mapq,splice_strand,chr,chrpos_low, |
1337 | 1360 |
mate_chr,mate_chrpos_low,insert_length, |
1338 | 1361 |
cigar_types,cigarlengths,cigar_querylength, |
1339 |
- readlength,read,quality_string, |
|
1362 |
+ readlength,read,quality_string,terminalp, |
|
1340 | 1363 |
/*aux_start*/bam1_aux(this->bam), |
1341 | 1364 |
/*aux_end*/this->bam->data + this->bam->data_len); |
1342 | 1365 |
} |
... | ... |
@@ -1348,7 +1371,8 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go |
1348 | 1371 |
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low, |
1349 | 1372 |
&mate_chr,&mate_chrpos_low,&insert_length, |
1350 | 1373 |
&cigar_types,&cigarlengths,&cigar_querylength, |
1351 |
- &readlength,&read,&quality_string,&read_group); |
|
1374 |
+ &readlength,&read,&quality_string,&read_group, |
|
1375 |
+ &terminalp); |
|
1352 | 1376 |
if (mapq >= minimum_mapq && |
1353 | 1377 |
(nhits = aux_nhits(this)) <= maximum_nhits && |
1354 | 1378 |
(need_unique_p == false || nhits == 1) && |
... | ... |
@@ -1359,7 +1383,7 @@ Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int go |
1359 | 1383 |
return Bamline_new(acc,flag,nhits,good_unique_p,mapq,splice_strand,chr,chrpos_low, |
1360 | 1384 |
mate_chr,mate_chrpos_low,insert_length, |
1361 | 1385 |
cigar_types,cigarlengths,cigar_querylength, |
1362 |
- readlength,read,quality_string, |
|
1386 |
+ readlength,read,quality_string,terminalp, |
|
1363 | 1387 |
/*aux_start*/bam1_aux(this->bam), |
1364 | 1388 |
/*aux_end*/this->bam->data + this->bam->data_len); |
1365 | 1389 |
} else { |
... | ... |
@@ -1390,6 +1414,7 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d |
1390 | 1414 |
char *read; |
1391 | 1415 |
char *quality_string; |
1392 | 1416 |
char *read_group; |
1417 |
+ bool terminalp; |
|
1393 | 1418 |
|
1394 | 1419 |
Bamread_limit_region(this,desired_chr,desired_chrpos,desired_chrpos); |
1395 | 1420 |
while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) { |
... | ... |
@@ -1397,14 +1422,15 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d |
1397 | 1422 |
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low, |
1398 | 1423 |
&mate_chr,&mate_chrpos_low,&insert_length, |
1399 | 1424 |
&cigar_types,&cigarlengths,&cigar_querylength, |
1400 |
- &readlength,&read,&quality_string,&read_group); |
|
1425 |
+ &readlength,&read,&quality_string,&read_group, |
|
1426 |
+ &terminalp); |
|
1401 | 1427 |
splice_strand = aux_splice_strand(this); |
1402 | 1428 |
if (!strcmp(acc,desired_acc) && chrpos_low == desired_chrpos) { |
1403 | 1429 |
Bamread_unlimit_region(this); |
1404 | 1430 |
return Bamline_new(acc,flag,nhits,/*good_unique_p*/true,mapq,splice_strand,chr,chrpos_low, |
1405 | 1431 |
mate_chr,mate_chrpos_low,insert_length, |
1406 | 1432 |
cigar_types,cigarlengths,cigar_querylength, |
1407 |
- readlength,read,quality_string, |
|
1433 |
+ readlength,read,quality_string,terminalp, |
|
1408 | 1434 |
/*aux_start*/bam1_aux(this->bam), |
1409 | 1435 |
/*aux_end*/this->bam->data + this->bam->data_len); |
1410 | 1436 |
} |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-/* $Id: bamread.h 108654 2013-09-19 23:11:00Z twu $ */ |
|
1 |
+/* $Id: bamread.h 129099 2014-03-04 18:44:44Z twu $ */ |
|
2 | 2 |
#ifndef BAMREAD_INCLUDED |
3 | 3 |
#define BAMREAD_INCLUDED |
4 | 4 |
/* Cannot use bool, since it appears to conflict with samtools */ |
... | ... |
@@ -43,7 +43,8 @@ extern int |
43 | 43 |
Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genomicpos_T *chrpos, |
44 | 44 |
char **mate_chr, Genomicpos_T *mate_chrpos, |
45 | 45 |
Intlist_T *cigartypes, Uintlist_T *cigarlengths, int *cigarlength, |
46 |
- int *readlength, char **read, char **quality_string, char **read_group); |
|
46 |
+ int *readlength, char **read, char **quality_string, char **read_group, |
|
47 |
+ bool *terminalp); |
|
47 | 48 |
|
48 | 49 |
typedef struct Bamline_T *Bamline_T; |
49 | 50 |
|
... | ... |
@@ -91,6 +92,8 @@ extern char * |
91 | 92 |
Bamline_read (Bamline_T this); |
92 | 93 |
extern char * |
93 | 94 |
Bamline_quality_string (Bamline_T this); |
95 |
+extern bool |
|
96 |
+Bamline_terminalp (Bamline_T this); |
|
94 | 97 |
extern char * |
95 | 98 |
Bamline_read_group (Bamline_T this); |
96 | 99 |
extern void |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-static char rcsid[] = "$Id: bamtally.c 123611 2014-01-15 21:22:04Z twu $"; |
|
1 |
+static char rcsid[] = "$Id: bamtally.c 129100 2014-03-04 18:45:02Z twu $"; |
|
2 | 2 |
#ifdef HAVE_CONFIG_H |
3 | 3 |
#include <config.h> |
4 | 4 |
#endif |
... | ... |
@@ -69,11 +69,11 @@ static bool totals_only_p = false; |
69 | 69 |
#endif |
70 | 70 |
|
71 | 71 |
|
72 |
+ |
|
72 | 73 |
#define T Tally_T |
73 | 74 |
typedef struct T *T; |
74 | 75 |
|
75 | 76 |
|
76 |
- |
|
77 | 77 |
typedef enum {FIRST, SECOND} Pairend_T; |
78 | 78 |
|
79 | 79 |
|
... | ... |
@@ -1525,6 +1525,7 @@ make_mismatches_unique_signed (List_T mismatches) { |
1525 | 1525 |
for (ptr = mismatches; ptr != NULL; ptr = List_next(ptr)) { |
1526 | 1526 |
mismatch = (Mismatch_T) List_head(ptr); |
1527 | 1527 |
if ((mismatch0 = find_mismatch_nt(unique_mismatches,mismatch->nt)) != NULL) { |
1528 |
+ mismatch0->count += mismatch->count; |
|
1528 | 1529 |
if (mismatch->shift > 0) { |
1529 | 1530 |
mismatch0->count_plus += mismatch->count; |
1530 | 1531 |
} else { |
... | ... |
@@ -1538,6 +1539,7 @@ make_mismatches_unique_signed (List_T mismatches) { |
1538 | 1539 |
mismatch0->shift += 1; /* Used here as nshifts */ |
1539 | 1540 |
} else { |
1540 | 1541 |
mismatch0 = Mismatch_new(mismatch->nt,/*shift, used here as nshifts*/1,/*mapq*/0,' '); |
1542 |
+ mismatch0->count = mismatch->count; |
|
1541 | 1543 |
if (mismatch->shift > 0) { |
1542 | 1544 |
mismatch0->count_plus = mismatch->count; |
1543 | 1545 |
mismatch0->count_minus = 0; |
... | ... |
@@ -1791,7 +1793,6 @@ print_block (T *block_tallies, Genomicpos_T blockstart, Genomicpos_T blockptr, |
1791 | 1793 |
|
1792 | 1794 |
|
1793 | 1795 |
/* Handle mismatches */ |
1794 |
- |
|
1795 | 1796 |
if (pass_variant_filter_p(this->nmatches,this->mismatches_byshift, |
1796 | 1797 |
min_depth,variant_strands) == false) { |
1797 | 1798 |
if (blockp == true) { |
... | ... |
@@ -1983,7 +1984,7 @@ print_block (T *block_tallies, Genomicpos_T blockstart, Genomicpos_T blockptr, |
1983 | 1984 |
for (i = 0; i < List_length(unique_mismatches_byshift); i++) { |
1984 | 1985 |
mismatch0 = mm_array[i]; |
1985 | 1986 |
if (signed_counts_p == false) { |
1986 |
- printf(" %c%ld",mismatch0->nt,mismatch0->count_plus+mismatch0->count_minus); |
|
1987 |
+ printf(" %c%ld",mismatch0->nt,mismatch0->count); |
|
1987 | 1988 |
} else { |
1988 | 1989 |
printf(" %c%ld|%ld",mismatch0->nt,mismatch0->count_plus,mismatch0->count_minus); |
1989 | 1990 |
} |
... | ... |
@@ -3009,6 +3010,7 @@ revise_position (char querynt, char genomicnt, int mapq, int quality, int signed |
3009 | 3010 |
/* Skip query N's */ |
3010 | 3011 |
|
3011 | 3012 |
} else { |
3013 |
+ |
|
3012 | 3014 |
if ((mismatch = find_mismatch_byshift(this->mismatches_byshift,toupper(querynt),signed_shift)) != NULL) { |
3013 | 3015 |
mismatch->count += 1; |
3014 | 3016 |
} else { |
... | ... |
@@ -3083,10 +3085,10 @@ average (char *quality_scores, int n) { |
3083 | 3085 |
static void |
3084 | 3086 |
revise_read (T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend, Genomicpos_T chrpos_low, |
3085 | 3087 |
unsigned int flag, Intlist_T types, Uintlist_T npositions, int cigar_querylength, |
3086 |
- char *shortread, int mapq, char *quality_string, Genomicpos_T alloc_low, |
|
3087 |
- int *quality_counts_match, int *quality_counts_mismatch, |
|
3088 |
+ char *shortread, int mapq, char *quality_string, bool terminalp, |
|
3089 |
+ Genomicpos_T alloc_low, int *quality_counts_match, int *quality_counts_mismatch, |
|
3088 | 3090 |
Genome_T genome, Genomicpos_T chroffset, bool ignore_query_Ns_p, |
3089 |
- bool print_indels_p, bool readlevel_p) { |
|
3091 |
+ bool print_indels_p, bool readlevel_p, int max_softclip) { |
|
3090 | 3092 |
T this, right; |
3091 | 3093 |
int alloci; |
3092 | 3094 |
int shift, signed_shift; |
... | ... |
@@ -3113,12 +3115,43 @@ revise_read (T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend, Genom |
3113 | 3115 |
|
3114 | 3116 |
pos = chrpos_low - 1U; /* Bamread reports chrpos as 1-based */ |
3115 | 3117 |
|
3118 |
+ if (terminalp == false) { |
|
3119 |
+ /* Don't handle soft clip for terminal alignments */ |
|
3120 |
+ max_softclip = 0; |
|
3121 |
+ } |
|
3122 |
+ |
|
3116 | 3123 |
p = shortread; |
3117 | 3124 |
r = quality_string; |
3125 |
+ if (max_softclip > 0 && types != NULL) { |
|
3126 |
+ if (Intlist_head(types) == 'S') { |
|
3127 |
+ /* Revise pos, so we handle the initial soft clip */ |
|
3128 |
+ mlength = Uintlist_head(npositions); |
|
3129 |
+ if (pos < mlength) { |
|
3130 |
+ /* Make sure initial soft clip does not extend past beginning of chromosome */ |
|
3131 |
+ pos = 0U; |
|
3132 |
+ p += (mlength - pos); |
|
3133 |
+ r += (mlength - pos); |
|
3134 |
+ Uintlist_head_set(npositions,pos); |
|
3135 |
+ } else { |
|
3136 |
+ pos -= mlength; |
|
3137 |
+ } |
|
3138 |
+ |
|
3139 |
+ mlength = Uintlist_head(npositions); |
|
3140 |
+ if (mlength > max_softclip) { |
|
3141 |
+ /* Make sure initial soft clip does not extend past max_softclip */ |
|
3142 |
+ fprintf(stderr,"Read has initial soft clip %d greater than max_softclip %d\n",mlength,max_softclip); |
|
3143 |
+ pos += (mlength - max_softclip); |
|
3144 |
+ p += (mlength - max_softclip); |
|
3145 |
+ r += (mlength - max_softclip); |
|
3146 |
+ Uintlist_head_set(npositions,max_softclip); |
|
3147 |
+ } |
|
3148 |
+ } |
|
3149 |
+ } |
|
3150 |
+ |
|
3118 | 3151 |
for (a = types, b = npositions; a != NULL; a = Intlist_next(a), b = Uintlist_next(b)) { |
3119 | 3152 |
type = Intlist_head(a); |
3120 | 3153 |
mlength = Uintlist_head(b); |
3121 |
- if (type == 'S') { |
|
3154 |
+ if (type == 'S' && max_softclip == 0) { |
|
3122 | 3155 |
/* pos += mlength; -- SAM assumes genome coordinates are of clipped region */ |
3123 | 3156 |
p += mlength; |
3124 | 3157 |
r += mlength; |
... | ... |
@@ -3187,7 +3220,7 @@ revise_read (T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend, Genom |
3187 | 3220 |
|
3188 | 3221 |
pos += mlength; |
3189 | 3222 |
|
3190 |
- } else if (type == 'M') { |
|
3223 |
+ } else if (type == 'M' || (type == 'S' && max_softclip > 0)) { |
|
3191 | 3224 |
if (0 /* mlength < min_mlength */) { |
3192 | 3225 |
p += mlength; |
3193 | 3226 |
r += mlength; |
... | ... |
@@ -3195,6 +3228,12 @@ revise_read (T *alloc_tallies, Genomicpos_T chrstart, Genomicpos_T chrend, Genom |
3195 | 3228 |
shift += ((strand == '+') ? +mlength : -mlength); |
3196 | 3229 |
|
3197 | 3230 |
} else { |
3231 |
+ if (type == 'S' && mlength > max_softclip) { |
|
3232 |
+ /* Must be final softclip, because we handled initial one already */ |
|
3233 |
+ fprintf(stderr,"Read has final soft clip %d greater than max_softclip %d\n",mlength,max_softclip); |
|
3234 |
+ mlength = max_softclip; |
|
3235 |
+ } |
|
3236 |
+ |
|
3198 | 3237 |
debug1(printf("Genomic pos is %u + %u = %u\n",chroffset,pos,chroffset+pos)); |
3199 | 3238 |
if (genome == NULL) { |
3200 | 3239 |
q = &(*p); |
... | ... |
@@ -3287,10 +3326,10 @@ static void |
3287 | 3326 |
revise_read_lh (T *alloc_tallies_low, T *alloc_tallies_high, Genomicpos_T chrstart, Genomicpos_T chrend, |
3288 | 3327 |
bool lowend_p, Genomicpos_T chrpos_low, |
3289 | 3328 |
unsigned int flag, Intlist_T types, Uintlist_T npositions, int cigar_querylength, |
3290 |
- char *shortread, int mapq, char *quality_string, Genomicpos_T alloc_low, |
|
3291 |
- int *quality_counts_match, int *quality_counts_mismatch, |
|
3329 |
+ char *shortread, int mapq, char *quality_string, bool terminalp, |
|
3330 |
+ Genomicpos_T alloc_low, int *quality_counts_match, int *quality_counts_mismatch, |
|
3292 | 3331 |
Genome_T genome, Genomicpos_T chroffset, bool ignore_query_Ns_p, bool print_indels_p, |
3293 |
- bool readlevel_p) { |
|
3332 |
+ bool readlevel_p, int max_softclip) { |
|
3294 | 3333 |
T this, right; |
3295 | 3334 |
int alloci; |
3296 | 3335 |
int shift, signed_shift; |
... | ... |
@@ -3317,12 +3356,43 @@ revise_read_lh (T *alloc_tallies_low, T *alloc_tallies_high, Genomicpos_T chrsta |
3317 | 3356 |
|
3318 | 3357 |
pos = chrpos_low - 1U; /* Bamread reports chrpos as 1-based */ |
3319 | 3358 |
|
3359 |
+ if (terminalp == false) { |
|
3360 |
+ /* Don't handle soft clip for terminal alignments */ |
|
3361 |
+ max_softclip = 0; |
|
3362 |
+ } |
|
3363 |
+ |
|
3320 | 3364 |
p = shortread; |
3321 | 3365 |
r = quality_string; |
3366 |
+ if (max_softclip > 0 && types != NULL) { |
|
3367 |
+ if (Intlist_head(types) == 'S') { |
|
3368 |
+ /* Revise pos, so we handle the initial soft clip */ |
|
3369 |
+ mlength = Uintlist_head(npositions); |
|
3370 |
+ if (pos < mlength) { |
|
3371 |
+ /* Make sure initial soft clip does not extend past beginning of chromosome */ |
|
3372 |
+ pos = 0U; |
|
3373 |
+ p += (mlength - pos); |
|
3374 |
+ r += (mlength - pos); |
|
3375 |
+ Uintlist_head_set(npositions,pos); |
|
3376 |
+ } else { |
|
3377 |
+ pos -= mlength; |
|
3378 |
+ } |
|
3379 |
+ |
|
3380 |
+ mlength = Uintlist_head(npositions); |
|
3381 |
+ if (mlength > max_softclip) { |
|
3382 |
+ /* Make sure initial soft clip does not extend past max_softclip */ |
|
3383 |
+ fprintf(stderr,"Read has initial soft clip %d greater than max_softclip %d\n",mlength,max_softclip); |
|
3384 |
+ pos += (mlength - max_softclip); |
|
3385 |
+ p += (mlength - max_softclip); |
|
3386 |
+ r += (mlength - max_softclip); |
|
3387 |
+ Uintlist_head_set(npositions,max_softclip); |
|
3388 |
+ } |
|
3389 |
+ } |
|
3390 |
+ } |
|
3391 |
+ |
|
3322 | 3392 |
for (a = types, b = npositions; a != NULL; a = Intlist_next(a), b = Uintlist_next(b)) { |
3323 | 3393 |
type = Intlist_head(a); |
3324 | 3394 |
mlength = Uintlist_head(b); |
3325 |
- if (type == 'S') { |
|
3395 |
+ if (type == 'S' && max_softclip == 0) { |
|
3326 | 3396 |
/* pos += mlength; -- SAM assumes genome coordinates are of clipped region */ |
3327 | 3397 |
p += mlength; |
3328 | 3398 |
r += mlength; |
... | ... |
@@ -3389,7 +3459,7 @@ revise_read_lh (T *alloc_tallies_low, T *alloc_tallies_high, Genomicpos_T chrsta |
3389 | 3459 |
|
3390 | 3460 |
pos += mlength; |
3391 | 3461 |
|
3392 |
- } else if (type == 'M') { |
|
3462 |
+ } else if (type == 'M' || (type == 'S' && max_softclip > 0)) { |
|
3393 | 3463 |
if (0 /* mlength < min_mlength */) { |
3394 | 3464 |
p += mlength; |
3395 | 3465 |
r += mlength; |
... | ... |
@@ -3397,6 +3467,12 @@ revise_read_lh (T *alloc_tallies_low, T *alloc_tallies_high, Genomicpos_T chrsta |
3397 | 3467 |
shift += ((strand == '+') ? +mlength : -mlength); |
3398 | 3468 |
|
3399 | 3469 |
} else { |
3470 |
+ if (type == 'S' && mlength > max_softclip) { |
|
3471 |
+ /* Must be final softclip, because we handled initial one already */ |
|
3472 |
+ fprintf(stderr,"Read has final soft clip %d greater than max_softclip %d\n",mlength,max_softclip); |
|
3473 |
+ mlength = max_softclip; |
|
3474 |
+ } |
|
3475 |
+ |
|
3400 | 3476 |
debug1(printf("Genomic pos is %u + %u = %u\n",chroffset,pos,chroffset+pos)); |
3401 | 3477 |
if (genome == NULL) { |
3402 | 3478 |
q = &(*p); |
... | ... |
@@ -3511,7 +3587,7 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches, |
3511 | 3587 |
bool genomic_diff_p, bool signed_counts_p, bool ignore_query_Ns_p, |
3512 | 3588 |
bool print_indels_p, bool print_totals_p, bool print_cycles_p, |
3513 | 3589 |
bool print_quality_scores_p, bool print_mapq_scores_p, bool want_genotypes_p, |
3514 |
- bool verbosep, bool readlevel_p) { |
|
3590 |
+ bool verbosep, bool readlevel_p, int max_softclip) { |
|
3515 | 3591 |
long int grand_total = 0; |
3516 | 3592 |
T this; |
3517 | 3593 |
Genomicpos_T alloc_ptr, alloc_low, alloc_high, chrpos_low, chrpos_high, chrpos; |
... | ... |
@@ -3527,8 +3603,8 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches, |
3527 | 3603 |
|
3528 | 3604 |
|
3529 | 3605 |
/* Create tally at position N to store n_fromleft */ |
3530 |
- alloc_tallies_alloc = (T *) CALLOC(alloclength+1,sizeof(T)); |
|
3531 |
- for (i = 0; i < alloclength+1; i++) { |
|
3606 |
+ alloc_tallies_alloc = (T *) CALLOC(alloclength + 2*max_softclip + 1,sizeof(T)); |
|
3607 |
+ for (i = 0; i < alloclength + 2*max_softclip + 1; i++) { |
|
3532 | 3608 |
alloc_tallies_alloc[i] = Tally_new(); |
3533 | 3609 |
} |
3534 | 3610 |
alloc_tallies = &(alloc_tallies_alloc[0]); |
... | ... |
@@ -3552,15 +3628,20 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches, |
3552 | 3628 |
|
3553 | 3629 |
alloc_ptr = 0U; |
3554 | 3630 |
alloc_low = 0U; |
3555 |
- alloc_high = alloc_low + alloclength; |
|
3631 |
+ alloc_high = alloc_low + alloclength + 2*max_softclip; |
|
3556 | 3632 |
goodp = false; |
3557 | 3633 |
|
3634 |
+ blockstart = 0U; |
|
3635 |
+ blockend = 0U; |
|
3636 |
+ blockptr = 0U; |
|
3637 |
+ |
|
3558 | 3638 |
while (goodp == false && |
3559 | 3639 |
(bamline = Bamread_next_bamline(bamreader,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits, |
3560 | 3640 |
need_unique_p,need_primary_p,ignore_duplicates_p, |
3561 | 3641 |
need_concordant_p)) != NULL) { |
3562 | 3642 |
/* chrpos_high = Samread_chrpos_high(types,npositions,chrpos_low); */ |
3563 |
- debug0(printf(">%s:%u..%u ",chr,chrpos_low,chrpos_high)); |
|
3643 |
+ debug0(printf("\n")); |
|
3644 |
+ debug0(printf(">%s:%u..%u ",printchr,Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline))); |
|
3564 | 3645 |
debug0(Bamread_print_cigar(stdout,bamline)); |
3565 | 3646 |
debug0(printf("\n")); |
3566 | 3647 |
|
... | ... |
@@ -3573,34 +3654,31 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches, |
3573 | 3654 |
chrpos_low = Bamline_chrpos_low(bamline); |
3574 | 3655 |
chrpos_high = Bamline_chrpos_high(bamline); |
3575 | 3656 |
|
3576 |
- alloc_low = chrpos_low; |
|
3577 |
- alloc_high = alloc_low + alloclength; |
|
3578 |
- alloc_ptr = chrpos_low; |
|
3579 |
- |
|
3580 |
- blockstart = chrpos_low; |
|
3581 |
- blockend = blockstart + blocksize; |
|
3582 |
- blockptr = chrpos_low; |
|
3657 |
+ alloc_low = (chrpos_low < max_softclip) ? 0U : (chrpos_low - max_softclip); |
|
3658 |
+ alloc_high = alloc_low + alloclength + 2*max_softclip; |
|
3659 |
+ alloc_ptr = alloc_low; |
|
3583 | 3660 |
|
3584 | 3661 |
debug0(printf(" initialize alloc_low %u, alloc_high = %u, blockstart = %u, blockend = %u\n", |
3585 | 3662 |
alloc_low,alloc_high,blockstart,blockend)); |
3586 | 3663 |
|
3587 |
- if (chrpos_high >= alloc_high) { |
|
3664 |
+ if (chrpos_high + max_softclip >= alloc_high) { |
|
3588 | 3665 |
if (verbosep == true) { |
3589 | 3666 |
fprintf(stderr,"read %s at %s:%u..%u is longer than allocated buffer ending at %u => skipping\n", |
3590 | 3667 |
Bamline_acc(bamline),printchr,chrpos_low,chrpos_high,alloclength); |
3591 | 3668 |
} |
3592 | 3669 |
} else { |
3593 |
- if (chrpos_high + 1U > alloc_ptr) { |
|
3594 |
- alloc_ptr = chrpos_high + 1U; |
|
3670 |
+ if (chrpos_high + max_softclip + 1U > alloc_ptr) { |
|
3671 |
+ alloc_ptr = chrpos_high + max_softclip + 1U; |
|
3595 | 3672 |
debug0(printf(" revising alloc_ptr to be %u\n",alloc_ptr)); |
3596 | 3673 |
} |
3597 | 3674 |
|
3598 | 3675 |
revise_read(alloc_tallies,chrstart,chrend,chrpos_low,Bamline_flag(bamline), |
3599 | 3676 |
Bamline_cigar_types(bamline),Bamline_cigar_npositions(bamline), |
3600 | 3677 |
Bamline_cigar_querylength(bamline),Bamline_read(bamline), |
3601 |
- Bamline_mapq(bamline),Bamline_quality_string(bamline),alloc_low, |
|
3602 |
- quality_counts_match,quality_counts_mismatch, |
|
3603 |
- genome,chroffset,ignore_query_Ns_p,print_indels_p,readlevel_p); |
|
3678 |
+ Bamline_mapq(bamline),Bamline_quality_string(bamline), |
|
3679 |
+ Bamline_terminalp(bamline),alloc_low,quality_counts_match,quality_counts_mismatch, |
|
3680 |
+ genome,chroffset,ignore_query_Ns_p,print_indels_p,readlevel_p, |
|
3681 |
+ max_softclip); |
|
3604 | 3682 |
|
3605 | 3683 |
goodp = true; |
3606 | 3684 |
} |
... | ... |
@@ -3616,7 +3694,8 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches, |
3616 | 3694 |
|
3617 | 3695 |
debug0(printf("* alloc: %u..%u..%u block: %u..%u..%u ", |
3618 | 3696 |
alloc_low,alloc_ptr,alloc_high,blockstart,blockptr,blockend)); |
3619 |
- debug0(printf("%s:%u..%u ",printchr,chrpos_low,chrpos_high)); |
|
3697 |
+ debug0(printf("\n")); |
|
3698 |
+ debug0(printf(">%s:%u..%u ",printchr,Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline))); |
|
3620 | 3699 |
debug0(Bamread_print_cigar(stdout,bamline)); |
3621 | 3700 |
debug0(printf("\n")); |
3622 | 3701 |
|
... | ... |
@@ -3633,8 +3712,8 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches, |
3633 | 3712 |
chrpos_low = Bamline_chrpos_low(bamline); |
3634 | 3713 |
chrpos_high = Bamline_chrpos_high(bamline); |
3635 | 3714 |
|
3636 |
- if (chrpos_low > alloc_ptr) { |
|
3637 |
- debug0(printf("Case 1: chrpos_low %u > alloc_ptr %u\n",chrpos_low,alloc_ptr)); |
|
3715 |
+ if (chrpos_low > alloc_ptr + max_softclip) { |
|
3716 |
+ debug0(printf("Case 1: chrpos_low %u > alloc_ptr %u + max_softclip %d\n",chrpos_low,alloc_ptr,max_softclip)); |
|
3638 | 3717 |
debug0(printf(" transfer from alloc_low %u to alloc_ptr %u\n",alloc_low,alloc_ptr)); |
3639 | 3718 |
for (chrpos = alloc_low; chrpos < alloc_ptr; chrpos++) { |
3640 | 3719 |
alloci = chrpos - alloc_low; |
... | ... |
@@ -3654,16 +3733,16 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches, |
3654 | 3733 |
} |
3655 | 3734 |
} |
3656 | 3735 |
|
3657 |
- debug0(printf(" reset alloc_low = chrpos_low\n")); |
|
3658 |
- alloc_low = chrpos_low; |
|
3659 |
- alloc_high = alloc_low + alloclength; |
|
3660 |
- alloc_tallies[alloclength]->n_fromleft_plus = 0; |
|
3661 |
- alloc_tallies[alloclength]->n_fromleft_minus = 0; |
|
3736 |
+ debug0(printf(" reset alloc_low = chrpos_low - max_softclip\n")); |
|
3737 |
+ alloc_low = (chrpos_low < max_softclip) ? 0U : chrpos_low - max_softclip; |
|
3738 |
+ alloc_high = alloc_low + alloclength + 2*max_softclip; |
|
3739 |
+ alloc_tallies[alloclength + 2*max_softclip]->n_fromleft_plus = 0; |
|
3740 |
+ alloc_tallies[alloclength + 2*max_softclip]->n_fromleft_minus = 0; |
|
3662 | 3741 |
|
3663 |
- } else if (chrpos_low > alloc_low) { |
|
3664 |
- debug0(printf("Case 2: chrpos_low %u > alloc_low %u\n",chrpos_low,alloc_low)); |
|
3742 |
+ } else if (chrpos_low > alloc_low + max_softclip) { |
|
3743 |
+ debug0(printf("Case 2: chrpos_low %u > alloc_low %u + max_softclip %d\n",chrpos_low,alloc_low,max_softclip)); |
|
3665 | 3744 |
debug0(printf(" transfer from alloc_low %u up to chrpos_low %u\n",alloc_low,chrpos_low)); |
3666 |
- for (chrpos = alloc_low; chrpos < chrpos_low; chrpos++) { |
|
3745 |
+ for (chrpos = alloc_low; chrpos + max_softclip < chrpos_low; chrpos++) { |
|
3667 | 3746 |
alloci = chrpos - alloc_low; |
3668 | 3747 |
this = alloc_tallies[alloci]; |
3669 | 3748 |
if (this->nmatches > 0 || this->mismatches_byshift != NULL || |
... | ... |
@@ -3681,8 +3760,12 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches, |
3681 | 3760 |
} |
3682 | 3761 |
} |
3683 | 3762 |
|
3684 |
- delta = chrpos_low - alloc_low; |
|
3685 |
- debug0(printf(" shift alloc downward by %d so alloc_low = chrpos_low\n",delta)); |
|
3763 |
+ if (chrpos_low < max_softclip) { |
|
3764 |
+ delta = chrpos_low /* - alloc_low (should be 0) */; |
|
3765 |
+ } else { |
|
3766 |
+ delta = chrpos_low - (alloc_low + max_softclip); |
|
3767 |
+ } |
|
3768 |
+ debug0(printf(" shift alloc upward by %d so alloc_low = chrpos_low - max_softclip\n",delta,max_softclip)); |
|
3686 | 3769 |
for (alloci = 0; alloci < (int) (alloc_ptr - alloc_low - delta); alloci++) { |
3687 | 3770 |
Tally_transfer(&(alloc_tallies[alloci]),&(alloc_tallies[alloci+delta])); |
3688 | 3771 |
} |
... | ... |
@@ -3692,35 +3775,36 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches, |
3692 | 3775 |
debug1(printf("Resetting alloci %d\n",alloci)); |
3693 | 3776 |
Tally_clear(alloc_tallies[alloci]); |
3694 | 3777 |
} |
3695 |
- alloc_low = chrpos_low; |
|
3696 |
- alloc_high = alloc_low + alloclength; |
|
3778 |
+ alloc_low = (chrpos_low < max_softclip) ? 0U : (chrpos_low - max_softclip); |
|
3779 |
+ alloc_high = alloc_low + alloclength + 2*max_softclip; |
|
3697 | 3780 |
|
3698 |
- } else if (chrpos_low == alloc_low) { |
|
3699 |
- debug0(printf("Case 3: chrpos_low %u == alloc_low %u\n",chrpos_low,alloc_low)); |
|
3781 |
+ } else if (chrpos_low == alloc_low + max_softclip) { |
|
3782 |
+ debug0(printf("Case 3: chrpos_low %u == alloc_low %u + max_softclip %d\n",chrpos_low,alloc_low,max_softclip)); |
|
3700 | 3783 |
|
3701 | 3784 |
} else { |
3702 |
- fprintf(stderr,"Sequences are not in order. Got chrpos_low %u < alloc_low %u\n", |
|
3703 |
- chrpos_low,alloc_low); |
|
3785 |
+ fprintf(stderr,"Sequences are not in order. Got chrpos_low %u < alloc_low %u + max_softclip %d\n", |
|
3786 |
+ chrpos_low,alloc_low,max_softclip); |
|
3704 | 3787 |
abort(); |
3705 | 3788 |
} |
3706 | 3789 |
|
3707 |
- if (chrpos_high >= alloc_high) { |
|
3790 |
+ if (chrpos_high + max_softclip >= alloc_high) { |
|
3708 | 3791 |
if (verbosep == true) { |
3709 | 3792 |
fprintf(stderr,"read %s at %s:%u..%u is longer than allocated buffer ending at %u => skipping\n", |
3710 | 3793 |
Bamline_acc(bamline),printchr,chrpos_low,chrpos_high,alloc_high); |
3711 | 3794 |
} |
3712 | 3795 |
} else { |
3713 |
- if (chrpos_high + 1U > alloc_ptr) { |
|
3714 |
- alloc_ptr = chrpos_high + 1U; |
|
3796 |
+ if (chrpos_high + max_softclip + 1U > alloc_ptr) { |
|
3797 |
+ alloc_ptr = chrpos_high + max_softclip + 1U; |
|
3715 | 3798 |
debug0(printf(" revising alloc_ptr to be %u\n",alloc_ptr)); |
3716 | 3799 |
} |
3717 | 3800 |
|
3718 | 3801 |
revise_read(alloc_tallies,chrstart,chrend,chrpos_low,Bamline_flag(bamline), |
3719 | 3802 |
Bamline_cigar_types(bamline),Bamline_cigar_npositions(bamline), |
3720 | 3803 |
Bamline_cigar_querylength(bamline),Bamline_read(bamline), |
3721 |
- Bamline_mapq(bamline),Bamline_quality_string(bamline),alloc_low, |
|
3722 |
- quality_counts_match,quality_counts_mismatch, |
|
3723 |
- genome,chroffset,ignore_query_Ns_p,print_indels_p,readlevel_p); |
|
3804 |
+ Bamline_mapq(bamline),Bamline_quality_string(bamline), |
|
3805 |
+ Bamline_terminalp(bamline),alloc_low,quality_counts_match,quality_counts_mismatch, |
|
3806 |
+ genome,chroffset,ignore_query_Ns_p,print_indels_p,readlevel_p, |
|
3807 |
+ max_softclip); |
|
3724 | 3808 |
} |
3725 | 3809 |
} |
3726 | 3810 |
|
... | ... |
@@ -3772,7 +3856,7 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches, |
3772 | 3856 |
} |
3773 | 3857 |
|
3774 | 3858 |
|
3775 |
- for (i = 0; i < alloclength+1; i++) { |
|
3859 |
+ for (i = 0; i < alloclength + 2*max_softclip + 1; i++) { |
|
3776 | 3860 |
Tally_free(&(alloc_tallies_alloc[i])); |
3777 | 3861 |
} |
3778 | 3862 |
FREE(alloc_tallies_alloc); |
... | ... |
@@ -3788,15 +3872,16 @@ Bamtally_run (long int **tally_matches, long int **tally_mismatches, |
3788 | 3872 |
|
3789 | 3873 |
/* Assumes output_type is OUTPUT_TALLY */ |
3790 | 3874 |
void |
3791 |
-Bamtally_run_lh (long int *tally_matches_low, long int *tally_mismatches_low, |
|
3792 |
- long int *tally_matches_high, long int *tally_mismatches_high, |
|
3875 |
+Bamtally_run_lh (long int **tally_matches_low, long int **tally_mismatches_low, |
|
3876 |
+ long int **tally_matches_high, long int **tally_mismatches_high, |
|
3793 | 3877 |
int *quality_counts_match, int *quality_counts_mismatch, |
3794 | 3878 |
Bamreader_T bamreader, Genome_T genome, char *printchr, |
3795 | 3879 |
Genomicpos_T chroffset, Genomicpos_T chrstart, Genomicpos_T chrend, int alloclength, |
3796 | 3880 |
char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits, |
3797 | 3881 |
bool need_concordant_p, bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p, |
3798 | 3882 |
int blocksize, int quality_score_adj, int min_depth, int variant_strands, |
3799 |
- bool genomic_diff_p, bool ignore_query_Ns_p, bool verbosep, bool readlevel_p) { |
|
3883 |
+ bool genomic_diff_p, bool ignore_query_Ns_p, bool verbosep, bool readlevel_p, |
|
3884 |
+ int max_softclip) { |
|
3800 | 3885 |
T this_low, this_high; |
3801 | 3886 |
Genomicpos_T alloc_ptr, alloc_low, alloc_high, chrpos_low, chrpos_high, chrpos; |
3802 | 3887 |
Genomicpos_T blockptr = 0U, blockstart = 0U, blockend = 0U; |
... | ... |
@@ -3809,8 +3894,8 @@ Bamtally_run_lh (long int *tally_matches_low, long int *tally_mismatches_low, |
3809 | 3894 |
T *alloc_tallies_low_alloc, *alloc_tallies_high_alloc, *block_tallies_low_alloc, *block_tallies_high_alloc; |
3810 | 3895 |
|
3811 | 3896 |
|
3812 |
- alloc_tallies_low_alloc = (T *) CALLOC(alloclength+1,sizeof(T)); |
|
3813 |
- for (i = 0; i < alloclength+1; i++) { |
|
3897 |
+ alloc_tallies_low_alloc = (T *) CALLOC(alloclength + 2*max_softclip + 1,sizeof(T)); |
|
3898 |
+ for (i = 0; i < alloclength + 2*max_softclip + 1; i++) { |
|
3814 | 3899 |
alloc_tallies_low_alloc[i] = Tally_new(); |
3815 | 3900 |
} |
3816 | 3901 |
alloc_tallies_low = &(alloc_tallies_low_alloc[0]); |
... |