git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@80611 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -10,7 +10,7 @@ Description: GSNAP and GMAP are a pair of tools to align short-read |
10 | 10 |
to work with GMAP and GSNAP from within R. In addition, it provides |
11 | 11 |
methods to tally alignment results on a per-nucleotide basis using |
12 | 12 |
the bam_tally tool. |
13 |
-Version: 1.3.9 |
|
13 |
+Version: 1.3.10 |
|
14 | 14 |
Depends: R (>= 2.15.0), methods, GenomicRanges |
15 | 15 |
Imports: IRanges, Rsamtools (>= 1.7.4), rtracklayer (>= 1.17.15), GenomicRanges, |
16 | 16 |
GenomicFeatures, Biostrings, VariantAnnotation (>= 1.7.34), tools, |
... | ... |
@@ -8,6 +8,7 @@ |
8 | 8 |
setClass("BamTallyParam", |
9 | 9 |
representation(genome = "GmapGenome", |
10 | 10 |
which = "GenomicRanges", |
11 |
+ desired_read_group = "characterORNULL", |
|
11 | 12 |
minimum_mapq = "integer", |
12 | 13 |
concordant_only = "logical", |
13 | 14 |
unique_only = "logical", |
... | ... |
@@ -32,13 +33,15 @@ normArgWhich <- function(x, genome) { |
32 | 33 |
} |
33 | 34 |
|
34 | 35 |
BamTallyParam <- function(genome, which = GRanges(), |
35 |
- minimum_mapq = 0L, |
|
36 |
+ desired_read_group = NULL, minimum_mapq = 0L, |
|
36 | 37 |
concordant_only = FALSE, unique_only = FALSE, |
37 | 38 |
primary_only = FALSE, ignore_duplicates = FALSE, |
38 | 39 |
min_depth = 0L, variant_strand = 0L, |
39 | 40 |
ignore_query_Ns = FALSE, |
40 | 41 |
indels = FALSE) |
41 | 42 |
{ |
43 |
+ if (!is.null(desired_read_group) && !isSingleString(desired_read_group)) |
|
44 |
+ stop("'desired_read_group' must be NULL or a single, non-NA string") |
|
42 | 45 |
if (!isSingleNumber(minimum_mapq) || minimum_mapq < 0) |
43 | 46 |
stop("minimum_mapq must be a single, non-negative, non-NA number") |
44 | 47 |
if (!isTRUEorFALSE(concordant_only)) |
... | ... |
@@ -120,11 +120,11 @@ normalizeIndelAlleles <- function(x, genome) { |
120 | 120 |
is.indel <- nchar(ref(x)) == 0L | nchar(alt(x)) == 0L |
121 | 121 |
if (any(is.indel)) { |
122 | 122 |
indels <- x[is.indel] |
123 |
- indels <- flank(indels, 1) |
|
124 |
- anchor <- getSeq(genome, indels) |
|
125 |
- ref(indels) <- paste0(anchor, ref(indels)) |
|
126 |
- alt(indels) <- paste0(anchor, alt(indels)) |
|
127 |
- x[is.indel] <- indels |
|
123 |
+ flanks <- flank(indels, 1) |
|
124 |
+ anchor <- getSeq(genome, flanks) |
|
125 |
+ ref(x)[is.indel] <- paste0(anchor, ref(indels)) |
|
126 |
+ alt(x)[is.indel] <- paste0(anchor, alt(indels)) |
|
127 |
+ ranges(x)[is.indel] <- resize(ranges(indels), nchar(ref(x)[is.indel]), "end") |
|
128 | 128 |
} |
129 | 129 |
x |
130 | 130 |
} |
... | ... |
@@ -136,7 +136,7 @@ normalizeIndelAlleles <- function(x, genome) { |
136 | 136 |
normArgSingleInteger <- function(x) { |
137 | 137 |
name <- deparse(substitute(x)) |
138 | 138 |
x <- as.integer(x) |
139 |
- if (!IRanges:::isSingleInteger(x)) |
|
139 |
+ if (!isSingleInteger(x)) |
|
140 | 140 |
stop("'", name, "' should be a single, non-NA integer") |
141 | 141 |
x |
142 | 142 |
} |
... | ... |
@@ -149,7 +149,8 @@ normArgTRUEorFALSE <- function(x) { |
149 | 149 |
|
150 | 150 |
.bam_tally_C <- function(bamreader, genome_dir = NULL, db = NULL, |
151 | 151 |
which = NULL, read_pos_breaks = NULL, |
152 |
- high_base_quality = 0L, alloclength = 200000L, |
|
152 |
+ high_base_quality = 0L, desired_read_group = NULL, |
|
153 |
+ alloclength = 200000L, |
|
153 | 154 |
minimum_mapq = 0L, good_unique_mapq = 35L, |
154 | 155 |
maximum_nhits = 1000000L, |
155 | 156 |
concordant_only = FALSE, unique_only = FALSE, |
... | ... |
@@ -164,10 +165,12 @@ normArgTRUEorFALSE <- function(x) { |
164 | 165 |
if (length(which) > 0L) { |
165 | 166 |
which <- list(as.character(seqnames(which)), start(which), end(which)) |
166 | 167 |
} else which <- NULL |
167 |
- if (!is.null(genome_dir) && !IRanges:::isSingleString(genome_dir)) |
|
168 |
+ if (!is.null(genome_dir) && !isSingleString(genome_dir)) |
|
168 | 169 |
stop("'genome_dir' must be NULL or a single, non-NA string") |
169 |
- if (!is.null(db) && !IRanges:::isSingleString(db)) |
|
170 |
+ if (!is.null(db) && !isSingleString(db)) |
|
170 | 171 |
stop("'db' must be NULL or a single, non-NA string") |
172 |
+ if (!is.null(desired_read_group) && !isSingleString(desired_read_group)) |
|
173 |
+ stop("'desired_read_group' must be NULL or a single, non-NA string") |
|
171 | 174 |
if (!is.null(read_pos_breaks)) { |
172 | 175 |
read_pos_breaks <- as.integer(read_pos_breaks) |
173 | 176 |
if (any(is.na(read_pos_breaks))) |
... | ... |
@@ -178,6 +181,7 @@ normArgTRUEorFALSE <- function(x) { |
178 | 181 |
stop("'read_pos_breaks' must be sorted") |
179 | 182 |
} |
180 | 183 |
.Call(R_Bamtally_iit, bamreader@.extptr, genome_dir, db, which, |
184 |
+ desired_read_group, |
|
181 | 185 |
normArgSingleInteger(alloclength), |
182 | 186 |
normArgSingleInteger(minimum_mapq), |
183 | 187 |
normArgSingleInteger(good_unique_mapq), |
... | ... |
@@ -14,14 +14,16 @@ |
14 | 14 |
} |
15 | 15 |
|
16 | 16 |
\usage{ |
17 |
- BamTallyParam(genome, which = RangesList(), read_pos_breaks = NULL, |
|
17 |
+ BamTallyParam(genome, which = RangesList(), |
|
18 |
+ desired_read_group = NULL, |
|
19 |
+ read_pos_breaks = NULL, |
|
18 | 20 |
high_base_quality = 0L, |
19 | 21 |
minimum_mapq = 0L, |
20 | 22 |
concordant_only = FALSE, unique_only = FALSE, |
21 | 23 |
primary_only = FALSE, ignore_duplicates = FALSE, |
22 | 24 |
min_depth = 0L, variant_strand = 0L, |
23 | 25 |
ignore_query_Ns = FALSE, |
24 |
- indels = FALSE) |
|
26 |
+ indels = TRUE) |
|
25 | 27 |
} |
26 | 28 |
\arguments{ |
27 | 29 |
\item{genome}{A \code{GmapGenome} object, or something coercible to one.} |
... | ... |
@@ -29,6 +31,8 @@ |
29 | 31 |
one that limits the tally to that range or set of ranges. By |
30 | 32 |
default, the entire genome is processed. |
31 | 33 |
} |
34 |
+ \item{desired_read_group}{The name of the read group to which to limit |
|
35 |
+ the tallying; if not NULL, must be a single, non-NA string.} |
|
32 | 36 |
\item{read_pos_breaks}{The breaks, like those passed to \code{\link{cut}} |
33 | 37 |
for aggregating the per-read position counts. If \code{NULL}, no per-cycle |
34 | 38 |
counts are returned.} |
... | ... |
@@ -54,7 +58,12 @@ |
54 | 58 |
is a good way to save resources.} |
55 | 59 |
\item{ignore_query_Ns}{Whether to ignore the N base pairs when |
56 | 60 |
counting. Can save a lot of resources when processing low quality data.} |
57 |
- \item{indels}{Whether to return indel counts; not supported yet.} |
|
61 |
+ \item{indels}{Whether to return indel counts. The \code{ref} and |
|
62 |
+ \code{alt} columns in the returned \code{VRanges} conform to VCF |
|
63 |
+ conventions; i.e., the first base upstream is included. The range |
|
64 |
+ always spans the sequence in \code{ref}; so e.g. a deletion extends |
|
65 |
+ one nt upstream of the actual deleted sequence. |
|
66 |
+ } |
|
58 | 67 |
} |
59 | 68 |
|
60 | 69 |
\seealso{ |
... | ... |
@@ -49,7 +49,7 @@ gstruct/Makefile: gstruct/configure ${R_SRC_DIR}/samtools |
49 | 49 |
--prefix=${PREFIX} --includedir=${GSTRUCT_INCLUDE_DIR} \ |
50 | 50 |
--libdir=${PREFIX}/${LIBnn} \ |
51 | 51 |
--with-samtools=${R_SRC_DIR}/samtools \ |
52 |
- --disable-binaries --disable-maintainer-mode #\ |
|
52 |
+ --disable-binaries #--disable-maintainer-mode #\ |
|
53 | 53 |
## does not appear to be a true dependency yet |
54 | 54 |
## --with-gmap=${PREFIX}/bin |
55 | 55 |
|
... | ... |
@@ -13,7 +13,7 @@ |
13 | 13 |
|
14 | 14 |
SEXP |
15 | 15 |
R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R, |
16 |
- SEXP which_R, |
|
16 |
+ SEXP which_R, SEXP desired_read_group_R, |
|
17 | 17 |
SEXP alloclength_R, |
18 | 18 |
SEXP minimum_mapq_R, SEXP good_unique_mapq_R, |
19 | 19 |
SEXP maximum_nhits_R, |
... | ... |
@@ -30,6 +30,9 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R, |
30 | 30 |
genome_dir_R == R_NilValue ? NULL : CHAR(asChar(genome_dir_R)); |
31 | 31 |
const char *db = CHAR(asChar(db_R)); |
32 | 32 |
int alloclength = asInteger(alloclength_R); |
33 |
+ const char *desired_read_group = |
|
34 |
+ desired_read_group_R == R_NilValue ? NULL : |
|
35 |
+ CHAR(asChar(desired_read_group_R)); |
|
33 | 36 |
int minimum_mapq = asInteger(minimum_mapq_R); |
34 | 37 |
int good_unique_mapq = asInteger(good_unique_mapq_R); |
35 | 38 |
int maximum_nhits = asInteger(maximum_nhits_R); |
... | ... |
@@ -57,9 +60,12 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R, |
57 | 60 |
end = asInteger(VECTOR_ELT(which_R, 2)); |
58 | 61 |
} |
59 | 62 |
|
60 |
- IIT_T tally_iit = Bamtally_iit(bamreader, (char *)chr, start, end, |
|
61 |
- genome, chromosome_iit, |
|
62 |
- alloclength, minimum_mapq, good_unique_mapq, |
|
63 |
+ IIT_T tally_iit = Bamtally_iit(bamreader, (char *)chr, |
|
64 |
+ /* TODO: bam_lacks_chr */ NULL, |
|
65 |
+ start, end, |
|
66 |
+ genome, chromosome_iit, alloclength, |
|
67 |
+ (char *)desired_read_group, |
|
68 |
+ minimum_mapq, good_unique_mapq, |
|
63 | 69 |
maximum_nhits, need_concordant_p, |
64 | 70 |
need_unique_p, need_primary_p, |
65 | 71 |
ignore_duplicates_p, |
... | ... |
@@ -10,7 +10,7 @@ R_Bamread_new (SEXP bamfile_R); |
10 | 10 |
|
11 | 11 |
SEXP |
12 | 12 |
R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R, |
13 |
- SEXP which_R, |
|
13 |
+ SEXP which_R, SEXP desired_read_group_R, |
|
14 | 14 |
SEXP alloclength_R, |
15 | 15 |
SEXP minimum_mapq_R, SEXP good_unique_mapq_R, |
16 | 16 |
SEXP maximum_nhits_R, |
... | ... |
@@ -51,13 +51,13 @@ build_triplet = @build@ |
51 | 51 |
host_triplet = @host@ |
52 | 52 |
target_triplet = @target@ |
53 | 53 |
subdir = . |
54 |
-DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog \ |
|
54 |
+DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog COPYING \ |
|
55 | 55 |
$(srcdir)/Makefile.in $(srcdir)/Makefile.am \ |
56 | 56 |
$(top_srcdir)/configure $(am__configure_deps) \ |
57 |
- $(top_srcdir)/lib/gstruct.pc.in COPYING config/compile \ |
|
58 |
- config/config.guess config/config.sub config/depcomp \ |
|
59 |
- config/install-sh config/missing config/ltmain.sh \ |
|
60 |
- $(top_srcdir)/config/compile $(top_srcdir)/config/config.guess \ |
|
57 |
+ $(top_srcdir)/lib/gstruct.pc.in config/compile \ |
|
58 |
+ config/config.guess config/config.sub config/install-sh \ |
|
59 |
+ config/missing config/ltmain.sh $(top_srcdir)/config/compile \ |
|
60 |
+ $(top_srcdir)/config/config.guess \ |
|
61 | 61 |
$(top_srcdir)/config/config.sub \ |
62 | 62 |
$(top_srcdir)/config/install-sh $(top_srcdir)/config/ltmain.sh \ |
63 | 63 |
$(top_srcdir)/config/missing |
... | ... |
@@ -231,7 +231,6 @@ LIBTOOL = @LIBTOOL@ |
231 | 231 |
LIPO = @LIPO@ |
232 | 232 |
LN_S = @LN_S@ |
233 | 233 |
LTLIBOBJS = @LTLIBOBJS@ |
234 |
-MAINT = @MAINT@ |
|
235 | 234 |
MAKEINFO = @MAKEINFO@ |
236 | 235 |
MANIFEST_TOOL = @MANIFEST_TOOL@ |
237 | 236 |
MKDIR_P = @MKDIR_P@ |
... | ... |
@@ -329,7 +328,7 @@ all: all-recursive |
329 | 328 |
.SUFFIXES: |
330 | 329 |
am--refresh: Makefile |
331 | 330 |
@: |
332 |
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) |
|
331 |
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) |
|
333 | 332 |
@for dep in $?; do \ |
334 | 333 |
case '$(am__configure_deps)' in \ |
335 | 334 |
*$$dep*) \ |
... | ... |
@@ -356,9 +355,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status |
356 | 355 |
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) |
357 | 356 |
$(SHELL) ./config.status --recheck |
358 | 357 |
|
359 |
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) |
|
358 |
+$(top_srcdir)/configure: $(am__configure_deps) |
|
360 | 359 |
$(am__cd) $(srcdir) && $(AUTOCONF) |
361 |
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) |
|
360 |
+$(ACLOCAL_M4): $(am__aclocal_m4_deps) |
|
362 | 361 |
$(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) |
363 | 362 |
$(am__aclocal_m4_deps): |
364 | 363 |
lib/gstruct-${LIBGSTRUCT_API_VERSION}.pc: $(top_builddir)/config.status $(top_srcdir)/lib/gstruct.pc.in |
... | ... |
@@ -1,22 +1,2 @@ |
1 | 1 |
|
2 | 2 |
|
3 |
- |
|
4 |
-Usage: |
|
5 |
- |
|
6 |
-bam_singlemap -d <genome> <bam> | samtools view -Sb - > <new bam> |
|
7 |
- |
|
8 |
-bam_tally -d <genome> <bam> |
|
9 |
-bam_gstruct -d <genome> <bam> | iit_store -o <pred> |
|
10 |
-bam_print -d <genome> -m <genes>,<pred> <bam> <coords> |
|
11 |
- |
|
12 |
- |
|
13 |
-genecompare -d <genome> -m <genes> <pred.iit> > x, or |
|
14 |
-genecompare <goldstd.iit> <pred.iit> > x |
|
15 |
- |
|
16 |
- |
|
17 |
-Depends on samtools and gmap packages. |
|
18 |
- |
|
19 |
-gfusion depends on gmap and qsub |
|
20 |
- |
|
21 |
- |
|
22 |
- |
... | ... |
@@ -596,42 +596,6 @@ fi |
596 | 596 |
rmdir .tst 2>/dev/null |
597 | 597 |
AC_SUBST([am__leading_dot])]) |
598 | 598 |
|
599 |
-# Add --enable-maintainer-mode option to configure. -*- Autoconf -*- |
|
600 |
-# From Jim Meyering |
|
601 |
- |
|
602 |
-# Copyright (C) 1996-2013 Free Software Foundation, Inc. |
|
603 |
-# |
|
604 |
-# This file is free software; the Free Software Foundation |
|
605 |
-# gives unlimited permission to copy and/or distribute it, |
|
606 |
-# with or without modifications, as long as this notice is preserved. |
|
607 |
- |
|
608 |
-# AM_MAINTAINER_MODE([DEFAULT-MODE]) |
|
609 |
-# ---------------------------------- |
|
610 |
-# Control maintainer-specific portions of Makefiles. |
|
611 |
-# Default is to disable them, unless 'enable' is passed literally. |
|
612 |
-# For symmetry, 'disable' may be passed as well. Anyway, the user |
|
613 |
-# can override the default with the --enable/--disable switch. |
|
614 |
-AC_DEFUN([AM_MAINTAINER_MODE], |
|
615 |
-[m4_case(m4_default([$1], [disable]), |
|
616 |
- [enable], [m4_define([am_maintainer_other], [disable])], |
|
617 |
- [disable], [m4_define([am_maintainer_other], [enable])], |
|
618 |
- [m4_define([am_maintainer_other], [enable]) |
|
619 |
- m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])]) |
|
620 |
-AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles]) |
|
621 |
- dnl maintainer-mode's default is 'disable' unless 'enable' is passed |
|
622 |
- AC_ARG_ENABLE([maintainer-mode], |
|
623 |
- [AS_HELP_STRING([--]am_maintainer_other[-maintainer-mode], |
|
624 |
- am_maintainer_other[ make rules and dependencies not useful |
|
625 |
- (and sometimes confusing) to the casual installer])], |
|
626 |
- [USE_MAINTAINER_MODE=$enableval], |
|
627 |
- [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes])) |
|
628 |
- AC_MSG_RESULT([$USE_MAINTAINER_MODE]) |
|
629 |
- AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes]) |
|
630 |
- MAINT=$MAINTAINER_MODE_TRUE |
|
631 |
- AC_SUBST([MAINT])dnl |
|
632 |
-] |
|
633 |
-) |
|
634 |
- |
|
635 | 599 |
# Check to see how 'make' treats includes. -*- Autoconf -*- |
636 | 600 |
|
637 | 601 |
# Copyright (C) 2001-2013 Free Software Foundation, Inc. |
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
#! /bin/sh |
2 | 2 |
# Guess values for system-dependent variables and create Makefiles. |
3 |
-# Generated by GNU Autoconf 2.69 for gstruct 2013-02-14. |
|
3 |
+# Generated by GNU Autoconf 2.69 for gstruct 2013-09-12. |
|
4 | 4 |
# |
5 | 5 |
# Report bugs to <Thomas Wu <twu@gene.com>>. |
6 | 6 |
# |
... | ... |
@@ -590,8 +590,8 @@ MAKEFLAGS= |
590 | 590 |
# Identity of this package. |
591 | 591 |
PACKAGE_NAME='gstruct' |
592 | 592 |
PACKAGE_TARNAME='gstruct' |
593 |
-PACKAGE_VERSION='2013-02-14' |
|
594 |
-PACKAGE_STRING='gstruct 2013-02-14' |
|
593 |
+PACKAGE_VERSION='2013-09-12' |
|
594 |
+PACKAGE_STRING='gstruct 2013-09-12' |
|
595 | 595 |
PACKAGE_BUGREPORT='Thomas Wu <twu@gene.com>' |
596 | 596 |
PACKAGE_URL='' |
597 | 597 |
|
... | ... |
@@ -684,9 +684,6 @@ MAINTAINER_FALSE |
684 | 684 |
MAINTAINER_TRUE |
685 | 685 |
FULLDIST_FALSE |
686 | 686 |
FULLDIST_TRUE |
687 |
-MAINT |
|
688 |
-MAINTAINER_MODE_FALSE |
|
689 |
-MAINTAINER_MODE_TRUE |
|
690 | 687 |
AM_BACKSLASH |
691 | 688 |
AM_DEFAULT_VERBOSITY |
692 | 689 |
AM_DEFAULT_V |
... | ... |
@@ -789,7 +786,6 @@ enable_option_checking |
789 | 786 |
enable_largefile |
790 | 787 |
enable_dependency_tracking |
791 | 788 |
enable_silent_rules |
792 |
-enable_maintainer_mode |
|
793 | 789 |
enable_fulldist |
794 | 790 |
enable_maintainer |
795 | 791 |
enable_static_linking |
... | ... |
@@ -1355,7 +1351,7 @@ if test "$ac_init_help" = "long"; then |
1355 | 1351 |
# Omit some internal or obsolete options to make the list less imposing. |
1356 | 1352 |
# This message is too long to be a string in the A/UX 3.1 sh. |
1357 | 1353 |
cat <<_ACEOF |
1358 |
-\`configure' configures gstruct 2013-02-14 to adapt to many kinds of systems. |
|
1354 |
+\`configure' configures gstruct 2013-09-12 to adapt to many kinds of systems. |
|
1359 | 1355 |
|
1360 | 1356 |
Usage: $0 [OPTION]... [VAR=VALUE]... |
1361 | 1357 |
|
... | ... |
@@ -1426,7 +1422,7 @@ fi |
1426 | 1422 |
|
1427 | 1423 |
if test -n "$ac_init_help"; then |
1428 | 1424 |
case $ac_init_help in |
1429 |
- short | recursive ) echo "Configuration of gstruct 2013-02-14:";; |
|
1425 |
+ short | recursive ) echo "Configuration of gstruct 2013-09-12:";; |
|
1430 | 1426 |
esac |
1431 | 1427 |
cat <<\_ACEOF |
1432 | 1428 |
|
... | ... |
@@ -1441,9 +1437,6 @@ Optional Features: |
1441 | 1437 |
speeds up one-time build |
1442 | 1438 |
--enable-silent-rules less verbose build output (undo: "make V=1") |
1443 | 1439 |
--disable-silent-rules verbose build output (undo: "make V=0") |
1444 |
- --disable-maintainer-mode |
|
1445 |
- disable make rules and dependencies not useful (and |
|
1446 |
- sometimes confusing) to the casual installer |
|
1447 | 1440 |
--enable-fulldist For use by program maintainer |
1448 | 1441 |
--enable-maintainer For use by program maintainer |
1449 | 1442 |
--enable-static-linking Link binaries statically (default=no) |
... | ... |
@@ -1545,7 +1538,7 @@ fi |
1545 | 1538 |
test -n "$ac_init_help" && exit $ac_status |
1546 | 1539 |
if $ac_init_version; then |
1547 | 1540 |
cat <<\_ACEOF |
1548 |
-gstruct configure 2013-02-14 |
|
1541 |
+gstruct configure 2013-09-12 |
|
1549 | 1542 |
generated by GNU Autoconf 2.69 |
1550 | 1543 |
|
1551 | 1544 |
Copyright (C) 2012 Free Software Foundation, Inc. |
... | ... |
@@ -2151,7 +2144,7 @@ cat >config.log <<_ACEOF |
2151 | 2144 |
This file contains any messages produced by compilers while |
2152 | 2145 |
running configure, to aid debugging if configure makes a mistake. |
2153 | 2146 |
|
2154 |
-It was created by gstruct $as_me 2013-02-14, which was |
|
2147 |
+It was created by gstruct $as_me 2013-09-12, which was |
|
2155 | 2148 |
generated by GNU Autoconf 2.69. Invocation command line was |
2156 | 2149 |
|
2157 | 2150 |
$ $0 $@ |
... | ... |
@@ -2501,8 +2494,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu |
2501 | 2494 |
|
2502 | 2495 |
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5 |
2503 | 2496 |
$as_echo_n "checking package version... " >&6; } |
2504 |
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2013-02-14" >&5 |
|
2505 |
-$as_echo "2013-02-14" >&6; } |
|
2497 |
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2013-09-12" >&5 |
|
2498 |
+$as_echo "2013-09-12" >&6; } |
|
2506 | 2499 |
|
2507 | 2500 |
### Read defaults |
2508 | 2501 |
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking CONFIG_SITE" >&5 |
... | ... |
@@ -4256,8 +4249,9 @@ fi |
4256 | 4249 |
|
4257 | 4250 |
|
4258 | 4251 |
# Define the identity of the package. |
4259 |
- PACKAGE='gstruct' |
|
4260 |
- VERSION='2013-02-14' |
|
4252 |
+ |
|
4253 |
+ PACKAGE=gstruct |
|
4254 |
+ VERSION=2013-09-12 |
|
4261 | 4255 |
|
4262 | 4256 |
|
4263 | 4257 |
cat >>confdefs.h <<_ACEOF |
... | ... |
@@ -4432,29 +4426,6 @@ fi |
4432 | 4426 |
|
4433 | 4427 |
|
4434 | 4428 |
|
4435 |
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5 |
|
4436 |
-$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; } |
|
4437 |
- # Check whether --enable-maintainer-mode was given. |
|
4438 |
-if test "${enable_maintainer_mode+set}" = set; then : |
|
4439 |
- enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval |
|
4440 |
-else |
|
4441 |
- USE_MAINTAINER_MODE=yes |
|
4442 |
-fi |
|
4443 |
- |
|
4444 |
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5 |
|
4445 |
-$as_echo "$USE_MAINTAINER_MODE" >&6; } |
|
4446 |
- if test $USE_MAINTAINER_MODE = yes; then |
|
4447 |
- MAINTAINER_MODE_TRUE= |
|
4448 |
- MAINTAINER_MODE_FALSE='#' |
|
4449 |
-else |
|
4450 |
- MAINTAINER_MODE_TRUE='#' |
|
4451 |
- MAINTAINER_MODE_FALSE= |
|
4452 |
-fi |
|
4453 |
- |
|
4454 |
- MAINT=$MAINTAINER_MODE_TRUE |
|
4455 |
- |
|
4456 |
- |
|
4457 |
- |
|
4458 | 4429 |
if test "x$enable_fulldist" = xyes; then |
4459 | 4430 |
FULLDIST_TRUE= |
4460 | 4431 |
FULLDIST_FALSE='#' |
... | ... |
@@ -15457,10 +15428,6 @@ else |
15457 | 15428 |
am__EXEEXT_FALSE= |
15458 | 15429 |
fi |
15459 | 15430 |
|
15460 |
-if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then |
|
15461 |
- as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined. |
|
15462 |
-Usually this means the macro was only invoked conditionally." "$LINENO" 5 |
|
15463 |
-fi |
|
15464 | 15431 |
if test -z "${FULLDIST_TRUE}" && test -z "${FULLDIST_FALSE}"; then |
15465 | 15432 |
as_fn_error $? "conditional \"FULLDIST\" was never defined. |
15466 | 15433 |
Usually this means the macro was only invoked conditionally." "$LINENO" 5 |
... | ... |
@@ -15875,7 +15842,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 |
15875 | 15842 |
# report actual input values of CONFIG_FILES etc. instead of their |
15876 | 15843 |
# values after options handling. |
15877 | 15844 |
ac_log=" |
15878 |
-This file was extended by gstruct $as_me 2013-02-14, which was |
|
15845 |
+This file was extended by gstruct $as_me 2013-09-12, which was |
|
15879 | 15846 |
generated by GNU Autoconf 2.69. Invocation command line was |
15880 | 15847 |
|
15881 | 15848 |
CONFIG_FILES = $CONFIG_FILES |
... | ... |
@@ -15941,7 +15908,7 @@ _ACEOF |
15941 | 15908 |
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 |
15942 | 15909 |
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" |
15943 | 15910 |
ac_cs_version="\\ |
15944 |
-gstruct config.status 2013-02-14 |
|
15911 |
+gstruct config.status 2013-09-12 |
|
15945 | 15912 |
configured by $0, generated by GNU Autoconf 2.69, |
15946 | 15913 |
with options \\"\$ac_cs_config\\" |
15947 | 15914 |
|
... | ... |
@@ -63,8 +63,7 @@ AC_SYS_LARGEFILE |
63 | 63 |
AC_ARG_PROGRAM |
64 | 64 |
|
65 | 65 |
#AM_INIT_AUTOMAKE([no-dependencies]) |
66 |
-AM_INIT_AUTOMAKE([1.10.0]) |
|
67 |
-AM_MAINTAINER_MODE([enable]) |
|
66 |
+AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION) |
|
68 | 67 |
|
69 | 68 |
AM_CONDITIONAL(FULLDIST,test "x$enable_fulldist" = xyes) |
70 | 69 |
AC_ARG_ENABLE([fulldist], |
... | ... |
@@ -360,7 +360,6 @@ LIBTOOL = @LIBTOOL@ |
360 | 360 |
LIPO = @LIPO@ |
361 | 361 |
LN_S = @LN_S@ |
362 | 362 |
LTLIBOBJS = @LTLIBOBJS@ |
363 |
-MAINT = @MAINT@ |
|
364 | 363 |
MAKEINFO = @MAKEINFO@ |
365 | 364 |
MANIFEST_TOOL = @MANIFEST_TOOL@ |
366 | 365 |
MKDIR_P = @MKDIR_P@ |
... | ... |
@@ -705,7 +704,7 @@ all: config.h |
705 | 704 |
|
706 | 705 |
.SUFFIXES: |
707 | 706 |
.SUFFIXES: .c .lo .o .obj |
708 |
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) |
|
707 |
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) |
|
709 | 708 |
@for dep in $?; do \ |
710 | 709 |
case '$(am__configure_deps)' in \ |
711 | 710 |
*$$dep*) \ |
... | ... |
@@ -730,9 +729,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status |
730 | 729 |
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) |
731 | 730 |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
732 | 731 |
|
733 |
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) |
|
732 |
+$(top_srcdir)/configure: $(am__configure_deps) |
|
734 | 733 |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
735 |
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) |
|
734 |
+$(ACLOCAL_M4): $(am__aclocal_m4_deps) |
|
736 | 735 |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
737 | 736 |
$(am__aclocal_m4_deps): |
738 | 737 |
|
... | ... |
@@ -743,7 +742,7 @@ config.h: stamp-h1 |
743 | 742 |
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status |
744 | 743 |
@rm -f stamp-h1 |
745 | 744 |
cd $(top_builddir) && $(SHELL) ./config.status src/config.h |
746 |
-$(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) |
|
745 |
+$(srcdir)/config.h.in: $(am__configure_deps) |
|
747 | 746 |
($(am__cd) $(top_srcdir) && $(AUTOHEADER)) |
748 | 747 |
rm -f stamp-h1 |
749 | 748 |
touch $@ |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-static char rcsid[] = "$Id: bam_fastq.c 87713 2013-03-01 18:32:34Z twu $"; |
|
1 |
+static char rcsid[] = "$Id: bam_fastq.c 108654 2013-09-19 23:11:00Z twu $"; |
|
2 | 2 |
#ifdef HAVE_CONFIG_H |
3 | 3 |
#include <config.h> |
4 | 4 |
#endif |
... | ... |
@@ -57,6 +57,7 @@ static char rcsid[] = "$Id: bam_fastq.c 87713 2013-03-01 18:32:34Z twu $"; |
57 | 57 |
static char *output_root = "out"; |
58 | 58 |
static char *error_root = NULL; |
59 | 59 |
static bool fastap = false; |
60 |
+static bool add_bounds_p = false; |
|
60 | 61 |
static bool allp = true; |
61 | 62 |
static bool single_end_output_p = false; /* print paired-end BAM as single-end file */ |
62 | 63 |
static int terminal_minlength = -1; |
... | ... |
@@ -75,6 +76,7 @@ static struct option long_options[] = { |
75 | 76 |
{"output", required_argument, 0, 'o'}, /* output_root */ |
76 | 77 |
{"error", required_argument, 0, 'e'}, /* error_root */ |
77 | 78 |
{"fasta", no_argument, 0, 0}, /* fastap */ |
79 |
+ {"add-bounds", no_argument, 0, 0}, /* add_bounds_p */ |
|
78 | 80 |
{"all", no_argument, 0, 'A'}, /* allp */ |
79 | 81 |
{"single-end-output", no_argument, 0, '1'}, /* single_end_output_p */ |
80 | 82 |
{"spliced", no_argument, 0, 0}, /* splicedp */ |
... | ... |
@@ -109,6 +111,7 @@ Usage: bam_fastq [options] [-o <output>] <bam file...>\n\ |
109 | 111 |
\n\ |
110 | 112 |
Options:\n\ |
111 | 113 |
--fasta Writes to stdout in FASTA format\n\ |
114 |
+ --add-lh Add chromosomal low/high designation for concordant paired-end alignments\n\ |
|
112 | 115 |
-o, --output=STRING Writes FASTQ to <output>_1.fq and <output>_2.fq (default=out)\n\ |
113 | 116 |
-1, --single-end-output Prints reads as single-end (even if BAM files have paired-end reads)\n\ |
114 | 117 |
-S, --terminal-minlength=INT Print only reads with terminal ends (cigar S length) equal to\n\ |
... | ... |
@@ -197,11 +200,35 @@ struct T { |
197 | 200 |
char *queryseq; |
198 | 201 |
char *quality_string; |
199 | 202 |
bool splicedp; |
203 |
+ |
|
204 |
+ char querystart_strand; |
|
205 |
+ char *querystart_chr; |
|
206 |
+ Genomicpos_T querystart_chrpos; |
|
207 |
+ |
|
208 |
+ char querybreak1_strand; |
|
209 |
+ char *querybreak1_chr; |
|
210 |
+ Genomicpos_T querybreak1_chrpos; |
|
211 |
+ |
|
212 |
+ char querybreak2_strand; |
|
213 |
+ char *querybreak2_chr; |
|
214 |
+ Genomicpos_T querybreak2_chrpos; |
|
215 |
+ |
|
216 |
+ char queryend_strand; |
|
217 |
+ char *queryend_chr; |
|
218 |
+ Genomicpos_T queryend_chrpos; |
|
200 | 219 |
}; |
201 | 220 |
|
202 | 221 |
|
203 | 222 |
static void |
204 | 223 |
Read_free (T *old) { |
224 |
+ if ((*old)->querybreak1_chrpos != 0) { |
|
225 |
+ FREE((*old)->querybreak1_chr); |
|
226 |
+ } |
|
227 |
+ if ((*old)->querybreak2_chrpos != 0) { |
|
228 |
+ FREE((*old)->querybreak2_chr); |
|
229 |
+ } |
|
230 |
+ FREE((*old)->queryend_chr); |
|
231 |
+ FREE((*old)->querystart_chr); |
|
205 | 232 |
FREE((*old)->queryseq); |
206 | 233 |
if ((*old)->quality_string != NULL) { |
207 | 234 |
FREE((*old)->quality_string); |
... | ... |
@@ -211,6 +238,102 @@ Read_free (T *old) { |
211 | 238 |
} |
212 | 239 |
|
213 | 240 |
|
241 |
+static void |
|
242 |
+Read_add_chrinfo_querystart (Read_T read, int flag, char *chr, Genomicpos_T chrpos_low, Genomicpos_T chrpos_high) { |
|
243 |
+ |
|
244 |
+ if (chr == NULL) { |
|
245 |
+ read->querystart_strand = '*'; |
|
246 |
+ read->querystart_chr = (char *) CALLOC(1,sizeof(char)); |
|
247 |
+ read->querystart_chr[0] = '\0'; |
|
248 |
+ read->querystart_chrpos = 0; |
|
249 |
+ } else { |
|
250 |
+ read->querystart_chr = (char *) CALLOC(strlen(chr)+1,sizeof(char)); |
|
251 |
+ strcpy(read->querystart_chr,chr); |
|
252 |
+ if (flag & QUERY_MINUSP) { |
|
253 |
+ read->querystart_strand = '-'; |
|
254 |
+ read->querystart_chrpos = chrpos_high; |
|
255 |
+ } else { |
|
256 |
+ read->querystart_strand = '+'; |
|
257 |
+ read->querystart_chrpos = chrpos_low; |
|
258 |
+ } |
|
259 |
+ } |
|
260 |
+ |
|
261 |
+ return; |
|
262 |
+} |
|
263 |
+ |
|
264 |
+static void |
|
265 |
+Read_add_chrinfo_querybreak2 (Read_T read, int flag, char *chr, Genomicpos_T chrpos_low, Genomicpos_T chrpos_high) { |
|
266 |
+ |
|
267 |
+ if (chr == NULL) { |
|
268 |
+ abort(); |
|
269 |
+ read->querybreak2_strand = '*'; |
|
270 |
+ read->querybreak2_chr = (char *) CALLOC(1,sizeof(char)); |
|
271 |
+ read->querybreak2_chr[0] = '\0'; |
|
272 |
+ read->querybreak2_chrpos = 0; |
|
273 |
+ } else { |
|
274 |
+ read->querybreak2_chr = (char *) CALLOC(strlen(chr)+1,sizeof(char)); |
|
275 |
+ strcpy(read->querybreak2_chr,chr); |
|
276 |
+ if (flag & QUERY_MINUSP) { |
|
277 |
+ read->querybreak2_strand = '-'; |
|
278 |
+ read->querybreak2_chrpos = chrpos_high; |
|
279 |
+ } else { |
|
280 |
+ read->querybreak2_strand = '+'; |
|
281 |
+ read->querybreak2_chrpos = chrpos_low; |
|
282 |
+ } |
|
283 |
+ } |
|
284 |
+ |
|
285 |
+ return; |
|
286 |
+} |
|
287 |
+ |
|
288 |
+ |
|
289 |
+static void |
|
290 |
+Read_add_chrinfo_queryend (Read_T read, int flag, char *chr, Genomicpos_T chrpos_low, Genomicpos_T chrpos_high) { |
|
291 |
+ |
|
292 |
+ if (chr == NULL) { |
|
293 |
+ read->queryend_strand = '*'; |
|
294 |
+ read->queryend_chr = (char *) CALLOC(1,sizeof(char)); |
|
295 |
+ read->queryend_chr[0] = '\0'; |
|
296 |
+ read->queryend_chrpos = 0; |
|
297 |
+ } else { |
|
298 |
+ read->queryend_chr = (char *) CALLOC(strlen(chr)+1,sizeof(char)); |
|
299 |
+ strcpy(read->queryend_chr,chr); |
|
300 |
+ if (flag & QUERY_MINUSP) { |
|
301 |
+ read->queryend_strand = '-'; |
|
302 |
+ read->queryend_chrpos = chrpos_low; |
|
303 |
+ } else { |
|
304 |
+ read->queryend_strand = '+'; |
|
305 |
+ read->queryend_chrpos = chrpos_high; |
|
306 |
+ } |
|
307 |
+ } |
|
308 |
+ |
|
309 |
+ return; |
|
310 |
+} |
|
311 |
+ |
|
312 |
+static void |
|
313 |
+Read_add_chrinfo_querybreak1 (Read_T read, int flag, char *chr, Genomicpos_T chrpos_low, Genomicpos_T chrpos_high) { |
|
314 |
+ |
|
315 |
+ if (chr == NULL) { |
|
316 |
+ abort(); |
|
317 |
+ read->querybreak1_strand = '*'; |
|
318 |
+ read->querybreak1_chr = (char *) CALLOC(1,sizeof(char)); |
|
319 |
+ read->querybreak1_chr[0] = '\0'; |
|
320 |
+ read->querybreak1_chrpos = 0; |
|
321 |
+ } else { |
|
322 |
+ read->querybreak1_chr = (char *) CALLOC(strlen(chr)+1,sizeof(char)); |
|
323 |
+ strcpy(read->querybreak1_chr,chr); |
|
324 |
+ if (flag & QUERY_MINUSP) { |
|
325 |
+ read->querybreak1_strand = '-'; |
|
326 |
+ read->querybreak1_chrpos = chrpos_low; |
|
327 |
+ } else { |
|
328 |
+ read->querybreak1_strand = '+'; |
|
329 |
+ read->querybreak1_chrpos = chrpos_high; |
|
330 |
+ } |
|
331 |
+ } |
|
332 |
+ |
|
333 |
+ return; |
|
334 |
+} |
|
335 |
+ |
|
336 |
+ |
|
214 | 337 |
static T |
215 | 338 |
Read_create_bam (bool *completep, T read, Bamline_T bamline) { |
216 | 339 |
unsigned int flag; |
... | ... |
@@ -220,6 +343,7 @@ Read_create_bam (bool *completep, T read, Bamline_T bamline) { |
220 | 343 |
char strand; |
221 | 344 |
int type; |
222 | 345 |
int cigar_querylength, mlength, querypos, i; |
346 |
+ /* bool insidep; */ |
|
223 | 347 |
|
224 | 348 |
flag = Bamline_flag(bamline); |
225 | 349 |
shortread = Bamline_read(bamline); |
... | ... |
@@ -235,6 +359,9 @@ Read_create_bam (bool *completep, T read, Bamline_T bamline) { |
235 | 359 |
strcpy(read->quality_string,quality_string); |
236 | 360 |
} |
237 | 361 |
read->splicedp = true; |
362 |
+ read->querybreak1_chrpos = read->querybreak2_chrpos = 0; |
|
363 |
+ Read_add_chrinfo_querystart(read,flag,Bamline_chr(bamline),Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline)); |
|
364 |
+ Read_add_chrinfo_queryend(read,flag,Bamline_chr(bamline),Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline)); |
|
238 | 365 |
*completep = true; |
239 | 366 |
return read; |
240 | 367 |
} |
... | ... |
@@ -244,6 +371,7 @@ Read_create_bam (bool *completep, T read, Bamline_T bamline) { |
244 | 371 |
cigar_querylength = Bamline_cigar_querylength(bamline); |
245 | 372 |
|
246 | 373 |
if (read == NULL) { |
374 |
+ /* Initialize */ |
|
247 | 375 |
read = (T) MALLOC(sizeof(struct T)); |
248 | 376 |
read->queryseq = (char *) CALLOC(cigar_querylength+1,sizeof(char)); |
249 | 377 |
for (i = 0; i < cigar_querylength; i++) { |
... | ... |
@@ -255,24 +383,42 @@ Read_create_bam (bool *completep, T read, Bamline_T bamline) { |
255 | 383 |
read->quality_string = (char *) CALLOC(cigar_querylength+1,sizeof(char)); |
256 | 384 |
} |
257 | 385 |
read->splicedp = false; |
386 |
+ read->querybreak1_chrpos = read->querybreak2_chrpos = 0; |
|
258 | 387 |
} |
259 | 388 |
|
260 | 389 |
if (flag & QUERY_MINUSP) { |
261 | 390 |
strand = '-'; |
262 | 391 |
querypos = cigar_querylength - 1; |
392 |
+ if (Intlist_head(types) == 'H') { |
|
393 |
+ Read_add_chrinfo_querybreak1(read,flag,Bamline_chr(bamline),Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline)); |
|
394 |
+ } else { |
|
395 |
+ Read_add_chrinfo_queryend(read,flag,Bamline_chr(bamline),Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline)); |
|
396 |
+ } |
|
263 | 397 |
} else { |
264 | 398 |
strand = '+'; |
265 | 399 |
querypos = 0; |
400 |
+ if (Intlist_head(types) == 'H') { |
|
401 |
+ Read_add_chrinfo_querybreak2(read,flag,Bamline_chr(bamline),Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline)); |
|
402 |
+ } else { |
|
403 |
+ Read_add_chrinfo_querystart(read,flag,Bamline_chr(bamline),Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline)); |
|
404 |
+ } |
|
266 | 405 |
} |
267 | 406 |
|
407 |
+ |
|
408 |
+ |
|
268 | 409 |
p = shortread; |
269 | 410 |
q = quality_string; |
411 |
+ /* insidep = true; */ |
|
270 | 412 |
for (a = types, b = npositions; a != NULL; a = Intlist_next(a), b = Uintlist_next(b)) { |
271 | 413 |
type = Intlist_head(a); |
272 | 414 |
mlength = Uintlist_head(b); |
273 | 415 |
|
274 | 416 |
if (type == 'H') { |
275 |
- querypos += ((strand == '+') ? +mlength : -mlength); |
|
417 |
+ if (strand == '+') { |
|
418 |
+ querypos += mlength; |
|
419 |
+ } else { |
|
420 |
+ querypos -= mlength; |
|
421 |
+ } |
|
276 | 422 |
|
277 | 423 |
} else if (type == 'N') { |
278 | 424 |
/* Ignore */ |
... | ... |
@@ -328,6 +474,20 @@ Read_create_bam (bool *completep, T read, Bamline_T bamline) { |
328 | 474 |
} |
329 | 475 |
} |
330 | 476 |
|
477 |
+ if (type == 'H') { |
|
478 |
+ if (flag & QUERY_MINUSP) { |
|
479 |
+ Read_add_chrinfo_querybreak2(read,flag,Bamline_chr(bamline),Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline)); |
|
480 |
+ } else { |
|
481 |
+ Read_add_chrinfo_querybreak1(read,flag,Bamline_chr(bamline),Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline)); |
|
482 |
+ } |
|
483 |
+ } else { |
|
484 |
+ if (flag & QUERY_MINUSP) { |
|
485 |
+ Read_add_chrinfo_querystart(read,flag,Bamline_chr(bamline),Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline)); |
|
486 |
+ } else { |
|
487 |
+ Read_add_chrinfo_queryend(read,flag,Bamline_chr(bamline),Bamline_chrpos_low(bamline),Bamline_chrpos_high(bamline)); |
|
488 |
+ } |
|
489 |
+ } |
|
490 |
+ |
|
331 | 491 |
*completep = true; |
332 | 492 |
for (i = 0; i < cigar_querylength; i++) { |
333 | 493 |
if (read->queryseq[i] == '*') { |
... | ... |
@@ -335,6 +495,16 @@ Read_create_bam (bool *completep, T read, Bamline_T bamline) { |
335 | 495 |
} |
336 | 496 |
} |
337 | 497 |
|
498 |
+#if 0 |
|
499 |
+ if (insidep == true && Bamline_paired_read_p(bamline) == true) { |
|
500 |
+ if (Bamline_lowend_p(bamline) == true) { |
|
501 |
+ read->lowp = true; |
|
502 |
+ } else { |
|
503 |
+ read->highp = true; |
|
504 |
+ } |
|
505 |
+ } |
|
506 |
+#endif |
|
507 |
+ |
|
338 | 508 |
return read; |
339 | 509 |
} |
340 | 510 |
|
... | ... |
@@ -349,14 +519,20 @@ Read_create_sam (bool *completep, T read, char *line) { |
349 | 519 |
int type; |
350 | 520 |
int cigar_querylength, mlength, querypos, i; |
351 | 521 |
|
352 |
- Genomicpos_T chrpos_low; |
|
353 |
- char *chr, *acc, *cigar, *auxinfo; |
|
522 |
+ Genomicpos_T chrpos_low, chrpos_high, mate_chrpos_low; |
|
523 |
+ char *chr, *mate_chr, *acc, *cigar, *auxinfo; |
|
354 | 524 |
int mapq; |
355 | 525 |
int readlength; |
526 |
+ /* bool insidep; */ |
|
356 | 527 |
|
357 |
- auxinfo = Samread_parse_line(&acc,&flag,&mapq,&chr,&chrpos_low,&cigar,&readlength,&shortread,&quality_string,line); |
|
528 |
+ auxinfo = Samread_parse_line(&acc,&flag,&mapq,&chr,&chrpos_low,&cigar, |
|
529 |
+ &mate_chr,&mate_chrpos_low,&readlength, |
|
530 |
+ &shortread,&quality_string,line); |
|
531 |
+ types = Samread_parse_cigar(&npositions,&cigar_querylength,cigar); |
|
532 |
+ chrpos_high = Samread_chrpos_high(types,npositions,chrpos_low); |
|
533 |
+ FREE(mate_chr); |
|
534 |
+ FREE(cigar); |
|
358 | 535 |
FREE(acc); |
359 |
- FREE(chr); |
|
360 | 536 |
|
361 | 537 |
if (flag & QUERY_UNMAPPED) { |
362 | 538 |
read = (T) MALLOC(sizeof(struct T)); |
... | ... |
@@ -369,19 +545,24 @@ Read_create_sam (bool *completep, T read, char *line) { |
369 | 545 |
strcpy(read->quality_string,quality_string); |
370 | 546 |
} |
371 | 547 |
read->splicedp = true; |
548 |
+ read->querybreak1_chrpos = read->querybreak2_chrpos = 0; |
|
549 |
+ Read_add_chrinfo_querystart(read,flag,chr,chrpos_low,chrpos_high); |
|
550 |
+ Read_add_chrinfo_queryend(read,flag,chr,chrpos_low,chrpos_high); |
|
372 | 551 |
*completep = true; |
373 | 552 |
|
553 |
+ FREE(chr); |
|
374 | 554 |
FREE(cigar); |
375 | 555 |
FREE(quality_string); |
376 | 556 |
FREE(shortread); |
377 | 557 |
|
558 |
+ Uintlist_free(&npositions); |
|
559 |
+ Intlist_free(&types); |
|
378 | 560 |
return read; |
379 | 561 |
} |
380 | 562 |
|
381 |
- types = Samread_parse_cigar(&npositions,&cigar_querylength,cigar); |
|
382 |
- FREE(cigar); |
|
383 | 563 |
|
384 | 564 |
if (read == NULL) { |
565 |
+ /* Initialize */ |
|
385 | 566 |
read = (T) MALLOC(sizeof(struct T)); |
386 | 567 |
read->queryseq = (char *) CALLOC(cigar_querylength+1,sizeof(char)); |
387 | 568 |
for (i = 0; i < cigar_querylength; i++) { |
... | ... |
@@ -393,24 +574,52 @@ Read_create_sam (bool *completep, T read, char *line) { |
393 | 574 |
read->quality_string = (char *) CALLOC(cigar_querylength+1,sizeof(char)); |
394 | 575 |
} |
395 | 576 |
read->splicedp = false; |
577 |
+ read->querybreak1_chrpos = read->querybreak2_chrpos = 0; |
|
396 | 578 |
} |
397 | 579 |
|
398 | 580 |
if (flag & QUERY_MINUSP) { |
399 | 581 |
strand = '-'; |
400 | 582 |
querypos = cigar_querylength - 1; |
583 |
+ if (Intlist_head(types) == 'H') { |
|
584 |
+ Read_add_chrinfo_querybreak1(read,flag,chr,chrpos_low,chrpos_high); |
|
585 |
+ } else { |
|
586 |
+ Read_add_chrinfo_queryend(read,flag,chr,chrpos_low,chrpos_high); |
|
587 |
+ } |
|
401 | 588 |
} else { |
402 | 589 |
strand = '+'; |
403 | 590 |
querypos = 0; |
591 |
+ if (Intlist_head(types) == 'H') { |
|
592 |
+ Read_add_chrinfo_querybreak2(read,flag,chr,chrpos_low,chrpos_high); |
|
593 |
+ } else { |
|
594 |
+ Read_add_chrinfo_querystart(read,flag,chr,chrpos_low,chrpos_high); |
|
595 |
+ } |
|
404 | 596 |
} |
405 | 597 |
|
406 | 598 |
p = shortread; |
407 | 599 |
q = quality_string; |
600 |
+ /* insidep = true; */ |
|
408 | 601 |
for (a = types, b = npositions; a != NULL; a = Intlist_next(a), b = Uintlist_next(b)) { |
409 | 602 |
type = Intlist_head(a); |
410 | 603 |
mlength = Uintlist_head(b); |
411 | 604 |
|
412 | 605 |
if (type == 'H') { |
413 |
- querypos += ((strand == '+') ? +mlength : -mlength); |
|
606 |
+ if (strand == '+') { |
|
607 |
+#if 0 |
|
608 |
+ if (querypos > 0) { |
|
609 |
+ /* Already filled in beginning of read and skipping end of read */ |
|
610 |
+ insidep = false; |
|
611 |
+ } |
|
612 |
+#endif |
|
613 |
+ querypos += mlength; |
|
614 |
+ } else { |
|
615 |
+#if 0 |
|
616 |
+ if (querypos == cigar_querylength - 1) { |
|
617 |
+ /* About to skip end of read */ |
|
618 |
+ insidep = false; |
|
619 |
+ } |
|
620 |
+#endif |
|
621 |
+ querypos -= mlength; |
|
622 |
+ } |
|
414 | 623 |
|
415 | 624 |
} else if (type == 'N') { |
416 | 625 |
/* Ignore */ |
... | ... |
@@ -471,7 +680,20 @@ Read_create_sam (bool *completep, T read, char *line) { |
471 | 680 |
FREE(quality_string); |
472 | 681 |
FREE(shortread); |
473 | 682 |
|
474 |
- |
|
683 |
+ if (type == 'H') { |
|
684 |
+ if (flag & QUERY_MINUSP) { |
|
685 |
+ Read_add_chrinfo_querybreak2(read,flag,chr,chrpos_low,chrpos_high); |
|
686 |
+ } else { |
|
687 |
+ Read_add_chrinfo_querybreak1(read,flag,chr,chrpos_low,chrpos_high); |
|
688 |
+ } |
|
689 |
+ } else { |
|
690 |
+ if (flag & QUERY_MINUSP) { |
|
691 |
+ Read_add_chrinfo_querystart(read,flag,chr,chrpos_low,chrpos_high); |
|
692 |
+ } else { |
|
693 |
+ Read_add_chrinfo_queryend(read,flag,chr,chrpos_low,chrpos_high); |
|
694 |
+ } |
|
695 |
+ } |
|
696 |
+ |
|
475 | 697 |
*completep = true; |
476 | 698 |
for (i = 0; i < cigar_querylength; i++) { |
477 | 699 |
if (read->queryseq[i] == '*') { |
... | ... |
@@ -479,6 +701,18 @@ Read_create_sam (bool *completep, T read, char *line) { |
479 | 701 |
} |
480 | 702 |
} |
481 | 703 |
|
704 |
+#if 0 |
|
705 |
+ if (insidep == true && mate_chr == NULL) { |
|
706 |
+ if (chrpos_low < mate_chrpos_low) { |
|
707 |
+ read->lowp = true; |
|
708 |
+ } else if (chrpos_low > mate_chrpos_low) { |
|
709 |
+ read->highp = true; |
|
710 |
+ } |
|
711 |
+ } |
|
712 |
+#endif |
|
713 |
+ |
|
714 |
+ FREE(chr); |
|
715 |
+ |
|
482 | 716 |
return read; |
483 | 717 |
} |
484 | 718 |
|
... | ... |
@@ -567,36 +801,99 @@ print_upper_case (FILE *fp, char *queryseq) { |
567 | 801 |
static void |
568 | 802 |
print_fasta_single (FILE *fp, char *acc, T read) { |
569 | 803 |
if (want_splicedp == true && read->splicedp == true) { |
570 |
- fprintf(fp,">%s\n",acc); |
|
804 |
+ fprintf(fp,">%s",acc); |
|
805 |
+ if (add_bounds_p == true) { |
|
806 |
+ fprintf(fp,"(%c%s:%u",read->querystart_strand,read->querystart_chr,read->querystart_chrpos); |
|
807 |
+ if (read->querybreak1_chrpos != 0) { |
|
808 |
+ fprintf(fp,",%c%s:%u",read->querybreak1_strand,read->querybreak1_chr,read->querybreak1_chrpos); |
|
809 |
+ fprintf(fp,",%c%s:%u",read->querybreak2_strand,read->querybreak2_chr,read->querybreak2_chrpos); |
|
810 |
+ } |
|
811 |
+ fprintf(fp,",%c%s:%u)",read->queryend_strand,read->queryend_chr,read->queryend_chrpos); |
|
812 |
+ } |
|
813 |
+ fprintf(fp,"\n"); |
|
571 | 814 |
print_upper_case(fp,read->queryseq); |
572 | 815 |
} else if (terminal_minlength < 0 || terminal_length(read->queryseq) >= terminal_minlength) { |
573 |
- fprintf(fp,">%s\n",acc); |
|
816 |
+ fprintf(fp,">%s",acc); |
|
817 |
+ if (add_bounds_p == true) { |
|
818 |
+ fprintf(fp,"(%c%s:%u",read->querystart_strand,read->querystart_chr,read->querystart_chrpos); |
|
819 |
+ if (read->querybreak1_chrpos != 0) { |
|
820 |
+ fprintf(fp,",%c%s:%u",read->querybreak1_strand,read->querybreak1_chr,read->querybreak1_chrpos); |
|
821 |
+ fprintf(fp,",%c%s:%u",read->querybreak2_strand,read->querybreak2_chr,read->querybreak2_chrpos); |
|
822 |
+ } |
|
823 |
+ fprintf(fp,",%c%s:%u)",read->queryend_strand,read->queryend_chr,read->queryend_chrpos); |
|
824 |
+ } |
|
825 |
+ fprintf(fp,"\n"); |
|
574 | 826 |
fprintf(fp,"%s\n",read->queryseq); |
575 | 827 |
} |
576 | 828 |
return; |
577 | 829 |
} |
578 | 830 |
|
579 | 831 |
|
832 |
+/* Bounds are from given read querystart to mate querystart */ |
|
580 | 833 |
static void |
581 | 834 |
print_fasta_paired (FILE *fp, char *acc, T read1, T read2) { |
582 | 835 |
int terminal_length_1, terminal_length_2; |
583 | 836 |
|
584 | 837 |
if (single_end_output_p == true) { |
585 | 838 |
/* Important to add the /1 and /2 endings for the remove-intragenic program, which also requires unique alignments */ |
839 |
+ /* First read. Mate is read2 */ |
|
586 | 840 |
if (want_splicedp == true && read1->splicedp == true) { |
587 |
- fprintf(fp,">%s/1\n",acc); |
|
841 |
+ fprintf(fp,">%s",acc); |
|
842 |
+ if (add_bounds_p == true) { |
|
843 |
+ fprintf(fp,"(%c%s:%u",read1->querystart_strand,read1->querystart_chr,read1->querystart_chrpos); |
|
844 |
+ if (read1->querybreak1_chrpos != 0) { |
|
845 |
+ fprintf(fp,",%c%s:%u",read1->querybreak1_strand,read1->querybreak1_chr,read1->querybreak1_chrpos); |
|
846 |
+ fprintf(fp,",%c%s:%u",read1->querybreak2_strand,read1->querybreak2_chr,read1->querybreak2_chrpos); |
|
847 |
+ } |
|
848 |
+ fprintf(fp,",%c%s:%u",read1->queryend_strand,read1->queryend_chr,read1->queryend_chrpos); |
|
849 |
+ fprintf(fp,"..%c%s:%u)",read2->querystart_strand,read2->querystart_chr,read2->querystart_chrpos); |
|
850 |
+ } |
|
851 |
+ fprintf(fp,"/1\n"); |
|
588 | 852 |
print_upper_case(fp,read1->queryseq); |
589 | 853 |
} else if (terminal_minlength < 0 || terminal_length(read1->queryseq) >= terminal_minlength) { |
590 |
- fprintf(fp,">%s/1\n",acc); |
|
854 |
+ fprintf(fp,">%s",acc); |
|
855 |
+ if (add_bounds_p == true) { |
|
856 |
+ fprintf(fp,"(%c%s:%u",read1->querystart_strand,read1->querystart_chr,read1->querystart_chrpos); |
|
857 |
+ if (read1->querybreak1_chrpos != 0) { |
|
858 |
+ fprintf(fp,",%c%s:%u",read1->querybreak1_strand,read1->querybreak1_chr,read1->querybreak1_chrpos); |
|
859 |
+ fprintf(fp,",%c%s:%u",read1->querybreak2_strand,read1->querybreak2_chr,read1->querybreak2_chrpos); |
|
860 |
+ } |
|
861 |
+ fprintf(fp,",%c%s:%u",read1->queryend_strand,read1->queryend_chr,read1->queryend_chrpos); |
|
862 |
+ fprintf(fp,"..%c%s:%u)",read2->querystart_strand,read2->querystart_chr,read2->querystart_chrpos); |
|
863 |
+ } |
|
864 |
+ fprintf(fp,"/1\n"); |
|
591 | 865 |
fprintf(fp,"%s\n",read1->queryseq); |
592 | 866 |
} |
867 |
+ |
|
868 |
+ /* Second read. Mate is read1 */ |
|
593 | 869 |
if (want_splicedp == true && read2->splicedp == true) { |
594 |
- fprintf(fp,">%s/2\n",acc); |
|
870 |
+ fprintf(fp,">%s",acc); |
|
871 |
+ if (add_bounds_p == true) { |
|
872 |
+ fprintf(fp,"(%c%s:%u",read2->querystart_strand,read2->querystart_chr,read2->querystart_chrpos); |
|
873 |
+ if (read2->querybreak1_chrpos != 0) { |
|
874 |
+ fprintf(fp,",%c%s:%u",read2->querybreak1_strand,read2->querybreak1_chr,read2->querybreak1_chrpos); |
|
875 |
+ fprintf(fp,",%c%s:%u",read2->querybreak2_strand,read2->querybreak2_chr,read2->querybreak2_chrpos); |
|
876 |
+ } |
|
877 |
+ fprintf(fp,",%c%s:%u",read2->queryend_strand,read2->queryend_chr,read2->queryend_chrpos); |
|
878 |
+ fprintf(fp,"..%c%s:%u)",read1->querystart_strand,read1->querystart_chr,read1->querystart_chrpos); |
|
879 |
+ } |
|
880 |
+ fprintf(fp,"/2\n"); |
|
595 | 881 |
print_upper_case(fp,read2->queryseq); |
596 | 882 |
} else if (terminal_minlength < 0 || terminal_length(read2->queryseq) >= terminal_minlength) { |
597 |
- fprintf(fp,">%s/2\n",acc); |
|
883 |
+ fprintf(fp,">%s",acc); |
|
884 |
+ if (add_bounds_p == true) { |
|
885 |
+ fprintf(fp,"(%c%s:%u",read2->querystart_strand,read2->querystart_chr,read2->querystart_chrpos); |
|
886 |
+ if (read2->querybreak1_chrpos != 0) { |
|
887 |
+ fprintf(fp,",%c%s:%u",read2->querybreak1_strand,read2->querybreak1_chr,read2->querybreak1_chrpos); |
|
888 |
+ fprintf(fp,",%c%s:%u",read2->querybreak2_strand,read2->querybreak2_chr,read2->querybreak2_chrpos); |
|
889 |
+ } |
|
890 |
+ fprintf(fp,",%c%s:%u",read2->queryend_strand,read2->queryend_chr,read2->queryend_chrpos); |
|
891 |
+ fprintf(fp,"..%c%s:%u)",read1->querystart_strand,read1->querystart_chr,read1->querystart_chrpos); |
|
892 |
+ } |
|
893 |
+ fprintf(fp,"/2\n"); |
|
598 | 894 |
fprintf(fp,"%s\n",read2->queryseq); |
599 | 895 |
} |
896 |
+ |
|
600 | 897 |
} else { |
601 | 898 |
if (want_splicedp == true && (read1->splicedp == true || read2->splicedp == true)) { |
602 | 899 |
fprintf(fp,">%s\n",acc); |
... | ... |
@@ -789,9 +1086,9 @@ parse_bam_input (FILE *fp1, FILE *fp2, Bamreader_T bamreader, Table_T read_table |
789 | 1086 |
bool found1p, found2p, complete1p, complete2p; |
790 | 1087 |
bool copyp; |
791 | 1088 |
|
792 |
- while ((bamline = Bamread_next_bamline(bamreader,/*minimum_mapq*/0,/*good_unique_mapq*/0,/*maximum_nhits*/1000000, |
|
793 |
- /*need_unique_p*/false,/*need_primary_p*/true,/*ignore_duplicates_p*/false, |
|
794 |
- /*need_concordant_p*/false)) != NULL) { |
|
1089 |
+ while ((bamline = Bamread_next_bamline(bamreader,/*desired_read_group*/NULL,/*minimum_mapq*/0,/*good_unique_mapq*/0, |
|
1090 |
+ /*maximum_nhits*/1000000,/*need_unique_p*/false,/*need_primary_p*/true, |
|
1091 |
+ /*ignore_duplicates_p*/false,/*need_concordant_p*/false)) != NULL) { |
|
795 | 1092 |
acc = Bamline_acc(bamline); |
796 | 1093 |
acc = strip_illumina_acc_ending(©p,acc); |
797 | 1094 |
debug(printf("Acc is %s",acc)); |
... | ... |
@@ -1050,6 +1347,9 @@ main (int argc, char *argv[]) { |
1050 | 1347 |
} else if (!strcmp(long_name,"fasta")) { |
1051 | 1348 |
fastap = true; |
1052 | 1349 |
|
1350 |
+ } else if (!strcmp(long_name,"add-bounds")) { |
|
1351 |
+ add_bounds_p = true; |
|
1352 |
+ |
|
1053 | 1353 |
} else if (!strcmp(long_name,"terminal-minlength")) { |
1054 | 1354 |
terminal_minlength = atoi(optarg); |
1055 | 1355 |
|
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-static char rcsid[] = "$Id: bam_get.c 87713 2013-03-01 18:32:34Z twu $"; |
|
1 |
+static char rcsid[] = "$Id: bam_get.c 108654 2013-09-19 23:11:00Z twu $"; |
|
2 | 2 |
#ifdef HAVE_CONFIG_H |
3 | 3 |
#include <config.h> |
4 | 4 |
#endif |
... | ... |
@@ -31,6 +31,7 @@ static char rcsid[] = "$Id: bam_get.c 87713 2013-03-01 18:32:34Z twu $"; |
31 | 31 |
|
32 | 32 |
|
33 | 33 |
/* Filters */ |
34 |
+static char *desired_read_group = NULL; |
|
34 | 35 |
static int minimum_mapq = 0; |
35 | 36 |
static int good_unique_mapq = 35; |
36 | 37 |
static int maximum_nhits = 1000000; |
... | ... |
@@ -50,6 +51,7 @@ static struct option long_options[] = { |
50 | 51 |
{"primary", required_argument, 0, 'P'}, /* need_primary_p */ |
51 | 52 |
{"ignore-duplicates", no_argument, 0, 0}, /* ignore_duplicates_p */ |
52 | 53 |
{"allow-duplicates", no_argument, 0, 0}, /* ignore_duplicates_p */ |
54 |
+ {"read-group", required_argument, 0, 0}, /* desired_read_group */ |
|
53 | 55 |
|
54 | 56 |
/* Help options */ |
55 | 57 |
{"version", no_argument, 0, 0}, /* print_program_version */ |
... | ... |
@@ -111,6 +113,9 @@ main (int argc, char *argv[]) { |
111 | 113 |
} else if (!strcmp(long_name,"allow-duplicates")) { |
112 | 114 |
ignore_duplicates_p = false; |
113 | 115 |
|
116 |
+ } else if (!strcmp(long_name,"read-group")) { |
|
117 |
+ desired_read_group = optarg; |
|
118 |
+ |
|
114 | 119 |
} else { |
115 | 120 |
/* Shouldn't reach here */ |
116 | 121 |
fprintf(stderr,"Don't recognize option %s. For usage, run 'gsnap --help'",long_name); |
... | ... |
@@ -164,7 +169,7 @@ main (int argc, char *argv[]) { |
164 | 169 |
|
165 | 170 |
Bamread_write_header(bamreader); |
166 | 171 |
Bamread_limit_region(bamreader,chromosome,chrstart,chrend); |
167 |
- while ((bamline = Bamread_next_bamline(bamreader,minimum_mapq,good_unique_mapq,maximum_nhits, |
|
172 |
+ while ((bamline = Bamread_next_bamline(bamreader,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits, |
|
168 | 173 |
need_unique_p,need_primary_p,ignore_duplicates_p, |
169 | 174 |
/*need_concordant_p*/true)) != NULL) { |
170 | 175 |
Bamline_print(stdout,bamline,Bamline_flag(bamline),quality_score_adj); |
... | ... |
@@ -212,6 +217,8 @@ where\n\ |
212 | 217 |
or startposition+length (+ strand)\n\ |
213 | 218 |
\n\ |
214 | 219 |
Filtering options\n\ |
220 |
+ --read-group=STRING Require alignments to have this read group in the RG field of\n\ |
|
221 |
+ the BAM line\n\ |
|
215 | 222 |
-q, --min-mapq=INT Require alignments to have this mapping quality and higher\n\ |
216 | 223 |
(default 0)\n\ |
217 | 224 |
-n, --nhits=INT Require alignments to have this many hits or fewer\n\ |
... | ... |
@@ -1,10 +1,11 @@ |
1 |
-static char rcsid[] = "$Id: bamread.c 87713 2013-03-01 18:32:34Z twu $"; |
|
1 |
+static char rcsid[] = "$Id: bamread.c 108654 2013-09-19 23:11:00Z twu $"; |
|
2 | 2 |
#ifdef HAVE_CONFIG_H |
3 | 3 |
#include <config.h> |
4 | 4 |
#endif |
5 | 5 |
|
6 | 6 |
#include "bamread.h" |
7 | 7 |
#include <stdlib.h> |
8 |
+#include "assert.h" |
|
8 | 9 |
#include "mem.h" |
9 | 10 |
#include "complement.h" |
10 | 11 |
#include "bool.h" |
... | ... |
@@ -366,7 +367,7 @@ static void |
366 | 367 |
parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genomicpos_T *chrpos, |
367 | 368 |
char **mate_chr, Genomicpos_T *mate_chrpos, int *insert_length, |
368 | 369 |
Intlist_T *cigartypes, Uintlist_T *cigarlengths, int *cigarlength, |
369 |
- int *readlength, char **read, char **quality_string) { |
|
370 |
+ int *readlength, char **read, char **quality_string, char **read_group) { |
|
370 | 371 |
int type; |
371 | 372 |
int i; |
372 | 373 |
unsigned int length; |
... | ... |
@@ -424,6 +425,13 @@ parse_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genom |
424 | 425 |
*quality_string = (char *) ptr; |
425 | 426 |
} |
426 | 427 |
|
428 |
+ ptr = bam_aux_get(this->bam,"RG"); |
|
429 |
+ if (ptr == NULL) { |
|
430 |
+ *read_group = (char *) NULL; |
|
431 |
+ } else { |
|
432 |
+ *read_group = bam_aux2Z(ptr); |
|
433 |
+ } |
|
434 |
+ |
|
427 | 435 |
/* Cigar */ |
428 | 436 |
*cigarlength = 0; |
429 | 437 |
*cigartypes = (Intlist_T) NULL; |
... | ... |
@@ -464,7 +472,7 @@ int |
464 | 472 |
Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genomicpos_T *chrpos, |
465 | 473 |
char **mate_chr, Genomicpos_T *mate_chrpos, |
466 | 474 |
Intlist_T *cigartypes, Uintlist_T *cigarlengths, int *cigarlength, |
467 |
- int *readlength, char **read, char **quality_string) { |
|
475 |
+ int *readlength, char **read, char **quality_string, char **read_group) { |
|
468 | 476 |
int insert_length; |
469 | 477 |
|
470 | 478 |
#ifndef HAVE_SAMTOOLS |
... | ... |
@@ -477,7 +485,7 @@ Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr |
477 | 485 |
parse_line(this,&(*acc),&(*flag),&(*mapq),&(*chr),&(*chrpos), |
478 | 486 |
&(*mate_chr),&(*mate_chrpos),&insert_length, |
479 | 487 |
&(*cigartypes),&(*cigarlengths),&(*cigarlength), |
480 |
- &(*readlength),&(*read),&(*quality_string)); |
|
488 |
+ &(*readlength),&(*read),&(*quality_string),&(*read_group)); |
|
481 | 489 |
return 1; |
482 | 490 |
} |
483 | 491 |
} else { |
... | ... |
@@ -487,7 +495,7 @@ Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr |
487 | 495 |
parse_line(this,&(*acc),&(*flag),&(*mapq),&(*chr),&(*chrpos), |
488 | 496 |
&(*mate_chr),&(*mate_chrpos),&insert_length, |
489 | 497 |
&(*cigartypes),&(*cigarlengths),&(*cigarlength), |
490 |
- &(*readlength),&(*read),&(*quality_string)); |
|
498 |
+ &(*readlength),&(*read),&(*quality_string),&(*read_group)); |
|
491 | 499 |
return 1; |
492 | 500 |
} |
493 | 501 |
} |
... | ... |
@@ -653,6 +661,29 @@ Bamread_print_cigar (FILE *fp, Bamline_T this) { |
653 | 661 |
return; |
654 | 662 |
} |
655 | 663 |
|
664 |
+char * |
|
665 |
+Bamline_cigar_string (Bamline_T this) { |
|
666 |
+ char *string, number[12]; |
|
667 |
+ Intlist_T p; |
|
668 |
+ Uintlist_T q; |
|
669 |
+ int string_length = 0; |
|
670 |
+ |
|
671 |
+ for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) { |
|
672 |
+ sprintf(number,"%u",Uintlist_head(q)); |
|
673 |
+ string_length += strlen(number) + 1; |
|
674 |
+ } |
|
675 |
+ string = (char *) CALLOC(string_length+1,sizeof(char)); |
|
676 |
+ |
|
677 |
+ string_length = 0; |
|
678 |
+ for (p = this->cigar_types, q = this->cigar_npositions; p != NULL; p = Intlist_next(p), q = Uintlist_next(q)) { |
|
679 |
+ sprintf(number,"%u",Uintlist_head(q)); |
|
680 |
+ sprintf(&(string[string_length]),"%u%c",Uintlist_head(q),Intlist_head(p)); |
|
681 |
+ string_length += strlen(number) + 1; |
|
682 |
+ } |
|
683 |
+ |
|
684 |
+ return string; |
|
685 |
+} |
|
686 |
+ |
|
656 | 687 |
int |
657 | 688 |
Bamline_cigar_querylength (Bamline_T this) { |
658 | 689 |
return this->cigar_querylength; |
... | ... |
@@ -1232,7 +1263,7 @@ Bamline_new (char *acc, unsigned int flag, int nhits, bool good_unique_p, int ma |
1232 | 1263 |
|
1233 | 1264 |
|
1234 | 1265 |
Bamline_T |
1235 |
-Bamread_next_bamline (T this, int minimum_mapq, int good_unique_mapq, int maximum_nhits, |
|
1266 |
+Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits, |
|
1236 | 1267 |
bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p, |
1237 | 1268 |
bool need_concordant_p) { |
1238 | 1269 |
char *acc, *chr, *mate_chr, splice_strand; |
... | ... |
@@ -1247,6 +1278,7 @@ Bamread_next_bamline (T this, int minimum_mapq, int good_unique_mapq, int maximu |
1247 | 1278 |
int cigar_querylength, readlength; |
1248 | 1279 |
char *read; |
1249 | 1280 |
char *quality_string; |
1281 |
+ char *read_group; |
|
1250 | 1282 |
|
1251 | 1283 |
#ifndef HAVE_SAMTOOLS |
1252 | 1284 |
return (Bamline_T) NULL; |
... | ... |
@@ -1258,8 +1290,13 @@ Bamread_next_bamline (T this, int minimum_mapq, int good_unique_mapq, int maximu |
1258 | 1290 |
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low, |
1259 | 1291 |
&mate_chr,&mate_chrpos_low,&insert_length, |
1260 | 1292 |
&cigar_types,&cigarlengths,&cigar_querylength, |
1261 |
- &readlength,&read,&quality_string); |
|
1262 |
- if (mapq < minimum_mapq) { |
|
1293 |
+ &readlength,&read,&quality_string,&read_group); |
|
1294 |
+ if (desired_read_group != NULL && (read_group == NULL || strcmp(desired_read_group,read_group))) { |
|
1295 |
+ debug1(fprintf(stderr,"Fails because doesn't have desired read group %s\n",desired_read_group)); |
|
1296 |
+ Intlist_free(&cigar_types); |
|
1297 |
+ Uintlist_free(&cigarlengths); |
|
1298 |
+ FREE(read); |
|
1299 |
+ } else if (mapq < minimum_mapq) { |
|
1263 | 1300 |
debug1(fprintf(stderr,"Fails because mapq %d < minimum %d\n",mapq,minimum_mapq)); |
1264 | 1301 |
Intlist_free(&cigar_types); |
1265 | 1302 |
Uintlist_free(&cigarlengths); |
... | ... |
@@ -1308,7 +1345,7 @@ Bamread_next_bamline (T this, int minimum_mapq, int good_unique_mapq, int maximu |
1308 | 1345 |
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low, |
1309 | 1346 |
&mate_chr,&mate_chrpos_low,&insert_length, |
1310 | 1347 |
&cigar_types,&cigarlengths,&cigar_querylength, |
1311 |
- &readlength,&read,&quality_string); |
|
1348 |
+ &readlength,&read,&quality_string,&read_group); |
|
1312 | 1349 |
if (mapq >= minimum_mapq && |
1313 | 1350 |
(nhits = aux_nhits(this)) <= maximum_nhits && |
1314 | 1351 |
(need_unique_p == false || nhits == 1) && |
... | ... |
@@ -1349,7 +1386,7 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d |
1349 | 1386 |
int cigar_querylength, readlength; |
1350 | 1387 |
char *read; |
1351 | 1388 |
char *quality_string; |
1352 |
- |
|
1389 |
+ char *read_group; |
|
1353 | 1390 |
|
1354 | 1391 |
Bamread_limit_region(this,desired_chr,desired_chrpos,desired_chrpos); |
1355 | 1392 |
while (bam_iter_read(this->fp,this->iter,this->bam) >= 0) { |
... | ... |
@@ -1357,7 +1394,7 @@ Bamread_get_acc (T this, char *desired_chr, Genomicpos_T desired_chrpos, char *d |
1357 | 1394 |
parse_line(this,&acc,&flag,&mapq,&chr,&chrpos_low, |
1358 | 1395 |
&mate_chr,&mate_chrpos_low,&insert_length, |
1359 | 1396 |
&cigar_types,&cigarlengths,&cigar_querylength, |
1360 |
- &readlength,&read,&quality_string); |
|
1397 |
+ &readlength,&read,&quality_string,&read_group); |
|
1361 | 1398 |
splice_strand = aux_splice_strand(this); |
1362 | 1399 |
if (!strcmp(acc,desired_acc) && chrpos_low == desired_chrpos) { |
1363 | 1400 |
Bamread_unlimit_region(this); |
... | ... |
@@ -1653,7 +1690,7 @@ Bampair_details (Uintlist_T *chrpos_lows, Uintlist_T *chrpos_highs, |
1653 | 1690 |
|
1654 | 1691 |
|
1655 | 1692 |
List_T |
1656 |
-Bamread_all_pairs (T bamreader, int minimum_mapq, int good_unique_mapq, int maximum_nhits, |
|
1693 |
+Bamread_all_pairs (T bamreader, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits, |
|
1657 | 1694 |
bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p, |
1658 | 1695 |
bool need_concordant_p) { |
1659 | 1696 |
List_T lines = NULL, p; |
... | ... |
@@ -1670,7 +1707,7 @@ Bamread_all_pairs (T bamreader, int minimum_mapq, int good_unique_mapq, int maxi |
1670 | 1707 |
|
1671 | 1708 |
bamstore_chrtable = Table_new(100,Chrom_compare_table,Chrom_hash_table); |
1672 | 1709 |
|
1673 |
- while ((bamline = Bamread_next_bamline(bamreader,minimum_mapq,good_unique_mapq,maximum_nhits, |
|
1710 |
+ while ((bamline = Bamread_next_bamline(bamreader,desired_read_group,minimum_mapq,good_unique_mapq,maximum_nhits, |
|
1674 | 1711 |
need_unique_p,need_primary_p,ignore_duplicates_p, |
1675 | 1712 |
need_concordant_p)) != NULL) { |
1676 | 1713 |
if (Bamline_concordantp(bamline) == false) { |
... | ... |
@@ -1770,7 +1807,7 @@ level_cmp (const void *x, const void *y) { |
1770 | 1807 |
int |
1771 | 1808 |
Bampair_compute_levels (List_T bampairs, Genomicpos_T mincoord, |
1772 | 1809 |
Genomicpos_T maxcoord, int max_allowed_levels, |
1773 |
- double xfactor, Genomicpos_T min_pairlength) { |
|
1810 |
+ double xfactor, Genomicpos_T min_pairlength, bool only_internal_p) { |
|
1774 | 1811 |
int nbampairs, i; |
1775 | 1812 |
int maxlevel = -1, level; |
1776 | 1813 |
bool donep; |
... | ... |
@@ -1790,6 +1827,8 @@ Bampair_compute_levels (List_T bampairs, Genomicpos_T mincoord, |
1790 | 1827 |
bampair = array[i]; |
1791 | 1828 |
if (bampair->chrpos_high - bampair->chrpos_low < min_pairlength) { |
1792 | 1829 |
bampair->level = -1; |
1830 |
+ } else if (only_internal_p == true && bampair->chrpos_low < mincoord && bampair->chrpos_high > maxcoord) { |
|
1831 |
+ bampair->level = -1; |
|
1793 | 1832 |
} else { |
1794 | 1833 |
/* Find appropriate level */ |
1795 | 1834 |
level = 0; |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-/* $Id: bamread.h 87713 2013-03-01 18:32:34Z twu $ */ |
|
1 |
+/* $Id: bamread.h 108654 2013-09-19 23:11:00Z twu $ */ |
|
2 | 2 |
#ifndef BAMREAD_INCLUDED |
3 | 3 |
#define BAMREAD_INCLUDED |
4 | 4 |
/* Cannot use bool, since it appears to conflict with samtools */ |
... | ... |
@@ -43,7 +43,7 @@ extern int |
43 | 43 |
Bamread_next_line (T this, char **acc, unsigned int *flag, int *mapq, char **chr, Genomicpos_T *chrpos, |
44 | 44 |
char **mate_chr, Genomicpos_T *mate_chrpos, |
45 | 45 |
Intlist_T *cigartypes, Uintlist_T *cigarlengths, int *cigarlength, |
46 |
- int *readlength, char **read, char **quality_string); |
|
46 |
+ int *readlength, char **read, char **quality_string, char **read_group); |
|
47 | 47 |
|
48 | 48 |
typedef struct Bamline_T *Bamline_T; |
49 | 49 |
|
... | ... |
@@ -83,12 +83,16 @@ extern int |
83 | 83 |
Bamline_cigar_querylength (Bamline_T this); |
84 | 84 |
extern void |
85 | 85 |
Bamread_print_cigar (FILE *fp, Bamline_T this); |
86 |
+extern char * |
|
87 |
+Bamline_cigar_string (Bamline_T this); |
|
86 | 88 |
extern int |
87 | 89 |
Bamline_readlength (Bamline_T this); |
88 | 90 |
extern char * |
89 | 91 |
Bamline_read (Bamline_T this); |
90 | 92 |
extern char * |
91 | 93 |
Bamline_quality_string (Bamline_T this); |
94 |
+extern char * |
|
95 |
+Bamline_read_group (Bamline_T this); |
|
92 | 96 |
extern void |
93 | 97 |
Bamline_print (FILE *fp, Bamline_T this, unsigned int newflag, int quality_score_adj); |
94 | 98 |
|
... | ... |
@@ -104,7 +108,7 @@ Bamline_chrpos_high (Bamline_T this); |
104 | 108 |
extern void |
105 | 109 |
Bamline_free (Bamline_T *old); |
106 | 110 |
extern Bamline_T |
107 |
-Bamread_next_bamline (T this, int minimum_mapq, int good_unique_mapq, int maximum_nhits, |
|
111 |
+Bamread_next_bamline (T this, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits, |
|
108 | 112 |
bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p, |
109 | 113 |
bool need_concordant_p); |
110 | 114 |
extern Bamline_T |
... | ... |
@@ -160,14 +164,14 @@ Bampair_details (Uintlist_T *chrpos_lows, Uintlist_T *chrpos_highs, |
160 | 164 |
Bampair_T this); |
161 | 165 |
|
162 | 166 |
extern List_T |
163 |
-Bamread_all_pairs (T bamreader, int minimum_mapq, int good_unique_mapq, int maximum_nhits, |
|
167 |
+Bamread_all_pairs (T bamreader, char *desired_read_group, int minimum_mapq, int good_unique_mapq, int maximum_nhits, |
|
164 | 168 |
bool need_unique_p, bool need_primary_p, bool ignore_duplicates_p, |
165 | 169 |
bool need_concordant_p); |
166 | 170 |
|
167 | 171 |
extern int |
168 | 172 |
Bampair_compute_levels (List_T bampairs, Genomicpos_T mincoord, |
169 | 173 |
Genomicpos_T maxcoord, int max_allowed_levels, |
170 |
- double xfactor, Genomicpos_T min_pairlength); |
|
174 |
+ double xfactor, Genomicpos_T min_pairlength, bool only_internal_p); |
|
171 | 175 |
|
172 | 176 |
|
173 | 177 |
#undef T |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-static char rcsid[] = "$Id: bamtally.c 87717 2013-03-01 18:34:33Z twu $"; |
|
1 |
+static char rcsid[] = "$Id: bamtally.c 108654 2013-09-19 23:11:00Z twu $"; |
|
2 | 2 |
#ifdef HAVE_CONFIG_H |
3 | 3 |
#include <config.h> |
4 | 4 |
#endif |
... | ... |
@@ -38,6 +38,12 @@ static char rcsid[] = "$Id: bamtally.c 87717 2013-03-01 18:34:33Z twu $"; |
38 | 38 |
#include "parserange.h" |
39 | 39 |
|
40 | 40 |
|
41 |
+#define ARRAY_THRESHOLD 20 |
|
42 |
+#define INITIAL_READLENGTH 75 |
|
43 |
+#define MAX_QUALITY_SCORE 40 |
|
44 |
+#define MAX_MAPQ_SCORE 40 |
|
45 |
+ |
|
46 |
+ |
|
41 | 47 |
/* Alloc and block control structure */ |
42 | 48 |
#ifdef DEBUG0 |
43 | 49 |
#define debug0(x) x |
... | ... |
@@ -619,9 +625,21 @@ struct T { |
619 | 625 |
long int n_fromleft_plus; /* Used for reference count for insertions */ |
620 | 626 |
long int n_fromleft_minus; /* Used for reference count for insertions */ |
621 | 627 |
|
622 |
- List_T matches_byshift; |
|
623 |
- List_T matches_byquality; |
|
624 |
- List_T matches_bymapq; |
|
628 |
+ bool use_array_p; |
|
629 |
+ List_T list_matches_byshift; |
|
630 |
+ List_T list_matches_byquality; |
|
631 |
+ List_T list_matches_bymapq; |
|
632 |
+ |
|
633 |
+ int n_matches_byshift_plus; |
|
634 |
+ int *matches_byshift_plus; |
|
< |