git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/seqbias@52193 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,13 +1,13 @@ |
1 | 1 |
Package: seqbias |
2 |
-Version: 0.99.1 |
|
2 |
+Version: 0.99.2 |
|
3 | 3 |
Date: 25-12-2010 |
4 | 4 |
Title: Estimation of per-position bias in high-throughput sequencing data |
5 |
-Description: This package implements a model of per-position sequencing bias i |
|
5 |
+Description: This package implements a model of per-position sequencing bias in |
|
6 | 6 |
high-throughput sequencing data using a simple Bayesian network, the |
7 | 7 |
structure and parameters of which are trained on a set of aligned reads |
8 | 8 |
and a reference genome sequence. |
9 | 9 |
Author: Daniel Jones <dcjones@cs.washington.edu> |
10 |
-Maintainer: Dainel Jones <dcjones@cs.washington.edu> |
|
10 |
+Maintainer: Daniel Jones <dcjones@cs.washington.edu> |
|
11 | 11 |
Depends: R (>= 2.13.0), GenomicRanges (>= 0.1.0), |
12 | 12 |
Biostrings (>= 2.15.0), methods |
13 | 13 |
Suggests: Rsamtools (>= 1.3.18) |
... | ... |
@@ -10,16 +10,16 @@ generated by GNU Autoconf 2.67. Invocation command line was |
10 | 10 |
## Platform. ## |
11 | 11 |
## --------- ## |
12 | 12 |
|
13 |
-hostname = alpaca |
|
14 |
-uname -m = x86_64 |
|
15 |
-uname -r = 2.6.31.12-0.2-desktop |
|
13 |
+hostname = dtp2 |
|
14 |
+uname -m = i686 |
|
15 |
+uname -r = 2.6.35-22-generic-pae |
|
16 | 16 |
uname -s = Linux |
17 |
-uname -v = #1 SMP PREEMPT 2010-03-16 21:25:39 +0100 |
|
17 |
+uname -v = #34-Ubuntu SMP Sun Oct 10 11:03:48 UTC 2010 |
|
18 | 18 |
|
19 | 19 |
/usr/bin/uname -p = unknown |
20 | 20 |
/bin/uname -X = unknown |
21 | 21 |
|
22 |
-/bin/arch = x86_64 |
|
22 |
+/bin/arch = unknown |
|
23 | 23 |
/usr/bin/arch -k = unknown |
24 | 24 |
/usr/convex/getsysinfo = unknown |
25 | 25 |
/usr/bin/hostinfo = unknown |
... | ... |
@@ -27,18 +27,17 @@ uname -v = #1 SMP PREEMPT 2010-03-16 21:25:39 +0100 |
27 | 27 |
/usr/bin/oslevel = unknown |
28 | 28 |
/bin/universe = unknown |
29 | 29 |
|
30 |
-PATH: /usr/lib64/mpi/gcc/openmpi/bin |
|
31 |
-PATH: /home/cwon2/bin |
|
30 |
+PATH: /home/dcjones/bin |
|
31 |
+PATH: /home/dcjones/bin/i686 |
|
32 |
+PATH: /home/dcjones/src/kent/built/bin/i386 |
|
33 |
+PATH: /home/dcjones/.cabal/bin |
|
34 |
+PATH: /usr/local/sbin |
|
32 | 35 |
PATH: /usr/local/bin |
36 |
+PATH: /usr/sbin |
|
33 | 37 |
PATH: /usr/bin |
38 |
+PATH: /sbin |
|
34 | 39 |
PATH: /bin |
35 |
-PATH: /usr/bin/X11 |
|
36 |
-PATH: /usr/X11R6/bin |
|
37 | 40 |
PATH: /usr/games |
38 |
-PATH: /opt/kde3/bin |
|
39 |
-PATH: /usr/lib/mit/bin |
|
40 |
-PATH: /usr/lib/mit/sbin |
|
41 |
-PATH: /usr/lib/qt3/bin |
|
42 | 41 |
|
43 | 42 |
|
44 | 43 |
## ----------- ## |
... | ... |
@@ -50,18 +49,18 @@ configure:2170: found /usr/bin/gcc |
50 | 49 |
configure:2181: result: gcc |
51 | 50 |
configure:2410: checking for C compiler version |
52 | 51 |
configure:2419: gcc --version >&5 |
53 |
-gcc (SUSE Linux) 4.4.1 [gcc-4_4-branch revision 150839] |
|
54 |
-Copyright (C) 2009 Free Software Foundation, Inc. |
|
52 |
+gcc (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5 |
|
53 |
+Copyright (C) 2010 Free Software Foundation, Inc. |
|
55 | 54 |
This is free software; see the source for copying conditions. There is NO |
56 | 55 |
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
57 | 56 |
|
58 | 57 |
configure:2430: $? = 0 |
59 | 58 |
configure:2419: gcc -v >&5 |
60 | 59 |
Using built-in specs. |
61 |
-Target: x86_64-suse-linux |
|
62 |
-Configured with: ../configure --prefix=/usr --infodir=/usr/share/info --mandir=/usr/share/man --libdir=/usr/lib64 --libexecdir=/usr/lib64 --enable-languages=c,c++,objc,fortran,obj-c++,java,ada --enable-checking=release --with-gxx-include-dir=/usr/include/c++/4.4 --enable-ssp --disable-libssp --with-bugurl=http://bugs.opensuse.org/ --with-pkgversion='SUSE Linux' --disable-libgcj --disable-libmudflap --with-slibdir=/lib64 --with-system-zlib --enable-__cxa_atexit --enable-libstdcxx-allocator=new --disable-libstdcxx-pch --enable-version-specific-runtime-libs --program-suffix=-4.4 --enable-linux-futex --without-system-libunwind --with-arch-32=i586 --with-tune=generic --build=x86_64-suse-linux |
|
60 |
+Target: i686-linux-gnu |
|
61 |
+Configured with: ../src/configure -v --with-pkgversion='Ubuntu/Linaro 4.4.4-14ubuntu5' --with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs --enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-4.4 --enable-shared --enable-multiarch --enable-linker-build-id --with-system-zlib --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.4 --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-objc-gc --enable-targets=all --disable-werror --with-arch-32=i686 --with-tune=generic --enable-checking=release --build=i686-linux-gnu --host=i686-linux-gnu --target=i686-linux-gnu |
|
63 | 62 |
Thread model: posix |
64 |
-gcc version 4.4.1 [gcc-4_4-branch revision 150839] (SUSE Linux) |
|
63 |
+gcc version 4.4.5 (Ubuntu/Linaro 4.4.4-14ubuntu5) |
|
65 | 64 |
configure:2430: $? = 0 |
66 | 65 |
configure:2419: gcc -V >&5 |
67 | 66 |
gcc: '-V' option must have argument |
... | ... |
@@ -119,8 +118,8 @@ configure:2948: result: yes |
119 | 118 |
configure:2948: checking for pow |
120 | 119 |
configure:2948: gcc -o conftest -g -O2 conftest.c -lz >&5 |
121 | 120 |
conftest.c:35: warning: conflicting types for built-in function 'pow' |
122 |
-/tmp/ccYE2dt3.o: In function `main': |
|
123 |
-/home/cwon2/Download/seqbias/conftest.c:46: undefined reference to `pow' |
|
121 |
+/tmp/ccxSNas5.o: In function `main': |
|
122 |
+/home/dcjones/bio/seqbias/conftest.c:46: undefined reference to `pow' |
|
124 | 123 |
collect2: ld returned 1 exit status |
125 | 124 |
configure:2948: $? = 1 |
126 | 125 |
configure: failed program was: |
... | ... |
@@ -177,8 +176,8 @@ configure:2948: result: no |
177 | 176 |
configure:2948: checking for sqrt |
178 | 177 |
configure:2948: gcc -o conftest -g -O2 conftest.c -lz >&5 |
179 | 178 |
conftest.c:35: warning: conflicting types for built-in function 'sqrt' |
180 |
-/tmp/ccKTGxDe.o: In function `main': |
|
181 |
-/home/cwon2/Download/seqbias/conftest.c:46: undefined reference to `sqrt' |
|
179 |
+/tmp/ccFZLqEa.o: In function `main': |
|
180 |
+/home/dcjones/bio/seqbias/conftest.c:46: undefined reference to `sqrt' |
|
182 | 181 |
collect2: ld returned 1 exit status |
183 | 182 |
configure:2948: $? = 1 |
184 | 183 |
configure: failed program was: |
... | ... |
@@ -255,7 +254,8 @@ configure:2962: checking how to run the C preprocessor |
255 | 254 |
configure:2993: gcc -E conftest.c |
256 | 255 |
configure:2993: $? = 0 |
257 | 256 |
configure:3007: gcc -E conftest.c |
258 |
-conftest.c:16:28: error: ac_nonexistent.h: No such file or directory |
|
257 |
+conftest.c:16: fatal error: ac_nonexistent.h: No such file or directory |
|
258 |
+compilation terminated. |
|
259 | 259 |
configure:3007: $? = 1 |
260 | 260 |
configure: failed program was: |
261 | 261 |
| /* confdefs.h */ |
... | ... |
@@ -278,7 +278,8 @@ configure:3032: result: gcc -E |
278 | 278 |
configure:3052: gcc -E conftest.c |
279 | 279 |
configure:3052: $? = 0 |
280 | 280 |
configure:3066: gcc -E conftest.c |
281 |
-conftest.c:16:28: error: ac_nonexistent.h: No such file or directory |
|
281 |
+conftest.c:16: fatal error: ac_nonexistent.h: No such file or directory |
|
282 |
+compilation terminated. |
|
282 | 283 |
configure:3066: $? = 1 |
283 | 284 |
configure: failed program was: |
284 | 285 |
| /* confdefs.h */ |
... | ... |
@@ -298,9 +299,9 @@ configure: failed program was: |
298 | 299 |
| /* end confdefs.h. */ |
299 | 300 |
| #include <ac_nonexistent.h> |
300 | 301 |
configure:3095: checking for grep that handles long lines and -e |
301 |
-configure:3153: result: /usr/bin/grep |
|
302 |
+configure:3153: result: /bin/grep |
|
302 | 303 |
configure:3158: checking for egrep |
303 |
-configure:3220: result: /usr/bin/grep -E |
|
304 |
+configure:3220: result: /bin/grep -E |
|
304 | 305 |
configure:3225: checking for ANSI C header files |
305 | 306 |
configure:3245: gcc -c -g -O2 conftest.c >&5 |
306 | 307 |
configure:3245: $? = 0 |
... | ... |
@@ -776,7 +777,7 @@ generated by GNU Autoconf 2.67. Invocation command line was |
776 | 777 |
CONFIG_COMMANDS = |
777 | 778 |
$ ./config.status |
778 | 779 |
|
779 |
-on alpaca |
|
780 |
+on dtp2 |
|
780 | 781 |
|
781 | 782 |
config.status:734: creating src/Makevars |
782 | 783 |
|
... | ... |
@@ -833,8 +834,8 @@ ac_cv_header_unistd_h=yes |
833 | 834 |
ac_cv_header_zlib_h=yes |
834 | 835 |
ac_cv_lib_z_gzeof=yes |
835 | 836 |
ac_cv_objext=o |
836 |
-ac_cv_path_EGREP='/usr/bin/grep -E' |
|
837 |
-ac_cv_path_GREP=/usr/bin/grep |
|
837 |
+ac_cv_path_EGREP='/bin/grep -E' |
|
838 |
+ac_cv_path_GREP=/bin/grep |
|
838 | 839 |
ac_cv_prog_CPP='gcc -E' |
839 | 840 |
ac_cv_prog_ac_ct_CC=gcc |
840 | 841 |
ac_cv_prog_cc_c89= |
... | ... |
@@ -857,9 +858,9 @@ DEFS='-DPACKAGE_NAME=\"\" -DPACKAGE_TARNAME=\"\" -DPACKAGE_VERSION=\"\" -DPACKAG |
857 | 858 |
ECHO_C='' |
858 | 859 |
ECHO_N='-n' |
859 | 860 |
ECHO_T='' |
860 |
-EGREP='/usr/bin/grep -E' |
|
861 |
+EGREP='/bin/grep -E' |
|
861 | 862 |
EXEEXT='' |
862 |
-GREP='/usr/bin/grep' |
|
863 |
+GREP='/bin/grep' |
|
863 | 864 |
LDFLAGS='' |
864 | 865 |
LIBOBJS='' |
865 | 866 |
LIBS='-lz ' |
... | ... |
@@ -872,7 +873,7 @@ PACKAGE_TARNAME='' |
872 | 873 |
PACKAGE_URL='' |
873 | 874 |
PACKAGE_VERSION='' |
874 | 875 |
PATH_SEPARATOR=':' |
875 |
-SHELL='/bin/sh' |
|
876 |
+SHELL='/bin/bash' |
|
876 | 877 |
ac_ct_CC='gcc' |
877 | 878 |
bindir='${exec_prefix}/bin' |
878 | 879 |
build_alias='' |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-#! /bin/sh |
|
1 |
+#! /bin/bash |
|
2 | 2 |
# Generated by configure. |
3 | 3 |
# Run this file to recreate the current configuration. |
4 | 4 |
# Compiler output produced by configure, useful for debugging |
... | ... |
@@ -8,7 +8,7 @@ debug=false |
8 | 8 |
ac_cs_recheck=false |
9 | 9 |
ac_cs_silent=false |
10 | 10 |
|
11 |
-SHELL=${CONFIG_SHELL-/bin/sh} |
|
11 |
+SHELL=${CONFIG_SHELL-/bin/bash} |
|
12 | 12 |
export SHELL |
13 | 13 |
## -------------------- ## |
14 | 14 |
## M4sh Initialization. ## |
... | ... |
@@ -438,7 +438,7 @@ Copyright (C) 2010 Free Software Foundation, Inc. |
438 | 438 |
This config.status script is free software; the Free Software Foundation |
439 | 439 |
gives unlimited permission to copy, distribute and modify it." |
440 | 440 |
|
441 |
-ac_pwd='/home/cwon2/Download/seqbias' |
|
441 |
+ac_pwd='/home/dcjones/bio/seqbias' |
|
442 | 442 |
srcdir='.' |
443 | 443 |
test -n "$AWK" || AWK=awk |
444 | 444 |
# The default lists apply if the user does not specify any file. |
... | ... |
@@ -506,10 +506,10 @@ if $ac_cs_silent; then |
506 | 506 |
fi |
507 | 507 |
|
508 | 508 |
if $ac_cs_recheck; then |
509 |
- set X '/bin/sh' './configure' $ac_configure_extra_args --no-create --no-recursion |
|
509 |
+ set X '/bin/bash' './configure' $ac_configure_extra_args --no-create --no-recursion |
|
510 | 510 |
shift |
511 |
- $as_echo "running CONFIG_SHELL=/bin/sh $*" >&6 |
|
512 |
- CONFIG_SHELL='/bin/sh' |
|
511 |
+ $as_echo "running CONFIG_SHELL=/bin/bash $*" >&6 |
|
512 |
+ CONFIG_SHELL='/bin/bash' |
|
513 | 513 |
export CONFIG_SHELL |
514 | 514 |
exec "$@" |
515 | 515 |
fi |
... | ... |
@@ -592,8 +592,8 @@ echo 'BEGIN {' >"$tmp/subs1.awk" && |
592 | 592 |
cat >>"$tmp/subs1.awk" <<\_ACAWK && |
593 | 593 |
S["LTLIBOBJS"]="" |
594 | 594 |
S["LIBOBJS"]="" |
595 |
-S["EGREP"]="/usr/bin/grep -E" |
|
596 |
-S["GREP"]="/usr/bin/grep" |
|
595 |
+S["EGREP"]="/bin/grep -E" |
|
596 |
+S["GREP"]="/bin/grep" |
|
597 | 597 |
S["CPP"]="gcc -E" |
598 | 598 |
S["OBJEXT"]="o" |
599 | 599 |
S["EXEEXT"]="" |
... | ... |
@@ -643,7 +643,7 @@ S["PACKAGE_VERSION"]="" |
643 | 643 |
S["PACKAGE_TARNAME"]="" |
644 | 644 |
S["PACKAGE_NAME"]="" |
645 | 645 |
S["PATH_SEPARATOR"]=":" |
646 |
-S["SHELL"]="/bin/sh" |
|
646 |
+S["SHELL"]="/bin/bash" |
|
647 | 647 |
_ACAWK |
648 | 648 |
cat >>"$tmp/subs1.awk" <<_ACAWK && |
649 | 649 |
for (key in S) S_is_set[key] = 1 |
... | ... |
@@ -9,10 +9,11 @@ YAML_CPP_OBJ = aliascontent.o conversion.o emitter.o emitterstate.o \ |
9 | 9 |
stream.o tag.o |
10 | 10 |
|
11 | 11 |
SEQBIAS_OBJ = sequencing_bias.o kmers.o miscmath.o common.o \ |
12 |
- table.o superfasthash.o logger.o seqbias.o |
|
12 |
+ table.o superfasthash.o logger.o seqbias.o \ |
|
13 |
+ snprintf.o |
|
13 | 14 |
|
14 |
-DFLAGS = -D_USE_KNETFILE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE |
|
15 |
-PKG_CFLAGS += $(DFLAGS) -Wno-unused-result -Wno-format |
|
15 |
+DFLAGS = -D_USE_KNETFILE -D_FILE_OFFSET_BITS=64 |
|
16 |
+PKG_CFLAGS += $(DFLAGS) |
|
16 | 17 |
PKG_CPPFLAGS = $(PKG_CFLAGS) |
17 | 18 |
|
18 | 19 |
PKG_LIBS+=-lz |
... | ... |
@@ -9,10 +9,11 @@ YAML_CPP_OBJ = aliascontent.o conversion.o emitter.o emitterstate.o \ |
9 | 9 |
stream.o tag.o |
10 | 10 |
|
11 | 11 |
SEQBIAS_OBJ = sequencing_bias.o kmers.o miscmath.o common.o \ |
12 |
- table.o superfasthash.o logger.o seqbias.o |
|
12 |
+ table.o superfasthash.o logger.o seqbias.o \ |
|
13 |
+ snprintf.o |
|
13 | 14 |
|
14 |
-DFLAGS = -D_USE_KNETFILE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE |
|
15 |
-PKG_CFLAGS += $(DFLAGS) -Wno-unused-result -Wno-format |
|
15 |
+DFLAGS = -D_USE_KNETFILE -D_FILE_OFFSET_BITS=64 |
|
16 |
+PKG_CFLAGS += $(DFLAGS) |
|
16 | 17 |
PKG_CPPFLAGS = $(PKG_CFLAGS) |
17 | 18 |
|
18 | 19 |
PKG_LIBS+=@LIBS@ |
... | ... |
@@ -596,7 +596,8 @@ double conditional_likelihood( size_t n, const double* l0, const double* l1, con |
596 | 596 |
|
597 | 597 |
l = 0.0; |
598 | 598 |
for( i = 0; i < n; i++ ) { |
599 |
- l += (c[i] == 0 ? l0[i] : l1[i]) - logaddexp( l0[i], l1[i] ); |
|
599 |
+ l += (c[i] == 0 ? l0[i] : l1[i]) |
|
600 |
+ - logaddexp( l0[i], l1[i] ); |
|
600 | 601 |
} |
601 | 602 |
|
602 | 603 |
return l; |
... | ... |
@@ -715,7 +716,7 @@ void train_motifs( motif& M0, motif& M1, |
715 | 716 |
while( true ) { |
716 | 717 |
round_num++; |
717 | 718 |
|
718 |
- log_printf( LOG_MSG, "round_num %4d (ic = %0.4e) ", round_num, ic_curr ); |
|
719 |
+ log_printf( LOG_MSG, "round %4d (ic = %0.4e) ", round_num, ic_curr ); |
|
719 | 720 |
|
720 | 721 |
ic_best = -HUGE_VAL; |
721 | 722 |
j_best = i_best = 0; |
... | ... |
@@ -730,7 +731,6 @@ void train_motifs( motif& M0, motif& M1, |
730 | 731 |
|
731 | 732 |
|
732 | 733 |
for( i = i_start; i <= j; i++ ) { |
733 |
- log_puts( LOG_MSG, "." ); |
|
734 | 734 |
|
735 | 735 |
if( M0.has_edge( i, j ) ) { |
736 | 736 |
continue; |
... | ... |
@@ -740,6 +740,8 @@ void train_motifs( motif& M0, motif& M1, |
740 | 740 |
continue; |
741 | 741 |
} |
742 | 742 |
|
743 |
+ log_puts( LOG_MSG, "." ); |
|
744 |
+ |
|
743 | 745 |
/* keep track of the old parameters to avoid retraining */ |
744 | 746 |
M0.store_row(j); |
745 | 747 |
M1.store_row(j); |
... | ... |
@@ -917,10 +919,11 @@ void train_motifs_backwards( motif& M0, motif& M1, |
917 | 919 |
i_last = M0.num_parents(j) > 1 ? j-1 : j; |
918 | 920 |
|
919 | 921 |
for( i = 0; i <= i_last; i++ ) { |
920 |
- log_puts( LOG_MSG, "." ); |
|
921 | 922 |
|
922 | 923 |
if( !M0.has_edge( i, j ) ) continue; |
923 | 924 |
|
925 |
+ log_puts( LOG_MSG, "." ); |
|
926 |
+ |
|
924 | 927 |
/* keep track of the old parameters to avoid retraining */ |
925 | 928 |
M0.store_row(j); |
926 | 929 |
M1.store_row(j); |
... | ... |
@@ -2,6 +2,7 @@ |
2 | 2 |
#include "sequencing_bias.hpp" |
3 | 3 |
#include "logger.h" |
4 | 4 |
#include "common.hpp" |
5 |
+#include "miscmath.hpp" |
|
5 | 6 |
|
6 | 7 |
#include <cmath> |
7 | 8 |
#include <cctype> |
... | ... |
@@ -14,11 +15,31 @@ using namespace std; |
14 | 15 |
|
15 | 16 |
|
16 | 17 |
/* simply uniform random numbers */ |
17 |
-double rand_uniform( double a, double b ) |
|
18 |
+static double rand_uniform( double a, double b ) |
|
18 | 19 |
{ |
19 | 20 |
return a + b * (double)rand() / (double)RAND_MAX; |
20 | 21 |
} |
21 | 22 |
|
23 |
+/* random gaussians (copied from GSL, to avoid dependency) */ |
|
24 |
+static double rand_gauss( double sigma ) |
|
25 |
+{ |
|
26 |
+ double x, y, r2; |
|
27 |
+ |
|
28 |
+ do |
|
29 |
+ { |
|
30 |
+ /* choose x,y in uniform square (-1,-1) to (+1,+1) */ |
|
31 |
+ x = -1 + 2 * rand_uniform(0.0,1.0); |
|
32 |
+ y = -1 + 2 * rand_uniform(0.0,1.0); |
|
33 |
+ |
|
34 |
+ /* see if it is in the unit circle */ |
|
35 |
+ r2 = x * x + y * y; |
|
36 |
+ } |
|
37 |
+ while (r2 > 1.0 || r2 == 0); |
|
38 |
+ |
|
39 |
+ /* Box-Muller transform */ |
|
40 |
+ return sigma * y * sqrt (-2.0 * log (r2) / r2); |
|
41 |
+} |
|
42 |
+ |
|
22 | 43 |
|
23 | 44 |
/* pseudocount used when sampling foreground and background nucleotide * frequencies */ |
24 | 45 |
const double sequencing_bias::pseudocount = 1; |
... | ... |
@@ -172,14 +193,14 @@ void sequencing_bias::build( const char* ref_fn, |
172 | 193 |
failf( "Can't open bam file '%s'.\n", reads_fn ); |
173 | 194 |
} |
174 | 195 |
|
196 |
+ /* find the first n unique reads by hashing */ |
|
175 | 197 |
table T; |
176 |
- hash_reads( &T, reads_f, n ); |
|
177 |
- n = T.m; |
|
198 |
+ hash_reads( &T, reads_f, 0 ); |
|
178 | 199 |
|
179 |
- /* resort the remaining (1-q)*n by position */ |
|
180 |
- log_puts( LOG_MSG, "sorting by position ... " ); |
|
200 |
+ /* sort by position */ |
|
201 |
+ log_puts( LOG_MSG, "shuffling ... " ); |
|
181 | 202 |
struct hashed_value** S; |
182 |
- table_sort_by_position( &T, &S ); |
|
203 |
+ table_sort_by_seq_rand( &T, &S ); |
|
183 | 204 |
log_puts( LOG_MSG, "done.\n" ); |
184 | 205 |
|
185 | 206 |
|
... | ... |
@@ -196,13 +217,12 @@ void sequencing_bias::build( const char* ref_fn, |
196 | 217 |
std::deque<sequence*> training_seqs; |
197 | 218 |
|
198 | 219 |
|
199 |
- /* sample this many background sequence for each foreground sequence */ |
|
200 |
- const int bg_count = 1; |
|
220 |
+ /* background sampling */ |
|
221 |
+ const size_t bg_samples = 1; // make this many samples for each read |
|
222 |
+ int bg_sample_num; // keep track of the number of samples made |
|
223 |
+ struct read_pos bg; // background position being considered |
|
201 | 224 |
|
202 |
- int j; |
|
203 |
- pos bg_pos; |
|
204 |
- pos bg_offset; |
|
205 |
- |
|
225 |
+ int b; |
|
206 | 226 |
char* seqname = NULL; |
207 | 227 |
int seqlen = 0; |
208 | 228 |
int curr_tid = -1; |
... | ... |
@@ -213,7 +233,7 @@ void sequencing_bias::build( const char* ref_fn, |
213 | 233 |
local_seq[L+R+1] = '\0'; |
214 | 234 |
|
215 | 235 |
|
216 |
- for( i = 0; i < n; i++ ) { |
|
236 |
+ for( i = 0; i < n && i < T.m; i++ ) { |
|
217 | 237 |
|
218 | 238 |
/* Load/switch sequences (chromosomes) as they are encountered in the |
219 | 239 |
* read stream. The idea here is to avoid thrashing by loading a large |
... | ... |
@@ -241,7 +261,8 @@ void sequencing_bias::build( const char* ref_fn, |
241 | 261 |
} |
242 | 262 |
|
243 | 263 |
if( seq == NULL ) continue; |
244 |
- |
|
264 |
+ |
|
265 |
+ |
|
245 | 266 |
/* add a foreground sequence */ |
246 | 267 |
if( S[i]->pos.strand ) { |
247 | 268 |
if( S[i]->pos.pos < R ) continue; |
... | ... |
@@ -258,31 +279,29 @@ void sequencing_bias::build( const char* ref_fn, |
258 | 279 |
|
259 | 280 |
/* add a background sequence */ |
260 | 281 |
/* adjust the current read position randomly, and sample */ |
261 |
- for( j = 0; j < bg_count; j++ ) { |
|
282 |
+ for( bg_sample_num = 0; bg_sample_num < bg_samples; bg_sample_num++ ) { |
|
262 | 283 |
|
263 |
- /* make things a bit more robust by sampling the background from a |
|
264 |
- * random offset */ |
|
265 |
- bg_offset = (pos)rand_uniform( 100.0, 200.0 ); |
|
266 |
- if( rand_uniform( -1.0, 1.0 ) < 0.0 ) bg_offset = -bg_offset; |
|
284 |
+ /* attempt to sample a position near the current read, with no reads |
|
285 |
+ * itself. */ |
|
286 |
+ memcpy( (void*)&bg, (void*)&S[i]->pos, sizeof(struct read_pos) ); |
|
267 | 287 |
|
268 |
- bg_pos = S[i]->pos.pos + bg_offset; |
|
288 |
+ bg.pos = S[i]->pos.pos + (pos)ceil( rand_gauss( 10 ) ); |
|
269 | 289 |
|
270 |
- if( S[i]->pos.strand ) { |
|
271 |
- if( bg_pos < R ) continue; |
|
272 |
- memcpy( local_seq, seq + bg_pos - R, (L+1+R)*sizeof(char) ); |
|
290 |
+ if( bg.strand ) { |
|
291 |
+ if( bg.pos < R ) continue; |
|
292 |
+ memcpy( local_seq, seq + bg.pos - R, (L+1+R)*sizeof(char) ); |
|
273 | 293 |
seqrc( local_seq, L+1+R ); |
274 | 294 |
} |
275 | 295 |
else { |
276 |
- if( bg_pos < L ) continue; |
|
277 |
- memcpy( local_seq, seq + (bg_pos-L), (L+1+R)*sizeof(char) ); |
|
296 |
+ if( bg.pos < L ) continue; |
|
297 |
+ memcpy( local_seq, seq + (bg.pos-L), (L+1+R)*sizeof(char) ); |
|
278 | 298 |
} |
279 | 299 |
|
280 | 300 |
training_seqs.push_back( new sequence( local_seq, 0 ) ); |
281 | 301 |
} |
282 | 302 |
} |
283 | 303 |
|
284 |
- |
|
285 |
- size_t max_k = 5; |
|
304 |
+ size_t max_k = 3; |
|
286 | 305 |
size_t max_dep_dist = 5; |
287 | 306 |
M0 = new motif( L+1+R, max_k, 0 ); |
288 | 307 |
M1 = new motif( L+1+R, max_k, 1 ); |
289 | 308 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,1025 @@ |
1 |
+/* |
|
2 |
+ * snprintf.c - a portable implementation of snprintf |
|
3 |
+ * |
|
4 |
+ * AUTHOR |
|
5 |
+ * Mark Martinec <mark.martinec@ijs.si>, April 1999. |
|
6 |
+ * |
|
7 |
+ * Copyright 1999, Mark Martinec. All rights reserved. |
|
8 |
+ * |
|
9 |
+ * TERMS AND CONDITIONS |
|
10 |
+ * This program is free software; you can redistribute it and/or modify |
|
11 |
+ * it under the terms of the "Frontier Artistic License" which comes |
|
12 |
+ * with this Kit. |
|
13 |
+ * |
|
14 |
+ * This program is distributed in the hope that it will be useful, |
|
15 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty |
|
16 |
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
|
17 |
+ * See the Frontier Artistic License for more details. |
|
18 |
+ * |
|
19 |
+ * You should have received a copy of the Frontier Artistic License |
|
20 |
+ * with this Kit in the file named LICENSE.txt . |
|
21 |
+ * If not, I'll be glad to provide one. |
|
22 |
+ * |
|
23 |
+ * FEATURES |
|
24 |
+ * - careful adherence to specs regarding flags, field width and precision; |
|
25 |
+ * - good performance for large string handling (large format, large |
|
26 |
+ * argument or large paddings). Performance is similar to system's sprintf |
|
27 |
+ * and in several cases significantly better (make sure you compile with |
|
28 |
+ * optimizations turned on, tell the compiler the code is strict ANSI |
|
29 |
+ * if necessary to give it more freedom for optimizations); |
|
30 |
+ * - return value semantics per ISO/IEC 9899:1999 ("ISO C99"); |
|
31 |
+ * - written in standard ISO/ANSI C - requires an ANSI C compiler. |
|
32 |
+ * |
|
33 |
+ * SUPPORTED CONVERSION SPECIFIERS AND DATA TYPES |
|
34 |
+ * |
|
35 |
+ * This snprintf only supports the following conversion specifiers: |
|
36 |
+ * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below) |
|
37 |
+ * with flags: '-', '+', ' ', '0' and '#'. |
|
38 |
+ * An asterisk is supported for field width as well as precision. |
|
39 |
+ * |
|
40 |
+ * Length modifiers 'h' (short int), 'l' (long int), |
|
41 |
+ * and 'll' (long long int) are supported. |
|
42 |
+ * NOTE: |
|
43 |
+ * If macro SNPRINTF_LONGLONG_SUPPORT is not defined (default) the |
|
44 |
+ * length modifier 'll' is recognized but treated the same as 'l', |
|
45 |
+ * which may cause argument value truncation! Defining |
|
46 |
+ * SNPRINTF_LONGLONG_SUPPORT requires that your system's sprintf also |
|
47 |
+ * handles length modifier 'll'. long long int is a language extension |
|
48 |
+ * which may not be portable. |
|
49 |
+ * |
|
50 |
+ * Conversion of numeric data (conversion specifiers d, u, o, x, X, p) |
|
51 |
+ * with length modifiers (none or h, l, ll) is left to the system routine |
|
52 |
+ * sprintf, but all handling of flags, field width and precision as well as |
|
53 |
+ * c and s conversions is done very carefully by this portable routine. |
|
54 |
+ * If a string precision (truncation) is specified (e.g. %.8s) it is |
|
55 |
+ * guaranteed the string beyond the specified precision will not be referenced. |
|
56 |
+ * |
|
57 |
+ * Length modifiers h, l and ll are ignored for c and s conversions (data |
|
58 |
+ * types wint_t and wchar_t are not supported). |
|
59 |
+ * |
|
60 |
+ * The following common synonyms for conversion characters are supported: |
|
61 |
+ * - i is a synonym for d |
|
62 |
+ * - D is a synonym for ld, explicit length modifiers are ignored |
|
63 |
+ * - U is a synonym for lu, explicit length modifiers are ignored |
|
64 |
+ * - O is a synonym for lo, explicit length modifiers are ignored |
|
65 |
+ * The D, O and U conversion characters are nonstandard, they are supported |
|
66 |
+ * for backward compatibility only, and should not be used for new code. |
|
67 |
+ * |
|
68 |
+ * The following is specifically NOT supported: |
|
69 |
+ * - flag ' (thousands' grouping character) is recognized but ignored |
|
70 |
+ * - numeric conversion specifiers: f, e, E, g, G and synonym F, |
|
71 |
+ * as well as the new a and A conversion specifiers |
|
72 |
+ * - length modifier 'L' (long double) and 'q' (quad - use 'll' instead) |
|
73 |
+ * - wide character/string conversions: lc, ls, and nonstandard |
|
74 |
+ * synonyms C and S |
|
75 |
+ * - writeback of converted string length: conversion character n |
|
76 |
+ * - the n$ specification for direct reference to n-th argument |
|
77 |
+ * - locales |
|
78 |
+ * |
|
79 |
+ * It is permitted for str_m to be zero, and it is permitted to specify NULL |
|
80 |
+ * pointer for resulting string argument if str_m is zero (as per ISO C99). |
|
81 |
+ * |
|
82 |
+ * The return value is the number of characters which would be generated |
|
83 |
+ * for the given input, excluding the trailing null. If this value |
|
84 |
+ * is greater or equal to str_m, not all characters from the result |
|
85 |
+ * have been stored in str, output bytes beyond the (str_m-1) -th character |
|
86 |
+ * are discarded. If str_m is greater than zero it is guaranteed |
|
87 |
+ * the resulting string will be null-terminated. |
|
88 |
+ * |
|
89 |
+ * NOTE that this matches the ISO C99, OpenBSD, and GNU C library 2.1, |
|
90 |
+ * but is different from some older and vendor implementations, |
|
91 |
+ * and is also different from XPG, XSH5, SUSv2 specifications. |
|
92 |
+ * For historical discussion on changes in the semantics and standards |
|
93 |
+ * of snprintf see printf(3) man page in the Linux programmers manual. |
|
94 |
+ * |
|
95 |
+ * Routines asprintf and vasprintf return a pointer (in the ptr argument) |
|
96 |
+ * to a buffer sufficiently large to hold the resulting string. This pointer |
|
97 |
+ * should be passed to free(3) to release the allocated storage when it is |
|
98 |
+ * no longer needed. If sufficient space cannot be allocated, these functions |
|
99 |
+ * will return -1 and set ptr to be a NULL pointer. These two routines are a |
|
100 |
+ * GNU C library extensions (glibc). |
|
101 |
+ * |
|
102 |
+ * Routines asnprintf and vasnprintf are similar to asprintf and vasprintf, |
|
103 |
+ * yet, like snprintf and vsnprintf counterparts, will write at most str_m-1 |
|
104 |
+ * characters into the allocated output string, the last character in the |
|
105 |
+ * allocated buffer then gets the terminating null. If the formatted string |
|
106 |
+ * length (the return value) is greater than or equal to the str_m argument, |
|
107 |
+ * the resulting string was truncated and some of the formatted characters |
|
108 |
+ * were discarded. These routines present a handy way to limit the amount |
|
109 |
+ * of allocated memory to some sane value. |
|
110 |
+ * |
|
111 |
+ * AVAILABILITY |
|
112 |
+ * http://www.ijs.si/software/snprintf/ |
|
113 |
+ * |
|
114 |
+ * REVISION HISTORY |
|
115 |
+ * 1999-04 V0.9 Mark Martinec |
|
116 |
+ * - initial version, some modifications after comparing printf |
|
117 |
+ * man pages for Digital Unix 4.0, Solaris 2.6 and HPUX 10, |
|
118 |
+ * and checking how Perl handles sprintf (differently!); |
|
119 |
+ * 1999-04-09 V1.0 Mark Martinec <mark.martinec@ijs.si> |
|
120 |
+ * - added main test program, fixed remaining inconsistencies, |
|
121 |
+ * added optional (long long int) support; |
|
122 |
+ * 1999-04-12 V1.1 Mark Martinec <mark.martinec@ijs.si> |
|
123 |
+ * - support the 'p' conversion (pointer to void); |
|
124 |
+ * - if a string precision is specified |
|
125 |
+ * make sure the string beyond the specified precision |
|
126 |
+ * will not be referenced (e.g. by strlen); |
|
127 |
+ * 1999-04-13 V1.2 Mark Martinec <mark.martinec@ijs.si> |
|
128 |
+ * - support synonyms %D=%ld, %U=%lu, %O=%lo; |
|
129 |
+ * - speed up the case of long format string with few conversions; |
|
130 |
+ * 1999-06-30 V1.3 Mark Martinec <mark.martinec@ijs.si> |
|
131 |
+ * - fixed runaway loop (eventually crashing when str_l wraps |
|
132 |
+ * beyond 2^31) while copying format string without |
|
133 |
+ * conversion specifiers to a buffer that is too short |
|
134 |
+ * (thanks to Edwin Young <edwiny@autonomy.com> for |
|
135 |
+ * spotting the problem); |
|
136 |
+ * - added macros PORTABLE_SNPRINTF_VERSION_(MAJOR|MINOR) |
|
137 |
+ * to snprintf.h |
|
138 |
+ * 2000-02-14 V2.0 (never released) Mark Martinec <mark.martinec@ijs.si> |
|
139 |
+ * - relaxed license terms: The Artistic License now applies. |
|
140 |
+ * You may still apply the GNU GENERAL PUBLIC LICENSE |
|
141 |
+ * as was distributed with previous versions, if you prefer; |
|
142 |
+ * - changed REVISION HISTORY dates to use ISO 8601 date format; |
|
143 |
+ * - added vsnprintf (patch also independently proposed by |
|
144 |
+ * Caolan McNamara 2000-05-04, and Keith M Willenson 2000-06-01) |
|
145 |
+ * 2000-06-27 V2.1 Mark Martinec <mark.martinec@ijs.si> |
|
146 |
+ * - removed POSIX check for str_m<1; value 0 for str_m is |
|
147 |
+ * allowed by ISO C99 (and GNU C library 2.1) - (pointed out |
|
148 |
+ * on 2000-05-04 by Caolan McNamara, caolan@ csn dot ul dot ie). |
|
149 |
+ * Besides relaxed license this change in standards adherence |
|
150 |
+ * is the main reason to bump up the major version number; |
|
151 |
+ * - added nonstandard routines asnprintf, vasnprintf, asprintf, |
|
152 |
+ * vasprintf that dynamically allocate storage for the |
|
153 |
+ * resulting string; these routines are not compiled by default, |
|
154 |
+ * see comments where NEED_V?ASN?PRINTF macros are defined; |
|
155 |
+ * - autoconf contributed by Caolan McNamara |
|
156 |
+ * 2000-10-06 V2.2 Mark Martinec <mark.martinec@ijs.si> |
|
157 |
+ * - BUG FIX: the %c conversion used a temporary variable |
|
158 |
+ * that was no longer in scope when referenced, |
|
159 |
+ * possibly causing incorrect resulting character; |
|
160 |
+ * - BUG FIX: make precision and minimal field width unsigned |
|
161 |
+ * to handle huge values (2^31 <= n < 2^32) correctly; |
|
162 |
+ * also be more careful in the use of signed/unsigned/size_t |
|
163 |
+ * internal variables - probably more careful than many |
|
164 |
+ * vendor implementations, but there may still be a case |
|
165 |
+ * where huge values of str_m, precision or minimal field |
|
166 |
+ * could cause incorrect behaviour; |
|
167 |
+ * - use separate variables for signed/unsigned arguments, |
|
168 |
+ * and for short/int, long, and long long argument lengths |
|
169 |
+ * to avoid possible incompatibilities on certain |
|
170 |
+ * computer architectures. Also use separate variable |
|
171 |
+ * arg_sign to hold sign of a numeric argument, |
|
172 |
+ * to make code more transparent; |
|
173 |
+ * - some fiddling with zero padding and "0x" to make it |
|
174 |
+ * Linux compatible; |
|
175 |
+ * - systematically use macros fast_memcpy and fast_memset |
|
176 |
+ * instead of case-by-case hand optimization; determine some |
|
177 |
+ * breakeven string lengths for different architectures; |
|
178 |
+ * - terminology change: 'format' -> 'conversion specifier', |
|
179 |
+ * 'C9x' -> 'ISO/IEC 9899:1999 ("ISO C99")', |
|
180 |
+ * 'alternative form' -> 'alternate form', |
|
181 |
+ * 'data type modifier' -> 'length modifier'; |
|
182 |
+ * - several comments rephrased and new ones added; |
|
183 |
+ * - make compiler not complain about 'credits' defined but |
|
184 |
+ * not used; |
|
185 |
+ */ |
|
186 |
+ |
|
187 |
+ |
|
188 |
+/* Define HAVE_SNPRINTF if your system already has snprintf and vsnprintf. |
|
189 |
+ * |
|
190 |
+ * If HAVE_SNPRINTF is defined this module will not produce code for |
|
191 |
+ * snprintf and vsnprintf, unless PREFER_PORTABLE_SNPRINTF is defined as well, |
|
192 |
+ * causing this portable version of snprintf to be called portable_snprintf |
|
193 |
+ * (and portable_vsnprintf). |
|
194 |
+ */ |
|
195 |
+/* #define HAVE_SNPRINTF */ |
|
196 |
+ |
|
197 |
+/* Define PREFER_PORTABLE_SNPRINTF if your system does have snprintf and |
|
198 |
+ * vsnprintf but you would prefer to use the portable routine(s) instead. |
|
199 |
+ * In this case the portable routine is declared as portable_snprintf |
|
200 |
+ * (and portable_vsnprintf) and a macro 'snprintf' (and 'vsnprintf') |
|
201 |
+ * is defined to expand to 'portable_v?snprintf' - see file snprintf.h . |
|
202 |
+ * Defining this macro is only useful if HAVE_SNPRINTF is also defined, |
|
203 |
+ * but does does no harm if defined nevertheless. |
|
204 |
+ */ |
|
205 |
+/* #define PREFER_PORTABLE_SNPRINTF */ |
|
206 |
+ |
|
207 |
+/* Define SNPRINTF_LONGLONG_SUPPORT if you want to support |
|
208 |
+ * data type (long long int) and length modifier 'll' (e.g. %lld). |
|
209 |
+ * If undefined, 'll' is recognized but treated as a single 'l'. |
|
210 |
+ * |
|
211 |
+ * If the system's sprintf does not handle 'll' |
|
212 |
+ * the SNPRINTF_LONGLONG_SUPPORT must not be defined! |
|
213 |
+ * |
|
214 |
+ * This is off by default as (long long int) is a language extension. |
|
215 |
+ */ |
|
216 |
+/* #define SNPRINTF_LONGLONG_SUPPORT */ |
|
217 |
+ |
|
218 |
+/* Define NEED_SNPRINTF_ONLY if you only need snprintf, and not vsnprintf. |
|
219 |
+ * If NEED_SNPRINTF_ONLY is defined, the snprintf will be defined directly, |
|
220 |
+ * otherwise both snprintf and vsnprintf routines will be defined |
|
221 |
+ * and snprintf will be a simple wrapper around vsnprintf, at the expense |
|
222 |
+ * of an extra procedure call. |
|
223 |
+ */ |
|
224 |
+/* #define NEED_SNPRINTF_ONLY */ |
|
225 |
+ |
|
226 |
+/* Define NEED_V?ASN?PRINTF macros if you need library extension |
|
227 |
+ * routines asprintf, vasprintf, asnprintf, vasnprintf respectively, |
|
228 |
+ * and your system library does not provide them. They are all small |
|
229 |
+ * wrapper routines around portable_vsnprintf. Defining any of the four |
|
230 |
+ * NEED_V?ASN?PRINTF macros automatically turns off NEED_SNPRINTF_ONLY |
|
231 |
+ * and turns on PREFER_PORTABLE_SNPRINTF. |
|
232 |
+ * |
|
233 |
+ * Watch for name conflicts with the system library if these routines |
|
234 |
+ * are already present there. |
|
235 |
+ * |
|
236 |
+ * NOTE: vasprintf and vasnprintf routines need va_copy() from stdarg.h, as |
|
237 |
+ * specified by C99, to be able to traverse the same list of arguments twice. |
|
238 |
+ * I don't know of any other standard and portable way of achieving the same. |
|
239 |
+ * With some versions of gcc you may use __va_copy(). You might even get away |
|
240 |
+ * with "ap2 = ap", in this case you must not call va_end(ap2) ! |
|
241 |
+ * #define va_copy(ap2,ap) ap2 = ap |
|
242 |
+ */ |
|
243 |
+/* #define NEED_ASPRINTF */ |
|
244 |
+/* #define NEED_ASNPRINTF */ |
|
245 |
+/* #define NEED_VASPRINTF */ |
|
246 |
+/* #define NEED_VASNPRINTF */ |
|
247 |
+ |
|
248 |
+ |
|
249 |
+/* Define the following macros if desired: |
|
250 |
+ * SOLARIS_COMPATIBLE, SOLARIS_BUG_COMPATIBLE, |
|
251 |
+ * HPUX_COMPATIBLE, HPUX_BUG_COMPATIBLE, LINUX_COMPATIBLE, |
|
252 |
+ * DIGITAL_UNIX_COMPATIBLE, DIGITAL_UNIX_BUG_COMPATIBLE, |
|
253 |
+ * PERL_COMPATIBLE, PERL_BUG_COMPATIBLE, |
|
254 |
+ * |
|
255 |
+ * - For portable applications it is best not to rely on peculiarities |
|
256 |
+ * of a given implementation so it may be best not to define any |
|
257 |
+ * of the macros that select compatibility and to avoid features |
|
258 |
+ * that vary among the systems. |
|
259 |
+ * |
|
260 |
+ * - Selecting compatibility with more than one operating system |
|
261 |
+ * is not strictly forbidden but is not recommended. |
|
262 |
+ * |
|
263 |
+ * - 'x'_BUG_COMPATIBLE implies 'x'_COMPATIBLE . |
|
264 |
+ * |
|
265 |
+ * - 'x'_COMPATIBLE refers to (and enables) a behaviour that is |
|
266 |
+ * documented in a sprintf man page on a given operating system |
|
267 |
+ * and actually adhered to by the system's sprintf (but not on |
|
268 |
+ * most other operating systems). It may also refer to and enable |
|
269 |
+ * a behaviour that is declared 'undefined' or 'implementation specific' |
|
270 |
+ * in the man page but a given implementation behaves predictably |
|
271 |
+ * in a certain way. |
|
272 |
+ * |
|
273 |
+ * - 'x'_BUG_COMPATIBLE refers to (and enables) a behaviour of system's sprintf |
|
274 |
+ * that contradicts the sprintf man page on the same operating system. |
|
275 |
+ * |
|
276 |
+ * - I do not claim that the 'x'_COMPATIBLE and 'x'_BUG_COMPATIBLE |
|
277 |
+ * conditionals take into account all idiosyncrasies of a particular |
|
278 |
+ * implementation, there may be other incompatibilities. |
|
279 |
+ */ |
|
280 |
+ |
|
281 |
+ |
|
282 |
+ |
|
283 |
+/* ============================================= */ |
|
284 |
+/* NO USER SERVICABLE PARTS FOLLOWING THIS POINT */ |
|
285 |
+/* ============================================= */ |
|
286 |
+ |
|
287 |
+#define PORTABLE_SNPRINTF_VERSION_MAJOR 2 |
|
288 |
+#define PORTABLE_SNPRINTF_VERSION_MINOR 2 |
|
289 |
+ |
|
290 |
+#if defined(NEED_ASPRINTF) || defined(NEED_ASNPRINTF) || defined(NEED_VASPRINTF) || defined(NEED_VASNPRINTF) |
|
291 |
+# if defined(NEED_SNPRINTF_ONLY) |
|
292 |
+# undef NEED_SNPRINTF_ONLY |
|
293 |
+# endif |
|
294 |
+# if !defined(PREFER_PORTABLE_SNPRINTF) |
|
295 |
+# define PREFER_PORTABLE_SNPRINTF |
|
296 |
+# endif |
|
297 |
+#endif |
|
298 |
+ |
|
299 |
+#if defined(SOLARIS_BUG_COMPATIBLE) && !defined(SOLARIS_COMPATIBLE) |
|
300 |
+#define SOLARIS_COMPATIBLE |
|
301 |
+#endif |
|
302 |
+ |
|
303 |
+#if defined(HPUX_BUG_COMPATIBLE) && !defined(HPUX_COMPATIBLE) |
|
304 |
+#define HPUX_COMPATIBLE |
|
305 |
+#endif |
|
306 |
+ |
|
307 |
+#if defined(DIGITAL_UNIX_BUG_COMPATIBLE) && !defined(DIGITAL_UNIX_COMPATIBLE) |
|
308 |
+#define DIGITAL_UNIX_COMPATIBLE |
|
309 |
+#endif |
|
310 |
+ |
|
311 |
+#if defined(PERL_BUG_COMPATIBLE) && !defined(PERL_COMPATIBLE) |
|
312 |
+#define PERL_COMPATIBLE |
|
313 |
+#endif |
|
314 |
+ |
|
315 |
+#if defined(LINUX_BUG_COMPATIBLE) && !defined(LINUX_COMPATIBLE) |
|
316 |
+#define LINUX_COMPATIBLE |
|
317 |
+#endif |
|
318 |
+ |
|
319 |
+#include <sys/types.h> |
|
320 |
+#include <string.h> |
|
321 |
+#include <stdlib.h> |
|
322 |
+#include <stdio.h> |
|
323 |
+#include <stdarg.h> |
|
324 |
+#include <assert.h> |
|
325 |
+#include <errno.h> |
|
326 |
+ |
|
327 |
+#ifdef isdigit |
|
328 |
+#undef isdigit |
|
329 |
+#endif |
|
330 |
+#define isdigit(c) ((c) >= '0' && (c) <= '9') |
|
331 |
+ |
|
332 |
+/* For copying strings longer or equal to 'breakeven_point' |
|
333 |
+ * it is more efficient to call memcpy() than to do it inline. |
|
334 |
+ * The value depends mostly on the processor architecture, |
|
335 |
+ * but also on the compiler and its optimization capabilities. |
|
336 |
+ * The value is not critical, some small value greater than zero |
|
337 |
+ * will be just fine if you don't care to squeeze every drop |
|
338 |
+ * of performance out of the code. |
|
339 |
+ * |
|
340 |
+ * Small values favor memcpy, large values favor inline code. |
|
341 |
+ */ |
|
342 |
+#if defined(__alpha__) || defined(__alpha) |
|
343 |
+# define breakeven_point 2 /* AXP (DEC Alpha) - gcc or cc or egcs */ |
|
344 |
+#endif |
|
345 |
+#if defined(__i386__) || defined(__i386) |
|
346 |
+# define breakeven_point 12 /* Intel Pentium/Linux - gcc 2.96 */ |
|
347 |
+#endif |
|
348 |
+#if defined(__hppa) |
|
349 |
+# define breakeven_point 10 /* HP-PA - gcc */ |
|
350 |
+#endif |
|
351 |
+#if defined(__sparc__) || defined(__sparc) |
|
352 |
+# define breakeven_point 33 /* Sun Sparc 5 - gcc 2.8.1 */ |
|
353 |
+#endif |
|
354 |
+ |
|
355 |
+/* some other values of possible interest: */ |
|
356 |
+/* #define breakeven_point 8 */ /* VAX 4000 - vaxc */ |
|
357 |
+/* #define breakeven_point 19 */ /* VAX 4000 - gcc 2.7.0 */ |
|
358 |
+ |
|
359 |
+#ifndef breakeven_point |
|
360 |
+# define breakeven_point 6 /* some reasonable one-size-fits-all value */ |
|
361 |
+#endif |
|
362 |
+ |
|
363 |
+#define fast_memcpy(d,s,n) \ |
|
364 |
+ { register size_t nn = (size_t)(n); \ |
|
365 |
+ if (nn >= breakeven_point) memcpy((d), (s), nn); \ |
|
366 |
+ else if (nn > 0) { /* proc call overhead is worth only for large strings*/\ |
|
367 |
+ register char *dd; register const char *ss; \ |
|
368 |
+ for (ss=(s), dd=(d); nn>0; nn--) *dd++ = *ss++; } } |
|
369 |
+ |
|
370 |
+#define fast_memset(d,c,n) \ |
|
371 |
+ { register size_t nn = (size_t)(n); \ |
|
372 |
+ if (nn >= breakeven_point) memset((d), (int)(c), nn); \ |
|
373 |
+ else if (nn > 0) { /* proc call overhead is worth only for large strings*/\ |
|
374 |
+ register char *dd; register const int cc=(int)(c); \ |
|
375 |
+ for (dd=(d); nn>0; nn--) *dd++ = cc; } } |
|
376 |
+ |
|
377 |
+/* prototypes */ |
|
378 |
+ |
|
379 |
+#if defined(NEED_ASPRINTF) |
|
380 |
+int asprintf (char **ptr, const char *fmt, /*args*/ ...); |
|
381 |
+#endif |
|
382 |
+#if defined(NEED_VASPRINTF) |
|
383 |
+int vasprintf (char **ptr, const char *fmt, va_list ap); |
|
384 |
+#endif |
|
385 |
+#if defined(NEED_ASNPRINTF) |
|
386 |
+int asnprintf (char **ptr, size_t str_m, const char *fmt, /*args*/ ...); |
|
387 |
+#endif |
|
388 |
+#if defined(NEED_VASNPRINTF) |
|
389 |
+int vasnprintf (char **ptr, size_t str_m, const char *fmt, va_list ap); |
|
390 |
+#endif |
|
391 |
+ |
|
392 |
+#if defined(HAVE_SNPRINTF) |
|
393 |
+/* declare our portable snprintf routine under name portable_snprintf */ |
|
394 |
+/* declare our portable vsnprintf routine under name portable_vsnprintf */ |
|
395 |
+#else |
|
396 |
+/* declare our portable routines under names snprintf and vsnprintf */ |
|
397 |
+#define portable_snprintf snprintf |
|
398 |
+#if !defined(NEED_SNPRINTF_ONLY) |
|
399 |
+#define portable_vsnprintf vsnprintf |
|
400 |
+#endif |
|
401 |
+#endif |
|
402 |
+ |
|
403 |
+#if !defined(HAVE_SNPRINTF) || defined(PREFER_PORTABLE_SNPRINTF) |
|
404 |
+int portable_snprintf(char *str, size_t str_m, const char *fmt, /*args*/ ...); |
|
405 |
+#if !defined(NEED_SNPRINTF_ONLY) |
|
406 |
+int portable_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap); |
|
407 |
+#endif |
|
408 |
+#endif |
|
409 |
+ |
|
410 |
+/* declarations */ |
|
411 |
+ |
|
412 |
+static char credits[] = "\n\ |
|
413 |
+@(#)snprintf.c, v2.2: Mark Martinec, <mark.martinec@ijs.si>\n\ |
|
414 |
+@(#)snprintf.c, v2.2: Copyright 1999, Mark Martinec. Frontier Artistic License applies.\n\ |
|
415 |
+@(#)snprintf.c, v2.2: http://www.ijs.si/software/snprintf/\n"; |
|
416 |
+ |
|
417 |
+#if defined(NEED_ASPRINTF) |
|
418 |
+int asprintf(char **ptr, const char *fmt, /*args*/ ...) { |
|
419 |
+ va_list ap; |
|
420 |
+ size_t str_m; |
|
421 |
+ int str_l; |
|
422 |
+ |
|
423 |
+ *ptr = NULL; |
|
424 |
+ va_start(ap, fmt); /* measure the required size */ |
|
425 |
+ str_l = portable_vsnprintf(NULL, (size_t)0, fmt, ap); |
|
426 |
+ va_end(ap); |
|
427 |
+ assert(str_l >= 0); /* possible integer overflow if str_m > INT_MAX */ |
|
428 |
+ *ptr = (char *) malloc(str_m = (size_t)str_l + 1); |
|
429 |
+ if (*ptr == NULL) { errno = ENOMEM; str_l = -1; } |
|
430 |
+ else { |
|
431 |
+ int str_l2; |
|
432 |
+ va_start(ap, fmt); |
|
433 |
+ str_l2 = portable_vsnprintf(*ptr, str_m, fmt, ap); |
|
434 |
+ va_end(ap); |
|
435 |
+ assert(str_l2 == str_l); |
|
436 |
+ } |
|
437 |
+ return str_l; |
|
438 |
+} |
|
439 |
+#endif |
|
440 |
+ |
|
441 |
+#if defined(NEED_VASPRINTF) |
|
442 |
+int vasprintf(char **ptr, const char *fmt, va_list ap) { |
|
443 |
+ size_t str_m; |
|
444 |
+ int str_l; |
|
445 |
+ |
|
446 |
+ *ptr = NULL; |
|
447 |
+ { va_list ap2; |
|
448 |
+ va_copy(ap2, ap); /* don't consume the original ap, we'll need it again */ |
|
449 |
+ str_l = portable_vsnprintf(NULL, (size_t)0, fmt, ap2);/*get required size*/ |
|
450 |
+ va_end(ap2); |
|
451 |
+ } |
|
452 |
+ assert(str_l >= 0); /* possible integer overflow if str_m > INT_MAX */ |
|
453 |
+ *ptr = (char *) malloc(str_m = (size_t)str_l + 1); |
|
454 |
+ if (*ptr == NULL) { errno = ENOMEM; str_l = -1; } |
|
455 |
+ else { |
|
456 |
+ int str_l2 = portable_vsnprintf(*ptr, str_m, fmt, ap); |
|
457 |
+ assert(str_l2 == str_l); |
|
458 |
+ } |
|
459 |
+ return str_l; |
|
460 |
+} |
|
461 |
+#endif |
|
462 |
+ |
|
463 |
+#if defined(NEED_ASNPRINTF) |
|
464 |
+int asnprintf (char **ptr, size_t str_m, const char *fmt, /*args*/ ...) { |
|
465 |
+ va_list ap; |
|
466 |
+ int str_l; |
|
467 |
+ |
|
468 |
+ *ptr = NULL; |
|
469 |
+ va_start(ap, fmt); /* measure the required size */ |
|
470 |
+ str_l = portable_vsnprintf(NULL, (size_t)0, fmt, ap); |
|
471 |
+ va_end(ap); |
|
472 |
+ assert(str_l >= 0); /* possible integer overflow if str_m > INT_MAX */ |
|
473 |
+ if ((size_t)str_l + 1 < str_m) str_m = (size_t)str_l + 1; /* truncate */ |
|
474 |
+ /* if str_m is 0, no buffer is allocated, just set *ptr to NULL */ |
|
475 |
+ if (str_m == 0) { /* not interested in resulting string, just return size */ |
|
476 |
+ } else { |
|
477 |
+ *ptr = (char *) malloc(str_m); |
|
478 |
+ if (*ptr == NULL) { errno = ENOMEM; str_l = -1; } |
|
479 |
+ else { |
|
480 |
+ int str_l2; |
|
481 |
+ va_start(ap, fmt); |
|
482 |
+ str_l2 = portable_vsnprintf(*ptr, str_m, fmt, ap); |
|
483 |
+ va_end(ap); |
|
484 |
+ assert(str_l2 == str_l); |
|
485 |
+ } |
|
486 |
+ } |
|
487 |
+ return str_l; |
|
488 |
+} |
|
489 |
+#endif |
|
490 |
+ |
|
491 |
+#if defined(NEED_VASNPRINTF) |
|
492 |
+int vasnprintf (char **ptr, size_t str_m, const char *fmt, va_list ap) { |
|
493 |
+ int str_l; |
|
494 |
+ |
|
495 |
+ *ptr = NULL; |
|
496 |
+ { va_list ap2; |
|
497 |
+ va_copy(ap2, ap); /* don't consume the original ap, we'll need it again */ |
|
498 |
+ str_l = portable_vsnprintf(NULL, (size_t)0, fmt, ap2);/*get required size*/ |
|
499 |
+ va_end(ap2); |
|
500 |
+ } |
|
501 |
+ assert(str_l >= 0); /* possible integer overflow if str_m > INT_MAX */ |
|
502 |
+ if ((size_t)str_l + 1 < str_m) str_m = (size_t)str_l + 1; /* truncate */ |
|
503 |
+ /* if str_m is 0, no buffer is allocated, just set *ptr to NULL */ |
|
504 |
+ if (str_m == 0) { /* not interested in resulting string, just return size */ |
|
505 |
+ } else { |
|
506 |
+ *ptr = (char *) malloc(str_m); |
|
507 |
+ if (*ptr == NULL) { errno = ENOMEM; str_l = -1; } |
|
508 |
+ else { |
|
509 |
+ int str_l2 = portable_vsnprintf(*ptr, str_m, fmt, ap); |
|
510 |
+ assert(str_l2 == str_l); |
|
511 |
+ } |
|
512 |
+ } |
|
513 |
+ return str_l; |
|
514 |
+} |
|
515 |
+#endif |
|
516 |
+ |
|
517 |
+/* |
|
518 |
+ * If the system does have snprintf and the portable routine is not |
|
519 |
+ * specifically required, this module produces no code for snprintf/vsnprintf. |
|
520 |
+ */ |
|
521 |
+#if !defined(HAVE_SNPRINTF) || defined(PREFER_PORTABLE_SNPRINTF) |
|
522 |
+ |
|
523 |
+#if !defined(NEED_SNPRINTF_ONLY) |
|
524 |
+int portable_snprintf(char *str, size_t str_m, const char *fmt, /*args*/ ...) { |
|
525 |
+ va_list ap; |
|
526 |
+ int str_l; |
|
527 |
+ |
|
528 |
+ va_start(ap, fmt); |
|
529 |
+ str_l = portable_vsnprintf(str, str_m, fmt, ap); |
|
530 |
+ va_end(ap); |
|
531 |
+ return str_l; |
|
532 |
+} |
|
533 |
+#endif |
|
534 |
+ |
|
535 |
+#if defined(NEED_SNPRINTF_ONLY) |
|
536 |
+int portable_snprintf(char *str, size_t str_m, const char *fmt, /*args*/ ...) { |
|
537 |
+#else |
|
538 |
+int portable_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap) { |
|
539 |
+#endif |
|
540 |
+ |
|
541 |
+#if defined(NEED_SNPRINTF_ONLY) |
|
542 |
+ va_list ap; |
|
543 |
+#endif |
|
544 |
+ size_t str_l = 0; |
|
545 |
+ const char *p = fmt; |
|
546 |
+ |
|
547 |
+/* In contrast with POSIX, the ISO C99 now says |
|
548 |
+ * that str can be NULL and str_m can be 0. |
|
549 |
+ * This is more useful than the old: if (str_m < 1) return -1; */ |
|
550 |
+ |
|
551 |
+#if defined(NEED_SNPRINTF_ONLY) |
|
552 |
+ va_start(ap, fmt); |
|
553 |
+#endif |
|
554 |
+ if (!p) p = ""; |
|
555 |
+ while (*p) { |
|
556 |
+ if (*p != '%') { |
|
557 |
+ /* if (str_l < str_m) str[str_l++] = *p++; -- this would be sufficient */ |
|
558 |
+ /* but the following code achieves better performance for cases |
|
559 |
+ * where format string is long and contains few conversions */ |
|
560 |
+ const char *q = strchr(p+1,'%'); |
|
561 |
+ size_t n = !q ? strlen(p) : (q-p); |
|
562 |
+ if (str_l < str_m) { |
|
563 |
+ size_t avail = str_m-str_l; |
|
564 |
+ fast_memcpy(str+str_l, p, (n>avail?avail:n)); |
|
565 |
+ } |
|
566 |
+ p += n; str_l += n; |
|
567 |
+ } else { |
|
568 |
+ const char *starting_p; |
|
569 |
+ size_t min_field_width = 0, precision = 0; |
|
570 |
+ int zero_padding = 0, precision_specified = 0, justify_left = 0; |
|
571 |
+ int alternate_form = 0, force_sign = 0; |
|
572 |
+ int space_for_positive = 1; /* If both the ' ' and '+' flags appear, |
|
573 |
+ the ' ' flag should be ignored. */ |
|
574 |
+ char length_modifier = '\0'; /* allowed values: \0, h, l, L */ |
|
575 |
+ char tmp[32];/* temporary buffer for simple numeric->string conversion */ |
|
576 |
+ |
|
577 |
+ const char *str_arg; /* string address in case of string argument */ |
|
578 |
+ size_t str_arg_l; /* natural field width of arg without padding |
|
579 |
+ and sign */ |
|
580 |
+ unsigned char uchar_arg; |
|
581 |
+ /* unsigned char argument value - only defined for c conversion. |
|
582 |
+ N.B. standard explicitly states the char argument for |
|
583 |
+ the c conversion is unsigned */ |
|
584 |
+ |
|
585 |
+ size_t number_of_zeros_to_pad = 0; |
|
586 |
+ /* number of zeros to be inserted for numeric conversions |
|
587 |
+ as required by the precision or minimal field width */ |
|
588 |
+ |
|
589 |
+ size_t zero_padding_insertion_ind = 0; |
|
590 |
+ /* index into tmp where zero padding is to be inserted */ |
|
591 |
+ |
|
592 |
+ char fmt_spec = '\0'; |
|
593 |
+ /* current conversion specifier character */ |
|
594 |
+ |
|
595 |
+ str_arg = credits;/* just to make compiler happy (defined but not used)*/ |
|
596 |
+ str_arg = NULL; |
|
597 |
+ starting_p = p; p++; /* skip '%' */ |
|
598 |
+ /* parse flags */ |
|
599 |
+ while (*p == '0' || *p == '-' || *p == '+' || |
|
600 |
+ *p == ' ' || *p == '#' || *p == '\'') { |
|
601 |
+ switch (*p) { |
|
602 |
+ case '0': zero_padding = 1; break; |
|
603 |
+ case '-': justify_left = 1; break; |
|
604 |
+ case '+': force_sign = 1; space_for_positive = 0; break; |
|
605 |
+ case ' ': force_sign = 1; |
|
606 |
+ /* If both the ' ' and '+' flags appear, the ' ' flag should be ignored */ |
|
607 |
+#ifdef PERL_COMPATIBLE |
|
608 |
+ /* ... but in Perl the last of ' ' and '+' applies */ |
|
609 |
+ space_for_positive = 1; |
|
610 |
+#endif |
|
611 |
+ break; |
|
612 |
+ case '#': alternate_form = 1; break; |
|
613 |
+ case '\'': break; |
|
614 |
+ } |
|
615 |
+ p++; |
|
616 |
+ } |
|
617 |
+ /* If the '0' and '-' flags both appear, the '0' flag should be ignored. */ |
|
618 |
+ |
|
619 |
+ /* parse field width */ |
|
620 |
+ if (*p == '*') { |
|
621 |
+ int j; |
|
622 |
+ p++; j = va_arg(ap, int); |
|
623 |
+ if (j >= 0) min_field_width = j; |
|
624 |
+ else { min_field_width = -j; justify_left = 1; } |
|
625 |
+ } else if (isdigit((int)(*p))) { |
|
626 |
+ /* size_t could be wider than unsigned int; |
|
627 |
+ make sure we treat argument like common implementations do */ |
|
628 |
+ unsigned int uj = *p++ - '0'; |
|
629 |
+ while (isdigit((int)(*p))) uj = 10*uj + (unsigned int)(*p++ - '0'); |
|
630 |
+ min_field_width = uj; |
|
631 |
+ } |
|
632 |
+ /* parse precision */ |
|
633 |
+ if (*p == '.') { |
|
634 |
+ p++; precision_specified = 1; |
|
635 |
+ if (*p == '*') { |
|
636 |
+ int j = va_arg(ap, int); |
|
637 |
+ p++; |
|
638 |
+ if (j >= 0) precision = j; |
|
639 |
+ else { |
|
640 |
+ precision_specified = 0; precision = 0; |
|
641 |
+ /* NOTE: |
|
642 |
+ * Solaris 2.6 man page claims that in this case the precision |
|
643 |
+ * should be set to 0. Digital Unix 4.0, HPUX 10 and BSD man page |
|
644 |
+ * claim that this case should be treated as unspecified precision, |
|
645 |
+ * which is what we do here. |
|
646 |
+ */ |
|
647 |
+ } |
|
648 |
+ } else if (isdigit((int)(*p))) { |
|
649 |
+ /* size_t could be wider than unsigned int; |
|
650 |
+ make sure we treat argument like common implementations do */ |
|
651 |
+ unsigned int uj = *p++ - '0'; |
|
652 |
+ while (isdigit((int)(*p))) uj = 10*uj + (unsigned int)(*p++ - '0'); |
|
653 |
+ precision = uj; |
|
654 |
+ } |
|
655 |
+ } |
|
656 |
+ /* parse 'h', 'l' and 'll' length modifiers */ |
|
657 |
+ if (*p == 'h' || *p == 'l') { |
|
658 |
+ length_modifier = *p; p++; |
|
659 |
+ if (length_modifier == 'l' && *p == 'l') { /* double l = long long */ |
|
660 |
+#ifdef SNPRINTF_LONGLONG_SUPPORT |
|
661 |
+ length_modifier = '2'; /* double l encoded as '2' */ |
|
662 |
+#else |
|
663 |
+ length_modifier = 'l'; /* treat it as a single 'l' */ |
|
664 |
+#endif |
|
665 |
+ p++; |
|
666 |
+ } |
|
667 |
+ } |
|
668 |
+ fmt_spec = *p; |
|
669 |
+ /* common synonyms: */ |
|
670 |
+ switch (fmt_spec) { |
|
671 |
+ case 'i': fmt_spec = 'd'; break; |
|
672 |
+ case 'D': fmt_spec = 'd'; length_modifier = 'l'; break; |
|
673 |
+ case 'U': fmt_spec = 'u'; length_modifier = 'l'; break; |
|
674 |
+ case 'O': fmt_spec = 'o'; length_modifier = 'l'; break; |
|
675 |
+ default: break; |
|
676 |
+ } |
|
677 |
+ /* get parameter value, do initial processing */ |
|
678 |
+ switch (fmt_spec) { |
|
679 |
+ case '%': /* % behaves similar to 's' regarding flags and field widths */ |
|
680 |
+ case 'c': /* c behaves similar to 's' regarding flags and field widths */ |
|
681 |
+ case 's': |
|
682 |
+ length_modifier = '\0'; /* wint_t and wchar_t not supported */ |
|
683 |
+ /* the result of zero padding flag with non-numeric conversion specifier*/ |
|
684 |
+ /* is undefined. Solaris and HPUX 10 does zero padding in this case, */ |
|
685 |
+ /* Digital Unix and Linux does not. */ |
|
686 |
+#if !defined(SOLARIS_COMPATIBLE) && !defined(HPUX_COMPATIBLE) |
|
687 |
+ zero_padding = 0; /* turn zero padding off for string conversions */ |
|
688 |
+#endif |
|
689 |
+ str_arg_l = 1; |
|
690 |
+ switch (fmt_spec) { |
|
691 |
+ case '%': |
|
692 |
+ str_arg = p; break; |
|
693 |
+ case 'c': { |
|
694 |
+ int j = va_arg(ap, int); |
|
695 |
+ uchar_arg = (unsigned char) j; /* standard demands unsigned char */ |
|
696 |
+ str_arg = (const char *) &uchar_arg; |
|
697 |
+ break; |
|
698 |
+ } |
|
699 |
+ case 's': |
|
700 |
+ str_arg = va_arg(ap, const char *); |
|
701 |
+ if (!str_arg) str_arg_l = 0; |
|
702 |
+ /* make sure not to address string beyond the specified precision !!! */ |
|
703 |
+ else if (!precision_specified) str_arg_l = strlen(str_arg); |
|
704 |
+ /* truncate string if necessary as requested by precision */ |
|
705 |
+ else if (precision == 0) str_arg_l = 0; |
|
706 |
+ else { |
|
707 |
+ /* memchr on HP does not like n > 2^31 !!! */ |
|
708 |
+ const char *q = memchr(str_arg, '\0', |
|
709 |
+ precision <= 0x7fffffff ? precision : 0x7fffffff); |
|
710 |
+ str_arg_l = !q ? precision : (q-str_arg); |
|
711 |
+ } |
|
712 |
+ break; |
|
713 |
+ default: break; |
|
714 |
+ } |
|
715 |
+ break; |
|
716 |
+ case 'd': case 'u': case 'o': case 'x': case 'X': case 'p': { |
|
717 |
+ /* NOTE: the u, o, x, X and p conversion specifiers imply |
|
718 |
+ the value is unsigned; d implies a signed value */ |
|
719 |
+ |
|
720 |
+ int arg_sign = 0; |
|
721 |
+ /* 0 if numeric argument is zero (or if pointer is NULL for 'p'), |
|
722 |
+ +1 if greater than zero (or nonzero for unsigned arguments), |
|
723 |
+ -1 if negative (unsigned argument is never negative) */ |
|
724 |
+ |
|
725 |
+ int int_arg = 0; unsigned int uint_arg = 0; |
|
726 |
+ /* only defined for length modifier h, or for no length modifiers */ |
|
727 |
+ |
|
728 |
+ long int long_arg = 0; unsigned long int ulong_arg = 0; |
|
729 |
+ /* only defined for length modifier l */ |
|
730 |
+ |
|
731 |
+ void *ptr_arg = NULL; |
|
732 |
+ /* pointer argument value -only defined for p conversion */ |
|
733 |
+ |
|
734 |
+#ifdef SNPRINTF_LONGLONG_SUPPORT |
|
735 |
+ long long int long_long_arg = 0; |
|
736 |
+ unsigned long long int ulong_long_arg = 0; |
|
737 |
+ /* only defined for length modifier ll */ |
|
738 |
+#endif |
|
739 |
+ if (fmt_spec == 'p') { |
|
740 |
+ /* HPUX 10: An l, h, ll or L before any other conversion character |
|
741 |
+ * (other than d, i, u, o, x, or X) is ignored. |
|
742 |
+ * Digital Unix: |
|
743 |
+ * not specified, but seems to behave as HPUX does. |
|
744 |
+ * Solaris: If an h, l, or L appears before any other conversion |
|
745 |
+ * specifier (other than d, i, u, o, x, or X), the behavior |
|
746 |
+ * is undefined. (Actually %hp converts only 16-bits of address |
|
747 |
+ * and %llp treats address as 64-bit data which is incompatible |
|
748 |
+ * with (void *) argument on a 32-bit system). |
|
749 |
+ */ |
|
750 |
+#ifdef SOLARIS_COMPATIBLE |
|
751 |
+# ifdef SOLARIS_BUG_COMPATIBLE |
|
752 |
+ /* keep length modifiers even if it represents 'll' */ |
|
753 |
+# else |
|
754 |
+ if (length_modifier == '2') length_modifier = '\0'; |
|
755 |
+# endif |
|
756 |
+#else |
|
757 |
+ length_modifier = '\0'; |
|
758 |
+#endif |
|
759 |
+ ptr_arg = va_arg(ap, void *); |
|
760 |
+ if (ptr_arg != NULL) arg_sign = 1; |
|
761 |
+ } else if (fmt_spec == 'd') { /* signed */ |
|
762 |
+ switch (length_modifier) { |
|
763 |
+ case '\0': |
|
764 |
+ case 'h': |
|
765 |
+ /* It is non-portable to specify a second argument of char or short |
|
766 |
+ * to va_arg, because arguments seen by the called function |
|
767 |
+ * are not char or short. C converts char and short arguments |
|
768 |
+ * to int before passing them to a function. |
|
769 |
+ */ |
|
770 |
+ int_arg = va_arg(ap, int); |
|
771 |
+ if (int_arg > 0) arg_sign = 1; |
|
772 |
+ else if (int_arg < 0) arg_sign = -1; |
|
773 |
+ break; |
|
774 |
+ case 'l': |
|
775 |
+ long_arg = va_arg(ap, long int); |
|
776 |
+ if (long_arg > 0) arg_sign = 1; |
|
777 |
+ else if (long_arg < 0) arg_sign = -1; |
|
778 |
+ break; |
|
779 |
+#ifdef SNPRINTF_LONGLONG_SUPPORT |
|
780 |
+ case '2': |
|
781 |
+ long_long_arg = va_arg(ap, long long int); |
|
782 |
+ if (long_long_arg > 0) arg_sign = 1; |
|
783 |
+ else if (long_long_arg < 0) arg_sign = -1; |
|
784 |
+ break; |
|
785 |
+#endif |
|
786 |
+ } |
|
787 |
+ } else { /* unsigned */ |
|
788 |
+ switch (length_modifier) { |
|
789 |
+ case '\0': |
|
790 |
+ case 'h': |
|
791 |
+ uint_arg = va_arg(ap, unsigned int); |
|
792 |
+ if (uint_arg) arg_sign = 1; |
|
793 |
+ break; |
|
794 |
+ case 'l': |
|
795 |
+ ulong_arg = va_arg(ap, unsigned long int); |
|
796 |
+ if (ulong_arg) arg_sign = 1; |
|
797 |
+ break; |
|
798 |
+#ifdef SNPRINTF_LONGLONG_SUPPORT |
|
799 |
+ case '2': |
|
800 |
+ ulong_long_arg = va_arg(ap, unsigned long long int); |
|
801 |
+ if (ulong_long_arg) arg_sign = 1; |
|
802 |
+ break; |
|
803 |
+#endif |
|
804 |
+ } |
|
805 |
+ } |
|
806 |
+ str_arg = tmp; str_arg_l = 0; |
|
807 |
+ /* NOTE: |
|
808 |
+ * For d, i, u, o, x, and X conversions, if precision is specified, |
|
809 |
+ * the '0' flag should be ignored. This is so with Solaris 2.6, |
|
810 |
+ * Digital UNIX 4.0, HPUX 10, Linux, FreeBSD, NetBSD; but not with Perl. |
|
811 |
+ */ |
|
812 |
+#ifndef PERL_COMPATIBLE |
|
813 |
+ if (precision_specified) zero_padding = 0; |
|
814 |
+#endif |
|
815 |
+ if (fmt_spec == 'd') { |
|
816 |
+ if (force_sign && arg_sign >= 0) |
|
817 |
+ tmp[str_arg_l++] = space_for_positive ? ' ' : '+'; |
|
818 |
+ /* leave negative numbers for sprintf to handle, |
|
819 |
+ to avoid handling tricky cases like (short int)(-32768) */ |
|
820 |
+#ifdef LINUX_COMPATIBLE |
|
821 |
+ } else if (fmt_spec == 'p' && force_sign && arg_sign > 0) { |
|
822 |
+ tmp[str_arg_l++] = space_for_positive ? ' ' : '+'; |
|
823 |
+#endif |
|
824 |
+ } else if (alternate_form) { |
|
825 |
+ if (arg_sign != 0 && (fmt_spec == 'x' || fmt_spec == 'X') ) |
|
826 |
+ { tmp[str_arg_l++] = '0'; tmp[str_arg_l++] = fmt_spec; } |
|
827 |
+ /* alternate form should have no effect for p conversion, but ... */ |
|
828 |
+#ifdef HPUX_COMPATIBLE |
|
829 |
+ else if (fmt_spec == 'p' |
|
830 |
+ /* HPUX 10: for an alternate form of p conversion, |
|
831 |
+ * a nonzero result is prefixed by 0x. */ |
|
832 |
+#ifndef HPUX_BUG_COMPATIBLE |
|
833 |
+ /* Actually it uses 0x prefix even for a zero value. */ |
|
834 |
+ && arg_sign != 0 |
|
835 |
+#endif |
|
836 |
+ ) { tmp[str_arg_l++] = '0'; tmp[str_arg_l++] = 'x'; } |
|
837 |
+#endif |
|
838 |
+ } |
|
839 |
+ zero_padding_insertion_ind = str_arg_l; |
|
840 |
+ if (!precision_specified) precision = 1; /* default precision is 1 */ |
|
841 |
+ if (precision == 0 && arg_sign == 0 |
|
842 |
+#if defined(HPUX_BUG_COMPATIBLE) || defined(LINUX_COMPATIBLE) |
|
843 |
+ && fmt_spec != 'p' |
|
844 |
+ /* HPUX 10 man page claims: With conversion character p the result of |
|
845 |
+ * converting a zero value with a precision of zero is a null string. |
|
846 |
+ * Actually HP returns all zeroes, and Linux returns "(nil)". */ |
|
847 |
+#endif |
|
848 |
+ ) { |
|
849 |
+ /* converted to null string */ |
|
850 |
+ /* When zero value is formatted with an explicit precision 0, |
|
851 |
+ the resulting formatted string is empty (d, i, u, o, x, X, p). */ |
|
852 |
+ } else { |
|
853 |
+ char f[5]; int f_l = 0; |
|
854 |
+ f[f_l++] = '%'; /* construct a simple format string for sprintf */ |
|
855 |
+ if (!length_modifier) { } |
|
856 |
+ else if (length_modifier=='2') { f[f_l++] = 'l'; f[f_l++] = 'l'; } |
|
857 |
+ else f[f_l++] = length_modifier; |
|
858 |
+ f[f_l++] = fmt_spec; f[f_l++] = '\0'; |
|
859 |
+ if (fmt_spec == 'p') str_arg_l += sprintf(tmp+str_arg_l, f, ptr_arg); |
|
860 |
+ else if (fmt_spec == 'd') { /* signed */ |
|
861 |
+ switch (length_modifier) { |
|
862 |
+ case '\0': |
|
863 |
+ case 'h': str_arg_l+=sprintf(tmp+str_arg_l, f, int_arg); break; |
|
864 |
+ case 'l': str_arg_l+=sprintf(tmp+str_arg_l, f, long_arg); break; |
|
865 |
+#ifdef SNPRINTF_LONGLONG_SUPPORT |
|
866 |
+ case '2': str_arg_l+=sprintf(tmp+str_arg_l,f,long_long_arg); break; |
|
867 |
+#endif |
|
868 |
+ } |
|
869 |
+ } else { /* unsigned */ |
|
870 |
+ switch (length_modifier) { |
|
871 |
+ case '\0': |
|
872 |
+ case 'h': str_arg_l+=sprintf(tmp+str_arg_l, f, uint_arg); break; |
|
873 |
+ case 'l': str_arg_l+=sprintf(tmp+str_arg_l, f, ulong_arg); break; |
|
874 |
+#ifdef SNPRINTF_LONGLONG_SUPPORT |
|
875 |
+ case '2': str_arg_l+=sprintf(tmp+str_arg_l,f,ulong_long_arg);break; |
|
876 |
+#endif |
|
877 |
+ } |
|
878 |
+ } |
|
879 |
+ /* include the optional minus sign and possible "0x" |
|
880 |
+ in the region before the zero padding insertion point */ |
|
881 |
+ if (zero_padding_insertion_ind < str_arg_l && |
|
882 |
+ tmp[zero_padding_insertion_ind] == '-') { |
|
883 |
+ zero_padding_insertion_ind++; |
|
884 |
+ } |
|
885 |
+ if (zero_padding_insertion_ind+1 < str_arg_l && |
|
886 |
+ tmp[zero_padding_insertion_ind] == '0' && |
|
887 |
+ (tmp[zero_padding_insertion_ind+1] == 'x' || |
|
888 |
+ tmp[zero_padding_insertion_ind+1] == 'X') ) { |
|
889 |
+ zero_padding_insertion_ind += 2; |
|
890 |
+ } |
|
891 |
+ } |
|
892 |
+ { size_t num_of_digits = str_arg_l - zero_padding_insertion_ind; |
|
893 |
+ if (alternate_form && fmt_spec == 'o' |
|
894 |
+#ifdef HPUX_COMPATIBLE /* ("%#.o",0) -> "" */ |
|
895 |
+ && (str_arg_l > 0) |
|
896 |
+#endif |
|
897 |
+#ifdef DIGITAL_UNIX_BUG_COMPATIBLE /* ("%#o",0) -> "00" */ |
|
898 |
+#else |
|
899 |
+ /* unless zero is already the first character */ |
|
900 |
+ && !(zero_padding_insertion_ind < str_arg_l |
|
901 |
+ && tmp[zero_padding_insertion_ind] == '0') |
|
902 |
+#endif |
|
903 |
+ ) { /* assure leading zero for alternate-form octal numbers */ |
|
904 |
+ if (!precision_specified || precision < num_of_digits+1) { |
|
905 |
+ /* precision is increased to force the first character to be zero, |
|
906 |
+ except if a zero value is formatted with an explicit precision |
|
907 |
+ of zero */ |
|
908 |
+ precision = num_of_digits+1; precision_specified = 1; |
|
909 |
+ } |
|
910 |
+ } |
|
911 |
+ /* zero padding to specified precision? */ |
|
912 |
+ if (num_of_digits < precision) |
|
913 |
+ number_of_zeros_to_pad = precision - num_of_digits; |
|
914 |
+ } |
|
915 |
+ /* zero padding to specified minimal field width? */ |
|
916 |
+ if (!justify_left && zero_padding) { |
|
917 |
+ int n = min_field_width - (str_arg_l+number_of_zeros_to_pad); |
|
918 |
+ if (n > 0) number_of_zeros_to_pad += n; |
|
919 |
+ } |
|
920 |
+ break; |
|
921 |
+ } |
|
922 |
+ default: /* unrecognized conversion specifier, keep format string as-is*/ |
|
923 |
+ zero_padding = 0; /* turn zero padding off for non-numeric convers. */ |
|
924 |
+#ifndef DIGITAL_UNIX_COMPATIBLE |
|
925 |
+ justify_left = 1; min_field_width = 0; /* reset flags */ |
|
926 |
+#endif |
|
927 |
+#if defined(PERL_COMPATIBLE) || defined(LINUX_COMPATIBLE) |
|
928 |
+ /* keep the entire format string unchanged */ |
|
929 |
+ str_arg = starting_p; str_arg_l = p - starting_p; |
|
930 |
+ /* well, not exactly so for Linux, which does something inbetween, |
|
931 |
+ * and I don't feel an urge to imitate it: "%+++++hy" -> "%+y" */ |
|
932 |
+#else |
|