Browse code

Removed non-portable code. Minor algorithmic improvements.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/seqbias@52193 bc3139a8-67e5-0310-9ffc-ced21a209358

Daniel Jones authored on 30/01/2011 05:41:26
Showing 11 changed files

... ...
@@ -1,13 +1,13 @@
1 1
 Package: seqbias
2
-Version: 0.99.1
2
+Version: 0.99.2
3 3
 Date: 25-12-2010
4 4
 Title: Estimation of per-position bias in high-throughput sequencing data
5
-Description: This package implements a model of per-position sequencing bias i
5
+Description: This package implements a model of per-position sequencing bias in
6 6
     high-throughput sequencing data using a simple Bayesian network, the
7 7
     structure and parameters of which are trained on a set of aligned reads
8 8
     and a reference genome sequence.
9 9
 Author: Daniel Jones <dcjones@cs.washington.edu>
10
-Maintainer: Dainel Jones <dcjones@cs.washington.edu>
10
+Maintainer: Daniel Jones <dcjones@cs.washington.edu>
11 11
 Depends: R (>= 2.13.0), GenomicRanges (>= 0.1.0),
12 12
          Biostrings (>= 2.15.0), methods 
13 13
 Suggests: Rsamtools (>= 1.3.18)
... ...
@@ -10,16 +10,16 @@ generated by GNU Autoconf 2.67.  Invocation command line was
10 10
 ## Platform. ##
11 11
 ## --------- ##
12 12
 
13
-hostname = alpaca
14
-uname -m = x86_64
15
-uname -r = 2.6.31.12-0.2-desktop
13
+hostname = dtp2
14
+uname -m = i686
15
+uname -r = 2.6.35-22-generic-pae
16 16
 uname -s = Linux
17
-uname -v = #1 SMP PREEMPT 2010-03-16 21:25:39 +0100
17
+uname -v = #34-Ubuntu SMP Sun Oct 10 11:03:48 UTC 2010
18 18
 
19 19
 /usr/bin/uname -p = unknown
20 20
 /bin/uname -X     = unknown
21 21
 
22
-/bin/arch              = x86_64
22
+/bin/arch              = unknown
23 23
 /usr/bin/arch -k       = unknown
24 24
 /usr/convex/getsysinfo = unknown
25 25
 /usr/bin/hostinfo      = unknown
... ...
@@ -27,18 +27,17 @@ uname -v = #1 SMP PREEMPT 2010-03-16 21:25:39 +0100
27 27
 /usr/bin/oslevel       = unknown
28 28
 /bin/universe          = unknown
29 29
 
30
-PATH: /usr/lib64/mpi/gcc/openmpi/bin
31
-PATH: /home/cwon2/bin
30
+PATH: /home/dcjones/bin
31
+PATH: /home/dcjones/bin/i686
32
+PATH: /home/dcjones/src/kent/built/bin/i386
33
+PATH: /home/dcjones/.cabal/bin
34
+PATH: /usr/local/sbin
32 35
 PATH: /usr/local/bin
36
+PATH: /usr/sbin
33 37
 PATH: /usr/bin
38
+PATH: /sbin
34 39
 PATH: /bin
35
-PATH: /usr/bin/X11
36
-PATH: /usr/X11R6/bin
37 40
 PATH: /usr/games
38
-PATH: /opt/kde3/bin
39
-PATH: /usr/lib/mit/bin
40
-PATH: /usr/lib/mit/sbin
41
-PATH: /usr/lib/qt3/bin
42 41
 
43 42
 
44 43
 ## ----------- ##
... ...
@@ -50,18 +49,18 @@ configure:2170: found /usr/bin/gcc
50 49
 configure:2181: result: gcc
51 50
 configure:2410: checking for C compiler version
52 51
 configure:2419: gcc --version >&5
53
-gcc (SUSE Linux) 4.4.1 [gcc-4_4-branch revision 150839]
54
-Copyright (C) 2009 Free Software Foundation, Inc.
52
+gcc (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5
53
+Copyright (C) 2010 Free Software Foundation, Inc.
55 54
 This is free software; see the source for copying conditions.  There is NO
56 55
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
57 56
 
58 57
 configure:2430: $? = 0
59 58
 configure:2419: gcc -v >&5
60 59
 Using built-in specs.
61
-Target: x86_64-suse-linux
62
-Configured with: ../configure --prefix=/usr --infodir=/usr/share/info --mandir=/usr/share/man --libdir=/usr/lib64 --libexecdir=/usr/lib64 --enable-languages=c,c++,objc,fortran,obj-c++,java,ada --enable-checking=release --with-gxx-include-dir=/usr/include/c++/4.4 --enable-ssp --disable-libssp --with-bugurl=http://bugs.opensuse.org/ --with-pkgversion='SUSE Linux' --disable-libgcj --disable-libmudflap --with-slibdir=/lib64 --with-system-zlib --enable-__cxa_atexit --enable-libstdcxx-allocator=new --disable-libstdcxx-pch --enable-version-specific-runtime-libs --program-suffix=-4.4 --enable-linux-futex --without-system-libunwind --with-arch-32=i586 --with-tune=generic --build=x86_64-suse-linux
60
+Target: i686-linux-gnu
61
+Configured with: ../src/configure -v --with-pkgversion='Ubuntu/Linaro 4.4.4-14ubuntu5' --with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs --enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-4.4 --enable-shared --enable-multiarch --enable-linker-build-id --with-system-zlib --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.4 --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-objc-gc --enable-targets=all --disable-werror --with-arch-32=i686 --with-tune=generic --enable-checking=release --build=i686-linux-gnu --host=i686-linux-gnu --target=i686-linux-gnu
63 62
 Thread model: posix
64
-gcc version 4.4.1 [gcc-4_4-branch revision 150839] (SUSE Linux) 
63
+gcc version 4.4.5 (Ubuntu/Linaro 4.4.4-14ubuntu5) 
65 64
 configure:2430: $? = 0
66 65
 configure:2419: gcc -V >&5
67 66
 gcc: '-V' option must have argument
... ...
@@ -119,8 +118,8 @@ configure:2948: result: yes
119 118
 configure:2948: checking for pow
120 119
 configure:2948: gcc -o conftest -g -O2   conftest.c -lz  >&5
121 120
 conftest.c:35: warning: conflicting types for built-in function 'pow'
122
-/tmp/ccYE2dt3.o: In function `main':
123
-/home/cwon2/Download/seqbias/conftest.c:46: undefined reference to `pow'
121
+/tmp/ccxSNas5.o: In function `main':
122
+/home/dcjones/bio/seqbias/conftest.c:46: undefined reference to `pow'
124 123
 collect2: ld returned 1 exit status
125 124
 configure:2948: $? = 1
126 125
 configure: failed program was:
... ...
@@ -177,8 +176,8 @@ configure:2948: result: no
177 176
 configure:2948: checking for sqrt
178 177
 configure:2948: gcc -o conftest -g -O2   conftest.c -lz  >&5
179 178
 conftest.c:35: warning: conflicting types for built-in function 'sqrt'
180
-/tmp/ccKTGxDe.o: In function `main':
181
-/home/cwon2/Download/seqbias/conftest.c:46: undefined reference to `sqrt'
179
+/tmp/ccFZLqEa.o: In function `main':
180
+/home/dcjones/bio/seqbias/conftest.c:46: undefined reference to `sqrt'
182 181
 collect2: ld returned 1 exit status
183 182
 configure:2948: $? = 1
184 183
 configure: failed program was:
... ...
@@ -255,7 +254,8 @@ configure:2962: checking how to run the C preprocessor
255 254
 configure:2993: gcc -E  conftest.c
256 255
 configure:2993: $? = 0
257 256
 configure:3007: gcc -E  conftest.c
258
-conftest.c:16:28: error: ac_nonexistent.h: No such file or directory
257
+conftest.c:16: fatal error: ac_nonexistent.h: No such file or directory
258
+compilation terminated.
259 259
 configure:3007: $? = 1
260 260
 configure: failed program was:
261 261
 | /* confdefs.h */
... ...
@@ -278,7 +278,8 @@ configure:3032: result: gcc -E
278 278
 configure:3052: gcc -E  conftest.c
279 279
 configure:3052: $? = 0
280 280
 configure:3066: gcc -E  conftest.c
281
-conftest.c:16:28: error: ac_nonexistent.h: No such file or directory
281
+conftest.c:16: fatal error: ac_nonexistent.h: No such file or directory
282
+compilation terminated.
282 283
 configure:3066: $? = 1
283 284
 configure: failed program was:
284 285
 | /* confdefs.h */
... ...
@@ -298,9 +299,9 @@ configure: failed program was:
298 299
 | /* end confdefs.h.  */
299 300
 | #include <ac_nonexistent.h>
300 301
 configure:3095: checking for grep that handles long lines and -e
301
-configure:3153: result: /usr/bin/grep
302
+configure:3153: result: /bin/grep
302 303
 configure:3158: checking for egrep
303
-configure:3220: result: /usr/bin/grep -E
304
+configure:3220: result: /bin/grep -E
304 305
 configure:3225: checking for ANSI C header files
305 306
 configure:3245: gcc -c -g -O2  conftest.c >&5
306 307
 configure:3245: $? = 0
... ...
@@ -776,7 +777,7 @@ generated by GNU Autoconf 2.67.  Invocation command line was
776 777
   CONFIG_COMMANDS = 
777 778
   $ ./config.status 
778 779
 
779
-on alpaca
780
+on dtp2
780 781
 
781 782
 config.status:734: creating src/Makevars
782 783
 
... ...
@@ -833,8 +834,8 @@ ac_cv_header_unistd_h=yes
833 834
 ac_cv_header_zlib_h=yes
834 835
 ac_cv_lib_z_gzeof=yes
835 836
 ac_cv_objext=o
836
-ac_cv_path_EGREP='/usr/bin/grep -E'
837
-ac_cv_path_GREP=/usr/bin/grep
837
+ac_cv_path_EGREP='/bin/grep -E'
838
+ac_cv_path_GREP=/bin/grep
838 839
 ac_cv_prog_CPP='gcc -E'
839 840
 ac_cv_prog_ac_ct_CC=gcc
840 841
 ac_cv_prog_cc_c89=
... ...
@@ -857,9 +858,9 @@ DEFS='-DPACKAGE_NAME=\"\" -DPACKAGE_TARNAME=\"\" -DPACKAGE_VERSION=\"\" -DPACKAG
857 858
 ECHO_C=''
858 859
 ECHO_N='-n'
859 860
 ECHO_T=''
860
-EGREP='/usr/bin/grep -E'
861
+EGREP='/bin/grep -E'
861 862
 EXEEXT=''
862
-GREP='/usr/bin/grep'
863
+GREP='/bin/grep'
863 864
 LDFLAGS=''
864 865
 LIBOBJS=''
865 866
 LIBS='-lz '
... ...
@@ -872,7 +873,7 @@ PACKAGE_TARNAME=''
872 873
 PACKAGE_URL=''
873 874
 PACKAGE_VERSION=''
874 875
 PATH_SEPARATOR=':'
875
-SHELL='/bin/sh'
876
+SHELL='/bin/bash'
876 877
 ac_ct_CC='gcc'
877 878
 bindir='${exec_prefix}/bin'
878 879
 build_alias=''
... ...
@@ -1,4 +1,4 @@
1
-#! /bin/sh
1
+#! /bin/bash
2 2
 # Generated by configure.
3 3
 # Run this file to recreate the current configuration.
4 4
 # Compiler output produced by configure, useful for debugging
... ...
@@ -8,7 +8,7 @@ debug=false
8 8
 ac_cs_recheck=false
9 9
 ac_cs_silent=false
10 10
 
11
-SHELL=${CONFIG_SHELL-/bin/sh}
11
+SHELL=${CONFIG_SHELL-/bin/bash}
12 12
 export SHELL
13 13
 ## -------------------- ##
14 14
 ## M4sh Initialization. ##
... ...
@@ -438,7 +438,7 @@ Copyright (C) 2010 Free Software Foundation, Inc.
438 438
 This config.status script is free software; the Free Software Foundation
439 439
 gives unlimited permission to copy, distribute and modify it."
440 440
 
441
-ac_pwd='/home/cwon2/Download/seqbias'
441
+ac_pwd='/home/dcjones/bio/seqbias'
442 442
 srcdir='.'
443 443
 test -n "$AWK" || AWK=awk
444 444
 # The default lists apply if the user does not specify any file.
... ...
@@ -506,10 +506,10 @@ if $ac_cs_silent; then
506 506
 fi
507 507
 
508 508
 if $ac_cs_recheck; then
509
-  set X '/bin/sh' './configure'  $ac_configure_extra_args --no-create --no-recursion
509
+  set X '/bin/bash' './configure'  $ac_configure_extra_args --no-create --no-recursion
510 510
   shift
511
-  $as_echo "running CONFIG_SHELL=/bin/sh $*" >&6
512
-  CONFIG_SHELL='/bin/sh'
511
+  $as_echo "running CONFIG_SHELL=/bin/bash $*" >&6
512
+  CONFIG_SHELL='/bin/bash'
513 513
   export CONFIG_SHELL
514 514
   exec "$@"
515 515
 fi
... ...
@@ -592,8 +592,8 @@ echo 'BEGIN {' >"$tmp/subs1.awk" &&
592 592
 cat >>"$tmp/subs1.awk" <<\_ACAWK &&
593 593
 S["LTLIBOBJS"]=""
594 594
 S["LIBOBJS"]=""
595
-S["EGREP"]="/usr/bin/grep -E"
596
-S["GREP"]="/usr/bin/grep"
595
+S["EGREP"]="/bin/grep -E"
596
+S["GREP"]="/bin/grep"
597 597
 S["CPP"]="gcc -E"
598 598
 S["OBJEXT"]="o"
599 599
 S["EXEEXT"]=""
... ...
@@ -643,7 +643,7 @@ S["PACKAGE_VERSION"]=""
643 643
 S["PACKAGE_TARNAME"]=""
644 644
 S["PACKAGE_NAME"]=""
645 645
 S["PATH_SEPARATOR"]=":"
646
-S["SHELL"]="/bin/sh"
646
+S["SHELL"]="/bin/bash"
647 647
 _ACAWK
648 648
 cat >>"$tmp/subs1.awk" <<_ACAWK &&
649 649
   for (key in S) S_is_set[key] = 1
... ...
@@ -9,10 +9,11 @@ YAML_CPP_OBJ = aliascontent.o conversion.o emitter.o emitterstate.o \
9 9
                stream.o tag.o
10 10
 
11 11
 SEQBIAS_OBJ = sequencing_bias.o kmers.o miscmath.o common.o \
12
-			  table.o superfasthash.o logger.o seqbias.o
12
+			  table.o superfasthash.o logger.o seqbias.o \
13
+			  snprintf.o
13 14
 
14
-DFLAGS = -D_USE_KNETFILE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
15
-PKG_CFLAGS  += $(DFLAGS) -Wno-unused-result -Wno-format
15
+DFLAGS = -D_USE_KNETFILE -D_FILE_OFFSET_BITS=64
16
+PKG_CFLAGS  += $(DFLAGS)
16 17
 PKG_CPPFLAGS = $(PKG_CFLAGS)
17 18
 
18 19
 PKG_LIBS+=-lz 
... ...
@@ -9,10 +9,11 @@ YAML_CPP_OBJ = aliascontent.o conversion.o emitter.o emitterstate.o \
9 9
                stream.o tag.o
10 10
 
11 11
 SEQBIAS_OBJ = sequencing_bias.o kmers.o miscmath.o common.o \
12
-			  table.o superfasthash.o logger.o seqbias.o
12
+			  table.o superfasthash.o logger.o seqbias.o \
13
+			  snprintf.o
13 14
 
14
-DFLAGS = -D_USE_KNETFILE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
15
-PKG_CFLAGS  += $(DFLAGS) -Wno-unused-result -Wno-format
15
+DFLAGS = -D_USE_KNETFILE -D_FILE_OFFSET_BITS=64
16
+PKG_CFLAGS  += $(DFLAGS)
16 17
 PKG_CPPFLAGS = $(PKG_CFLAGS)
17 18
 
18 19
 PKG_LIBS+=@LIBS@
... ...
@@ -596,7 +596,8 @@ double conditional_likelihood( size_t n, const double* l0, const double* l1, con
596 596
 
597 597
     l = 0.0;
598 598
     for( i = 0; i < n; i++ ) {
599
-        l += (c[i] == 0 ? l0[i] : l1[i]) - logaddexp( l0[i], l1[i] );
599
+        l += (c[i] == 0 ? l0[i] : l1[i])
600
+                    - logaddexp( l0[i], l1[i] );
600 601
     }
601 602
 
602 603
     return l;
... ...
@@ -715,7 +716,7 @@ void train_motifs( motif& M0, motif& M1,
715 716
     while( true ) {
716 717
         round_num++;
717 718
 
718
-        log_printf( LOG_MSG, "round_num %4d (ic = %0.4e) ", round_num, ic_curr );
719
+        log_printf( LOG_MSG, "round %4d (ic = %0.4e) ", round_num, ic_curr );
719 720
 
720 721
         ic_best = -HUGE_VAL;
721 722
         j_best = i_best = 0;
... ...
@@ -730,7 +731,6 @@ void train_motifs( motif& M0, motif& M1,
730 731
 
731 732
 
732 733
             for( i = i_start; i <= j; i++ ) {
733
-                log_puts( LOG_MSG, "." );
734 734
 
735 735
                 if( M0.has_edge( i, j ) ) {
736 736
                     continue;
... ...
@@ -740,6 +740,8 @@ void train_motifs( motif& M0, motif& M1,
740 740
                     continue;
741 741
                 }
742 742
 
743
+                log_puts( LOG_MSG, "." );
744
+
743 745
                 /* keep track of the old parameters to avoid retraining */
744 746
                 M0.store_row(j);
745 747
                 M1.store_row(j);
... ...
@@ -917,10 +919,11 @@ void train_motifs_backwards( motif& M0, motif& M1,
917 919
             i_last = M0.num_parents(j) > 1 ? j-1 : j;
918 920
 
919 921
             for( i = 0; i <= i_last; i++ ) {
920
-                log_puts( LOG_MSG, "." );
921 922
 
922 923
                 if( !M0.has_edge( i, j ) ) continue;
923 924
 
925
+                log_puts( LOG_MSG, "." );
926
+
924 927
                 /* keep track of the old parameters to avoid retraining */
925 928
                 M0.store_row(j);
926 929
                 M1.store_row(j);
... ...
@@ -2,6 +2,7 @@
2 2
 #include "sequencing_bias.hpp"
3 3
 #include "logger.h"
4 4
 #include "common.hpp"
5
+#include "miscmath.hpp"
5 6
 
6 7
 #include <cmath>
7 8
 #include <cctype>
... ...
@@ -14,11 +15,31 @@ using namespace std;
14 15
 
15 16
 
16 17
 /* simply uniform random numbers */
17
-double rand_uniform( double a, double b )
18
+static double rand_uniform( double a, double b )
18 19
 {
19 20
     return a + b * (double)rand() / (double)RAND_MAX;
20 21
 }
21 22
 
23
+/* random gaussians (copied from GSL, to avoid dependency) */
24
+static double rand_gauss( double sigma )
25
+{
26
+    double x, y, r2;
27
+
28
+    do
29
+    {
30
+        /* choose x,y in uniform square (-1,-1) to (+1,+1) */
31
+        x = -1 + 2 * rand_uniform(0.0,1.0);
32
+        y = -1 + 2 * rand_uniform(0.0,1.0);
33
+
34
+        /* see if it is in the unit circle */
35
+        r2 = x * x + y * y;
36
+    }
37
+    while (r2 > 1.0 || r2 == 0);
38
+
39
+    /* Box-Muller transform */
40
+    return sigma * y * sqrt (-2.0 * log (r2) / r2);
41
+}
42
+
22 43
 
23 44
 /* pseudocount used when sampling foreground and background nucleotide * frequencies */
24 45
 const double sequencing_bias::pseudocount = 1;
... ...
@@ -172,14 +193,14 @@ void sequencing_bias::build( const char* ref_fn,
172 193
         failf( "Can't open bam file '%s'.\n", reads_fn );
173 194
     }
174 195
 
196
+    /* find the first n unique reads by hashing */
175 197
     table T;
176
-    hash_reads( &T, reads_f, n );
177
-    n = T.m;
198
+    hash_reads( &T, reads_f, 0 );
178 199
 
179
-    /* resort the remaining (1-q)*n by position */
180
-    log_puts( LOG_MSG, "sorting by position ... " );
200
+    /* sort by position */
201
+    log_puts( LOG_MSG, "shuffling ... " );
181 202
     struct hashed_value** S;
182
-    table_sort_by_position( &T, &S );
203
+    table_sort_by_seq_rand( &T, &S );
183 204
     log_puts( LOG_MSG, "done.\n" );
184 205
 
185 206
 
... ...
@@ -196,13 +217,12 @@ void sequencing_bias::build( const char* ref_fn,
196 217
     std::deque<sequence*> training_seqs;
197 218
 
198 219
 
199
-    /* sample this many background sequence for each foreground sequence */
200
-    const int bg_count = 1;
220
+    /* background sampling */
221
+    const size_t bg_samples = 1; // make this many samples for each read
222
+    int bg_sample_num;           // keep track of the number of samples made
223
+    struct read_pos bg;          // background position being considered
201 224
     
202
-    int j;
203
-    pos bg_pos;
204
-    pos bg_offset;
205
-
225
+    int b;
206 226
     char*          seqname   = NULL;
207 227
     int            seqlen    = 0;
208 228
     int            curr_tid  = -1;
... ...
@@ -213,7 +233,7 @@ void sequencing_bias::build( const char* ref_fn,
213 233
     local_seq[L+R+1] = '\0';
214 234
 
215 235
 
216
-    for( i = 0; i < n; i++ ) {
236
+    for( i = 0; i < n && i < T.m; i++ ) {
217 237
 
218 238
         /* Load/switch sequences (chromosomes) as they are encountered in the
219 239
          * read stream. The idea here is to avoid thrashing by loading a large
... ...
@@ -241,7 +261,8 @@ void sequencing_bias::build( const char* ref_fn,
241 261
         }
242 262
 
243 263
         if( seq == NULL ) continue;
244
-        
264
+
265
+
245 266
         /* add a foreground sequence */
246 267
         if( S[i]->pos.strand ) {
247 268
             if( S[i]->pos.pos < R ) continue;
... ...
@@ -258,31 +279,29 @@ void sequencing_bias::build( const char* ref_fn,
258 279
 
259 280
         /* add a background sequence */
260 281
         /* adjust the current read position randomly, and sample */
261
-        for( j = 0; j < bg_count; j++ ) {
282
+        for( bg_sample_num = 0; bg_sample_num < bg_samples; bg_sample_num++ ) {
262 283
 
263
-            /* make things a bit more robust by sampling the background from a
264
-             * random offset */
265
-            bg_offset = (pos)rand_uniform( 100.0, 200.0 );
266
-            if( rand_uniform( -1.0, 1.0 ) < 0.0 ) bg_offset = -bg_offset;
284
+            /* attempt to sample a position near the current read, with no reads
285
+             * itself. */
286
+            memcpy( (void*)&bg, (void*)&S[i]->pos, sizeof(struct read_pos) );
267 287
 
268
-            bg_pos = S[i]->pos.pos + bg_offset;
288
+            bg.pos = S[i]->pos.pos + (pos)ceil( rand_gauss( 10 ) );
269 289
 
270
-            if( S[i]->pos.strand ) {
271
-                if( bg_pos < R ) continue;
272
-                memcpy( local_seq, seq + bg_pos - R, (L+1+R)*sizeof(char) );
290
+            if( bg.strand ) {
291
+                if( bg.pos < R ) continue;
292
+                memcpy( local_seq, seq + bg.pos - R, (L+1+R)*sizeof(char) );
273 293
                 seqrc( local_seq, L+1+R );
274 294
             }
275 295
             else {
276
-                if( bg_pos < L ) continue;
277
-                memcpy( local_seq, seq + (bg_pos-L), (L+1+R)*sizeof(char) );
296
+                if( bg.pos < L ) continue;
297
+                memcpy( local_seq, seq + (bg.pos-L), (L+1+R)*sizeof(char) );
278 298
             }
279 299
 
280 300
             training_seqs.push_back( new sequence( local_seq, 0 ) );
281 301
         }
282 302
     }
283 303
 
284
-
285
-    size_t max_k = 5;
304
+    size_t max_k = 3;
286 305
     size_t max_dep_dist = 5;
287 306
     M0 = new motif( L+1+R, max_k, 0 );
288 307
     M1 = new motif( L+1+R, max_k, 1 );
289 308
new file mode 100644
... ...
@@ -0,0 +1,1025 @@
1
+/*
2
+ * snprintf.c - a portable implementation of snprintf
3
+ *
4
+ * AUTHOR
5
+ *   Mark Martinec <mark.martinec@ijs.si>, April 1999.
6
+ *
7
+ *   Copyright 1999, Mark Martinec. All rights reserved.
8
+ *
9
+ * TERMS AND CONDITIONS
10
+ *   This program is free software; you can redistribute it and/or modify
11
+ *   it under the terms of the "Frontier Artistic License" which comes
12
+ *   with this Kit.
13
+ *
14
+ *   This program is distributed in the hope that it will be useful,
15
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty
16
+ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
17
+ *   See the Frontier Artistic License for more details.
18
+ *
19
+ *   You should have received a copy of the Frontier Artistic License
20
+ *   with this Kit in the file named LICENSE.txt .
21
+ *   If not, I'll be glad to provide one.
22
+ *
23
+ * FEATURES
24
+ * - careful adherence to specs regarding flags, field width and precision;
25
+ * - good performance for large string handling (large format, large
26
+ *   argument or large paddings). Performance is similar to system's sprintf
27
+ *   and in several cases significantly better (make sure you compile with
28
+ *   optimizations turned on, tell the compiler the code is strict ANSI
29
+ *   if necessary to give it more freedom for optimizations);
30
+ * - return value semantics per ISO/IEC 9899:1999 ("ISO C99");
31
+ * - written in standard ISO/ANSI C - requires an ANSI C compiler.
32
+ *
33
+ * SUPPORTED CONVERSION SPECIFIERS AND DATA TYPES
34
+ *
35
+ * This snprintf only supports the following conversion specifiers:
36
+ * s, c, d, u, o, x, X, p  (and synonyms: i, D, U, O - see below)
37
+ * with flags: '-', '+', ' ', '0' and '#'.
38
+ * An asterisk is supported for field width as well as precision.
39
+ *
40
+ * Length modifiers 'h' (short int), 'l' (long int),
41
+ * and 'll' (long long int) are supported.
42
+ * NOTE:
43
+ *   If macro SNPRINTF_LONGLONG_SUPPORT is not defined (default) the
44
+ *   length modifier 'll' is recognized but treated the same as 'l',
45
+ *   which may cause argument value truncation! Defining
46
+ *   SNPRINTF_LONGLONG_SUPPORT requires that your system's sprintf also
47
+ *   handles length modifier 'll'.  long long int is a language extension
48
+ *   which may not be portable.
49
+ *
50
+ * Conversion of numeric data (conversion specifiers d, u, o, x, X, p)
51
+ * with length modifiers (none or h, l, ll) is left to the system routine
52
+ * sprintf, but all handling of flags, field width and precision as well as
53
+ * c and s conversions is done very carefully by this portable routine.
54
+ * If a string precision (truncation) is specified (e.g. %.8s) it is
55
+ * guaranteed the string beyond the specified precision will not be referenced.
56
+ *
57
+ * Length modifiers h, l and ll are ignored for c and s conversions (data
58
+ * types wint_t and wchar_t are not supported).
59
+ *
60
+ * The following common synonyms for conversion characters are supported:
61
+ *   - i is a synonym for d
62
+ *   - D is a synonym for ld, explicit length modifiers are ignored
63
+ *   - U is a synonym for lu, explicit length modifiers are ignored
64
+ *   - O is a synonym for lo, explicit length modifiers are ignored
65
+ * The D, O and U conversion characters are nonstandard, they are supported
66
+ * for backward compatibility only, and should not be used for new code.
67
+ *
68
+ * The following is specifically NOT supported:
69
+ *   - flag ' (thousands' grouping character) is recognized but ignored
70
+ *   - numeric conversion specifiers: f, e, E, g, G and synonym F,
71
+ *     as well as the new a and A conversion specifiers
72
+ *   - length modifier 'L' (long double) and 'q' (quad - use 'll' instead)
73
+ *   - wide character/string conversions: lc, ls, and nonstandard
74
+ *     synonyms C and S
75
+ *   - writeback of converted string length: conversion character n
76
+ *   - the n$ specification for direct reference to n-th argument
77
+ *   - locales
78
+ *
79
+ * It is permitted for str_m to be zero, and it is permitted to specify NULL
80
+ * pointer for resulting string argument if str_m is zero (as per ISO C99).
81
+ *
82
+ * The return value is the number of characters which would be generated
83
+ * for the given input, excluding the trailing null. If this value
84
+ * is greater or equal to str_m, not all characters from the result
85
+ * have been stored in str, output bytes beyond the (str_m-1) -th character
86
+ * are discarded. If str_m is greater than zero it is guaranteed
87
+ * the resulting string will be null-terminated.
88
+ *
89
+ * NOTE that this matches the ISO C99, OpenBSD, and GNU C library 2.1,
90
+ * but is different from some older and vendor implementations,
91
+ * and is also different from XPG, XSH5, SUSv2 specifications.
92
+ * For historical discussion on changes in the semantics and standards
93
+ * of snprintf see printf(3) man page in the Linux programmers manual.
94
+ *
95
+ * Routines asprintf and vasprintf return a pointer (in the ptr argument)
96
+ * to a buffer sufficiently large to hold the resulting string. This pointer
97
+ * should be passed to free(3) to release the allocated storage when it is
98
+ * no longer needed. If sufficient space cannot be allocated, these functions
99
+ * will return -1 and set ptr to be a NULL pointer. These two routines are a
100
+ * GNU C library extensions (glibc).
101
+ *
102
+ * Routines asnprintf and vasnprintf are similar to asprintf and vasprintf,
103
+ * yet, like snprintf and vsnprintf counterparts, will write at most str_m-1
104
+ * characters into the allocated output string, the last character in the
105
+ * allocated buffer then gets the terminating null. If the formatted string
106
+ * length (the return value) is greater than or equal to the str_m argument,
107
+ * the resulting string was truncated and some of the formatted characters
108
+ * were discarded. These routines present a handy way to limit the amount
109
+ * of allocated memory to some sane value.
110
+ *
111
+ * AVAILABILITY
112
+ *   http://www.ijs.si/software/snprintf/
113
+ *
114
+ * REVISION HISTORY
115
+ * 1999-04	V0.9  Mark Martinec
116
+ *		- initial version, some modifications after comparing printf
117
+ *		  man pages for Digital Unix 4.0, Solaris 2.6 and HPUX 10,
118
+ *		  and checking how Perl handles sprintf (differently!);
119
+ * 1999-04-09	V1.0  Mark Martinec <mark.martinec@ijs.si>
120
+ *		- added main test program, fixed remaining inconsistencies,
121
+ *		  added optional (long long int) support;
122
+ * 1999-04-12	V1.1  Mark Martinec <mark.martinec@ijs.si>
123
+ *		- support the 'p' conversion (pointer to void);
124
+ *		- if a string precision is specified
125
+ *		  make sure the string beyond the specified precision
126
+ *		  will not be referenced (e.g. by strlen);
127
+ * 1999-04-13	V1.2  Mark Martinec <mark.martinec@ijs.si>
128
+ *		- support synonyms %D=%ld, %U=%lu, %O=%lo;
129
+ *		- speed up the case of long format string with few conversions;
130
+ * 1999-06-30	V1.3  Mark Martinec <mark.martinec@ijs.si>
131
+ *		- fixed runaway loop (eventually crashing when str_l wraps
132
+ *		  beyond 2^31) while copying format string without
133
+ *		  conversion specifiers to a buffer that is too short
134
+ *		  (thanks to Edwin Young <edwiny@autonomy.com> for
135
+ *		  spotting the problem);
136
+ *		- added macros PORTABLE_SNPRINTF_VERSION_(MAJOR|MINOR)
137
+ *		  to snprintf.h
138
+ * 2000-02-14	V2.0 (never released) Mark Martinec <mark.martinec@ijs.si>
139
+ *		- relaxed license terms: The Artistic License now applies.
140
+ *		  You may still apply the GNU GENERAL PUBLIC LICENSE
141
+ *		  as was distributed with previous versions, if you prefer;
142
+ *		- changed REVISION HISTORY dates to use ISO 8601 date format;
143
+ *		- added vsnprintf (patch also independently proposed by
144
+ *		  Caolan McNamara 2000-05-04, and Keith M Willenson 2000-06-01)
145
+ * 2000-06-27	V2.1  Mark Martinec <mark.martinec@ijs.si>
146
+ *		- removed POSIX check for str_m<1; value 0 for str_m is
147
+ *		  allowed by ISO C99 (and GNU C library 2.1) - (pointed out
148
+ *		  on 2000-05-04 by Caolan McNamara, caolan@ csn dot ul dot ie).
149
+ *		  Besides relaxed license this change in standards adherence
150
+ *		  is the main reason to bump up the major version number;
151
+ *		- added nonstandard routines asnprintf, vasnprintf, asprintf,
152
+ *		  vasprintf that dynamically allocate storage for the
153
+ *		  resulting string; these routines are not compiled by default,
154
+ *		  see comments where NEED_V?ASN?PRINTF macros are defined;
155
+ *		- autoconf contributed by Caolan McNamara
156
+ * 2000-10-06	V2.2  Mark Martinec <mark.martinec@ijs.si>
157
+ *		- BUG FIX: the %c conversion used a temporary variable
158
+ *		  that was no longer in scope when referenced,
159
+ *		  possibly causing incorrect resulting character;
160
+ *		- BUG FIX: make precision and minimal field width unsigned
161
+ *		  to handle huge values (2^31 <= n < 2^32) correctly;
162
+ *		  also be more careful in the use of signed/unsigned/size_t
163
+ *		  internal variables - probably more careful than many
164
+ *		  vendor implementations, but there may still be a case
165
+ *		  where huge values of str_m, precision or minimal field
166
+ *		  could cause incorrect behaviour;
167
+ *		- use separate variables for signed/unsigned arguments,
168
+ *		  and for short/int, long, and long long argument lengths
169
+ *		  to avoid possible incompatibilities on certain
170
+ *		  computer architectures. Also use separate variable
171
+ *		  arg_sign to hold sign of a numeric argument,
172
+ *		  to make code more transparent;
173
+ *		- some fiddling with zero padding and "0x" to make it
174
+ *		  Linux compatible;
175
+ *		- systematically use macros fast_memcpy and fast_memset
176
+ *		  instead of case-by-case hand optimization; determine some
177
+ *		  breakeven string lengths for different architectures;
178
+ *		- terminology change: 'format' -> 'conversion specifier',
179
+ *		  'C9x' -> 'ISO/IEC 9899:1999 ("ISO C99")',
180
+ *		  'alternative form' -> 'alternate form',
181
+ *		  'data type modifier' -> 'length modifier';
182
+ *		- several comments rephrased and new ones added;
183
+ *		- make compiler not complain about 'credits' defined but
184
+ *		  not used;
185
+ */
186
+
187
+
188
+/* Define HAVE_SNPRINTF if your system already has snprintf and vsnprintf.
189
+ *
190
+ * If HAVE_SNPRINTF is defined this module will not produce code for
191
+ * snprintf and vsnprintf, unless PREFER_PORTABLE_SNPRINTF is defined as well,
192
+ * causing this portable version of snprintf to be called portable_snprintf
193
+ * (and portable_vsnprintf).
194
+ */
195
+/* #define HAVE_SNPRINTF */
196
+
197
+/* Define PREFER_PORTABLE_SNPRINTF if your system does have snprintf and
198
+ * vsnprintf but you would prefer to use the portable routine(s) instead.
199
+ * In this case the portable routine is declared as portable_snprintf
200
+ * (and portable_vsnprintf) and a macro 'snprintf' (and 'vsnprintf')
201
+ * is defined to expand to 'portable_v?snprintf' - see file snprintf.h .
202
+ * Defining this macro is only useful if HAVE_SNPRINTF is also defined,
203
+ * but does does no harm if defined nevertheless.
204
+ */
205
+/* #define PREFER_PORTABLE_SNPRINTF */
206
+
207
+/* Define SNPRINTF_LONGLONG_SUPPORT if you want to support
208
+ * data type (long long int) and length modifier 'll' (e.g. %lld).
209
+ * If undefined, 'll' is recognized but treated as a single 'l'.
210
+ *
211
+ * If the system's sprintf does not handle 'll'
212
+ * the SNPRINTF_LONGLONG_SUPPORT must not be defined!
213
+ *
214
+ * This is off by default as (long long int) is a language extension.
215
+ */
216
+/* #define SNPRINTF_LONGLONG_SUPPORT */
217
+
218
+/* Define NEED_SNPRINTF_ONLY if you only need snprintf, and not vsnprintf.
219
+ * If NEED_SNPRINTF_ONLY is defined, the snprintf will be defined directly,
220
+ * otherwise both snprintf and vsnprintf routines will be defined
221
+ * and snprintf will be a simple wrapper around vsnprintf, at the expense
222
+ * of an extra procedure call.
223
+ */
224
+/* #define NEED_SNPRINTF_ONLY */
225
+
226
+/* Define NEED_V?ASN?PRINTF macros if you need library extension
227
+ * routines asprintf, vasprintf, asnprintf, vasnprintf respectively,
228
+ * and your system library does not provide them. They are all small
229
+ * wrapper routines around portable_vsnprintf. Defining any of the four
230
+ * NEED_V?ASN?PRINTF macros automatically turns off NEED_SNPRINTF_ONLY
231
+ * and turns on PREFER_PORTABLE_SNPRINTF.
232
+ *
233
+ * Watch for name conflicts with the system library if these routines
234
+ * are already present there.
235
+ *
236
+ * NOTE: vasprintf and vasnprintf routines need va_copy() from stdarg.h, as
237
+ * specified by C99, to be able to traverse the same list of arguments twice.
238
+ * I don't know of any other standard and portable way of achieving the same.
239
+ * With some versions of gcc you may use __va_copy(). You might even get away
240
+ * with "ap2 = ap", in this case you must not call va_end(ap2) !
241
+ *   #define va_copy(ap2,ap) ap2 = ap
242
+ */
243
+/* #define NEED_ASPRINTF   */
244
+/* #define NEED_ASNPRINTF  */
245
+/* #define NEED_VASPRINTF  */
246
+/* #define NEED_VASNPRINTF */
247
+
248
+
249
+/* Define the following macros if desired:
250
+ *   SOLARIS_COMPATIBLE, SOLARIS_BUG_COMPATIBLE,
251
+ *   HPUX_COMPATIBLE, HPUX_BUG_COMPATIBLE, LINUX_COMPATIBLE,
252
+ *   DIGITAL_UNIX_COMPATIBLE, DIGITAL_UNIX_BUG_COMPATIBLE,
253
+ *   PERL_COMPATIBLE, PERL_BUG_COMPATIBLE,
254
+ *
255
+ * - For portable applications it is best not to rely on peculiarities
256
+ *   of a given implementation so it may be best not to define any
257
+ *   of the macros that select compatibility and to avoid features
258
+ *   that vary among the systems.
259
+ *
260
+ * - Selecting compatibility with more than one operating system
261
+ *   is not strictly forbidden but is not recommended.
262
+ *
263
+ * - 'x'_BUG_COMPATIBLE implies 'x'_COMPATIBLE .
264
+ *
265
+ * - 'x'_COMPATIBLE refers to (and enables) a behaviour that is
266
+ *   documented in a sprintf man page on a given operating system
267
+ *   and actually adhered to by the system's sprintf (but not on
268
+ *   most other operating systems). It may also refer to and enable
269
+ *   a behaviour that is declared 'undefined' or 'implementation specific'
270
+ *   in the man page but a given implementation behaves predictably
271
+ *   in a certain way.
272
+ *
273
+ * - 'x'_BUG_COMPATIBLE refers to (and enables) a behaviour of system's sprintf
274
+ *   that contradicts the sprintf man page on the same operating system.
275
+ *
276
+ * - I do not claim that the 'x'_COMPATIBLE and 'x'_BUG_COMPATIBLE
277
+ *   conditionals take into account all idiosyncrasies of a particular
278
+ *   implementation, there may be other incompatibilities.
279
+ */
280
+
281
+
282
+
283
+/* ============================================= */
284
+/* NO USER SERVICABLE PARTS FOLLOWING THIS POINT */
285
+/* ============================================= */
286
+
287
+#define PORTABLE_SNPRINTF_VERSION_MAJOR 2
288
+#define PORTABLE_SNPRINTF_VERSION_MINOR 2
289
+
290
+#if defined(NEED_ASPRINTF) || defined(NEED_ASNPRINTF) || defined(NEED_VASPRINTF) || defined(NEED_VASNPRINTF)
291
+# if defined(NEED_SNPRINTF_ONLY)
292
+# undef NEED_SNPRINTF_ONLY
293
+# endif
294
+# if !defined(PREFER_PORTABLE_SNPRINTF)
295
+# define PREFER_PORTABLE_SNPRINTF
296
+# endif
297
+#endif
298
+
299
+#if defined(SOLARIS_BUG_COMPATIBLE) && !defined(SOLARIS_COMPATIBLE)
300
+#define SOLARIS_COMPATIBLE
301
+#endif
302
+
303
+#if defined(HPUX_BUG_COMPATIBLE) && !defined(HPUX_COMPATIBLE)
304
+#define HPUX_COMPATIBLE
305
+#endif
306
+
307
+#if defined(DIGITAL_UNIX_BUG_COMPATIBLE) && !defined(DIGITAL_UNIX_COMPATIBLE)
308
+#define DIGITAL_UNIX_COMPATIBLE
309
+#endif
310
+
311
+#if defined(PERL_BUG_COMPATIBLE) && !defined(PERL_COMPATIBLE)
312
+#define PERL_COMPATIBLE
313
+#endif
314
+
315
+#if defined(LINUX_BUG_COMPATIBLE) && !defined(LINUX_COMPATIBLE)
316
+#define LINUX_COMPATIBLE
317
+#endif
318
+
319
+#include <sys/types.h>
320
+#include <string.h>
321
+#include <stdlib.h>
322
+#include <stdio.h>
323
+#include <stdarg.h>
324
+#include <assert.h>
325
+#include <errno.h>
326
+
327
+#ifdef isdigit
328
+#undef isdigit
329
+#endif
330
+#define isdigit(c) ((c) >= '0' && (c) <= '9')
331
+
332
+/* For copying strings longer or equal to 'breakeven_point'
333
+ * it is more efficient to call memcpy() than to do it inline.
334
+ * The value depends mostly on the processor architecture,
335
+ * but also on the compiler and its optimization capabilities.
336
+ * The value is not critical, some small value greater than zero
337
+ * will be just fine if you don't care to squeeze every drop
338
+ * of performance out of the code.
339
+ *
340
+ * Small values favor memcpy, large values favor inline code.
341
+ */
342
+#if defined(__alpha__) || defined(__alpha)
343
+#  define breakeven_point   2	/* AXP (DEC Alpha)     - gcc or cc or egcs */
344
+#endif
345
+#if defined(__i386__)  || defined(__i386)
346
+#  define breakeven_point  12	/* Intel Pentium/Linux - gcc 2.96 */
347
+#endif
348
+#if defined(__hppa)
349
+#  define breakeven_point  10	/* HP-PA               - gcc */
350
+#endif
351
+#if defined(__sparc__) || defined(__sparc)
352
+#  define breakeven_point  33	/* Sun Sparc 5         - gcc 2.8.1 */
353
+#endif
354
+
355
+/* some other values of possible interest: */
356
+/* #define breakeven_point  8 */  /* VAX 4000          - vaxc */
357
+/* #define breakeven_point 19 */  /* VAX 4000          - gcc 2.7.0 */
358
+
359
+#ifndef breakeven_point
360
+#  define breakeven_point   6	/* some reasonable one-size-fits-all value */
361
+#endif
362
+
363
+#define fast_memcpy(d,s,n) \
364
+  { register size_t nn = (size_t)(n); \
365
+    if (nn >= breakeven_point) memcpy((d), (s), nn); \
366
+    else if (nn > 0) { /* proc call overhead is worth only for large strings*/\
367
+      register char *dd; register const char *ss; \
368
+      for (ss=(s), dd=(d); nn>0; nn--) *dd++ = *ss++; } }
369
+
370
+#define fast_memset(d,c,n) \
371
+  { register size_t nn = (size_t)(n); \
372
+    if (nn >= breakeven_point) memset((d), (int)(c), nn); \
373
+    else if (nn > 0) { /* proc call overhead is worth only for large strings*/\
374
+      register char *dd; register const int cc=(int)(c); \
375
+      for (dd=(d); nn>0; nn--) *dd++ = cc; } }
376
+
377
+/* prototypes */
378
+
379
+#if defined(NEED_ASPRINTF)
380
+int asprintf   (char **ptr, const char *fmt, /*args*/ ...);
381
+#endif
382
+#if defined(NEED_VASPRINTF)
383
+int vasprintf  (char **ptr, const char *fmt, va_list ap);
384
+#endif
385
+#if defined(NEED_ASNPRINTF)
386
+int asnprintf  (char **ptr, size_t str_m, const char *fmt, /*args*/ ...);
387
+#endif
388
+#if defined(NEED_VASNPRINTF)
389
+int vasnprintf (char **ptr, size_t str_m, const char *fmt, va_list ap);
390
+#endif
391
+
392
+#if defined(HAVE_SNPRINTF)
393
+/* declare our portable snprintf  routine under name portable_snprintf  */
394
+/* declare our portable vsnprintf routine under name portable_vsnprintf */
395
+#else
396
+/* declare our portable routines under names snprintf and vsnprintf */
397
+#define portable_snprintf snprintf
398
+#if !defined(NEED_SNPRINTF_ONLY)
399
+#define portable_vsnprintf vsnprintf
400
+#endif
401
+#endif
402
+
403
+#if !defined(HAVE_SNPRINTF) || defined(PREFER_PORTABLE_SNPRINTF)
404
+int portable_snprintf(char *str, size_t str_m, const char *fmt, /*args*/ ...);
405
+#if !defined(NEED_SNPRINTF_ONLY)
406
+int portable_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap);
407
+#endif
408
+#endif
409
+
410
+/* declarations */
411
+
412
+static char credits[] = "\n\
413
+@(#)snprintf.c, v2.2: Mark Martinec, <mark.martinec@ijs.si>\n\
414
+@(#)snprintf.c, v2.2: Copyright 1999, Mark Martinec. Frontier Artistic License applies.\n\
415
+@(#)snprintf.c, v2.2: http://www.ijs.si/software/snprintf/\n";
416
+
417
+#if defined(NEED_ASPRINTF)
418
+int asprintf(char **ptr, const char *fmt, /*args*/ ...) {
419
+  va_list ap;
420
+  size_t str_m;
421
+  int str_l;
422
+
423
+  *ptr = NULL;
424
+  va_start(ap, fmt);                            /* measure the required size */
425
+  str_l = portable_vsnprintf(NULL, (size_t)0, fmt, ap);
426
+  va_end(ap);
427
+  assert(str_l >= 0);        /* possible integer overflow if str_m > INT_MAX */
428
+  *ptr = (char *) malloc(str_m = (size_t)str_l + 1);
429
+  if (*ptr == NULL) { errno = ENOMEM; str_l = -1; }
430
+  else {
431
+    int str_l2;
432
+    va_start(ap, fmt);
433
+    str_l2 = portable_vsnprintf(*ptr, str_m, fmt, ap);
434
+    va_end(ap);
435
+    assert(str_l2 == str_l);
436
+  }
437
+  return str_l;
438
+}
439
+#endif
440
+
441
+#if defined(NEED_VASPRINTF)
442
+int vasprintf(char **ptr, const char *fmt, va_list ap) {
443
+  size_t str_m;
444
+  int str_l;
445
+
446
+  *ptr = NULL;
447
+  { va_list ap2;
448
+    va_copy(ap2, ap);  /* don't consume the original ap, we'll need it again */
449
+    str_l = portable_vsnprintf(NULL, (size_t)0, fmt, ap2);/*get required size*/
450
+    va_end(ap2);
451
+  }
452
+  assert(str_l >= 0);        /* possible integer overflow if str_m > INT_MAX */
453
+  *ptr = (char *) malloc(str_m = (size_t)str_l + 1);
454
+  if (*ptr == NULL) { errno = ENOMEM; str_l = -1; }
455
+  else {
456
+    int str_l2 = portable_vsnprintf(*ptr, str_m, fmt, ap);
457
+    assert(str_l2 == str_l);
458
+  }
459
+  return str_l;
460
+}
461
+#endif
462
+
463
+#if defined(NEED_ASNPRINTF)
464
+int asnprintf (char **ptr, size_t str_m, const char *fmt, /*args*/ ...) {
465
+  va_list ap;
466
+  int str_l;
467
+
468
+  *ptr = NULL;
469
+  va_start(ap, fmt);                            /* measure the required size */
470
+  str_l = portable_vsnprintf(NULL, (size_t)0, fmt, ap);
471
+  va_end(ap);
472
+  assert(str_l >= 0);        /* possible integer overflow if str_m > INT_MAX */
473
+  if ((size_t)str_l + 1 < str_m) str_m = (size_t)str_l + 1;      /* truncate */
474
+  /* if str_m is 0, no buffer is allocated, just set *ptr to NULL */
475
+  if (str_m == 0) {  /* not interested in resulting string, just return size */
476
+  } else {
477
+    *ptr = (char *) malloc(str_m);
478
+    if (*ptr == NULL) { errno = ENOMEM; str_l = -1; }
479
+    else {
480
+      int str_l2;
481
+      va_start(ap, fmt);
482
+      str_l2 = portable_vsnprintf(*ptr, str_m, fmt, ap);
483
+      va_end(ap);
484
+      assert(str_l2 == str_l);
485
+    }
486
+  }
487
+  return str_l;
488
+}
489
+#endif
490
+
491
+#if defined(NEED_VASNPRINTF)
492
+int vasnprintf (char **ptr, size_t str_m, const char *fmt, va_list ap) {
493
+  int str_l;
494
+
495
+  *ptr = NULL;
496
+  { va_list ap2;
497
+    va_copy(ap2, ap);  /* don't consume the original ap, we'll need it again */
498
+    str_l = portable_vsnprintf(NULL, (size_t)0, fmt, ap2);/*get required size*/
499
+    va_end(ap2);
500
+  }
501
+  assert(str_l >= 0);        /* possible integer overflow if str_m > INT_MAX */
502
+  if ((size_t)str_l + 1 < str_m) str_m = (size_t)str_l + 1;      /* truncate */
503
+  /* if str_m is 0, no buffer is allocated, just set *ptr to NULL */
504
+  if (str_m == 0) {  /* not interested in resulting string, just return size */
505
+  } else {
506
+    *ptr = (char *) malloc(str_m);
507
+    if (*ptr == NULL) { errno = ENOMEM; str_l = -1; }
508
+    else {
509
+      int str_l2 = portable_vsnprintf(*ptr, str_m, fmt, ap);
510
+      assert(str_l2 == str_l);
511
+    }
512
+  }
513
+  return str_l;
514
+}
515
+#endif
516
+
517
+/*
518
+ * If the system does have snprintf and the portable routine is not
519
+ * specifically required, this module produces no code for snprintf/vsnprintf.
520
+ */
521
+#if !defined(HAVE_SNPRINTF) || defined(PREFER_PORTABLE_SNPRINTF)
522
+
523
+#if !defined(NEED_SNPRINTF_ONLY)
524
+int portable_snprintf(char *str, size_t str_m, const char *fmt, /*args*/ ...) {
525
+  va_list ap;
526
+  int str_l;
527
+
528
+  va_start(ap, fmt);
529
+  str_l = portable_vsnprintf(str, str_m, fmt, ap);
530
+  va_end(ap);
531
+  return str_l;
532
+}
533
+#endif
534
+
535
+#if defined(NEED_SNPRINTF_ONLY)
536
+int portable_snprintf(char *str, size_t str_m, const char *fmt, /*args*/ ...) {
537
+#else
538
+int portable_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap) {
539
+#endif
540
+
541
+#if defined(NEED_SNPRINTF_ONLY)
542
+  va_list ap;
543
+#endif
544
+  size_t str_l = 0;
545
+  const char *p = fmt;
546
+
547
+/* In contrast with POSIX, the ISO C99 now says
548
+ * that str can be NULL and str_m can be 0.
549
+ * This is more useful than the old:  if (str_m < 1) return -1; */
550
+
551
+#if defined(NEED_SNPRINTF_ONLY)
552
+  va_start(ap, fmt);
553
+#endif
554
+  if (!p) p = "";
555
+  while (*p) {
556
+    if (*p != '%') {
557
+   /* if (str_l < str_m) str[str_l++] = *p++;    -- this would be sufficient */
558
+   /* but the following code achieves better performance for cases
559
+    * where format string is long and contains few conversions */
560
+      const char *q = strchr(p+1,'%');
561
+      size_t n = !q ? strlen(p) : (q-p);
562
+      if (str_l < str_m) {
563
+        size_t avail = str_m-str_l;
564
+        fast_memcpy(str+str_l, p, (n>avail?avail:n));
565
+      }
566
+      p += n; str_l += n;
567
+    } else {
568
+      const char *starting_p;
569
+      size_t min_field_width = 0, precision = 0;
570
+      int zero_padding = 0, precision_specified = 0, justify_left = 0;
571
+      int alternate_form = 0, force_sign = 0;
572
+      int space_for_positive = 1; /* If both the ' ' and '+' flags appear,
573
+                                     the ' ' flag should be ignored. */
574
+      char length_modifier = '\0';            /* allowed values: \0, h, l, L */
575
+      char tmp[32];/* temporary buffer for simple numeric->string conversion */
576
+
577
+      const char *str_arg;      /* string address in case of string argument */
578
+      size_t str_arg_l;         /* natural field width of arg without padding
579
+                                   and sign */
580
+      unsigned char uchar_arg;
581
+        /* unsigned char argument value - only defined for c conversion.
582
+           N.B. standard explicitly states the char argument for
583
+           the c conversion is unsigned */
584
+
585
+      size_t number_of_zeros_to_pad = 0;
586
+        /* number of zeros to be inserted for numeric conversions
587
+           as required by the precision or minimal field width */
588
+
589
+      size_t zero_padding_insertion_ind = 0;
590
+        /* index into tmp where zero padding is to be inserted */
591
+
592
+      char fmt_spec = '\0';
593
+        /* current conversion specifier character */
594
+
595
+      str_arg = credits;/* just to make compiler happy (defined but not used)*/
596
+      str_arg = NULL;
597
+      starting_p = p; p++;  /* skip '%' */
598
+   /* parse flags */
599
+      while (*p == '0' || *p == '-' || *p == '+' ||
600
+             *p == ' ' || *p == '#' || *p == '\'') {
601
+        switch (*p) {
602
+        case '0': zero_padding = 1; break;
603
+        case '-': justify_left = 1; break;
604
+        case '+': force_sign = 1; space_for_positive = 0; break;
605
+        case ' ': force_sign = 1;
606
+     /* If both the ' ' and '+' flags appear, the ' ' flag should be ignored */
607
+#ifdef PERL_COMPATIBLE
608
+     /* ... but in Perl the last of ' ' and '+' applies */
609
+                  space_for_positive = 1;
610
+#endif
611
+                  break;
612
+        case '#': alternate_form = 1; break;
613
+        case '\'': break;
614
+        }
615
+        p++;
616
+      }
617
+   /* If the '0' and '-' flags both appear, the '0' flag should be ignored. */
618
+
619
+   /* parse field width */
620
+      if (*p == '*') {
621
+        int j;
622
+        p++; j = va_arg(ap, int);
623
+        if (j >= 0) min_field_width = j;
624
+        else { min_field_width = -j; justify_left = 1; }
625
+      } else if (isdigit((int)(*p))) {
626
+        /* size_t could be wider than unsigned int;
627
+           make sure we treat argument like common implementations do */
628
+        unsigned int uj = *p++ - '0';
629
+        while (isdigit((int)(*p))) uj = 10*uj + (unsigned int)(*p++ - '0');
630
+        min_field_width = uj;
631
+      }
632
+   /* parse precision */
633
+      if (*p == '.') {
634
+        p++; precision_specified = 1;
635
+        if (*p == '*') {
636
+          int j = va_arg(ap, int);
637
+          p++;
638
+          if (j >= 0) precision = j;
639
+          else {
640
+            precision_specified = 0; precision = 0;
641
+         /* NOTE:
642
+          *   Solaris 2.6 man page claims that in this case the precision
643
+          *   should be set to 0.  Digital Unix 4.0, HPUX 10 and BSD man page
644
+          *   claim that this case should be treated as unspecified precision,
645
+          *   which is what we do here.
646
+          */
647
+          }
648
+        } else if (isdigit((int)(*p))) {
649
+          /* size_t could be wider than unsigned int;
650
+             make sure we treat argument like common implementations do */
651
+          unsigned int uj = *p++ - '0';
652
+          while (isdigit((int)(*p))) uj = 10*uj + (unsigned int)(*p++ - '0');
653
+          precision = uj;
654
+        }
655
+      }
656
+   /* parse 'h', 'l' and 'll' length modifiers */
657
+      if (*p == 'h' || *p == 'l') {
658
+        length_modifier = *p; p++;
659
+        if (length_modifier == 'l' && *p == 'l') {   /* double l = long long */
660
+#ifdef SNPRINTF_LONGLONG_SUPPORT
661
+          length_modifier = '2';                  /* double l encoded as '2' */
662
+#else
663
+          length_modifier = 'l';                 /* treat it as a single 'l' */
664
+#endif
665
+          p++;
666
+        }
667
+      }
668
+      fmt_spec = *p;
669
+   /* common synonyms: */
670
+      switch (fmt_spec) {
671
+      case 'i': fmt_spec = 'd'; break;
672
+      case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
673
+      case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
674
+      case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
675
+      default: break;
676
+      }
677
+   /* get parameter value, do initial processing */
678
+      switch (fmt_spec) {
679
+      case '%': /* % behaves similar to 's' regarding flags and field widths */
680
+      case 'c': /* c behaves similar to 's' regarding flags and field widths */
681
+      case 's':
682
+        length_modifier = '\0';          /* wint_t and wchar_t not supported */
683
+     /* the result of zero padding flag with non-numeric conversion specifier*/
684
+     /* is undefined. Solaris and HPUX 10 does zero padding in this case,    */
685
+     /* Digital Unix and Linux does not. */
686
+#if !defined(SOLARIS_COMPATIBLE) && !defined(HPUX_COMPATIBLE)
687
+        zero_padding = 0;    /* turn zero padding off for string conversions */
688
+#endif
689
+        str_arg_l = 1;
690
+        switch (fmt_spec) {
691
+        case '%':
692
+          str_arg = p; break;
693
+        case 'c': {
694
+          int j = va_arg(ap, int);
695
+          uchar_arg = (unsigned char) j;   /* standard demands unsigned char */
696
+          str_arg = (const char *) &uchar_arg;
697
+          break;
698
+        }
699
+        case 's':
700
+          str_arg = va_arg(ap, const char *);
701
+          if (!str_arg) str_arg_l = 0;
702
+       /* make sure not to address string beyond the specified precision !!! */
703
+          else if (!precision_specified) str_arg_l = strlen(str_arg);
704
+       /* truncate string if necessary as requested by precision */
705
+          else if (precision == 0) str_arg_l = 0;
706
+          else {
707
+       /* memchr on HP does not like n > 2^31  !!! */
708
+            const char *q = memchr(str_arg, '\0',
709
+                             precision <= 0x7fffffff ? precision : 0x7fffffff);
710
+            str_arg_l = !q ? precision : (q-str_arg);
711
+          }
712
+          break;
713
+        default: break;
714
+        }
715
+        break;
716
+      case 'd': case 'u': case 'o': case 'x': case 'X': case 'p': {
717
+        /* NOTE: the u, o, x, X and p conversion specifiers imply
718
+                 the value is unsigned;  d implies a signed value */
719
+
720
+        int arg_sign = 0;
721
+          /* 0 if numeric argument is zero (or if pointer is NULL for 'p'),
722
+            +1 if greater than zero (or nonzero for unsigned arguments),
723
+            -1 if negative (unsigned argument is never negative) */
724
+
725
+        int int_arg = 0;  unsigned int uint_arg = 0;
726
+          /* only defined for length modifier h, or for no length modifiers */
727
+
728
+        long int long_arg = 0;  unsigned long int ulong_arg = 0;
729
+          /* only defined for length modifier l */
730
+
731
+        void *ptr_arg = NULL;
732
+          /* pointer argument value -only defined for p conversion */
733
+
734
+#ifdef SNPRINTF_LONGLONG_SUPPORT
735
+        long long int long_long_arg = 0;
736
+        unsigned long long int ulong_long_arg = 0;
737
+          /* only defined for length modifier ll */
738
+#endif
739
+        if (fmt_spec == 'p') {
740
+        /* HPUX 10: An l, h, ll or L before any other conversion character
741
+         *   (other than d, i, u, o, x, or X) is ignored.
742
+         * Digital Unix:
743
+         *   not specified, but seems to behave as HPUX does.
744
+         * Solaris: If an h, l, or L appears before any other conversion
745
+         *   specifier (other than d, i, u, o, x, or X), the behavior
746
+         *   is undefined. (Actually %hp converts only 16-bits of address
747
+         *   and %llp treats address as 64-bit data which is incompatible
748
+         *   with (void *) argument on a 32-bit system).
749
+         */
750
+#ifdef SOLARIS_COMPATIBLE
751
+#  ifdef SOLARIS_BUG_COMPATIBLE
752
+          /* keep length modifiers even if it represents 'll' */
753
+#  else
754
+          if (length_modifier == '2') length_modifier = '\0';
755
+#  endif
756
+#else
757
+          length_modifier = '\0';
758
+#endif
759
+          ptr_arg = va_arg(ap, void *);
760
+          if (ptr_arg != NULL) arg_sign = 1;
761
+        } else if (fmt_spec == 'd') {  /* signed */
762
+          switch (length_modifier) {
763
+          case '\0':
764
+          case 'h':
765
+         /* It is non-portable to specify a second argument of char or short
766
+          * to va_arg, because arguments seen by the called function
767
+          * are not char or short.  C converts char and short arguments
768
+          * to int before passing them to a function.
769
+          */
770
+            int_arg = va_arg(ap, int);
771
+            if      (int_arg > 0) arg_sign =  1;
772
+            else if (int_arg < 0) arg_sign = -1;
773
+            break;
774
+          case 'l':
775
+            long_arg = va_arg(ap, long int);
776
+            if      (long_arg > 0) arg_sign =  1;
777
+            else if (long_arg < 0) arg_sign = -1;
778
+            break;
779
+#ifdef SNPRINTF_LONGLONG_SUPPORT
780
+          case '2':
781
+            long_long_arg = va_arg(ap, long long int);
782
+            if      (long_long_arg > 0) arg_sign =  1;
783
+            else if (long_long_arg < 0) arg_sign = -1;
784
+            break;
785
+#endif
786
+          }
787
+        } else {  /* unsigned */
788
+          switch (length_modifier) {
789
+          case '\0':
790
+          case 'h':
791
+            uint_arg = va_arg(ap, unsigned int);
792
+            if (uint_arg) arg_sign = 1;
793
+            break;
794
+          case 'l':
795
+            ulong_arg = va_arg(ap, unsigned long int);
796
+            if (ulong_arg) arg_sign = 1;
797
+            break;
798
+#ifdef SNPRINTF_LONGLONG_SUPPORT
799
+          case '2':
800
+            ulong_long_arg = va_arg(ap, unsigned long long int);
801
+            if (ulong_long_arg) arg_sign = 1;
802
+            break;
803
+#endif
804
+          }
805
+        }
806
+        str_arg = tmp; str_arg_l = 0;
807
+     /* NOTE:
808
+      *   For d, i, u, o, x, and X conversions, if precision is specified,
809
+      *   the '0' flag should be ignored. This is so with Solaris 2.6,
810
+      *   Digital UNIX 4.0, HPUX 10, Linux, FreeBSD, NetBSD; but not with Perl.
811
+      */
812
+#ifndef PERL_COMPATIBLE
813
+        if (precision_specified) zero_padding = 0;
814
+#endif
815
+        if (fmt_spec == 'd') {
816
+          if (force_sign && arg_sign >= 0)
817
+            tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
818
+         /* leave negative numbers for sprintf to handle,
819
+            to avoid handling tricky cases like (short int)(-32768) */
820
+#ifdef LINUX_COMPATIBLE
821
+        } else if (fmt_spec == 'p' && force_sign && arg_sign > 0) {
822
+          tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
823
+#endif
824
+        } else if (alternate_form) {
825
+          if (arg_sign != 0 && (fmt_spec == 'x' || fmt_spec == 'X') )
826
+            { tmp[str_arg_l++] = '0'; tmp[str_arg_l++] = fmt_spec; }
827
+         /* alternate form should have no effect for p conversion, but ... */
828
+#ifdef HPUX_COMPATIBLE
829
+          else if (fmt_spec == 'p'
830
+         /* HPUX 10: for an alternate form of p conversion,
831
+          *          a nonzero result is prefixed by 0x. */
832
+#ifndef HPUX_BUG_COMPATIBLE
833
+         /* Actually it uses 0x prefix even for a zero value. */
834
+                   && arg_sign != 0
835
+#endif
836
+                  ) { tmp[str_arg_l++] = '0'; tmp[str_arg_l++] = 'x'; }
837
+#endif
838
+        }
839
+        zero_padding_insertion_ind = str_arg_l;
840
+        if (!precision_specified) precision = 1;   /* default precision is 1 */
841
+        if (precision == 0 && arg_sign == 0
842
+#if defined(HPUX_BUG_COMPATIBLE) || defined(LINUX_COMPATIBLE)
843
+            && fmt_spec != 'p'
844
+         /* HPUX 10 man page claims: With conversion character p the result of
845
+          * converting a zero value with a precision of zero is a null string.
846
+          * Actually HP returns all zeroes, and Linux returns "(nil)". */
847
+#endif
848
+        ) {
849
+         /* converted to null string */
850
+         /* When zero value is formatted with an explicit precision 0,
851
+            the resulting formatted string is empty (d, i, u, o, x, X, p).   */
852
+        } else {
853
+          char f[5]; int f_l = 0;
854
+          f[f_l++] = '%';    /* construct a simple format string for sprintf */
855
+          if (!length_modifier) { }
856
+          else if (length_modifier=='2') { f[f_l++] = 'l'; f[f_l++] = 'l'; }
857
+          else f[f_l++] = length_modifier;
858
+          f[f_l++] = fmt_spec; f[f_l++] = '\0';
859
+          if (fmt_spec == 'p') str_arg_l += sprintf(tmp+str_arg_l, f, ptr_arg);
860
+          else if (fmt_spec == 'd') {  /* signed */
861
+            switch (length_modifier) {
862
+            case '\0':
863
+            case 'h': str_arg_l+=sprintf(tmp+str_arg_l, f, int_arg);  break;
864
+            case 'l': str_arg_l+=sprintf(tmp+str_arg_l, f, long_arg); break;
865
+#ifdef SNPRINTF_LONGLONG_SUPPORT
866
+            case '2': str_arg_l+=sprintf(tmp+str_arg_l,f,long_long_arg); break;
867
+#endif
868
+            }
869
+          } else {  /* unsigned */
870
+            switch (length_modifier) {
871
+            case '\0':
872
+            case 'h': str_arg_l+=sprintf(tmp+str_arg_l, f, uint_arg);  break;
873
+            case 'l': str_arg_l+=sprintf(tmp+str_arg_l, f, ulong_arg); break;
874
+#ifdef SNPRINTF_LONGLONG_SUPPORT
875
+            case '2': str_arg_l+=sprintf(tmp+str_arg_l,f,ulong_long_arg);break;
876
+#endif
877
+            }
878
+          }
879
+         /* include the optional minus sign and possible "0x"
880
+            in the region before the zero padding insertion point */
881
+          if (zero_padding_insertion_ind < str_arg_l &&
882
+              tmp[zero_padding_insertion_ind] == '-') {
883
+            zero_padding_insertion_ind++;
884
+          }
885
+          if (zero_padding_insertion_ind+1 < str_arg_l &&
886
+              tmp[zero_padding_insertion_ind]   == '0' &&
887
+             (tmp[zero_padding_insertion_ind+1] == 'x' ||
888
+              tmp[zero_padding_insertion_ind+1] == 'X') ) {
889
+            zero_padding_insertion_ind += 2;
890
+          }
891
+        }
892
+        { size_t num_of_digits = str_arg_l - zero_padding_insertion_ind;
893
+          if (alternate_form && fmt_spec == 'o'
894
+#ifdef HPUX_COMPATIBLE                                  /* ("%#.o",0) -> ""  */
895
+              && (str_arg_l > 0)
896
+#endif
897
+#ifdef DIGITAL_UNIX_BUG_COMPATIBLE                      /* ("%#o",0) -> "00" */
898
+#else
899
+              /* unless zero is already the first character */
900
+              && !(zero_padding_insertion_ind < str_arg_l
901
+                   && tmp[zero_padding_insertion_ind] == '0')
902
+#endif
903
+          ) {        /* assure leading zero for alternate-form octal numbers */
904
+            if (!precision_specified || precision < num_of_digits+1) {
905
+             /* precision is increased to force the first character to be zero,
906
+                except if a zero value is formatted with an explicit precision
907
+                of zero */
908
+              precision = num_of_digits+1; precision_specified = 1;
909
+            }
910
+          }
911
+       /* zero padding to specified precision? */
912
+          if (num_of_digits < precision) 
913
+            number_of_zeros_to_pad = precision - num_of_digits;
914
+        }
915
+     /* zero padding to specified minimal field width? */
916
+        if (!justify_left && zero_padding) {
917
+          int n = min_field_width - (str_arg_l+number_of_zeros_to_pad);
918
+          if (n > 0) number_of_zeros_to_pad += n;
919
+        }
920
+        break;
921
+      }
922
+      default: /* unrecognized conversion specifier, keep format string as-is*/
923
+        zero_padding = 0;  /* turn zero padding off for non-numeric convers. */
924
+#ifndef DIGITAL_UNIX_COMPATIBLE
925
+        justify_left = 1; min_field_width = 0;                /* reset flags */
926
+#endif
927
+#if defined(PERL_COMPATIBLE) || defined(LINUX_COMPATIBLE)
928
+     /* keep the entire format string unchanged */
929
+        str_arg = starting_p; str_arg_l = p - starting_p;
930
+     /* well, not exactly so for Linux, which does something inbetween,
931
+      * and I don't feel an urge to imitate it: "%+++++hy" -> "%+y"  */
932
+#else