Browse code

add package to the repository

msa


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102253 bc3139a8-67e5-0310-9ffc-ced21a209358

Sonali Arora authored on 10/04/2015 00:12:33
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,1435 @@
1
+/* -*- mode: c; tab-width: 4; c-basic-offset: 4;  indent-tabs-mode: nil -*- */
2
+
3
+/*********************************************************************
4
+ * Clustal Omega - Multiple sequence alignment
5
+ *
6
+ * Copyright (C) 2010 University College Dublin
7
+ *
8
+ * Clustal-Omega is free software; you can redistribute it and/or
9
+ * modify it under the terms of the GNU General Public License as
10
+ * published by the Free Software Foundation; either version 2 of the
11
+ * License, or (at your option) any later version.
12
+ *
13
+ * This file is part of Clustal-Omega.
14
+ *
15
+ ********************************************************************/
16
+
17
+/*
18
+ *  RCS $Id: clustal-omega.c 284 2013-06-12 10:10:11Z fabian $
19
+ */
20
+
21
+#ifdef HAVE_CONFIG_H
22
+#include "config.h"
23
+#endif
24
+
25
+#include <assert.h>
26
+
27
+#include "clustal-omega.h"
28
+#include "hhalign/general.h"
29
+#include "clustal/hhalign_wrapper.h"
30
+
31
+/* The following comment block contains the frontpage/mainpage of the doxygen
32
+ *  documentation. Please add some more info. FIXME add more
33
+ */
34
+
35
+/**
36
+ *
37
+ * @mainpage Clustal-Omega Documentation
38
+ *
39
+ * @section intro_sec Introduction
40
+ *
41
+ * For more information see http://www.clustal.org/
42
+ *
43
+ * @section api_section API
44
+ *
45
+ * @subsection example_prog_subsection An Example Program
46
+ *
47
+ * To use libclustalo you will have to include the clustal-omega.h header and
48
+ * link against libclustalo. For linking against libclustalo you will have to
49
+ * use a C++ compiler, no matter if your program was written in C or C++. See
50
+ * below (\ref pkgconfig_subsubsec)) on how to figure out compiler flags with
51
+ * pkg-config.
52
+ *
53
+ * Assuming Clustal Omega was installed in system-wide default directory (e.g.
54
+ * /usr), first compile (don't link yet) your source (for example code see
55
+ * section \ref example_src_subsubsec) and then link against libclustalo:
56
+ *
57
+ * @code
58
+ * $ gcc -c -ansi -Wall clustalo-api-test.c
59
+ * $ g++  -ansi -Wall -o clustalo-api-test clustalo-api-test.o  -lclustalo
60
+ * @endcode
61
+ *
62
+ * Voila! Now you have your own alignment program based on Clustal Omega which
63
+ * can be run with
64
+ *
65
+ * @code
66
+ * $ ./clustalo-api-test <your-sequence-input>
67
+ * @endcode
68
+ *  
69
+ * It's best to use the same compiler that you used for compiling libclustal.
70
+ * If libclustal was compiled with OpenMP support, you will have to use OpenMP
71
+ * flags for you program as well.
72
+ *
73
+ *
74
+ * @subsubsection pkgconfig_subsubsec Using pkg-config / Figuring out compiler flags
75
+ *
76
+ * Clustal Omega comes with support for <a
77
+ * href="http://pkg-config.freedesktop.org">pkg-config</a>, which means you
78
+ * can run
79
+ *
80
+ * @code
81
+ * $ pkg-config --cflags --libs clustalo
82
+ * @endcode
83
+ *
84
+ * to figure out cflags and library flags needed to compile and link against
85
+ * libclustalo. This is especially handy if Clustal Omega was installed to a
86
+ * non-standard directory.
87
+ *  
88
+ * You might have to change PKG_CONFIG_PATH. For example, if you used the prefix $HOME/local/ for
89
+ * installation then you will first need to set PKG_CONFIG_PATH:
90
+ *
91
+ * @code
92
+ * $ export PKG_CONFIG_PATH=$HOME/local/lib/pkgconfig
93
+ * $ pkg-config --cflags --libs clustalo
94
+ * @endcode
95
+ *  
96
+ *  
97
+ * To compile your source use as above but this time using proper flags:
98
+ *  
99
+ * @code
100
+ * $ export PKG_CONFIG_PATH=$HOME/local/lib/pkgconfig
101
+ * $ gcc -c -ansi -Wall $(pkg-config --cflags clustalo) clustalo-api-test.c  
102
+ * $ g++  -ansi -Wall -o clustalo-api-test $(pkg-config --libs clustalo) clustalo-api-test.o 
103
+ * @endcode
104
+ *
105
+ *
106
+ * @subsubsection example_src_subsubsec Example Source Code
107
+ *
108
+ * @include "clustalo-api-test.c"
109
+ *
110
+ *
111
+ */
112
+
113
+
114
+/* the following are temporary flags while the code is still under construction;
115
+   had problems internalising hhmake, so as temporary crutch 
116
+   write alignment to file and get external hmmer/hhmake via system call 
117
+   to read alignment and convert into HMM
118
+   All this will go, once hhmake is properly internalised */
119
+#define INDIRECT_HMM 0 /* temp flag: (1) write aln to file, use system(hmmer/hhmake), (0) internal hhmake */
120
+#define USEHMMER 1 /* temp flag: use system(hmmer) to build HMM */
121
+#define USEHHMAKE (!USEHMMER) /* temp flag: use system(hhmake) to build HMM */
122
+
123
+
124
+/* shuffle order of input sequences */
125
+#define SHUFFLE_INPUT_SEQ_ORDER 0
126
+
127
+/* sort input sequences by length */
128
+#define SORT_INPUT_SEQS 0
129
+
130
+
131
+int iNumberOfThreads;
132
+
133
+/* broken, unused and lonely */
134
+static const int ITERATION_SCORE_IMPROVEMENT_THRESHOLD = 0.01;
135
+
136
+
137
+/**
138
+ * @brief Print Long version information to pre-allocated char. 
139
+ *
140
+ * @note short version
141
+ * information is equivalent to PACKAGE_VERSION
142
+ *
143
+ * @param[out] pcStr
144
+ * char pointer to write to preallocated to hold iSize chars.
145
+ * @param[in] iSize
146
+ * size of pcStr
147
+ */
148
+void 
149
+PrintLongVersion(char *pcStr, int iSize)
150
+{
151
+    snprintf(pcStr, iSize, "version %s; code-name '%s'; build date %s",
152
+             PACKAGE_VERSION, PACKAGE_CODENAME, __DATE__);
153
+}
154
+/* end of PrintLongVersion() */
155
+
156
+
157
+
158
+/**
159
+ * @brief free aln opts members
160
+ *
161
+ */
162
+void
163
+FreeAlnOpts(opts_t *prAlnOpts) {
164
+    if (NULL != prAlnOpts->pcGuidetreeInfile) {
165
+        CKFREE(prAlnOpts->pcGuidetreeInfile);
166
+    }
167
+    if (NULL != prAlnOpts->pcGuidetreeOutfile) {
168
+        CKFREE(prAlnOpts->pcGuidetreeOutfile);
169
+    }
170
+    if (NULL  != prAlnOpts->pcDistmatOutfile) {
171
+        CKFREE(prAlnOpts->pcDistmatOutfile);
172
+    }
173
+    if (NULL != prAlnOpts->pcDistmatInfile) {
174
+        CKFREE(prAlnOpts->pcDistmatInfile);
175
+    }
176
+}
177
+/* end of FreeAlnOpts() */
178
+
179
+
180
+
181
+/**
182
+ * @brief Sets members of given user opts struct to default values
183
+ *
184
+ * @param[out] prOpts
185
+ * User opt struct to initialise
186
+ *
187
+ */
188
+void
189
+SetDefaultAlnOpts(opts_t *prOpts) {
190
+    prOpts->bAutoOptions = FALSE;
191
+    
192
+    prOpts->pcDistmatInfile = NULL;
193
+    prOpts->pcDistmatOutfile = NULL;
194
+
195
+    prOpts->iClustersizes = 100; /* FS, r274 -> */
196
+    prOpts->pcClustfile = NULL; /* FS, r274 -> */
197
+    prOpts->iClusteringType = CLUSTERING_UPGMA;
198
+    prOpts->iPairDistType = PAIRDIST_KTUPLE;
199
+    prOpts->bUseMbed = TRUE; /* FS, r250 -> */
200
+    prOpts->bUseMbedForIteration = TRUE; /* FS, r250 -> */
201
+    prOpts->pcGuidetreeOutfile = NULL;
202
+    prOpts->pcGuidetreeInfile = NULL;
203
+    prOpts->bPercID = FALSE;
204
+
205
+    prOpts->ppcHMMInput = NULL;
206
+    prOpts->iHMMInputFiles = 0;
207
+		
208
+    prOpts->iNumIterations = 0;
209
+    prOpts->bIterationsAuto = FALSE;
210
+    prOpts->iMaxGuidetreeIterations = INT_MAX;
211
+    prOpts->iMaxHMMIterations = INT_MAX;
212
+
213
+    SetDefaultHhalignPara(& prOpts->rHhalignPara);
214
+ }
215
+/* end of SetDefaultAlnOpts() */
216
+
217
+
218
+
219
+/**
220
+ * @brief Check logic of parsed user options. Will exit (call Log(&rLog, LOG_FATAL, ))
221
+ * on Fatal logic error
222
+ *
223
+ * @param[in] prOpts
224
+ * Already parsed user options
225
+ * 
226
+ */    
227
+void
228
+AlnOptsLogicCheck(opts_t *prOpts)
229
+{
230
+    /* guide-tree & distmat
231
+     *
232
+     */
233
+    if (prOpts->pcDistmatInfile && prOpts->pcGuidetreeInfile) {
234
+        Log(&rLog, LOG_FATAL, "Read distances *and* guide-tree from file doesn't make sense.");
235
+    }
236
+
237
+    if (prOpts->pcDistmatOutfile && prOpts->pcGuidetreeInfile) {
238
+        Log(&rLog, LOG_FATAL, "Won't be able to save distances to file, because I got a guide-tree as input.");
239
+    }
240
+
241
+    /* combination of options that don't make sense when not iterating
242
+     */
243
+    if (prOpts->iNumIterations==0 && prOpts->bIterationsAuto != TRUE)  {
244
+        
245
+        if (prOpts->pcGuidetreeInfile && prOpts->pcGuidetreeOutfile) {
246
+            Log(&rLog, LOG_FATAL, "Got a guide-tree as input and output which doesn't make sense when not iterating.");            
247
+        }
248
+        /*
249
+          if (prOpts->pcGuidetreeInfile && prOpts->bUseMbed > 0) {
250
+          Log(&rLog, LOG_FATAL, "Got a guide-tree as input and was requested to cluster with mBed, which doesn't make sense when not iterating.");            
251
+          }
252
+        */
253
+        /*
254
+          AW: bUseMbedForIteration default since at least R252
255
+          if (prOpts->bUseMbedForIteration > 0) {
256
+            Log(&rLog, LOG_FATAL, "No iteration requested, but mbed for iteration was set. Paranoia exit.");
257
+          }        
258
+        */
259
+    }
260
+    
261
+    if (prOpts->rHhalignPara.iMacRamMB < 512) {
262
+        Log(&rLog, LOG_WARN, "Memory for MAC Algorithm quite low, Viterbi Algorithm may be triggered.");
263
+        if (prOpts->rHhalignPara.iMacRamMB < 1) {
264
+            Log(&rLog, LOG_WARN, "Viterbi Algorithm always turned on, increase MAC-RAM to turn on MAC.");
265
+        }
266
+    }
267
+
268
+    return; 
269
+}
270
+/* end of AlnOptsLogicCheck() */
271
+
272
+
273
+/**
274
+ * @brief FIXME doc
275
+ */
276
+void
277
+PrintAlnOpts(FILE *prFile, opts_t *prOpts)
278
+{
279
+    int iAux;
280
+
281
+
282
+    /* keep in same order as struct */
283
+    fprintf(prFile, "option: auto-options = %d\n", prOpts->bAutoOptions);
284
+    fprintf(prFile, "option: distmat-infile = %s\n", 
285
+            NULL != prOpts->pcDistmatInfile? prOpts->pcDistmatInfile: "(null)");
286
+	fprintf(prFile, "option: distmat-outfile = %s\n",
287
+            NULL != prOpts->pcDistmatOutfile? prOpts->pcDistmatOutfile: "(null)");
288
+	fprintf(prFile, "option: clustering-type = %d\n", prOpts->iClusteringType);
289
+	fprintf(prFile, "option: pair-dist-type = %d\n", prOpts->iPairDistType);
290
+	fprintf(prFile, "option: use-mbed = %d\n", prOpts->bUseMbed);
291
+	fprintf(prFile, "option: use-mbed-for-iteration = %d\n", prOpts->bUseMbedForIteration);
292
+	fprintf(prFile, "option: guidetree-outfile = %s\n", 
293
+            NULL != prOpts->pcGuidetreeOutfile? prOpts->pcGuidetreeOutfile: "(null)");
294
+	fprintf(prFile, "option: guidetree-infile = %s\n",
295
+            NULL != prOpts->pcGuidetreeInfile? prOpts->pcGuidetreeInfile: "(null)");
296
+    for (iAux=0; iAux<prOpts->iHMMInputFiles; iAux++) {
297
+        fprintf(prFile, "option: hmm-input no %d = %s\n", iAux, prOpts->ppcHMMInput[iAux]);
298
+    }
299
+	fprintf(prFile, "option: hmm-input-files = %d\n", prOpts->iHMMInputFiles);
300
+	fprintf(prFile, "option: num-iterations = %d\n", prOpts->iNumIterations);
301
+	fprintf(prFile, "option: iterations-auto = %d\n", prOpts->bIterationsAuto);
302
+	fprintf(prFile, "option: max-hmm-iterations = %d\n", prOpts->iMaxHMMIterations);
303
+	fprintf(prFile, "option: max-guidetree-iterations = %d\n", prOpts->iMaxGuidetreeIterations);
304
+    fprintf(prFile, "option: iMacRamMB = %d\n", prOpts->rHhalignPara.iMacRamMB);
305
+    fprintf(prFile, "option: percent-id = %d\n", prOpts->bPercID);
306
+    fprintf(prFile, "option: use-kimura = %d\n", prOpts->bUseKimura);
307
+
308
+}
309
+/* end of PrintAlnOpts() */
310
+
311
+
312
+
313
+/**
314
+ * @brief Returns major version of HMMER. Whichever hmmbuild version
315
+ * is found first in your PATH will be used
316
+ *
317
+ * @return -1 on error, major hmmer version otherwise
318
+ *
319
+ */
320
+int
321
+HmmerVersion()
322
+{
323
+    char zcHmmerTestCall[] = "hmmbuild -h";
324
+    FILE *fp = NULL;
325
+    int iMajorVersion = 0;
326
+    char zcLine[16384];
327
+
328
+    if (NULL == (fp = popen(zcHmmerTestCall, "r"))) {
329
+        Log(&rLog, LOG_ERROR, "Couldn't exec %s", zcHmmerTestCall);
330
+        return -1;
331
+    }
332
+    while (fgets(zcLine, sizeof(zcLine), fp)) {
333
+        char *pcLocate;
334
+        if ((pcLocate = strstr(zcLine, "HMMER "))) {
335
+            iMajorVersion = atoi(&pcLocate[6]);
336
+            break;
337
+        }
338
+    }
339
+    pclose(fp);
340
+
341
+    return iMajorVersion;
342
+}
343
+/* end of HmmerVersion() */
344
+
345
+
346
+
347
+/**
348
+ * @brief Create a HHM file from aligned sequences
349
+ *
350
+ * @warning Should be eliminated in the future 
351
+ * as building routine should not create intermediate files
352
+ *
353
+ * @param[in] prMSeq
354
+ * Aligned mseq_t
355
+ * @param[in] pcHMMOut
356
+ * HMM output file name
357
+ *
358
+ * @return Non-zero on error
359
+ *
360
+ */
361
+int
362
+AlnToHHMFile(mseq_t *prMSeq, char *pcHMMOut)
363
+{
364
+    char *tmp_aln = NULL;
365
+    int retcode = OK;
366
+
367
+    int msa_c = 0;
368
+    char **msa_v = NULL;
369
+
370
+    assert(NULL!=prMSeq);
371
+    assert(NULL!=pcHMMOut);
372
+
373
+    if (FALSE == prMSeq->aligned) {
374
+        Log(&rLog, LOG_ERROR, "Sequences need to be aligned to create an HMM");
375
+        return FAILURE;
376
+    }
377
+
378
+    /* Convert alignment to a2m, and call hhmake
379
+     *
380
+     * can't be static templates, or mktemp fails (at least on os x
381
+     * (with a bus error))
382
+     * 
383
+     * gcc says we should use mkstemp to avoid race conditions, 
384
+     * but that returns a file descriptor, which is of no use to 
385
+     * us  
386
+     */
387
+    /* NOTE: the following won't work on windows: missing /tmp/ */
388
+    tmp_aln = CkStrdup("/tmp/clustalo_tmpaln_XXXXXX");
389
+    if (NULL == mktemp(tmp_aln)) {
390
+        Log(&rLog, LOG_ERROR, "Could not create temporary alignment filename");
391
+        retcode = FAILURE;
392
+        goto cleanup_and_return;
393
+    }
394
+#define LINE_WRAP 60
395
+
396
+    if (WriteAlignment(prMSeq, tmp_aln, MSAFILE_A2M, LINE_WRAP, FALSE, &msa_c, &msa_v)) {
397
+        Log(&rLog, LOG_ERROR, "Could not save alignment to %s", tmp_aln);
398
+        retcode = FAILURE;
399
+        goto cleanup_and_return;
400
+    }
401
+
402
+    if (HHMake_Wrapper(tmp_aln, pcHMMOut)){
403
+        Log(&rLog, LOG_ERROR, "Could not convert alignment %s into HHM", tmp_aln);
404
+        retcode = FAILURE;
405
+        goto cleanup_and_return;
406
+    }
407
+
408
+
409
+ cleanup_and_return:
410
+
411
+    if (NULL != tmp_aln) {
412
+        if (FileExists(tmp_aln)) {
413
+            if (remove(tmp_aln)) {
414
+                Log(&rLog, LOG_WARN, "Removing %s failed. Continuing anyway", tmp_aln);
415
+            }
416
+        }
417
+        CKFREE(tmp_aln);
418
+    }
419
+
420
+    return retcode; 
421
+
422
+} /* end of AlnToHHMFile() */
423
+
424
+
425
+
426
+/**
427
+ * @brief Create a HMM file from aligned sequences
428
+ *
429
+ * @warning Should be replaced in the future by some internal HMM
430
+ * building routine that does not call external programs
431
+ *
432
+ * @param[in] prMSeq
433
+ * Aligned mseq_t
434
+ * @param[in] pcHMMOut
435
+ * HMM output file name
436
+ *
437
+ * @return Non-zero on error
438
+ *
439
+
440
+ */
441
+int
442
+AlnToHMMFile(mseq_t *prMSeq, const char *pcHMMOut)
443
+{
444
+    char *tmp_aln = NULL;
445
+    char *tmp_hmm = NULL; /* only needed for hmmer3 to hmmer2 conversion */
446
+    char cmdbuf[16384];
447
+    int iHmmerVersion = 0;
448
+    int retcode = OK;
449
+    
450
+    int msa_c = 0;
451
+    char **msa_v = NULL;
452
+
453
+    assert(NULL!=prMSeq);
454
+    assert(NULL!=pcHMMOut);
455
+
456
+    if (FALSE == prMSeq->aligned) {
457
+        Log(&rLog, LOG_ERROR, "Sequences need to be aligned to create an HMM");
458
+        return FAILURE;
459
+    }
460
+
461
+    iHmmerVersion = HmmerVersion();
462
+    if (2 != iHmmerVersion && 3 != iHmmerVersion) {
463
+        Log(&rLog, LOG_ERROR, "Could not find suitable HMMER binaries");
464
+        return FAILURE;
465
+    }
466
+
467
+    /* Convert alignment to stockholm, call hmmbuild and then
468
+     * either hmmconvert (hmmer3) or hmmcalibrate (hmmer2)
469
+     *
470
+     * can't be static templates, or mktemp fails (at least on os x
471
+     * (with a bus error))
472
+     *
473
+     * gcc says we should use mkstemp to avoid race conditions,
474
+     * but that returns a file descriptor, which is of no use to
475
+     * us
476
+     */
477
+    /* NOTE: the following won't work on windows: missing /tmp/ */
478
+    tmp_aln = CkStrdup("/tmp/clustalo_tmpaln_XXXXXX");
479
+    if (NULL == mktemp(tmp_aln)) {
480
+        Log(&rLog, LOG_ERROR, "Could not create temporary alignment filename");
481
+        retcode = FAILURE;
482
+        goto cleanup_and_return;
483
+    }
484
+#define LINE_WRAP 60
485
+    if (WriteAlignment(prMSeq, tmp_aln, MSAFILE_STOCKHOLM, LINE_WRAP, FALSE, &msa_c, &msa_v)) {
486
+        Log(&rLog, LOG_ERROR, "Could not save alignment to %s", tmp_aln);
487
+        retcode = FAILURE;
488
+        goto cleanup_and_return;
489
+    }
490
+
491
+    if (2 == iHmmerVersion) {
492
+        sprintf(cmdbuf, "hmmbuild %s %s >/dev/null && hmmcalibrate %s >/dev/null",
493
+                pcHMMOut, tmp_aln, pcHMMOut);
494
+        if (system(cmdbuf)) {
495
+            Log(&rLog, LOG_ERROR, "Command '%s' failed", cmdbuf);
496
+            retcode = FAILURE;
497
+            goto cleanup_and_return;
498
+        }
499
+    } else if (3 == iHmmerVersion) {
500
+        /* NOTE: the following won't work on windows: missing /tmp/ */
501
+        tmp_hmm = CkStrdup("/tmp/clustalo_tmphmm2_XXXXXX");
502
+        if (NULL == mktemp(tmp_hmm)) {
503
+            Log(&rLog, LOG_ERROR, "Could not create temporary hmm filename");
504
+            retcode = FAILURE;
505
+            goto cleanup_and_return;
506
+        }
507
+        sprintf(cmdbuf, "hmmbuild %s %s >/dev/null && hmmconvert -2 %s > %s",
508
+                tmp_hmm, tmp_aln, tmp_hmm, pcHMMOut);
509
+        if (system(cmdbuf)) {
510
+            Log(&rLog, LOG_ERROR, "Command '%s' failed", cmdbuf);
511
+            retcode = FAILURE;
512
+            goto cleanup_and_return;
513
+        }
514
+    } else {
515
+        CKFREE(tmp_aln);
516
+        Log(&rLog, LOG_FATAL, "Internal error: Unknown Hmmer version %d", iHmmerVersion);
517
+    }
518
+
519
+
520
+ cleanup_and_return:
521
+
522
+    if (NULL != tmp_aln) {
523
+        if (FileExists(tmp_aln)) {
524
+            if (remove(tmp_aln)) {
525
+                Log(&rLog, LOG_WARN, "Removing %s failed. Continuing anyway", tmp_aln);
526
+            }
527
+        }
528
+        CKFREE(tmp_aln);
529
+    }
530
+    if (NULL != tmp_hmm) {
531
+        if (FileExists(tmp_hmm)) {
532
+            if (remove(tmp_hmm)) {
533
+                Log(&rLog, LOG_WARN, "Removing %s failed. Continuing anyway", tmp_hmm);
534
+            }
535
+        }
536
+        CKFREE(tmp_hmm);
537
+    }
538
+
539
+    return retcode;
540
+}
541
+/* end of AlnToHMMFile() */
542
+
543
+
544
+
545
+/**
546
+ * @brief Convert a multiple sequence structure into a HMM
547
+ *
548
+ * @param[out] prHMM
549
+ * Pointer to preallocted HMM which will be set here
550
+ * @param[in] prMSeq
551
+ * Pointer to an alignment
552
+ *
553
+ * @return 0 on error, non-0 otherwise
554
+ *
555
+ * @see AlnToHMMFile()
556
+ * 
557
+ */    
558
+int
559
+AlnToHMM(hmm_light *prHMM, mseq_t *prMSeq)
560
+{
561
+    char *pcHMM; /* temp hmm file */
562
+
563
+    Log(&rLog, LOG_INFO,
564
+        "Using HMMER version %d to calculate a new HMM.",
565
+         HmmerVersion());
566
+    /* FIXME replace all this with internal HMM computation (HHmake) */
567
+
568
+    /**
569
+     * @warning the following probably won't work on windows: missing
570
+     * /tmp/. Should be ok on Cygwin though
571
+     */
572
+    pcHMM = CkStrdup("/tmp/clustalo-hmm-iter_XXXXXX");
573
+    if (NULL == mktemp(pcHMM)) {
574
+        Log(&rLog, LOG_ERROR, "Could not create temporary hmm filename");
575
+        CKFREE(pcHMM);
576
+        return FAILURE;
577
+    }
578
+    
579
+    /* Create a HMM representing the current alignment
580
+     */
581
+#if USEHMMER
582
+    if (AlnToHMMFile(prMSeq, pcHMM)) {
583
+        Log(&rLog, LOG_ERROR, "AlnToHMMFile() on %s failed.", pcHMM);
584
+        CKFREE(pcHMM);
585
+        return FAILURE;
586
+    }
587
+#elif USEHHMAKE
588
+    if (AlnToHHMFile(prMSeq, pcHMM)) {
589
+        Log(&rLog, LOG_ERROR, "AlnToHHMFile() on %s failed.", pcHMM);
590
+        CKFREE(pcHMM);
591
+        return FAILURE;
592
+    }
593
+    /* Log(&rLog, LOG_FATAL, "Method to create HHM (HMM using hhmake) not installed yet"); */
594
+#else
595
+    Log(&rLog, LOG_FATAL, "Unknown method to create temporary HMM");
596
+#endif
597
+    
598
+    /* Read HMM information
599
+     */
600
+    if (OK != readHMMWrapper(prHMM, pcHMM)){
601
+        Log(&rLog, LOG_ERROR, "Processing of HMM file %s failed", pcHMM);
602
+        CKFREE(pcHMM);
603
+        return FAILURE;
604
+    }
605
+
606
+    if (remove(pcHMM)) {
607
+        Log(&rLog, LOG_WARN, "Removing %s failed. Continuing anyway", pcHMM);
608
+    }
609
+    CKFREE(pcHMM);
610
+        
611
+    return OK; 
612
+}
613
+/* end of AlnToHMM() */
614
+
615
+
616
+
617
+/**
618
+ * @brief FIXME
619
+ *
620
+ */
621
+void
622
+InitClustalOmega(int iNumThreadsRequested)
623
+{
624
+
625
+#ifdef WIN32
626
+    if (iNumThreadsRequested>1) {
627
+        Log(&rLog, LOG_FATAL, "Cannot change number of threads to %d. %s was build without OpenMP support.",
628
+              iNumThreadsRequested, PACKAGE_NAME);
629
+    }
630
+    iNumberOfThreads = 1; /* need to set this, even if build without support */
631
+#elif HAVE_OPENMP
632
+    iNumberOfThreads = iNumThreadsRequested;
633
+    omp_set_num_threads(iNumberOfThreads);
634
+#else
635
+	if (iNumThreadsRequested>1) {
636
+        Log(&rLog, LOG_FATAL, "Cannot change number of threads to %d. %s was build without OpenMP support.",
637
+              iNumThreadsRequested, PACKAGE_NAME);
638
+    }
639
+    iNumberOfThreads = 1; /* need to set this, even if build without support */
640
+#endif
641
+
642
+    Log(&rLog, LOG_INFO, "Using %d threads",
643
+         iNumberOfThreads);
644
+
645
+}
646
+/* end of InitClustalOmega() */
647
+
648
+
649
+
650
+/**
651
+ * @brief Defines an alignment order, which adds sequences
652
+ * sequentially, i.e. one at a time starting with seq 1 & 2
653
+ *
654
+ * @param[out] piOrderLR_p
655
+ * order in which nodes/profiles are to be merged/aligned
656
+ * @param[in] iNumSeq
657
+ * Number of sequences
658
+ *
659
+ * @see TraverseTree()
660
+ *
661
+ */
662
+void
663
+SequentialAlignmentOrder(int **piOrderLR_p, int iNumSeq)
664
+{
665
+    unsigned int uNodes = iNumSeq*2-1;
666
+    unsigned int uNodeCounter = 0;
667
+    unsigned int uSeqCounter = 0;
668
+
669
+    Log(&rLog, LOG_FATAL, "FIXME: Untested...");
670
+    
671
+    (*piOrderLR_p) = (int *)CKCALLOC(DIFF_NODE * uNodes, sizeof(int));
672
+    /* loop over merge nodes, which have per definition even indices
673
+     * and set up children which have odd indices
674
+     */
675
+    uSeqCounter = 0;
676
+    for (uNodeCounter=iNumSeq; uNodeCounter<uNodes; uNodeCounter+=1) {
677
+         unsigned int uLeftChildNodeIndex = uNodeCounter-1;
678
+         unsigned int uRightChildNodeIndex = uNodeCounter-iNumSeq+1;
679
+         unsigned int uParentNodeIndex = uNodeCounter+1;
680
+
681
+         /* merge node setup */
682
+        (*piOrderLR_p)[DIFF_NODE*uNodeCounter+LEFT_NODE] = uLeftChildNodeIndex;
683
+        (*piOrderLR_p)[DIFF_NODE*uNodeCounter+RGHT_NODE] = uRightChildNodeIndex;
684
+        (*piOrderLR_p)[DIFF_NODE*uNodeCounter+PRNT_NODE] = uParentNodeIndex;
685
+        /* only setup left child if at first merge node, all other left childs
686
+         * should be merge nodes that are already set up. also correct
687
+         * left node number here.
688
+         */
689
+        if (uNodeCounter==iNumSeq) {
690
+            (*piOrderLR_p)[DIFF_NODE*uNodeCounter+LEFT_NODE] = 0;
691
+
692
+            (*piOrderLR_p)[0+LEFT_NODE] = 0;
693
+            (*piOrderLR_p)[0+RGHT_NODE] = 0;
694
+            (*piOrderLR_p)[0+PRNT_NODE] = uNodeCounter;
695
+            uSeqCounter++;
696
+
697
+            Log(&rLog, LOG_FORCED_DEBUG, "Set up first leaf with node counter %d: left=%d right=%d parent=%d",
698
+                      0,
699
+                      (*piOrderLR_p)[DIFF_NODE*uLeftChildNodeIndex+LEFT_NODE],
700
+                      (*piOrderLR_p)[DIFF_NODE*uLeftChildNodeIndex+RGHT_NODE],
701
+                      (*piOrderLR_p)[DIFF_NODE*uLeftChildNodeIndex+PRNT_NODE]);
702
+        }
703
+        Log(&rLog, LOG_FORCED_DEBUG, "Set up merge node with node counter %d: left=%d right=%d parent=%d",
704
+                  uNodeCounter, (*piOrderLR_p)[DIFF_NODE*uNodeCounter+LEFT_NODE],
705
+                  (*piOrderLR_p)[DIFF_NODE*uNodeCounter+RGHT_NODE],
706
+                  (*piOrderLR_p)[DIFF_NODE*uNodeCounter+PRNT_NODE]);
707
+        
708
+        /* right child */
709
+        (*piOrderLR_p)[DIFF_NODE*uRightChildNodeIndex+LEFT_NODE] = uSeqCounter;
710
+        (*piOrderLR_p)[DIFF_NODE*uRightChildNodeIndex+RGHT_NODE] = uSeqCounter;
711
+        (*piOrderLR_p)[DIFF_NODE*uRightChildNodeIndex+PRNT_NODE] = uNodeCounter;
712
+        uSeqCounter++;
713
+
714
+        Log(&rLog, LOG_FORCED_DEBUG, "Set up leaf with node counter %d: left=%d right=%d parent=%d",
715
+                  uRightChildNodeIndex, (*piOrderLR_p)[DIFF_NODE*uRightChildNodeIndex+LEFT_NODE],
716
+                  (*piOrderLR_p)[DIFF_NODE*uRightChildNodeIndex+RGHT_NODE],
717
+                  (*piOrderLR_p)[DIFF_NODE*uRightChildNodeIndex+PRNT_NODE]);
718
+    }
719
+}
720
+/* end of SequentialAlignmentOrder() */
721
+
722
+
723
+
724
+/**
725
+ * @brief Defines the alignment order by calculating a guide tree. In
726
+ * a first-step pairwise distances will be calculated (or read from a
727
+ * file). In a second step those distances will be clustered and a
728
+ * guide-tree created. Steps 1 and 2 will be skipped if a guide-tree
729
+ * file was given, in which case the guide-tree will be just read from
730
+ * the file.
731
+ *
732
+ * @param[out] piOrderLR_p
733
+ * order in which nodes/profiles are to be merged/aligned
734
+ * @param[out] pdSeqWeights_p
735
+ * Sequence weights
736
+ * @param[out] pdSeqWeights_p
737
+ * Sequence weights
738
+ * @param[in] prMSeq
739
+ * The sequences from which the alignment order is to be calculated
740
+ * @param[in] iPairDistType
741
+ * Method of pairwise distance comparison
742
+ * @param[in] pcDistmatInfile
743
+ * If not NULL distances will be read from this file instead of being
744
+ * calculated
745
+ * @param[in] pcDistmatOutfile
746
+ * If not NULL computed pairwise distances will be written to this file
747
+ * @param[in] iClusteringType
748
+ * Clustering method to be used to cluster the pairwise distances
749
+ * @param[in] pcGuidetreeInfile
750
+ * If not NULL guidetree will be read from this file. Skips pairwise
751
+ * distance and guidetree computation
752
+ * @param[in] pcGuidetreeOutfile
753
+ * If not NULL computed guidetree will be written to this file
754
+ * @param[in] bUseMbed
755
+ * If TRUE, fast mBed guidetree computation will be employed
756
+ *
757
+ * @return Non-zero on error
758
+ *
759
+ */
760
+int
761
+AlignmentOrder(int **piOrderLR_p, double **pdSeqWeights_p, mseq_t *prMSeq,
762
+               int iPairDistType, char *pcDistmatInfile, char *pcDistmatOutfile,
763
+               int iClusteringType, int iClustersizes, 
764
+               char *pcGuidetreeInfile, char *pcGuidetreeOutfile, char *pcClusterFile, 
765
+               bool bUseMbed, bool bPercID)
766
+{
767
+    /* pairwise distance matrix (tmat in 1.83) */
768
+    symmatrix_t *distmat = NULL;
769
+    /* guide tree */
770
+    tree_t *prTree = NULL;
771
+    int i = 0;
772
+
773
+    
774
+    /* Shortcut for only two sequences: Do not compute k-tuple
775
+     * distances. Use the same logic as in TraverseTree() to setup
776
+     * piOrderLR_p. Changes there will have to be reflected here as
777
+     * well. */
778
+    if (2==prMSeq->nseqs) {
779
+        Log(&rLog, LOG_VERBOSE,
780
+            "Have only two sequences: No need to compute pairwise score and compute a tree.");
781
+
782
+        if (NULL != pcDistmatOutfile){
783
+            Log(&rLog, LOG_WARN, "Have only two sequences: Will not calculate/print distance matrix.");
784
+        }
785
+
786
+        (*piOrderLR_p) = (int*) CKMALLOC(DIFF_NODE * 3 * sizeof(int));
787
+        (*piOrderLR_p)[DIFF_NODE*0+LEFT_NODE] = 0;
788
+        (*piOrderLR_p)[DIFF_NODE*0+RGHT_NODE] = 0;
789
+        (*piOrderLR_p)[DIFF_NODE*0+PRNT_NODE] = 0;
790
+
791
+        (*piOrderLR_p)[DIFF_NODE*1+LEFT_NODE] = 1;
792
+        (*piOrderLR_p)[DIFF_NODE*1+RGHT_NODE] = 1;
793
+        (*piOrderLR_p)[DIFF_NODE*1+PRNT_NODE] = 1;
794
+
795
+        /* root */
796
+        (*piOrderLR_p)[DIFF_NODE*2+LEFT_NODE] = 0;
797
+        (*piOrderLR_p)[DIFF_NODE*2+RGHT_NODE] = 1;
798
+        (*piOrderLR_p)[DIFF_NODE*2+PRNT_NODE] = 2;
799
+
800
+        /* Same logic as CalcClustalWeights(). Changes there will
801
+           have to be reflected here as well. */
802
+#if USE_WEIGHTS
803
+         (*pdWeights_p) = (double *) CKMALLOC(uNodeCount * sizeof(double));
804
+         (*pdWeights_p)[0] = 0.5;
805
+         (*pdWeights_p)[1] = 0.5;
806
+#endif
807
+        
808
+        return OK;
809
+    }
810
+
811
+        
812
+    /* compute distance & guide tree, alternatively read distances or
813
+     * guide tree from file
814
+     *
815
+     */
816
+    if (NULL != pcGuidetreeInfile) {
817
+        Log(&rLog, LOG_INFO, "Reading guide-tree from %s", pcGuidetreeInfile);
818
+        if (GuideTreeFromFile(&prTree, prMSeq, pcGuidetreeInfile)) {
819
+            Log(&rLog, LOG_ERROR, "Reading of guide tree %s failed.", pcGuidetreeInfile);
820
+            return FAILURE;
821
+        }
822
+
823
+    } else {
824
+
825
+        if (bUseMbed) {
826
+            if (Mbed(&prTree, prMSeq, iPairDistType, pcGuidetreeOutfile, iClustersizes, pcClusterFile)) {
827
+                Log(&rLog, LOG_ERROR, "mbed execution failed.");
828
+                return FAILURE;
829
+            }
830
+            Log(&rLog, LOG_INFO, "Guide-tree computation (mBed) done.");
831
+            if (NULL != pcDistmatOutfile) {
832
+                Log(&rLog, LOG_INFO,
833
+                    "Ignoring request to write distance matrix (am in mBed mode)");
834
+            }
835
+        } else {
836
+
837
+            if (PairDistances(&distmat, prMSeq, iPairDistType, bPercID, 
838
+                              0, prMSeq->nseqs, 0, prMSeq->nseqs,
839
+                              pcDistmatInfile, pcDistmatOutfile)) {
840
+                Log(&rLog, LOG_ERROR, "Couldn't compute pair distances");
841
+                return FAILURE;
842
+            }
843
+
844
+            /* clustering of distances to get guide tree
845
+             */
846
+            if (CLUSTERING_UPGMA == iClusteringType) {
847
+                char **labels;
848
+                labels = (char**) CKMALLOC(prMSeq->nseqs * sizeof(char*));
849
+                for (i=0; i<prMSeq->nseqs; i++) {
850
+                    labels[i] = prMSeq->sqinfo[i].name;
851
+                }
852
+
853
+                GuideTreeUpgma(&prTree, labels, distmat, pcGuidetreeOutfile);
854
+                Log(&rLog, LOG_INFO, "Guide-tree computation done.");
855
+
856
+                CKFREE(labels);
857
+            } else {
858
+                Log(&rLog, LOG_FATAL, "INTERNAL ERROR %s",
859
+                      "clustering method should have been checked before");
860
+            }
861
+        } /* did not use mBed */
862
+    } /* had to calculate tree (did not read from file) */
863
+
864
+
865
+#if USE_WEIGHTS
866
+    /* derive sequence weights from tree
867
+     *
868
+     */
869
+    Log(&rLog, LOG_INFO, "Calculating sequence weights");
870
+    CalcClustalWeights(pdSeqWeights_p, prTree);
871
+    for (i = 0; i < GetLeafCount(prTree); i++) {
872
+        Log(&rLog, LOG_VERBOSE,
873
+            "Weight for seq no %d: %s = %f",
874
+             i, prMSeq->sqinfo[i].name, (*pdSeqWeights_p)[i]);
875
+    }
876
+#else
877
+    Log(&rLog, LOG_DEBUG, "Not using weights");
878
+#endif
879
+
880
+    
881
+    /* define traversing order of tree
882
+     *
883
+     */
884
+    TraverseTree(piOrderLR_p, prTree, prMSeq);
885
+    if (rLog.iLogLevelEnabled <= LOG_DEBUG) {
886
+        /* FIXME: debug only, FS */
887
+        uint uNodeIndex;
888
+        FILE *fp = LogGetFP(&rLog, LOG_INFO);
889
+        Log(&rLog, LOG_DEBUG, "left/right order after tree traversal");
890
+        for (uNodeIndex = 0; uNodeIndex < GetNodeCount(prTree); uNodeIndex++) {
891
+            fprintf(fp, "%3d:\t%2d/%2d -> %d\n", i,
892
+                   (*piOrderLR_p)[DIFF_NODE*uNodeIndex+LEFT_NODE],
893
+                   (*piOrderLR_p)[DIFF_NODE*uNodeIndex+RGHT_NODE],
894
+                   (*piOrderLR_p)[DIFF_NODE*uNodeIndex+PRNT_NODE]);
895
+        }
896
+    }
897
+
898
+    FreeMuscleTree(prTree);
899
+    FreeSymMatrix(&distmat);
900
+
901
+#if 0
902
+    Log(&rLog, LOG_FATAL, "DEBUG EXIT before leaving %s", __FUNCTION__);
903
+#endif
904
+    return OK;
905
+}
906
+/* end of AlignmentOrder() */
907
+
908
+
909
+
910
+/**
911
+ * @brief Set some options automatically based on number of sequences. Might
912
+ * overwrite some user-set options.
913
+ *
914
+ * @param[out] prOpts
915
+ * Pointer to alignment options structure
916
+ * @param[in] iNumSeq
917
+ * Number of sequences to align
918
+ */
919
+void
920
+SetAutoOptions(opts_t *prOpts, int iNumSeq) {
921
+
922
+    Log(&rLog, LOG_INFO,
923
+        "Setting options automatically based on input sequence characteristics (might overwrite some of your options).");
924
+
925
+    /* AW: new version of mbed is always good (uses subclusters) */
926
+    if (FALSE == prOpts->bUseMbed) {
927
+        Log(&rLog, LOG_INFO, "Auto settings: Enabling mBed.");
928
+        prOpts->bUseMbed = TRUE;
929
+    }
930
+    
931
+    if (iNumSeq >= 1000) {
932
+        if (0 != prOpts->iNumIterations) {
933
+            Log(&rLog, LOG_INFO, "Auto settings: Disabling iterations.");
934
+            prOpts->iNumIterations = 0;
935
+        }
936
+        
937
+    } else if (iNumSeq < 1000) {
938
+        if (1 != prOpts->iNumIterations) {
939
+            Log(&rLog, LOG_INFO, "Auto settings: Setting iteration to 1.");
940
+            prOpts->iNumIterations = 1;
941
+        }        
942
+    }
943
+}
944
+/* end of */
945
+
946
+
947
+
948
+/**
949
+ * @brief The main alignment function which wraps everything else.
950
+ *
951
+ * @param[out] prMSeq
952
+ * *the* multiple sequences structure
953
+ * @param[in] prMSeqProfile
954
+ * optional profile to align against
955
+ * @param[in] prOpts
956
+ * alignment options to use
957
+ *
958
+ * @return 0 on success, -1 on failure
959
+ *
960
+ */
961
+int
962
+Align(mseq_t *prMSeq, 
963
+      mseq_t *prMSeqProfile,
964
+      opts_t *prOpts) {
965
+   
966
+    /* HMM
967
+     */
968
+    /* structs with pseudocounts etc; one for each HMM infile, i.e.
969
+     * index range: 0..iHMMInputFiles */
970
+    hmm_light *prHMMs = NULL;
971
+
972
+    /* MSA order in which nodes/profiles are to be merged/aligned
973
+       (order of nodes in guide tree (left/right)*/
974
+    int *piOrderLR = NULL;
975
+
976
+    /* weights per sequence */
977
+    double *pdSeqWeights = NULL;
978
+
979
+    /* Iteration
980
+     */
981
+    int iIterationCounter = 0;
982
+    double dAlnScore;
983
+    /* last dAlnScore for iteration */
984
+    double dLastAlnScore = -666.666;
985
+
986
+    int i, j; /* aux */
987
+
988
+    assert(NULL != prMSeq);
989
+    if (NULL != prMSeqProfile) {
990
+        assert(TRUE == prMSeqProfile->aligned);
991
+    }
992
+
993
+
994
+    /* automatic setting of options
995
+     *
996
+     */
997
+    if (prOpts->bAutoOptions) {
998
+        SetAutoOptions(prOpts, prMSeq->nseqs);
999
+    }
1000
+
1001
+
1002
+#if SHUFFLE_INPUT_SEQ_ORDER
1003
+    /* 
1004
+     * shuffle input:  only useful for testing/debugging 
1005
+     */
1006
+    Log(&rLog, LOG_WARN, "Shuffling input sequences! (Will also change output order)");
1007
+    ShuffleMSeq(prMSeq);
1008
+#endif
1009
+        
1010
+
1011
+#if SORT_INPUT_SEQS
1012
+    /* 
1013
+     * sort input:
1014
+     *
1015
+     * would ensure we *always* (unless we get into the mbed k-means stage)
1016
+     * get the same answer. usually you don't, because most pairwise alignment
1017
+     * scores are in theory not symmetric, therefore sequence ordering might
1018
+     * have an effect on the guide-tree. Sorting by length should get rid of
1019
+     * this (and takes no time even for 100k seqs). Benchmark results on
1020
+     * Balibase show almost no difference after sorting.
1021
+     */
1022
+    Log(&rLog, LOG_WARN, "Sorting input seq by length! This will also change the output order");
1023
+    SortMSeqByLength(prMSeq, 'd');
1024
+
1025
+#endif
1026
+
1027
+
1028
+    /* Read backgrounds HMMs and store in prHMMs
1029
+     *
1030
+     */
1031
+    if (0 < prOpts->iHMMInputFiles) {
1032
+        int iHMMInfileIndex;
1033
+        
1034
+        /**
1035
+         * @warning old structure used to be initialised like this:
1036
+         * hmm_light rHMM = {0};
1037
+         */
1038
+        prHMMs = (hmm_light *) CKMALLOC(prOpts->iHMMInputFiles * sizeof(hmm_light));
1039
+        
1040
+        for (iHMMInfileIndex=0; iHMMInfileIndex<prOpts->iHMMInputFiles; iHMMInfileIndex++) {
1041
+            char *pcHMMInput = prOpts->ppcHMMInput[iHMMInfileIndex];
1042
+            if (OK != readHMMWrapper(&prHMMs[iHMMInfileIndex], pcHMMInput)){
1043
+                Log(&rLog, LOG_ERROR, "Processing of HMM file %s failed", pcHMMInput);
1044
+                return -1;
1045
+            }
1046
+            
1047
+#if 0
1048
+            Log(&rLog, LOG_FORCED_DEBUG, "HMM length is %d", prHMMs[iHMMInfileIndex].L);
1049
+            Log(&rLog, LOG_FORCED_DEBUG, "n-display  is %d", prHMMs[iHMMInfileIndex].n_display);
1050
+            for (i = 0; NULL != prHMMs[prOpts->iHMMInputFiles].seq[i]; i++){
1051
+                printf("seq[%d]: %s\n", i, prHMMs[iHMMInfileIndex].seq[i]);
1052
+            }
1053
+            Log(&rLog, LOG_FORCED_DEBUG, "Neff_HMM   is %f", prHMMs[iHMMInfileIndex].Neff_HMM);
1054
+#endif
1055
+            if (rLog.iLogLevelEnabled <= LOG_DEBUG){
1056
+                Log(&rLog, LOG_DEBUG, "print frequencies");
1057
+                for (i = 0; i < prHMMs[iHMMInfileIndex].L; i++){
1058
+#define PRINT_TAIL 5
1059
+                    if ( (PRINT_TAIL+1 == i) && (prHMMs[iHMMInfileIndex].L-PRINT_TAIL != i) ){
1060
+                        printf("....\n");
1061
+                    }
1062
+                    if ( (i > PRINT_TAIL) && (i < prHMMs[iHMMInfileIndex].L-PRINT_TAIL) ){
1063
+                        continue;
1064
+                    }
1065
+                    printf("%3d:", i);
1066
+                    for (j = 0; j < 20; j++){
1067
+                        printf("\t%1.3f", prHMMs[iHMMInfileIndex].f[i][j]);
1068
+                    }
1069
+                    printf("\n");
1070
+                }
1071
+            } /* debug print block */
1072
+            
1073
+            CKFREE(prOpts->ppcHMMInput[iHMMInfileIndex]);
1074
+        } /* for each background HMM file */
1075
+        CKFREE(prOpts->ppcHMMInput);
1076
+    } /* there were background HMM files */
1077
+    
1078
+
1079
+    
1080
+    /* If the input ("non-profile") sequences are aligned, then turn
1081
+     * the alignment into a HMM and add to the list of background HMMs
1082
+     *
1083
+     */
1084
+    if (TRUE == prMSeq->aligned) {
1085
+        /* FIXME: gcc warns about missing initialiser here (-Wall -Wextra -pedantic) */
1086
+        hmm_light rHMMLocal = {0};
1087
+        
1088
+        Log(&rLog, LOG_INFO,
1089
+            "Input sequences are aligned. Will turn alignment into HMM and add it to the user provided background HMMs.");
1090
+        /* certain gap parameters ('~' MSF) cause problems, 
1091
+           sanitise them; FS, r258 -> r259 */
1092
+        SanitiseUnknown(prMSeq);
1093
+        if (OK !=
1094
+#if INDIRECT_HMM 
1095
+            AlnToHMM(&rHMMLocal, prMSeq)
1096
+#else
1097
+            AlnToHMM2(&rHMMLocal, prMSeq->seq, prMSeq->nseqs)
1098
+#endif
1099
+            ) {
1100
+            Log(&rLog, LOG_ERROR, "Couldn't convert aligned input sequences to HMM. Will try to continue");
1101
+        } else {
1102
+            prHMMs = (hmm_light *) CKREALLOC(prHMMs, ((prOpts->iHMMInputFiles+1) * sizeof(hmm_light)));
1103
+            memcpy(&(prHMMs[prOpts->iHMMInputFiles]), &rHMMLocal, sizeof(hmm_light));
1104
+            prOpts->iHMMInputFiles++;
1105
+        }
1106
+    }
1107
+
1108
+    
1109
+    /* If we have a profile turn it into a HMM and add to
1110
+     * the list of background HMMs.
1111
+     *
1112
+     */
1113
+    if (NULL != prMSeqProfile) {
1114
+        /* FIXME: gcc warns about missing initialiser here (-Wall -Wextra -pedantic) */
1115
+        hmm_light rHMMLocal = {0};
1116
+        Log(&rLog, LOG_INFO,
1117
+            "Turning profile1 into HMM and will use it during progressive alignment.");
1118
+        if (OK !=
1119
+#if INDIRECT_HMM 
1120
+            AlnToHMM(&rHMMLocal, prMSeqProfile)
1121
+#else 
1122
+            AlnToHMM2(&rHMMLocal, prMSeqProfile->seq, prMSeqProfile->nseqs)
1123
+#endif
1124
+            ) {
1125
+            Log(&rLog, LOG_ERROR, "Couldn't convert profile1 to HMM. Will try to continue");
1126
+        } else {
1127
+            prHMMs = (hmm_light *) CKREALLOC(prHMMs, ((prOpts->iHMMInputFiles+1) * sizeof(hmm_light)));
1128
+            memcpy(&(prHMMs[prOpts->iHMMInputFiles]), &rHMMLocal, sizeof(hmm_light));
1129
+            prOpts->iHMMInputFiles++;
1130
+        }
1131
+    }
1132
+
1133
+
1134
+    /* Now do a first alignment of the input sequences (prMSeq) adding
1135
+     * all collected background HMMs
1136
+     *
1137
+     */
1138
+    /* Determine progressive alignment order
1139
+     */
1140
+    if (TRUE == prMSeq->aligned) {
1141
+        if ( (SEQTYPE_PROTEIN == prMSeq->seqtype) && (TRUE == prOpts->bUseKimura) ){
1142
+            Log(&rLog, LOG_INFO, "%s %s",
1143
+                "Input sequences are aligned.",
1144
+                "Will use Kimura distances of aligned sequences.");
1145
+            prOpts->iPairDistType = PAIRDIST_SQUIDID_KIMURA;
1146
+        }
1147
+        else {
1148
+            prOpts->iPairDistType = PAIRDIST_SQUIDID;
1149
+        }
1150
+    }
1151
+
1152
+#if 0
1153
+    Log(&rLog, LOG_WARN, "Using a sequential alignment order.");
1154
+    SequentialAlignmentOrder(&piOrderLR, prMSeq->nseqs);
1155
+#else
1156
+    if (OK != AlignmentOrder(&piOrderLR, &pdSeqWeights, prMSeq,
1157
+                             prOpts->iPairDistType,
1158
+                             prOpts->pcDistmatInfile, prOpts->pcDistmatOutfile,
1159
+                             prOpts->iClusteringType, prOpts->iClustersizes, 
1160
+                             prOpts->pcGuidetreeInfile, prOpts->pcGuidetreeOutfile, prOpts->pcClustfile, 
1161
+                             prOpts->bUseMbed, prOpts->bPercID)) {
1162
+        Log(&rLog, LOG_ERROR, "AlignmentOrder() failed. Cannot continue");
1163
+        return -1;
1164
+    }
1165
+#endif
1166
+
1167
+    /* if max-hmm-iter is set < 0 then do not perform alignment 
1168
+     * there is a problem/feature(?) that the un-aligned sequences are output 
1169
+     */
1170
+    if (prOpts->iMaxHMMIterations < 0){
1171
+        Log(&rLog, LOG_VERBOSE,
1172
+            "iMaxHMMIterations < 0 (%d), will not perform alignment", prOpts->iMaxHMMIterations);
1173
+        return 0;
1174
+    }
1175
+
1176
+
1177
+    /* Progressive alignment of input sequences. Order defined by
1178
+     * branching of guide tree (piOrderLR). Use optional
1179
+     * background HMM information (prHMMs[0..prOpts->iHMMInputFiles-1])
1180
+     *
1181
+     */
1182
+    dAlnScore = HHalignWrapper(prMSeq, piOrderLR, pdSeqWeights,
1183
+                               2*prMSeq->nseqs -1/* nodes */,
1184
+                               prHMMs, prOpts->iHMMInputFiles, -1, prOpts->rHhalignPara);
1185
+    dLastAlnScore = dAlnScore;
1186
+    Log(&rLog, LOG_VERBOSE,
1187
+        "Alignment score for first alignment = %f", dAlnScore);        
1188
+
1189
+
1190
+
1191
+
1192
+    /* ------------------------------------------------------------
1193
+     *
1194
+     * prMSeq is aligned now. Now start iterations if requested and save the
1195
+     * alignment at the very end.
1196
+     *
1197
+     * @note We discard the background HMM information at this point,
1198
+     * because it was already used. Could consider to make this choice
1199
+     * optional.  FIXME
1200
+     *
1201
+     * ------------------------------------------------------------ */
1202
+
1203
+    
1204
+    /* iteration after first alignment was computed (if not profile-profile
1205
+     * alignment)
1206
+     *
1207
+     */
1208
+    for (iIterationCounter=0;
1209
+         (iIterationCounter < prOpts->iNumIterations || prOpts->bIterationsAuto);
1210
+         iIterationCounter++) {
1211
+
1212
+        hmm_light rHMMLocal = {0};
1213
+        /* FIXME Keep copy of old alignment in case new one sucks? */
1214
+
1215
+
1216
+        if (iIterationCounter >= prOpts->iMaxHMMIterations
1217
+            &&
1218
+            iIterationCounter >= prOpts->iMaxGuidetreeIterations) {
1219
+            Log(&rLog, LOG_VERBOSE, "Reached maximum number of HMM and guide-tree iterations");
1220
+            break;
1221
+        }
1222
+        
1223
+        if (! prOpts->bIterationsAuto) {
1224
+            Log(&rLog, LOG_INFO, "Iteration step %d out of %d", 
1225
+                 iIterationCounter+1, prOpts->iNumIterations);
1226
+        } else {
1227
+            Log(&rLog, LOG_INFO, "Iteration step %d out of <auto>", 
1228
+                 iIterationCounter+1);
1229
+        }
1230
+#if 0
1231
+        if (rLog.iLogLevelEnabled <= LOG_VERBOSE) {
1232
+            char zcIntermediate[1000] = {0};
1233
+            char *pcFormat = "fasta";
1234
+            sprintf(zcIntermediate, "clustalo-aln-iter~%d~", iIterationCounter);
1235
+#define LINE_WRAP 60
1236
+            if (WriteAlignment(prMSeq, zcIntermediate, MSAFILE_A2M, LINE_WRAP, &msa_c, &msa_v)) {
1237
+                Log(&rLog, LOG_ERROR, "Could not save alignment to %s", zcIntermediate);
1238
+                return -1;
1239
+            }
1240
+        }
1241
+#endif
1242
+
1243
+
1244
+        /* new guide-tree
1245
+         *
1246
+         */
1247
+        if (iIterationCounter < prOpts->iMaxGuidetreeIterations) {
1248
+            /* determine progressive alignment order
1249
+             *
1250
+             * few things are different now when calling AlignmentOrder:
1251
+             * - we have to ignore prOpts->pcDistmatInfile and pcGuidetreeInfile
1252
+             *   as they were used before
1253
+             * - the corresponding outfiles are still valid though
1254
+             */
1255
+            /* Free stuff that has already been allocated by or further
1256
+             * downstream of AlignmentOrder()
1257
+             */
1258
+            if (NULL != piOrderLR)
1259
+                CKFREE(piOrderLR);
1260
+            if (NULL != pdSeqWeights)
1261
+                CKFREE(pdSeqWeights);
1262
+            Log(&rLog, LOG_INFO, "Computing new guide tree (iteration step %d)");
1263
+            if (AlignmentOrder(&piOrderLR, &pdSeqWeights, prMSeq,
1264
+                               ((SEQTYPE_PROTEIN == prMSeq->seqtype) && (TRUE == prOpts->bUseKimura)) ? PAIRDIST_SQUIDID_KIMURA : PAIRDIST_SQUIDID, 
1265
+                               NULL, prOpts->pcDistmatOutfile,
1266
+                               prOpts->iClusteringType, prOpts->iClustersizes,
1267
+                               NULL, prOpts->pcGuidetreeOutfile, prOpts->pcClustfile, 
1268
+                               prOpts->bUseMbedForIteration, prOpts->bPercID)) {
1269
+                Log(&rLog, LOG_ERROR, "AlignmentOrder() failed. Cannot continue");
1270
+                return -1;
1271
+            }
1272
+        } else {
1273
+            Log(&rLog, LOG_INFO, "Skipping guide-tree iteration at iteration step %d (reached maximum)", 
1274
+                iIterationCounter);
1275
+        }
1276
+
1277
+
1278
+        /* new local hmm iteration
1279
+         *
1280
+         */
1281
+        /* non-residue/gap characters will crash AlnToHMM2(), 
1282
+           therefore sanitise unknown characters, FS, r259 -> r260 */
1283
+        SanitiseUnknown(prMSeq);
1284
+        if (iIterationCounter < prOpts->iMaxHMMIterations) {
1285
+            Log(&rLog, LOG_INFO, "Computing HMM from alignment");
1286
+            
1287
+            if (OK !=
1288
+#if INDIRECT_HMM 
1289
+                AlnToHMM(&rHMMLocal, prMSeq)
1290
+#else
1291
+                AlnToHMM2(&rHMMLocal, prMSeq->seq, prMSeq->nseqs)
1292
+#endif
1293
+                ) {
1294
+                Log(&rLog, LOG_ERROR, "Couldn't convert alignment to HMM. Will stop iterating now...");
1295
+                break;
1296
+            }
1297
+        } else {
1298
+            Log(&rLog, LOG_INFO, "Skipping HMM iteration at iteration step %d (reached maximum)", 
1299
+                 iIterationCounter);
1300
+        }
1301
+
1302
+        
1303
+        /* align the sequences (again)
1304
+         */
1305
+        dAlnScore = HHalignWrapper(prMSeq, piOrderLR, pdSeqWeights,
1306
+                                   2*prMSeq->nseqs -1/* nodes */, &rHMMLocal, 1, -1, 
1307
+                                   prOpts->rHhalignPara);
1308
+        Log(&rLog, LOG_VERBOSE,
1309
+            "Alignment score for alignmnent in hmm-iteration no %d = %f (last score = %f)",
1310
+             iIterationCounter+1, dAlnScore, dLastAlnScore);
1311
+        
1312
+        
1313
+        FreeHMMstruct(&rHMMLocal);
1314
+
1315
+#if 0
1316
+        /* FIXME: need a better score for automatic iteration */
1317
+        if (prOpts->bIterationsAuto) {
1318
+            /* automatic iteration: break if score improvement was not
1319
+             * big enough
1320
+             */
1321
+            double dScoreImprovement = (dAlnScore-dLastAlnScore)/dLastAlnScore;
1322
+            if (dScoreImprovement < ITERATION_SCORE_IMPROVEMENT_THRESHOLD) {
1323
+                Log(&rLog, LOG_INFO,
1324
+                    "Stopping after %d guide-tree iterations. No further alignment score improvement achieved.",
1325
+                     iIterationCounter+1);
1326
+                /* use previous alignment */
1327
+                FreeMSeq(&prMSeq);
1328
+                Log(&rLog, LOG_FORCED_DEBUG, "FIXME: %s", "CopyMSeq breaks things in this context");
1329
+                CopyMSeq(&prMSeq, prMSeqCopy);
1330
+                /* FIXME: prOpts->pcDistmatOutfile and pcGuidetreeOutfile
1331
+                 * might have been updated, but then discarded here?
1332
+                 */
1333
+                break;
1334
+            } else {
1335
+                Log(&rLog, LOG_INFO,
1336
+                    "Got a %d%% better score in iteration step %d",
1337
+                     (int)dScoreImprovement*100, iIterationCounter+1);
1338
+                FreeMSeq(&prMSeqCopy);
1339
+            }
1340
+        }
1341
+        dLastAlnScore = dAlnScore;
1342
+#endif
1343
+
1344
+    }
1345
+    /* end of iterations */
1346
+
1347
+
1348
+
1349
+    /* Last step: if a profile was also provided then align now-aligned mseq
1350
+     * with this profile
1351
+     *
1352
+     * Don't use the backgrounds HMMs anymore and don't iterate.
1353
+     * (which was done before).
1354
+     *
1355
+     */
1356
+    if (NULL != prMSeqProfile) {
1357
+        if (AlignProfiles(prMSeq, prMSeqProfile, prOpts->rHhalignPara)) {
1358
+            Log(&rLog, LOG_ERROR, "An error occured during the profile/profile alignment");
1359
+            return -1;
1360
+        }
1361
+    }
1362
+
1363
+    
1364
+    if (NULL != piOrderLR) {
1365
+        CKFREE(piOrderLR);
1366
+    }
1367
+    if (NULL != pdSeqWeights) {
1368
+        CKFREE(pdSeqWeights);
1369
+    }
1370
+    if (0 < prOpts->iHMMInputFiles) {
1371
+        for (i=0; i<prOpts->iHMMInputFiles; i++) {
1372
+            FreeHMMstruct(&prHMMs[i]);
1373
+        }
1374
+        CKFREE(prHMMs);
1375
+    }
1376
+
1377
+    return 0;
1378
+}
1379
+/* end of Align() */
1380
+
1381
+
1382
+
1383
+
1384
+/**
1385
+ * @brief Align two profiles, ie two sets of prealigned sequences. Already
1386
+ * aligned columns won't be changed.
1387
+ *
1388
+ * @param[out] prMSeqProfile1
1389
+ * First profile/aligned set of sequences. Merged alignment will be found in
1390
+ * here.
1391
+ * @param[in] prMSeqProfile2
1392
+ * First profile/aligned set of sequences
1393
+ * @param[in] rHhalignPara
1394
+ * FIXME
1395
+ *
1396
+ * @return 0 on success, -1 on failure
1397
+ *