Browse code

add package to the repository

msa


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102253 bc3139a8-67e5-0310-9ffc-ced21a209358

Sonali Arora authored on 10/04/2015 00:12:33
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,1229 @@
1
+/* -*- mode: c; tab-width: 4; c-basic-offset: 4;  indent-tabs-mode: nil -*- */
2
+
3
+/*********************************************************************
4
+ * Clustal Omega - Multiple sequence alignment
5
+ *
6
+ * Copyright (C) 2010 University College Dublin
7
+ *
8
+ * Clustal-Omega is free software; you can redistribute it and/or
9
+ * modify it under the terms of the GNU General Public License as
10
+ * published by the Free Software Foundation; either version 2 of the
11
+ * License, or (at your option) any later version.
12
+ *
13
+ * This file is part of Clustal-Omega.
14
+ *
15
+ ********************************************************************/
16
+
17
+/*
18
+ *  RCS $Id: mymain.c 285 2013-06-12 11:45:17Z fabian $
19
+ */
20
+
21
+#ifdef HAVE_CONFIG_H
22
+#include "config.h"
23
+#endif
24
+
25
+#include <stdio.h>
26
+#include <string.h>
27
+#include <assert.h>
28
+#include <unistd.h>
29
+#include "argtable2/argtable2.h"
30
+#include <ctype.h>
31
+#include <limits.h>
32
+#include <libgen.h> /* for basename only */
33
+
34
+/* clustal */
35
+#include "clustal-omega.h"
36
+
37
+#include "mymain.h"
38
+
39
+#include "exceptions4c/e4c_lite.h"
40
+
41
+typedef struct {
42
+
43
+    /* Sequence input
44
+     */
45
+    /** sequence type (from cmdline arg) */
46
+    int iSeqType;
47
+    /** sequence input file. not directly used by Align() */
48
+    char *pcSeqInfile;
49
+    /** Dealign sequences on input. Otherwise we use the alignment
50
+     * info for background-HMM creation */
51
+    bool bDealignInputSeqs;
52
+
53
+    /** Sequence input format
54
+     */
55
+    int iSeqInFormat;
56
+
57
+    /* profiles: : pre-aligned sequences, whose alignment will not be changed 
58
+     */
59
+    /** profile 1: pre-aligned sequence input. not directly used by Align() */
60
+    char *pcProfile1Infile ;
61
+    /** profile 2: pre-aligned sequence input. not directly used by Align() */
62
+    char *pcProfile2Infile;        
63
+    /** profiles that contain no gaps are rejected, force them */
64
+    bool bIsProfile; 
65
+    /** up to version 1.1.1 Kimura distance was default, change default, make Kimura optional */
66
+    //bool bUseKimura; 
67
+    /** distance matrix output is default, allow %-ID output */
68
+    bool bPercentID; 
69
+
70
+    /** Input limitations
71
+     */
72
+    /** maximum allowed number of input sequences */
73
+    int iMaxNumSeq;
74
+    /** maximum allowed input sequence length */
75
+    int iMaxSeqLen;
76
+
77
+    /* Alignment output
78
+     */
79
+    /** alignment output file */
80
+    char *pcAlnOutfile;
81
+    /** alignment output format */
82
+    int iAlnOutFormat;
83
+    /** force overwriting of files */
84
+    bool bForceFileOverwrite;
85
+    /** force sequence from R */
86
+    bool bRSequence;
87
+
88
+    /* line wrapping, FS, r274 -> */
89
+    int iWrap;
90
+    /* residue number in Clustal format, FS, r274 -> */
91
+    bool bResno;
92
+    /* output order, FS, r274 -> */
93
+    int iOutputOrder; 
94
+
95
+    /* multithreading
96
+     */
97
+    /** number of threads */
98
+    int iThreads;
99
+
100
+    /* logging 
101
+     */
102
+    char *pcLogFile;
103
+
104
+    opts_t aln_opts;
105
+
106
+    /* changes here will have to be reflected in SetDefaultUserOpts(),
107
+     * FreeUserOpts(), PrintUserOpts() and UserOptsLogicCheck() etc
108
+     */
109
+} cmdline_opts_t;
110
+
111
+
112
+
113
+/* log-file used for non-essential logging in prLog */
114
+FILE *prLogFile = NULL;
115
+
116
+const char *CITATION = " Sievers F, Wilm A, Dineen D, Gibson TJ, Karplus K, Li W, Lopez R, McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG."
117
+    "\n"
118
+    " Fast, scalable generation of high-quality protein multiple sequence alignments using Clustal Omega."
119
+    "\n"
120
+    " Mol Syst Biol. 2011 Oct 11;7:539. doi: 10.1038/msb.2011.75. PMID: 21988835.";
121
+
122
+
123
+
124
+/**
125
+ * @brief Sets default user/commandline options
126
+ *
127
+ * @param[out] opts
128
+ * The option structure to initialise
129
+ *
130
+ */
131
+void
132
+SetDefaultUserOpts(cmdline_opts_t *opts)
133
+{
134
+    assert(NULL != opts);
135
+
136
+    opts->iSeqType = SEQTYPE_UNKNOWN;
137
+    opts->pcSeqInfile = NULL;
138
+    opts->bDealignInputSeqs = FALSE;        
139
+    opts->pcProfile1Infile = NULL;
140
+    opts->pcProfile2Infile = NULL;
141
+    opts->bIsProfile = FALSE;
142
+    opts->aln_opts.bUseKimura = FALSE;
143
+	opts->aln_opts.bPercID = FALSE;
144
+
145
+    opts->iMaxNumSeq = INT_MAX;
146
+    opts->iMaxSeqLen = INT_MAX;
147
+    
148
+    opts->pcAlnOutfile = NULL;
149
+    opts->iAlnOutFormat =  MSAFILE_A2M;
150
+    opts->bForceFileOverwrite = FALSE;
151
+    opts->bRSequence = FALSE;
152
+    opts->iWrap = 60;
153
+    opts->bResno = FALSE;
154
+    opts->iOutputOrder = INPUT_ORDER;
155
+
156
+#ifdef WIN32
157
+    opts->iThreads = 1;
158
+#elif HAVE_OPENMP
159
+    /* defaults to # of CPUs */
160
+    opts->iThreads = omp_get_max_threads();
161
+#else
162
+	opts->iThreads = 1;
163
+#endif
164
+    
165
+    opts->pcLogFile = NULL;
166
+
167
+    SetDefaultAlnOpts(& opts->aln_opts);
168
+}
169
+/* end of SetDefaultUserOpts() */
170
+
171
+
172
+
173
+
174
+/**
175
+ * @brief FIXME add doc
176
+ *
177
+ */
178
+void
179
+PrintUserOpts(FILE *prFile, cmdline_opts_t *opts) {
180
+    
181
+    /* keep in same order as in struct. FIXME could this be derived from argtable?
182
+     */
183
+    fprintf(prFile, "seq-type = %s\n", SeqTypeToStr(opts->iSeqType));
184
+    fprintf(prFile, "seq-in-fmt = %s\n", SeqfileFormat2String(opts->iSeqInFormat));
185
+    fprintf(prFile, "option: seq-in = %s\n", 
186
+            NULL != opts->pcSeqInfile? opts->pcSeqInfile: "(null)");
187
+    fprintf(prFile, "option: dealign = %d\n", opts->bDealignInputSeqs);
188
+    fprintf(prFile, "option: profile1 = %s\n", 
189
+            NULL != opts->pcProfile1Infile? opts->pcProfile1Infile: "(null)");
190
+    fprintf(prFile, "option: profile2 = %s\n",
191
+            NULL != opts->pcProfile2Infile? opts->pcProfile2Infile: "(null)");
192
+    //fprintf(prFile, "option: percent-id = %d\n", opts->bPercID);
193
+    fprintf(prFile, "option: is-profile = %d\n", opts->bIsProfile);
194
+    //fprintf(prFile, "option: use-kimura = %d\n", opts->aln_opts.bUseKimura);
195
+
196
+    fprintf(prFile, "option: max-num-seq = %d\n", opts->iMaxNumSeq);
197
+    fprintf(prFile, "option: max-seq-len = %d\n", opts->iMaxSeqLen);
198
+    fprintf(prFile, "option: aln-out-file = %s\n", 
199
+            NULL != opts->pcAlnOutfile? opts->pcAlnOutfile: "(null)");
200
+    fprintf(prFile, "option: aln-out-format = %s\n", SeqfileFormat2String(opts->iAlnOutFormat));
201
+    fprintf(prFile, "option: force-file-overwrite = %d\n", opts->bForceFileOverwrite);
202
+    fprintf(prFile, "option: sequence from R = %d\n", opts->bRSequence);
203
+    fprintf(prFile, "option: line wrap = %d\n", opts->iWrap);
204
+    fprintf(prFile, "option: print residue numbers = %d\n", opts->bResno);
205
+    fprintf(prFile, "option: order alignment like input/tree = %d\n", opts->iOutputOrder);
206
+
207
+    fprintf(prFile, "option: threads = %d\n", opts->iThreads);
208
+    fprintf(prFile, "option: logFile = %s\n", opts->pcLogFile);
209
+}
210
+/* end of PrintUserOpts */
211
+
212
+
213
+
214
+/**
215
+ * @brief Frees user opt members allocated during parsing
216
+ *
217
+ * @param[out] user_opts
218
+ * user options whose members are to free
219
+ *
220
+ * @see ParseCommandLine()
221
+ *
222
+ */    
223
+void
224
+FreeUserOpts(cmdline_opts_t *user_opts)
225
+{
226
+
227
+    if (NULL != user_opts->pcSeqInfile) {
228
+        CKFREE(user_opts->pcSeqInfile);
229
+    }
230
+    if (NULL != user_opts->pcProfile1Infile) {
231
+        CKFREE(user_opts->pcProfile1Infile);
232
+    }
233
+    if (NULL != user_opts->pcProfile2Infile) {
234
+        CKFREE(user_opts->pcProfile2Infile);
235
+    }
236
+    if (NULL != user_opts->pcAlnOutfile) {
237
+        CKFREE(user_opts->pcAlnOutfile);
238
+    }
239
+    if (NULL != user_opts->pcLogFile) {
240
+        CKFREE(user_opts->pcLogFile);
241
+    }
242
+
243
+    FreeAlnOpts(& user_opts->aln_opts);
244
+
245
+    return; 
246
+}
247
+/* end of FreeUserOpts() */
248
+
249
+
250
+
251
+
252
+/**
253
+ * @brief Do quick&dirty logic check of used options and call Log(&rLog, LOG_FATAL, ) in case
254
+ * of any inconsistencies
255
+ *
256
+ * @param[in] opts
257
+ * option structure to check
258
+ *
259
+ */
260
+void
261
+UserOptsLogicCheck(cmdline_opts_t *opts)
262
+{
263
+    /* sequence input
264
+     *
265
+     */
266
+    if (NULL == opts->pcSeqInfile && !opts->bRSequence) {
267
+        if (NULL == opts->pcProfile1Infile && NULL == opts->pcProfile2Infile) {
268
+            Log(&rLog, LOG_FATAL, "No sequence input was provided. For more information try: --help");
269
+        }
270
+    } else {
271
+        if (NULL != opts->pcProfile1Infile && NULL != opts->pcProfile2Infile) {
272
+            Log(&rLog, LOG_FATAL, "Can't align two profile alignments AND a 'normal' sequence file");
273
+        }
274
+    }
275
+    /* if a profile was given it should always be no 1, not 2 */
276
+    if (NULL == opts->pcProfile1Infile && NULL != opts->pcProfile2Infile) {
277
+        Log(&rLog, LOG_FATAL, "Got a second profile, but no first one.");
278
+    }
279
+
280
+    /* alignment output
281
+     */
282
+    if (rLog.iLogLevelEnabled < LOG_WARN && NULL==opts->pcAlnOutfile && NULL==opts->pcLogFile) {
283
+        Log(&rLog, LOG_FATAL, "%s %s",
284
+              "You requested alignment output to stdout and verbose logging.",
285
+              " Alignment and log messages would get mixed up.");
286
+    }
287
+    /* distance matrix output impossible in mBed mode, only have clusters, FS, r254 ->  */
288
+#if 1
289
+    /* allow distance matrix output if initial mBed but subsequent full matrix during iteration, FS, r274 -> */
290
+    if (NULL != opts->aln_opts.pcDistmatOutfile){
291
+        if ( (TRUE == opts->aln_opts.bUseMbed) && (opts->aln_opts.iNumIterations < 1) ){
292
+            Log(&rLog, LOG_FATAL, "Distance Matrix output not possible in mBed mode.");
293
+        }
294
+        if ( (TRUE == opts->aln_opts.bUseMbed) && (TRUE == opts->aln_opts.bUseMbedForIteration) ){
295
+            Log(&rLog, LOG_FATAL, "Distance Matrix output not possible in mBed mode.");
296
+        }
297
+        if ( (TRUE == opts->aln_opts.bUseMbed) && (opts->aln_opts.iNumIterations > 0) && 
298
+             (opts->aln_opts.iMaxGuidetreeIterations < 1) ){
299
+            Log(&rLog, LOG_FATAL, "Distance Matrix output not possible in mBed mode.");
300
+        }
301
+    }
302
+#else
303
+    if ( (NULL != opts->aln_opts.pcDistmatOutfile) && (TRUE == opts->aln_opts.bUseMbed) ) {
304
+        Log(&rLog, LOG_FATAL, "Distance Matrix output not possible in mBed mode.");
305
+    }
306
+#endif
307
+
308
+    /* percentage identity cannot be printed in Kimura mode */
309
+    if ( (TRUE == opts->aln_opts.bUseKimura) && (TRUE == opts->aln_opts.bPercID) ){
310
+        Log(&rLog, LOG_FATAL, "Percentage Identity cannot be calculated if Kimura Distances are used.");
311
+    }
312
+
313
+    AlnOptsLogicCheck(& opts->aln_opts);
314
+}
315
+/* end of UserOptsLogicCheck */
316
+
317
+
318
+
319
+/**
320
+ * @brief Parse command line parameters. Will exit if help/usage etc
321
+ * are called or or call Log(&rLog, LOG_FATAL, ) if an error was detected.
322
+ *
323
+ * @param[out] user_opts
324
+ * User parameter struct, with defaults already set.
325
+ * @param[in] argc
326
+ * mains argc
327
+ * @param[in] argv
328
+ * mains argv
329
+ * 
330
+ */    
331
+void
332
+ParseCommandLine(cmdline_opts_t *user_opts, int argc, char **argv)
333
+{
334
+
335
+     /* argtable command line parsing:
336
+     * see
337
+     * http://argtable.sourceforge.net/doc/argtable2-intro.html
338
+     *
339
+     * basic structure is: arg_xxxN:
340
+     * xxx can be int, lit, db1, str, rex, file or date
341
+     * If N = 0, arguments may appear zero-or-once; N = 1 means
342
+     * exactly once, N = n means up to maxcount times
343
+     *
344
+     *
345
+     * @note: changes here, might also affect main.cpp:ConvertOldCmdLine()
346
+     *
347
+     */  
348
+   
349
+    struct arg_rem  *rem_seq_input  = arg_rem(NULL, "\nSequence Input:");
350
+    struct arg_file *opt_seqin = arg_file0("i", "in,infile",
351
+                                           "{<file>,-}",
352
+                                           "Multiple sequence input file (- for stdin)");
353
+    struct arg_file *opt_hmm_in = arg_filen(NULL, "hmm-in", "<file>",
354
+                                            /*min*/ 0, /*max*/ 128,
355
+                                            "HMM input files");
356
+    struct arg_lit *opt_dealign = arg_lit0(NULL, "dealign",
357
+                                           "Dealign input sequences");
358
+    struct arg_file *opt_profile1 = arg_file0(NULL, "profile1,p1",
359
+                                              "<file>",
360
+                                              "Pre-aligned multiple sequence file (aligned columns will be kept fix)");
361
+    struct arg_file *opt_profile2 = arg_file0(NULL, "profile2,p2",
362
+                                              "<file>",
363
+                                              "Pre-aligned multiple sequence file (aligned columns will be kept fix)");
364
+    struct arg_lit *opt_isprofile = arg_lit0(NULL, "is-profile",
365
+                                             "disable check if profile, force profile (default no)");
366
+
367
+    struct arg_str *opt_seqtype = arg_str0("t", "seqtype",
368
+                                           "{Protein, RNA, DNA}",
369
+                                           "Force a sequence type (default: auto)");
370
+/*    struct arg_lit *opt_force_protein = arg_lit0(NULL, "protein",
371
+                                         "Set sequence type to protein even if Clustal guessed nucleic acid"); */
372
+    struct arg_str *opt_infmt = arg_str0(NULL, "infmt",
373
+                                            "{a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]}",
374
+                                            "Forced sequence input file format (default: auto)");
375
+    struct arg_lit *opt_resno = arg_lit0(NULL, "residuenumber,resno",
376
+                                         "in Clustal format print residue numbers (default no)");
377
+    
378
+    struct arg_rem  *rem_guidetree  = arg_rem(NULL, "\nClustering:");
379
+    struct arg_str *opt_pairdist = arg_str0("p", "pairdist",
380
+                                             "{ktuple}",
381
+                                             "Pairwise alignment distance measure");
382
+    struct arg_file *opt_distmat_in = arg_file0(NULL, "distmat-in",
383
+                                                "<file>",
384
+                                                "Pairwise distance matrix input file (skips distance computation)");
385
+    struct arg_file *opt_distmat_out = arg_file0(NULL, "distmat-out",
386
+                                                 "<file>",
387
+                                                 "Pairwise distance matrix output file");
388
+    struct arg_file *opt_guidetree_in = arg_file0(NULL, "guidetree-in",
389
+                                                  "<file>",
390
+                                                  "Guide tree input file (skips distance computation and guide-tree clustering step)");
391
+    struct arg_file *opt_guidetree_out = arg_file0(NULL, "guidetree-out",
392
+                                                   "<file>",
393
+                                                   "Guide tree output file");
394
+
395
+    struct arg_dbl *opt_gapopen = arg_dbl0(NULL, "gapopen", "<double>", "Gap opening, default: 6 (bit)");
396
+    struct arg_dbl *opt_gapext  = arg_dbl0(NULL, "gapext",  "<double>", "Gap extension, default: 1 (bit)");
397
+    /* AW: mbed is default since at least R253
398
+       struct arg_lit *opt_mbed = arg_lit0(NULL, "mbed",
399
+       "Fast, Mbed-like clustering for guide-tree calculation");
400
+       struct arg_lit *opt_mbed_iter = arg_lit0(NULL, "mbed-iter",
401
+       "Use Mbed-like clustering also during iteration");
402
+    */
403
+    /* Note: might be better to use arg_str (mbed=YES/NO) but I don't want to introduce an '=' into pipeline, FS, r250 -> */
404
+    struct arg_lit *opt_full = arg_lit0(NULL, "full",
405
+                                        "Use full distance matrix for guide-tree calculation (might be slow; mBed is default)");
406
+    struct arg_lit *opt_full_iter = arg_lit0(NULL, "full-iter",
407
+                                        "Use full distance matrix for guide-tree calculation during iteration (might be slowish; mBed is default)");
408
+
409
+    struct arg_int *opt_clustersize = arg_int0(NULL, "cluster-size", "<n>", 
410
+                                               "soft maximum of sequences in sub-clusters"); /* FS, r274 -> */
411
+
412
+    struct arg_file *opt_clustfile = arg_file0(NULL, "clustering-out",
413
+                                               "<file>",
414
+                                               "Clustering output file"); /* FS, r274 -> */
415
+    struct arg_lit *opt_usekimura = arg_lit0(NULL, "use-kimura",
416
+                                             "use Kimura distance correction for aligned sequences (default no)");
417
+    struct arg_lit *opt_percentid = arg_lit0(NULL, "percent-id",
418
+                                             "convert distances into percent identities (default no)");
419
+
420
+    struct arg_str *opt_clustering = arg_str0("c", "clustering",
421
+                                              "{UPGMA}",
422
+                                              "Clustering method for guide tree");
423
+
424
+    
425
+    struct arg_rem *rem_aln_output  = arg_rem(NULL, "\nAlignment Output:");
426
+    struct arg_file *opt_outfile = arg_file0("o", "out,outfile",
427
+                                             "{file,-}",
428
+                                             "Multiple sequence alignment output file (default: stdout)");
429
+    struct arg_str *opt_outfmt = arg_str0(NULL, "outfmt",
430
+                                            "{a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]}",
431
+                                            "MSA output file format (default: fasta)");
432
+    struct arg_int *opt_wrap = arg_int0(NULL, "wrap", "<n>", 
433
+                                        "number of residues before line-wrap in output");
434
+    struct arg_str *opt_output_order = arg_str0(NULL, "output-order",
435
+                                                "{input-order,tree-order}",
436
+                                                "MSA output orderlike in input/guide-tree");
437
+
438
+    
439
+    struct arg_rem *rem_iteration  = arg_rem(NULL, "\nIteration:");
440
+    struct arg_str *opt_num_iterations = arg_str0(NULL, "iterations,iter",
441
+                                                  /* FIXME "{<n>,auto}", "Number of combined guide-tree/HMM iterations"); */
442
+                                                  "<n>", "Number of (combined guide-tree/HMM) iterations");
443
+    struct arg_int *opt_max_guidetree_iterations = arg_int0(NULL, "max-guidetree-iterations",
444
+                                                            "<n>", "Maximum number guidetree iterations");
445
+    struct arg_int *opt_max_hmm_iterations = arg_int0(NULL, "max-hmm-iterations",
446
+                                                      "<n>", "Maximum number of HMM iterations");
447
+
448
+   
449
+    struct arg_rem *rem_limits  = arg_rem(NULL, "\nLimits (will exit early, if exceeded):");
450
+    struct arg_int *opt_max_seq = arg_int0(NULL, "maxnumseq", "<n>",
451
+                                           "Maximum allowed number of sequences");
452
+    struct arg_int *opt_max_seqlen = arg_int0(NULL, "maxseqlen", "<l>", 
453
+                                              "Maximum allowed sequence length");
454
+
455
+
456
+    struct arg_rem *rem_misc  = arg_rem(NULL, "\nMiscellaneous:");
457
+
458
+    struct arg_lit *opt_autooptions = arg_lit0(NULL, "auto",
459
+                                         "Set options automatically (might overwrite some of your options)");
460
+    struct arg_int *opt_threads = arg_int0(NULL, "threads", "<n>", 
461
+                                              "Number of processors to use");
462
+    struct arg_file *opt_logfile = arg_file0("l", "log",
463
+                                             "<file>",
464
+                                             "Log all non-essential output to this file");
465
+    struct arg_lit *opt_help = arg_lit0("h", "help",
466
+                                         "Print this help and exit");
467
+    struct arg_lit *opt_version = arg_lit0(NULL, "version",
468
+                                           "Print version information and exit");
469
+    struct arg_lit *opt_long_version = arg_lit0(NULL, "long-version",
470
+                                           "Print long version information and exit");
471
+    struct arg_lit *opt_verbose = arg_litn("v", "verbose",
472
+                                           0, 3,
473
+                                           "Verbose output (increases if given multiple times)");
474
+    struct arg_lit *opt_force = arg_lit0(NULL, "force",
475
+                                         "Force file overwriting");
476
+    struct arg_lit *opt_r_seq = arg_lit0(NULL, "R",
477
+                                             "Force sequence from R");
478
+    struct arg_int *opt_macram = arg_int0(NULL, "MAC-RAM", "<n>", /* keep this quiet for the moment, FS r240 -> */
479
+                                          NULL/*"maximum amount of RAM to use for MAC algorithm (in MB)"*/);
480
+
481
+
482
+    struct arg_end *opt_end = arg_end(10); /* maximum number of errors
483
+                                                * to store */
484
+
485
+
486
+    void *argtable[] = {rem_seq_input,
487
+                        opt_seqin,
488
+                        opt_hmm_in,
489
+                        opt_dealign,
490
+                        opt_profile1,
491
+                        opt_profile2,
492
+                        opt_isprofile, /* FS, r282 ->*/
493
+                        opt_seqtype,
494
+                        /* opt_force_protein, */
495
+                        opt_infmt,
496
+                        rem_guidetree,
497
+#if 0
498
+                        /* no other options then default available or not implemented */
499
+                        opt_pairdist,
500
+#endif
501
+                        opt_distmat_in,
502
+                        opt_distmat_out,
503
+                        opt_guidetree_in,
504
+                        opt_guidetree_out,
505
+                         //added for R
506
+                        opt_gapopen,
507
+                        opt_gapext,
508
+                        //added for R
509
+                        opt_full, /* FS, r250 -> */
510
+                        opt_full_iter, /* FS, r250 -> */
511
+                        opt_clustersize, /* FS, r274 -> */
512
+                        opt_clustfile, /* FS, r274 -> */
513
+                        opt_usekimura, /* FS, r282 ->*/
514
+                        opt_percentid, /* FS, r282 ->*/
515
+#if 0
516
+                        /* no other options then default available */
517
+                        opt_clustering,
518
+#endif
519
+                        rem_aln_output,
520
+                        opt_outfile,
521
+                        opt_outfmt,
522
+                        opt_resno,  /* FS, 274 -> */
523
+                        opt_wrap, /* FS, 274 -> */
524
+                        opt_output_order, /* FS, 274 -> */
525
+
526
+                        rem_iteration,
527
+                        opt_num_iterations,
528
+                        opt_max_guidetree_iterations,
529
+                        opt_max_hmm_iterations,
530
+
531
+                        rem_limits,
532
+                        opt_max_seq,
533
+                        opt_max_seqlen,
534
+
535
+                        rem_misc,
536
+                        opt_autooptions,
537
+                        opt_threads,
538
+                        opt_logfile,
539
+                        opt_help,
540
+                        opt_verbose,
541
+                        opt_version,
542
+                        opt_long_version,
543
+                        opt_force,
544
+                        opt_r_seq,
545
+                        opt_macram, /* FS, r240 -> r241 */
546
+
547
+                        opt_end};
548
+
549
+    int nerrors;
550
+
551
+
552
+    /* Verify the argtable[] entries were allocated sucessfully
553
+     */
554
+    if (arg_nullcheck(argtable)) {
555
+        Log(&rLog, LOG_FATAL, "insufficient memory (for argtable allocation)");
556
+    }
557
+
558
+    /* Parse the command line as defined by argtable[]
559
+     */
560
+    nerrors = arg_parse(argc, argv, argtable);
561
+
562
+    /* Special case: '--help' takes precedence over error reporting
563
+     */
564
+    if (opt_help->count > 0) {
565
+        printf("%s - %s (%s)\n", PACKAGE_NAME, PACKAGE_VERSION, PACKAGE_CODENAME);
566
+
567
+        printf("\n");
568
+        printf("If you like Clustal-Omega please cite:\n%s\n", CITATION);
569
+        printf("If you don't like Clustal-Omega, please let us know why (and cite us anyway).\n");
570
+        /* printf("You can contact reach us under %s\n", PACKAGE_BUGREPORT); */
571
+        printf("\n");
572
+        printf("Check http://www.clustal.org for more information and updates.\n");
573
+            
574
+        printf("\n");
575
+        printf("Usage: %s", basename(argv[0]));
576
+        arg_print_syntax(stdout,argtable, "\n");
577
+
578
+        printf("\n");
579
+        printf("A typical invocation would be: %s -i my-in-seqs.fa -o my-out-seqs.fa -v\n",
580
+               basename(argv[0]));
581
+        printf("See below for a list of all options.\n");
582
+
583
+        arg_print_glossary(stdout, argtable, "  %-25s %s\n");
584
+        arg_freetable(argtable, sizeof(argtable)/sizeof(argtable[0]));
585
+        throw(ClustalOmegaException, "0");
586
+    }
587
+
588
+    /* Special case: '--version' takes precedence over error reporting
589
+     */
590
+    if (opt_version->count > 0) {
591
+        printf("%s\n", PACKAGE_VERSION);
592
+        arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
593
+        throw(ClustalOmegaException, "0");
594
+    }
595
+
596
+    /* Special case: '--long-version' takes precedence over error reporting
597
+     */
598
+    if (opt_long_version->count > 0) {
599
+        char zcLongVersion[1024];
600
+        PrintLongVersion(zcLongVersion, sizeof(zcLongVersion));
601
+        printf("%s\n", zcLongVersion);
602
+        arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
603
+        throw(ClustalOmegaException, "0");
604
+    }
605
+
606
+    /* If the parser returned any errors then display them and exit
607
+     */
608
+    if (nerrors > 0) {
609
+        /* Display the error details contained in the arg_end struct.*/
610
+        arg_print_errors(stdout, opt_end, PACKAGE);
611
+        fprintf(stderr, "For more information try: %s --help\n", argv[0]);
612
+        arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
613
+        throw(ClustalOmegaException, "1");
614
+    }
615
+
616
+    
617
+    /* ------------------------------------------------------------
618
+     *
619
+     * Command line successfully parsed. Now transfer values to
620
+     * user_opts. While doing so, make sure that given input files
621
+     * exist and given output files are writable do not exist, or if
622
+     * they do, should be overwritten.
623
+     *
624
+     * No logic checks here! They are done in a different function
625
+     *
626
+     * ------------------------------------------------------------*/
627
+        
628
+    
629
+    /* not part of user_opts because it declared in src/util.h */
630
+    if (0 == opt_verbose->count) {
631
+        rLog.iLogLevelEnabled = LOG_WARN;
632
+    } else if (1 == opt_verbose->count) {
633
+        rLog.iLogLevelEnabled = LOG_INFO;
634
+    } else if (2 == opt_verbose->count) {
635
+        rLog.iLogLevelEnabled = LOG_VERBOSE;
636
+    } else if (3 == opt_verbose->count) {
637
+        rLog.iLogLevelEnabled = LOG_DEBUG;
638
+    }
639
+
640
+    user_opts->aln_opts.bAutoOptions = opt_autooptions->count;
641
+
642
+    user_opts->bDealignInputSeqs = opt_dealign->count;
643
+
644
+    /* NOTE: full distance matrix used to be default - there was
645
+       --mbed flag but no --full flag after r250 decided that mBed
646
+       should be default - now need --full flag to turn off mBed.
647
+       wanted to retain mBed Boolean, so simply added --full flag. if
648
+       both flags set (erroneously) want --mbed to overwrite --full,
649
+       therefore do --full 1st, the --mbed, FS, r250 */
650
+    if (opt_full->count){
651
+        user_opts->aln_opts.bUseMbed = FALSE;
652
+    }
653
+
654
+    if (opt_full_iter->count){
655
+        user_opts->aln_opts.bUseMbedForIteration = FALSE;
656
+    }
657
+
658
+    user_opts->bForceFileOverwrite = opt_force->count;
659
+    if (opt_r_seq->count){
660
+    	user_opts->bRSequence = TRUE;
661
+    }
662
+
663
+    /* log-file
664
+     */
665
+    if (opt_logfile->count > 0) {
666
+        user_opts->pcLogFile = CkStrdup(opt_logfile->filename[0]);
667
+        
668
+        /* warn if already exists or not writable */
669
+        if (FileExists(user_opts->pcLogFile) && ! user_opts->bForceFileOverwrite) {
670
+            Log(&rLog, LOG_FATAL, "%s '%s'. %s",
671
+                  "Cowardly refusing to overwrite already existing file",
672
+                  user_opts->pcLogFile,
673
+                  "Use --force to force overwriting.");
674
+        }
675
+        if (! FileIsWritable(user_opts->pcLogFile)) {
676
+            Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
677
+                  user_opts->pcLogFile);
678
+        }
679
+    }
680
+
681
+
682
+    /* normal sequence input (no profile)
683
+     */
684
+    if (opt_seqin->count > 0) {
685
+        user_opts->pcSeqInfile = CkStrdup(opt_seqin->filename[0]);
686
+    }
687
+
688
+    /* Input limitations
689
+     */
690
+    /* maximum number of sequences */
691
+    if (opt_max_seq->count > 0) {
692
+        user_opts->iMaxNumSeq = opt_max_seq->ival[0];
693
+    }
694
+    
695
+    /* maximum sequence length */
696
+    if (opt_max_seqlen->count > 0) {
697
+        user_opts->iMaxSeqLen = opt_max_seqlen->ival[0];
698
+    }
699
+    
700
+    /* Output format
701
+     */
702
+    if (opt_infmt->count > 0) {
703
+        /* avoid gcc warning about discarded qualifier */
704
+        char *tmp = (char *)opt_infmt->sval[0];
705
+        user_opts->iSeqInFormat = String2SeqfileFormat(tmp);
706
+    } else {
707
+        user_opts->iSeqInFormat = SQFILE_UNKNOWN;
708
+    }
709
+
710
+
711
+    /* Sequence type
712
+     */
713
+    if (opt_seqtype->count > 0) {
714
+        if (STR_NC_EQ(opt_seqtype->sval[0], "protein")) {
715
+            user_opts->iSeqType = SEQTYPE_PROTEIN;
716
+        } else if (STR_NC_EQ(opt_seqtype->sval[0], "rna")) {
717
+            user_opts->iSeqType = SEQTYPE_RNA;
718
+        } else if  (STR_NC_EQ(opt_seqtype->sval[0], "dna")) {
719
+            user_opts->iSeqType = SEQTYPE_DNA;
720
+        } else {
721
+            Log(&rLog, LOG_FATAL, "Unknown sequence type '%s'", opt_seqtype->sval[0]);
722
+        }
723
+    }
724
+/*    if (opt_force_protein->count > 0) {
725
+        user_opts->iSeqType = SEQTYPE_PROTEIN;
726
+    } */
727
+
728
+    /* Profile input
729
+     */
730
+    if (opt_profile1->count > 0) {
731
+        user_opts->pcProfile1Infile = CkStrdup(opt_profile1->filename[0]);
732
+        if (! FileExists(user_opts->pcProfile1Infile)) {
733
+            Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->pcProfile1Infile);
734
+        }
735
+    }
736
+    
737
+    if (opt_profile2->count > 0) {
738
+        user_opts->pcProfile2Infile = CkStrdup(opt_profile2->filename[0]);
739
+        if (! FileExists(user_opts->pcProfile2Infile)) {
740
+            Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->pcProfile2Infile);
741
+        }
742
+    }
743
+    
744
+    if (opt_isprofile->count){
745
+        user_opts->bIsProfile = TRUE; 
746
+    }
747
+    if (opt_usekimura->count){
748
+        user_opts->aln_opts.bUseKimura = TRUE;
749
+    }
750
+    if (opt_percentid->count){
751
+        user_opts->aln_opts.bPercID = TRUE;
752
+    }
753
+
754
+    
755
+    /* HMM input
756
+     */
757
+    user_opts->aln_opts.iHMMInputFiles = 0;
758
+    user_opts->aln_opts.ppcHMMInput = NULL;
759
+    if (opt_hmm_in->count>0) {
760
+        int iAux;
761
+        user_opts->aln_opts.iHMMInputFiles = opt_hmm_in->count;
762
+        user_opts->aln_opts.ppcHMMInput = (char **) CKMALLOC(
763
+            user_opts->aln_opts.iHMMInputFiles * sizeof(char*));
764
+        for (iAux=0; iAux<opt_hmm_in->count; iAux++) {
765
+            user_opts->aln_opts.ppcHMMInput[iAux] = CkStrdup(opt_hmm_in->filename[iAux]);
766
+            if (! FileExists(user_opts->aln_opts.ppcHMMInput[iAux])) {
767
+                Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.ppcHMMInput[iAux]);
768
+            }
769
+        }
770
+    }
771
+
772
+
773
+    /* Pair distance method
774
+     */
775
+    if (opt_pairdist->count > 0) {
776
+        if (STR_NC_EQ(opt_pairdist->sval[0], "ktuple")) {
777
+            user_opts->aln_opts.iPairDistType = PAIRDIST_KTUPLE;
778
+        } else {
779
+            Log(&rLog, LOG_FATAL, "Unknown pairdist method '%s'", opt_pairdist->sval[0]);
780
+        }
781
+    }
782
+#if !0
783
+    free(opt_pairdist);
784
+#endif
785
+
786
+
787
+    /* Distance matrix input
788
+     */
789
+    if (opt_distmat_in->count > 0) {
790
+        user_opts->aln_opts.pcDistmatInfile = CkStrdup(opt_distmat_in->filename[0]);
791
+        if (! FileExists(user_opts->aln_opts.pcDistmatInfile)) {
792
+            Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcDistmatInfile);
793
+        }
794
+    }
795
+
796
+
797
+    /* Distance matrix output
798
+     */
799
+    if (opt_distmat_out->count > 0) {
800
+        user_opts->aln_opts.pcDistmatOutfile = CkStrdup(opt_distmat_out->filename[0]);
801
+        
802
+        /* warn if already exists or not writable */
803
+        if (FileExists(user_opts->aln_opts.pcDistmatOutfile) && ! user_opts->bForceFileOverwrite) {
804
+            Log(&rLog, LOG_FATAL, "%s '%s'. %s",
805
+                  "Cowardly refusing to overwrite already existing file",
806
+                  user_opts->aln_opts.pcDistmatOutfile,
807
+                  "Use --force to force overwriting.");
808
+        }
809
+        if (! FileIsWritable(user_opts->aln_opts.pcDistmatOutfile)) {
810
+            Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
811
+                user_opts->aln_opts.pcDistmatOutfile);
812
+        }
813
+    }
814
+
815
+    /* Clustering
816
+     *
817
+     */
818
+    if (opt_clustering->count > 0) {
819
+        if (STR_NC_EQ(opt_clustering->sval[0], "upgma")) {
820
+            user_opts->aln_opts.iClusteringType = CLUSTERING_UPGMA;
821
+        } else {
822
+            Log(&rLog, LOG_FATAL, "Unknown guide-tree clustering method '%s'", opt_clustering->sval[0]);
823
+        }
824
+    }
825
+#if !0
826
+    free(opt_clustering);
827
+#endif
828
+
829
+    if (opt_clustersize->count > 0){ /* FS, r274 -> */
830
+        if (opt_clustersize->ival[0] > 0){
831
+            user_opts->aln_opts.iClustersizes = opt_clustersize->ival[0];
832
+        }
833
+    }
834
+
835
+    if (opt_clustfile->count > 0){ /* FS, r274 -> */
836
+        user_opts->aln_opts.pcClustfile = CkStrdup(opt_clustfile->filename[0]);
837
+        /*if (! FileExists(user_opts->aln_opts.pcClustfile)) {
838
+            Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcClustfile);
839
+            }*/
840
+    }
841
+
842
+
843
+    /* Guidetree input
844
+     */
845
+    if (opt_guidetree_in->count > 0) {
846
+        user_opts->aln_opts.pcGuidetreeInfile = CkStrdup(opt_guidetree_in->filename[0]);
847
+        if (! FileExists(user_opts->aln_opts.pcGuidetreeInfile)) {
848
+            Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcGuidetreeInfile);
849
+        }
850
+    }
851
+    
852
+    
853
+    /* Guidetree output
854
+     */
855
+    if (opt_guidetree_out->count > 0) {
856
+        user_opts->aln_opts.pcGuidetreeOutfile = CkStrdup(opt_guidetree_out->filename[0]);
857
+        
858
+        /* warn if already exists or not writable */
859
+        if (FileExists(user_opts->aln_opts.pcGuidetreeOutfile) && ! user_opts->bForceFileOverwrite) {
860
+            Log(&rLog, LOG_FATAL, "%s '%s'. %s",
861
+                  "Cowardly refusing to overwrite already existing file",
862
+                  user_opts->aln_opts.pcGuidetreeOutfile,
863
+                  "Use --force to force overwriting.");
864
+        }
865
+        if (! FileIsWritable(user_opts->aln_opts.pcGuidetreeOutfile)) {
866
+            Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
867
+                  user_opts->aln_opts.pcGuidetreeOutfile);
868
+        }
869
+    }
870
+    
871
+
872
+    /* max guidetree iterations
873
+     */
874
+    if (opt_max_guidetree_iterations->count > 0) {
875
+        user_opts->aln_opts.iMaxGuidetreeIterations = opt_max_guidetree_iterations->ival[0];
876
+    }
877
+
878
+
879
+    /* max guidetree iterations
880
+     */
881
+    if (opt_max_hmm_iterations->count > 0) {
882
+        user_opts->aln_opts.iMaxHMMIterations = opt_max_hmm_iterations->ival[0];
883
+    }
884
+
885
+     /* number of iterations
886
+      */
887
+     if (opt_num_iterations->count > 0) {
888
+        if (STR_NC_EQ(opt_num_iterations->sval[0], "auto")) {
889
+            Log(&rLog, LOG_FATAL, "Automatic iteration not supported at the moment.");
890
+            user_opts->aln_opts.bIterationsAuto = TRUE;
891
+
892
+        } else {
893
+            int iAux;
894
+            user_opts->aln_opts.bIterationsAuto = FALSE;
895
+            for (iAux=0; iAux<(int)strlen(opt_num_iterations->sval[0]); iAux++) {
896
+                if (! isdigit(opt_num_iterations->sval[0][iAux])) {
897
+                    Log(&rLog, LOG_FATAL, "Couldn't iteration parameter: %s",
898
+                          opt_num_iterations->sval[0]);
899
+                }
900
+            }
901
+            user_opts->aln_opts.iNumIterations = atoi(opt_num_iterations->sval[0]);
902
+        }
903
+    }
904
+
905
+    
906
+    /* Alignment output
907
+     */
908
+    if (opt_outfile->count > 0) {
909
+        user_opts->pcAlnOutfile = CkStrdup(opt_outfile->filename[0]);
910
+
911
+        /* warn if already exists or not writable */
912
+        if (FileExists(user_opts->pcAlnOutfile) && ! user_opts->bForceFileOverwrite) {
913
+            Log(&rLog, LOG_FATAL, "%s '%s'. %s",
914
+                  "Cowardly refusing to overwrite already existing file",
915
+                  user_opts->pcAlnOutfile,
916
+                  "Use --force to force overwriting.");
917
+        }
918
+        if (! FileIsWritable(user_opts->pcAlnOutfile)) {
919
+            Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
920
+                  user_opts->pcAlnOutfile);
921
+        }
922
+    }
923
+    
924
+
925
+    /* Output format
926
+     */
927
+    if (opt_outfmt->count > 0) {
928
+        /* avoid gcc warning about discarded qualifier */
929
+        char *tmp = (char *)opt_outfmt->sval[0];
930
+        user_opts->iAlnOutFormat = String2SeqfileFormat(tmp);
931
+        if (SQFILE_UNKNOWN == user_opts->iAlnOutFormat) {
932
+            Log(&rLog, LOG_FATAL, "Unknown output format '%s'", opt_outfmt->sval[0]);
933
+        }
934
+    }
935
+
936
+    /* Number of threads
937
+     */
938
+#ifdef HAVE_OPENMP
939
+    if (opt_threads->count > 0) {
940
+        if (opt_threads->ival[0] <= 0) {
941
+            Log(&rLog, LOG_FATAL, "Changing number of threads to %d doesn't make sense.", 
942
+                  opt_threads->ival[0]);    
943
+        }
944
+        user_opts->iThreads = opt_threads->ival[0];
945
+    }
946
+
947
+#else
948
+    if (opt_threads->count > 0) {
949
+        if (opt_threads->ival[0] > 1) {
950
+            Log(&rLog, LOG_FATAL, "Cannot change number of threads to %d. %s was build without OpenMP support.", 
951
+                  opt_threads->ival[0], PACKAGE_NAME);    
952
+        }
953
+    }
954
+#endif
955
+
956
+    if (opt_gapopen->count > 0) {
957
+        user_opts->aln_opts.rHhalignPara.gapOpening = (float)opt_gapopen->dval[0];
958
+    }
959
+
960
+	if (opt_gapext->count > 0) {
961
+        user_opts->aln_opts.rHhalignPara.gapExtension = (float)opt_gapext->dval[0];
962
+    }
963
+
964
+    /* max MAC RAM (maximum amount of RAM set aside for MAC algorithm)
965
+     */
966
+    if (opt_macram->count > 0) { /* FS, r240 -> r241 */
967
+        user_opts->aln_opts.rHhalignPara.iMacRamMB = opt_macram->ival[0];
968
+    }
969
+
970
+    /* Number of residues in output before line-wrap
971
+     */
972
+    if (opt_wrap->count > 0) { /* FS, r274 -> */
973
+        user_opts->iWrap = opt_wrap->ival[0];
974
+    }
975
+
976
+    user_opts->bResno = opt_resno->count;
977
+
978
+    /* output-order
979
+     * like input (INPUT_ORDER = 0) or tree (TREE_ORDER = 1)
980
+     * if output-order format not valid use INPUT_ORDER
981
+     */
982
+    if (opt_output_order->count > 0){
983
+        user_opts->iOutputOrder = (0 == strcmp(opt_output_order->sval[0], "input-order")) ? INPUT_ORDER : 
984
+            (0 == strcmp(opt_output_order->sval[0], "tree-order")) ? TREE_ORDER : INPUT_ORDER;
985
+    }
986
+
987
+
988
+    arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
989
+
990
+    UserOptsLogicCheck(user_opts);
991
+
992
+    return; 
993
+}
994
+/* end of ParseCommandLine() */
995
+
996
+
997
+
998
+
999
+/**
1000
+ *
1001
+ * @brief the 'real' main function
1002
+ *
1003
+ */
1004
+int
1005
+executeClustalOmega(int argc, char **argv, ClustalOmegaInput *msaInput, ClustalOmegaOutput *msaOutput)
1006
+{
1007
+    mseq_t *prMSeq = NULL;
1008
+    mseq_t *prMSeqProfile1 = NULL;
1009
+    mseq_t *prMSeqProfile2 = NULL;
1010
+    cmdline_opts_t cmdline_opts;
1011
+
1012
+    /* Must happen first: setup logger */
1013
+    LogDefaultSetup(&rLog);
1014
+
1015
+    /*Log(&rLog, LOG_WARN, "This is a non-public realase of %s. Please do not distribute.", PACKAGE_NAME);*/
1016
+    /*Log(&rLog, LOG_WARN, "This is a beta version of %s, for protein only.", PACKAGE_NAME);*/ /* FS, r237 -> 238 */
1017
+
1018
+    SetDefaultUserOpts(&(cmdline_opts));
1019
+
1020
+    ParseCommandLine(&cmdline_opts, argc, argv);
1021
+
1022
+    if (NULL != cmdline_opts.pcLogFile) {
1023
+        prLogFile = fopen(cmdline_opts.pcLogFile, "w");
1024
+        LogSetFP(&rLog, LOG_INFO, prLogFile);
1025
+        LogSetFP(&rLog, LOG_VERBOSE, prLogFile);
1026
+        LogSetFP(&rLog, LOG_DEBUG, prLogFile);
1027
+    }
1028
+
1029
+    InitClustalOmega(cmdline_opts.iThreads);
1030
+
1031
+    if (rLog.iLogLevelEnabled < LOG_INFO) {
1032
+        PrintUserOpts(LogGetFP(&rLog, LOG_INFO), & cmdline_opts);
1033
+        PrintAlnOpts(LogGetFP(&rLog, LOG_INFO), & (cmdline_opts.aln_opts));
1034
+    }
1035
+
1036
+    /* Read sequence file
1037
+     *
1038
+     */
1039
+    if (NULL != cmdline_opts.pcSeqInfile || cmdline_opts.bRSequence) {
1040
+        NewMSeq(&prMSeq);
1041
+        if (!cmdline_opts.bRSequence) {
1042
+        	Log(&rLog, LOG_INFO, "Reading sequence file from '%s'", cmdline_opts.pcSeqInfile);
1043
+			if (ReadSequences(prMSeq, cmdline_opts.pcSeqInfile,
1044
+							  cmdline_opts.iSeqType, cmdline_opts.iSeqInFormat,
1045
+							  cmdline_opts.bIsProfile,
1046
+							  cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
1047
+				Log(&rLog, LOG_FATAL, "Reading sequence file '%s' failed", cmdline_opts.pcSeqInfile);
1048
+			}
1049
+        } else {
1050
+        	Log(&rLog, LOG_INFO, "Reading sequence file from R");
1051
+        	if (ReadSequencesFromR(prMSeq, msaInput->seqLength, msaInput->inputSeqs, msaInput->seqNames,
1052
+							  cmdline_opts.iSeqType, cmdline_opts.iSeqInFormat,
1053
+							  cmdline_opts.bIsProfile,
1054
+							  cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
1055
+				Log(&rLog, LOG_FATAL, "Reading sequence file '%s' failed", cmdline_opts.pcSeqInfile);
1056
+			}
1057
+        }
1058
+#if TRACE
1059
+        {
1060
+            int iAux;
1061
+            for (iAux=0; iAux<prMSeq->nseqs; iAux++) {
1062
+                Log(&rLog, LOG_FORCED_DEBUG, "seq no %d: seq = %s", iAux, prMSeq->seq[iAux]);
1063
+                LogSqInfo(&prMSeq->sqinfo[iAux]);
1064
+            }
1065
+        }
1066
+#endif
1067
+    }
1068
+    /* k-tuple pairwise distance calculation seg-faults if 
1069
+     * only one sequence, simply exit early.
1070
+     * note that for profile/profile alignment prMSeq is NULL 
1071
+     * FS, r222->r223 */
1072
+    if (prMSeq && (prMSeq->nseqs <= 1)){
1073
+        Log(&rLog, LOG_FATAL, "File '%s' contains %d sequence%s, nothing to align",
1074
+              cmdline_opts.pcSeqInfile, prMSeq->nseqs, 1==prMSeq->nseqs?"":"s");
1075
+    }
1076
+    /* if there are fewer sequences than target size of clusters,
1077
+     * then mBed is unnecessary, FS, r283-> */
1078
+    if ( (prMSeq) && (prMSeq->nseqs <= cmdline_opts.aln_opts.iClustersizes) ){
1079
+        cmdline_opts.aln_opts.bUseMbed = FALSE;
1080
+        cmdline_opts.aln_opts.bUseMbedForIteration = FALSE;
1081
+        Log(&rLog, LOG_INFO, "not more sequences (%d) than cluster-size (%d), turn off mBed", 
1082
+            prMSeq->nseqs, cmdline_opts.aln_opts.iClustersizes);
1083
+    }
1084
+
1085
+    /* Dealign if requested and neccessary
1086
+     */
1087
+    if (NULL != prMSeq) {
1088
+        if (TRUE == prMSeq->aligned && cmdline_opts.bDealignInputSeqs) {
1089
+            Log(&rLog, LOG_INFO, "Dealigning already aligned input sequences as requested.");
1090
+            DealignMSeq(prMSeq);
1091
+        }
1092
+    }
1093
+
1094
+    /* Read profile1
1095
+     *
1096
+     */
1097
+    if (NULL != cmdline_opts.pcProfile1Infile) {
1098
+        NewMSeq(&prMSeqProfile1);
1099
+        if (ReadSequences(prMSeqProfile1, cmdline_opts.pcProfile1Infile,
1100
+                          cmdline_opts.iSeqType,  cmdline_opts.iSeqInFormat,
1101
+                          cmdline_opts.bIsProfile,
1102
+                          cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
1103
+            Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed",
1104
+                  cmdline_opts.pcProfile1Infile);
1105
+        }
1106
+        /* FIXME: commented out. FS, r240 -> r241  
1107
+         * for explanation see below */
1108
+        /*if (1==prMSeqProfile1->nseqs) {
1109
+          Log(&rLog, LOG_FATAL, "'%s' contains only one sequence and can therefore not be used as a profile",
1110
+          cmdline_opts.pcProfile1Infile);
1111
+          }*/
1112
+        if (FALSE == prMSeqProfile1->aligned) {
1113
+            Log(&rLog, LOG_FATAL, "Sequences in '%s' are not aligned, i.e. this is not a profile",
1114
+                  cmdline_opts.pcProfile1Infile);
1115
+        }
1116
+    }
1117
+
1118
+    
1119
+
1120
+    /* Read profile2
1121
+     *
1122
+     */
1123
+    if (NULL != cmdline_opts.pcProfile2Infile) {
1124
+        NewMSeq(&prMSeqProfile2);
1125
+        if (ReadSequences(prMSeqProfile2, cmdline_opts.pcProfile2Infile,
1126
+                          cmdline_opts.iSeqType,  cmdline_opts.iSeqInFormat,
1127
+                          cmdline_opts.bIsProfile,
1128
+                          cmdline_opts.iMaxNumSeq, cmdline_opts.iMaxSeqLen)) {
1129
+            Log(&rLog, LOG_FATAL, "Reading sequences from profile file '%s' failed",
1130
+                  cmdline_opts.pcProfile2Infile);
1131
+        }
1132
+        /* FIXME: there is no (clean) way to align a single sequence to a profile. 
1133
+         * if we go down the -i route, it causes a seg-fault in the pair-wise 
1134
+         * k-tuple distance calculation. However, single sequences can be 
1135
+         * understood as 1-profiles. Therefore we have to allow for 1-profiles.
1136
+         * FS, r240 -> r241 
1137
+         */
1138
+        /*if (1==prMSeqProfile2->nseqs) {
1139
+          Log(&rLog, LOG_FATAL, "'%s' contains only one sequence and can therefore not be used as a profile",
1140
+          cmdline_opts.pcProfile2Infile);
1141
+          }*/
1142
+        if (FALSE == prMSeqProfile2->aligned) {
1143
+            Log(&rLog, LOG_FATAL, "Sequences in '%s' are not aligned, i.e. this is not a profile",
1144
+                  cmdline_opts.pcProfile2Infile);
1145
+        }
1146
+    }
1147
+
1148
+
1149
+    /* Depending on the input we got perform
1150
+     *
1151
+     * (i) normal alignment: seq + optional profile
1152
+     * or
1153
+     * (ii) profile profile alignment
1154
+     *
1155
+     */
1156
+
1157
+    cmdline_opts.aln_opts.rHhalignPara.substitutionMatrix = msaInput->substitutionMatrix;
1158
+
1159
+    if (NULL != prMSeq) {
1160
+
1161
+        if (2 == prMSeq->nseqs){
1162
+            /* if there are only 2 sequences then the order does not matter */
1163
+            /* this is important, because for pair-wise alignment we don't do 
1164
+             * tree indexing*, and trying to use tree-indexing during output 
1165
+             * will cause a segmentation fault */
1166
+            cmdline_opts.iOutputOrder = INPUT_ORDER;
1167
+        }
1168
+        if (TREE_ORDER == cmdline_opts.iOutputOrder){
1169
+            /* this is a crutch, if tree_order==NULL use input order, 
1170
+             * otherwise use guide-tree-order */
1171
+            prMSeq->tree_order = (int *)CKMALLOC(prMSeq->nseqs * sizeof(int));
1172
+        }
1173
+        if (Align(prMSeq, prMSeqProfile1, & cmdline_opts.aln_opts)) {
1174
+            Log(&rLog, LOG_FATAL, "An error occured during the alignment");
1175
+        }
1176
+
1177
+        if (cmdline_opts.aln_opts.iMaxHMMIterations >= 0){
1178
+            if (WriteAlignment(prMSeq, cmdline_opts.pcAlnOutfile,
1179
+                               cmdline_opts.iAlnOutFormat, cmdline_opts.iWrap, cmdline_opts.bResno,
1180
+                               &msaOutput->msa_c, &msaOutput->msa_v)) {
1181
+                Log(&rLog, LOG_FATAL, "Could not save alignment to %s", cmdline_opts.pcAlnOutfile);
1182
+            }
1183
+        }
1184
+#if 0
1185
+        {
1186
+            bool bSampling = FALSE; /* better set to TRUE for many sequences */
1187
+            bool bReportAll = TRUE;
1188
+            AliStat(prMSeq, bSampling, bReportAll);
1189
+        }
1190
+#endif
1191
+        
1192
+
1193
+    } else if (NULL != prMSeqProfile1 && NULL != prMSeqProfile2) {
1194
+        if (AlignProfiles(prMSeqProfile1, prMSeqProfile2, 
1195
+                          cmdline_opts.aln_opts.rHhalignPara)) {
1196
+            Log(&rLog, LOG_FATAL, "An error occured during the alignment");
1197
+        }
1198
+        if (WriteAlignment(prMSeqProfile1, cmdline_opts.pcAlnOutfile,
1199
+                           cmdline_opts.iAlnOutFormat, cmdline_opts.iWrap, cmdline_opts.bResno,
1200
+                           &msaOutput->msa_c, &msaOutput->msa_v)) {
1201
+            Log(&rLog, LOG_FATAL, "Could not save alignment to %s", cmdline_opts.pcAlnOutfile);
1202
+        }
1203
+    }
1204
+
1205
+    /* cleanup
1206
+     */
1207
+    if (NULL != prMSeq) {
1208
+        FreeRSeq(&prMSeq, cmdline_opts.bRSequence);
1209
+    }
1210
+    if (NULL != prMSeqProfile1) {
1211
+        FreeMSeq(&prMSeqProfile1);
1212
+    }
1213
+    if (NULL != prMSeqProfile2) {
1214
+        FreeMSeq(&prMSeqProfile2);
1215
+    }
1216
+
1217
+    FreeUserOpts(&cmdline_opts);
1218
+
1219
+    Log(&rLog, LOG_DEBUG, "Successful program exit");
1220
+
1221
+    if (NULL != cmdline_opts.pcLogFile) {
1222
+        fclose(prLogFile);
1223
+    }
1224
+    free(prMSeq);
1225
+    return EXIT_SUCCESS;
1226
+}
1227
+/*  end of MyMain() */
1228
+
1229
+