Browse code

add package to the repository

msa


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102253 bc3139a8-67e5-0310-9ffc-ced21a209358

Sonali Arora authored on 10/04/2015 00:12:33
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,480 @@
1
+/*********************************************************************
2
+ * Clustal Omage - Multiple sequence alignment
3
+ *
4
+ * Copyright (C) 2010 University College Dublin
5
+ *
6
+ * Clustal-Omega is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU General Public License as
8
+ * published by the Free Software Foundation; either version 2 of the
9
+ * License, or (at your option) any later version.
10
+ *
11
+ * This file is part of Clustal-Omega.
12
+ *
13
+ ********************************************************************/
14
+
15
+/*
16
+ *  RCS $Id: main.cpp 234 2011-04-13 05:26:16Z andreas $
17
+ */
18
+
19
+/*
20
+ * We are using a mix of C and C++, which means that linking has to be
21
+ * done with a C++ compiler. By using this "fake" main c++ function,
22
+ * automake is convinced to use a C++ compiler for linking.
23
+ *
24
+ */
25
+
26
+#ifdef HAVE_CONFIG_H
27
+    #include "config.h"
28
+#endif
29
+
30
+#include <stdio.h>
31
+#include <stdlib.h>
32
+#include <string.h>
33
+
34
+extern "C" {
35
+#include "mymain.h"
36
+#include "clustal/util.h"
37
+//#include "squid/squid.h"
38
+}
39
+
40
+
41
+/**
42
+ * @brief Convert an old Clustal command line parameter in the form of
43
+ * [-/]param[=value] to new parameter if possible
44
+ *
45
+ * @param[out] iNewArgC_p
46
+ * "argc" which will be incremented for each successfully converted option
47
+ * @param[out] ppcNewArgV_p
48
+ * "argv" to which each successfully converted options will be added
49
+ * (caller has to free)
50
+ * @param[in] pcOldArg
51
+ * The old parameter and value command line option
52
+ *
53
+ */
54
+void
55
+ConvertOldCmdLineArg(int *iNewArgC_p, char ***ppcNewArgV_p, char *pcOldArg)
56
+{
57
+    char *pcOldParam, *pcOldValue, *pcOldArgCopy;
58
+    char zcNotImplementedMsg[] = "WARNING: Invalid old command line option";
59
+    
60
+    pcOldArgCopy = CkStrdup(pcOldArg); 
61
+    pcOldParam = strtok(pcOldArgCopy, "=");
62
+    pcOldValue = strtok(NULL, "=");
63
+
64
+
65
+    /* go through all options in order of appearance in clustalw2 -help
66
+     *
67
+     */
68
+     
69
+        /* data
70
+         *
71
+         */
72
+    if (STR_NC_EQ("INFILE", &pcOldParam[1])) {
73
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("-i");
74
+        if (NULL != pcOldValue)
75
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup(pcOldValue);
76
+
77
+    } else if (STR_NC_EQ("PROFILE1", &pcOldParam[1])) {
78
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("--profile1");
79
+        if (NULL != pcOldValue)
80
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup(pcOldValue);
81
+
82
+    } else if (STR_NC_EQ("PROFILE2", &pcOldParam[1])) {
83
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("--profile2");
84
+        if (NULL != pcOldValue)
85
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup(pcOldValue);
86
+
87
+        /* verbs
88
+         *
89
+         */
90
+
91
+        /* missing:
92
+         * OPTIONS
93
+         */
94
+        
95
+    } else if (STR_NC_EQ("HELP", &pcOldParam[1])
96
+               || STR_NC_EQ("CHECK", &pcOldParam[1])) {
97
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("-h");
98
+        
99
+    } else if (STR_NC_EQ("FULLHELP", &pcOldParam[1])) {
100
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("-h");
101
+        
102
+    } else if (STR_NC_EQ("ALIGN", &pcOldParam[1])) {
103
+        char msg[] = "WARNING: The ALIGN option is default in Clustal Omega";
104
+        fprintf(stderr, "%s\n", msg);
105
+
106
+        /* missing:
107
+         * TREE
108
+         * PIM
109
+         * BOOTSTRAP
110
+         * CONVERT
111
+         */
112
+        
113
+        /* parameters
114
+         *
115
+         */
116
+    } else if (STR_NC_EQ("INTERACTIVE", &pcOldParam[1])) {
117
+        char msg[] = "WARNING: There is no interactive command-line menu in Clustal Omega";
118
+        fprintf(stderr, "%s\n", msg);
119
+        /* trigger help */
120
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("-h");
121
+        
122
+    } else if (STR_NC_EQ("QUICKTREE", &pcOldParam[1])) {
123
+        char msg[] = "WARNING: The QUICKTREE (i.e. k-tuple distance) option is default in Clustal Omega";
124
+        fprintf(stderr, "%s\n", msg);
125
+        
126
+    } else if (STR_NC_EQ("TYPE", &pcOldParam[1])) {
127
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("-t");
128
+        if (NULL != pcOldValue)
129
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup(pcOldValue);
130
+
131
+        /* NEGATIVE */
132
+        
133
+    } else if (STR_NC_EQ("OUTFILE", &pcOldParam[1])) {
134
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("-o");
135
+        if (NULL != pcOldValue)
136
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup(pcOldValue);
137
+
138
+        
139
+    } else if (STR_NC_EQ("OUTPUT", &pcOldParam[1])) {
140
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("--outfmt");
141
+        if (NULL != pcOldValue)
142
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup(pcOldValue);
143
+
144
+        /* missing:
145
+         * OUTORDER
146
+         * CASE
147
+         * SEQNOS
148
+         * SEQNO_RANGE
149
+         * RANGE
150
+         */
151
+                   
152
+    } else if (STR_NC_EQ("MAXSEQLEN", &pcOldParam[1])) {
153
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("--maxseqlen");
154
+        if (NULL != pcOldValue)
155
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup(pcOldValue);
156
+        
157
+    } else if (STR_NC_EQ("QUIET", &pcOldParam[1])) {
158
+        char msg[] = "WARNING: The QUIET option is default in Clustal Omega";
159
+        fprintf(stderr, "%s\n", msg);
160
+
161
+        /* missing:
162
+         * STATS
163
+         */
164
+        
165
+        /* fast pariwise alignment
166
+         *
167
+         */
168
+
169
+        /* missing:
170
+         * KTUPLE
171
+         * TOPDIAGS
172
+         * WINDOW
173
+         * PAIRGAP
174
+         * SCORE
175
+         */
176
+        
177
+        /* slow pairwise alignments 
178
+         *
179
+         */
180
+
181
+        /* missing:
182
+         * PWMATRIX
183
+         * PWDNAMATRIX
184
+         * PWGAPOPEN
185
+         * PWGAPEXT
186
+         */
187
+        
188
+        /* multiple alignments
189
+         *
190
+         */
191
+    } else if (STR_NC_EQ("NEWTREE", &pcOldParam[1])) {
192
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("--guidetree-out");
193
+        if (NULL != pcOldValue)
194
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup(pcOldValue);
195
+        
196
+    } else if (STR_NC_EQ("USETREE", &pcOldParam[1])) {
197
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("--guidetree-in");
198
+        if (NULL != pcOldValue)
199
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup(pcOldValue);
200
+
201
+        /* missing:
202
+         * MATRIX
203
+         * DNAMATRIX
204
+         * GAPOPEN
205
+         * GAPEXT
206
+         * ENDGAPS
207
+         * GAPDIST
208
+         * NOGAP
209
+         * NOHGAP
210
+         * HGAPRESIDUES
211
+         * MAXDIV
212
+         * TYPE already handled above
213
+         * TRANSWEIGHT
214
+         * ITERATION
215
+         * NUMITER
216
+         * NOWEIGHTS
217
+         */
218
+
219
+        /* profile alignments
220
+         *
221
+         */
222
+
223
+        /* missing:
224
+         * PROFILE
225
+         * NEWTREE1
226
+         * NEWTREE2
227
+         * USETREE1
228
+         * USETREE2
229
+         */
230
+        
231
+        /* sequence to profile alignments 
232
+         *
233
+         */
234
+    } else if (STR_NC_EQ("SEQUENCES", &pcOldParam[1])) {
235
+        fprintf(stderr, "WARNING: %s: %s\n", zcNotImplementedMsg, pcOldArg);
236
+
237
+        /* SEQUENCES and NEWTREE already handled above */
238
+        
239
+        /* structure alignments
240
+         *
241
+         */
242
+
243
+        /* missing:
244
+         * NOSECSTR1
245
+         * NOSECSTR2
246
+         * SECSTROUT
247
+         * HELIXGAP
248
+         * STRANDGAP
249
+         * LOOPGAP
250
+         * TERMINALGAP
251
+         * HELIXENDIN
252
+         * HELIXENDOUT
253
+         * STRANDENDIN
254
+         * STRANDENDOUT
255
+         */
256
+        
257
+        /* trees
258
+         *
259
+         */
260
+
261
+        /* missing:
262
+         * OUTPUTTREE
263
+         * SEED
264
+         * KIMURA
265
+         * TOSSGAPS
266
+         * BOOTLABELS
267
+         */
268
+#if 0        
269
+    } else if (STR_NC_EQ("CLUSTERING", &pcOldParam[1])) {
270
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("-c");
271
+        if (NULL != pcOldValue)
272
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup(pcOldValue);
273
+#endif
274
+        
275
+    } else {
276
+        fprintf(stderr,
277
+                "WARNING: Unsupported old command line option '%s' will be ignored\n",
278
+                pcOldArg);
279
+    }
280
+    
281
+	/* FIXME: if not outfile was given, than the old default was to create a
282
+	 * filename based on the input filename but with its extension replaced by
283
+	 * aln. If the input already had the extension aln then the input was
284
+	 * overwritten. What to do here? Strictly mimic the old behaviour?
285
+	 */
286
+	
287
+    free(pcOldArgCopy);
288
+}
289
+/* end ConvertOldCmdLineArg */
290
+
291
+
292
+
293
+/**
294
+ * @brief Convert old command line usage to new one. Mix of old and
295
+ * new style will be tolerated
296
+ *
297
+ * @param[out] iNewArgC_p
298
+ * The updated "argc"
299
+ * @param[out] ppcNewArgV_p
300
+ * The updated "argv". Caller has to free.
301
+ * @param[in] argc
302
+ * Original "argc"
303
+ * @param[in] argv
304
+ * Original argv
305
+ * 
306
+ * @note old style parameters look like this:
307
+ *  [/-]param[=value]
308
+ * new style parameters:
309
+ *  -p [value]
310
+ *  --param [value]
311
+ *
312
+ */
313
+void
314
+ConvertOldCmdline(int *iNewArgC_p, char ***ppcNewArgV_p, int argc, char **argv)
315
+{
316
+    bool bOldCmdlineDetected = false;
317
+    int i; /* aux */
318
+
319
+    /* we can have at most 2*argc converted arguments, plus the few
320
+     * that we set by default (.e.g --force)
321
+     */
322
+    (*ppcNewArgV_p) = (char **) CKCALLOC(argc*2+10, sizeof(char*));
323
+    
324
+    /* copy first arg which is program name */
325
+    (*ppcNewArgV_p)[0] = CkStrdup(argv[0]);
326
+    *iNewArgC_p = 1;
327
+        
328
+    for (i=1; i<argc; i++) {
329
+        bool bNewStyle = false;
330
+        
331
+        if (strlen(argv[i])<=2) {
332
+            /* e.g. -i (param) or just numbers (value) */
333
+            bNewStyle = true;
334
+            
335
+        } else if (strlen(argv[i])>2) {
336
+            if (argv[i][0] == '-' && argv[i][1] == '-') {
337
+                /* new style long opts */
338
+                bNewStyle = true;
339
+                
340
+            } else if (argv[i][0]=='/' && (NULL!=strchr(&argv[i][1], '/'))) {
341
+                /* Slash used to be a valid replacement for dash in
342
+                 * Clustal<=2, but could also be file in new style. If
343
+                 * we find at least two slashes, one at the beginning,
344
+                 * it should be a filename and therefore new style */
345
+                bNewStyle = true;
346
+                
347
+            } else if (argv[i][0] != '/' && argv[i][0] != '-') {
348
+                /* old style opts always start with slash or dash */
349
+                bNewStyle = true;
350
+                
351
+            }
352
+        }
353
+
354
+        /* copy and continue if new style arg or attempt to convert
355
+         * old style arg
356
+         */
357
+        if (bNewStyle) {
358
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup(argv[i]);
359
+        } else {
360
+            ConvertOldCmdLineArg(iNewArgC_p, ppcNewArgV_p, argv[i]);
361
+            /*LOG_DEBUG("old command line arg: %s", argv[i]);*/
362
+            bOldCmdlineDetected = true;
363
+        }
364
+    }
365
+    
366
+    if (bOldCmdlineDetected) {
367
+        bool bOutfileOptSet = FALSE;
368
+        bool bOutFormatOptSet = FALSE;
369
+        
370
+        
371
+        /* old clustal used to write to a file called in.aln by
372
+         *  default. set if not
373
+         * explicitely requested otherwisee
374
+         */
375
+        for (i=0; i<*iNewArgC_p; i++) {
376
+            const char *pcOpt = "-o";
377
+            if (strlen(pcOpt) <= strlen((*ppcNewArgV_p)[i])) {
378
+                if (0 == strncmp((*ppcNewArgV_p)[i], pcOpt, strlen(pcOpt))) {
379
+                    bOutfileOptSet = TRUE;
380
+                    break;
381
+                }
382
+            }
383
+        }
384
+        if (FALSE == bOutfileOptSet) {
385
+#ifdef TOO_LAZY_TO_IMPLEMENT_JUST_USING_DEFAULT_NAME_INSTEAD
386
+            char *pcDotPos = NULL;
387
+            char *pcInfileOpt = NULL;
388
+
389
+            /* get infile arg and find last dot in it. if found replace
390
+             * everything after with "aln", otherwise just add "aln"
391
+             */
392
+            for (i=0; i<*iNewArgC_p; i++) {
393
+                const char *pcOpt = "-i";
394
+                if (strlen(pcOpt) <= strlen((*ppcNewArgV_p)[i])) {
395
+                    if (0 == strncmp((*ppcNewArgV_p)[i], pcOpt, strlen(pcOpt))) {
396
+                        if (*iNewArgC_p<= i+1) {
397
+                            fprintf(stderr,
398
+                                    "Oups...error while trying to convert old commandline (%s).\n",
399
+                                    "No more arguments left after -i");
400
+                            throw 1;
401
+                        }
402
+                        pcInfileOpt = (*ppcNewArgV_p)[i+1];
403
+                        break;
404
+                    }
405
+                }
406
+            }
407
+            if (NULL == pcInfileOpt) {
408
+                fprintf(stderr,
409
+                        "Oups...error while trying to convert old commandline (%s)\n",
410
+                        "No infile opt found");
411
+                throw 1;
412
+            }
413
+
414
+            fprintf(stderr, "FIXME: unfinished\n");
415
+            throw 1;
416
+#endif
417
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("-o");
418
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("clustal.aln");
419
+        }
420
+
421
+        
422
+        /* old clustal used the clustal format by default. set if not
423
+         * explicitely requested otherwisee
424
+         */
425
+        for (i=0; i<*iNewArgC_p; i++) {
426
+            const char *pcOpt = "--outfmt";
427
+            if (strlen(pcOpt) <= strlen((*ppcNewArgV_p)[i])) {
428
+                if (0 == strncmp((*ppcNewArgV_p)[i], pcOpt, strlen(pcOpt))) {
429
+                    bOutFormatOptSet = TRUE;
430
+                    break;
431
+                }
432
+            }
433
+        }
434
+        if (FALSE == bOutFormatOptSet) {
435
+            (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("--outfmt=clustal");
436
+        }
437
+
438
+        
439
+        /* old clustal was verbose by default
440
+         */
441
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("-v");
442
+        
443
+        /* old clustal used to overwrite files by default
444
+         */
445
+        (*ppcNewArgV_p)[(*iNewArgC_p)++] = CkStrdup("--force");
446
+        
447
+        fprintf(stderr,
448
+                "WARNING: Your old-style command-line options were converted to: ");
449
+        for (i=0; i<*iNewArgC_p; i++) {
450
+            fprintf(stderr, " %s", (*ppcNewArgV_p)[i]);
451
+        }
452
+        fprintf(stderr, "\n");
453
+    }
454
+
455
+}
456
+/* end ConvertOldCmdline */
457
+
458
+
459
+
460
+int
461
+main(int argc, char **argv)
462
+{
463
+    ClustalOmegaInput msaInput;
464
+    ClustalOmegaOutput msaOutput;
465
+
466
+    int i; /* aux */
467
+    int new_argc = 0;
468
+    char **new_argv = NULL;
469
+
470
+    ConvertOldCmdline(&new_argc, &new_argv, argc, argv);
471
+    
472
+    executeClustalOmega(new_argc, new_argv, &msaInput, &msaOutput);
473
+    
474
+    for (i=0; i<new_argc; i++) {
475
+        free(new_argv[i]);
476
+    }
477
+    free(new_argv);
478
+    
479
+    return 0;
480
+}