Browse code

Multiple fixes; final comit before release; version number bumped to 0.99.6

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102514 bc3139a8-67e5-0310-9ffc-ced21a209358

Ulrich Bodenhofer authored on 15/04/2015 13:20:09
Showing1 changed files
... ...
@@ -182,7 +182,6 @@ SEXP RClustalOmega(SEXP rInputSeqs,
182 182
             msaInput.seqLength = seqLength;
183 183
             msaInput.seqNames = seqNames;
184 184
             //hardcoded value, if sequence is an R sequence
185
-            //appendString(&argv, argc, "R");
186 185
             appendString(&argv, argc, "--R");
187 186
         }
188 187
         appendString(&argv, argc, "-o");
... ...
@@ -243,6 +242,10 @@ SEXP RClustalOmega(SEXP rInputSeqs,
243 242
 			appendStringValue(&argv, argc, "--distmat-out",
244 243
 					getListElement(rParams, "distMatOut"));
245 244
         }
245
+        if (hasClustalOmegaEntry(rParams, "outputOrder")) {
246
+			appendStringValue(&argv, argc, "--output-order",
247
+					getListElement(rParams, "outputOrder"));
248
+        }
246 249
         if (hasClustalOmegaEntry(rParams, "guideTreeIn")) {
247 250
 			appendStringValue(&argv, argc, "--guidetree-in",
248 251
 					getListElement(rParams, "guideTreeIn"));
Browse code

Various fixes and improvements; version number bumped to 0.99.3

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102303 bc3139a8-67e5-0310-9ffc-ced21a209358

Ulrich Bodenhofer authored on 10/04/2015 22:38:22
Showing1 changed files
... ...
@@ -31,11 +31,10 @@
31 31
 #include <stdlib.h>
32 32
 #include <string.h>
33 33
 
34
-#include <R.h>
35
-#include <Rinternals.h>
36
-
37 34
 #include "RClustalOmega.h"
38 35
 
36
+#include <R.h>
37
+#include <Rinternals.h>
39 38
 #include <Rdefines.h>
40 39
 
41 40
 extern "C" {
Browse code

add package to the repository

msa


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102253 bc3139a8-67e5-0310-9ffc-ced21a209358

Sonali Arora authored on 10/04/2015 00:12:33
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,436 @@
1
+/*********************************************************************
2
+ * Clustal Omega - Multiple sequence alignment
3
+ *
4
+ * Copyright (C) 2010 University College Dublin
5
+ *
6
+ * Clustal-Omega is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU General Public License as
8
+ * published by the Free Software Foundation; either version 2 of the
9
+ * License, or (at your option) any later version.
10
+ *
11
+ * This file is part of Clustal-Omega.
12
+ *
13
+ ********************************************************************/
14
+
15
+/*
16
+ *  RCS $Id: main.cpp 234 2011-04-13 05:26:16Z andreas $
17
+ */
18
+
19
+/*
20
+ * We are using a mix of C and C++, which means that linking has to be
21
+ * done with a C++ compiler. By using this "fake" main c++ function,
22
+ * automake is convinced to use a C++ compiler for linking.
23
+ *
24
+ */
25
+
26
+#ifdef HAVE_CONFIG_H
27
+    #include "config.h"
28
+#endif
29
+
30
+#include <stdio.h>
31
+#include <stdlib.h>
32
+#include <string.h>
33
+
34
+#include <R.h>
35
+#include <Rinternals.h>
36
+
37
+#include "RClustalOmega.h"
38
+
39
+#include <Rdefines.h>
40
+
41
+extern "C" {
42
+#include "mymain.h"
43
+#include "clustal/util.h"
44
+#include "squid/squid.h"
45
+}
46
+
47
+using namespace std;
48
+using namespace Rcpp;
49
+
50
+bool hasClustalOmegaEntry(Rcpp::List params, const char* entry) {
51
+	Rcpp::CharacterVector namesCV = params.names();
52
+	int n = namesCV.size();
53
+	vector<string> namesVector = Rcpp::as< std::vector< std::string > >(namesCV);
54
+	for (int i = 0; i < n; i++) {
55
+		const char* vec = namesVector[i].c_str();
56
+		if (strcmp(vec, entry) == 0) {
57
+			return !Rf_isNull(params[entry]);
58
+		}
59
+	}
60
+	return false;
61
+}
62
+
63
+void appendDoubleToString(SEXP value, const char *str, char *buffer) {
64
+    double doubleValue = as<double>(value);
65
+    sprintf(buffer, "%s=%f", str, doubleValue);
66
+}
67
+
68
+void appendIntToString(SEXP value, const char *str, char *buffer) {
69
+    int intValue = as<int>(value);
70
+    sprintf(buffer, "%s=%i", str, intValue);
71
+}
72
+
73
+void appendStringToString(SEXP value, const char *str, char *buffer) {
74
+    string stringValue = as<string>(value);
75
+    sprintf(buffer, "%s=%s", str, stringValue.c_str());
76
+}
77
+
78
+void appendString(char ***argv, int &count, const char *str) {
79
+    count++;
80
+    *argv = (char**)realloc(*argv, count * sizeof(char*));
81
+    if(*argv == NULL)
82
+        Rprintf("Error (re)allocating memory\n");
83
+    else {
84
+        (*argv)[count - 1] = (char*)malloc(strlen(str)*sizeof(char) + 1);
85
+        strcpy((*argv)[count - 1], str);
86
+    }
87
+}
88
+
89
+void appendIntValue(char ***argv, int &count, const char *str, SEXP intValue) {
90
+    if (!intValue || Rf_isNull(intValue)) {
91
+        return;
92
+    }
93
+    char buffer[128];
94
+    appendIntToString(intValue, str, buffer);
95
+    appendString(argv, count, buffer);
96
+}
97
+
98
+void appendDoubleValue(char ***argv, int &count, const char *str, SEXP doubleValue) {
99
+    if (!doubleValue || Rf_isNull(doubleValue)) {
100
+        return;
101
+    }
102
+    char buffer[128];
103
+    appendDoubleToString(doubleValue, str, buffer);
104
+    appendString(argv, count, buffer);
105
+}
106
+
107
+void appendStringValue(char ***argv, int &count, const char *str, SEXP stringValue) {
108
+    if (!stringValue || Rf_isNull(stringValue)) {
109
+        return;
110
+    }
111
+    char buffer[128];
112
+    appendStringToString(stringValue, str, buffer);
113
+    appendString(argv, count, buffer);
114
+}
115
+
116
+/* get the list element named str, or return NULL */
117
+SEXP getListElement(SEXP list, const char *str) {
118
+    List lst(list);
119
+    SEXP element = NULL;
120
+    CharacterVector nam = lst.names();    
121
+    for (int i = 0; i < nam.size(); i++) {
122
+        string nameStr = as<string>(nam[i]);
123
+        if(strcmp(nameStr.c_str(), str) == 0) {
124
+           element = lst[str];
125
+           break;
126
+        }
127
+    }
128
+    return element;
129
+}
130
+
131
+char* getChar(string str) {
132
+    char * writable = new char[str.size() + 1];
133
+    std::copy(str.begin(), str.end(), writable);
134
+    writable[str.size()] = '\0';
135
+    return writable;
136
+}
137
+
138
+SEXP RClustalOmega(SEXP rInputSeqs,
139
+                   SEXP rCluster,
140
+                   SEXP rGapOpening,
141
+                   SEXP rGapExtension,
142
+                   SEXP rMaxiters,
143
+                   SEXP rSubstitutionMatrix,
144
+                   SEXP rType,
145
+                   SEXP rVerbose,
146
+                   SEXP rParams) {
147
+
148
+	Rcpp::List retList;
149
+    try {
150
+        Rcpp::List params(rParams);
151
+
152
+        bool inputSeqIsFileFlag = as<bool>(params["inputSeqIsFileFlag"]);
153
+
154
+        ClustalOmegaInput msaInput;
155
+
156
+        int argc = 0;
157
+        char **argv = NULL;
158
+
159
+        appendString(&argv, argc, "clustalo");
160
+        R_len_t n = Rf_length(rInputSeqs);
161
+        if (inputSeqIsFileFlag) {
162
+        	appendString(&argv, argc, "-i");  //input
163
+            string inputSeqFile = as<string>(rInputSeqs);
164
+            appendString(&argv, argc, inputSeqFile.c_str());
165
+        } else {
166
+            vector<string> myInputSeqs = Rcpp::as<vector<string> >(rInputSeqs);
167
+            CharacterVector myInputSeqsCV(rInputSeqs);
168
+            vector<string> mySeqNames =
169
+                    Rcpp::as<vector<string> >(myInputSeqsCV.attr("names"));
170
+
171
+            R_len_t i;
172
+            char **seqs = (char **)R_alloc(n, sizeof(char *));
173
+            char **seqNames = (char **)R_alloc(n, sizeof(char *));
174
+            int *seqLengths = (int *)R_alloc(n, sizeof(int));
175
+            int seqLength = n;
176
+
177
+            for (i = 0; i < n; i++) {
178
+                seqs[i] = getChar(myInputSeqs[i]);
179
+                seqNames[i] = getChar(mySeqNames[i]);
180
+                seqLengths[i] = strlen(seqs[i]);
181
+            }
182
+            msaInput.inputSeqs = seqs;
183
+            msaInput.seqLength = seqLength;
184
+            msaInput.seqNames = seqNames;
185
+            //hardcoded value, if sequence is an R sequence
186
+            //appendString(&argv, argc, "R");
187
+            appendString(&argv, argc, "--R");
188
+        }
189
+        appendString(&argv, argc, "-o");
190
+        //used in some cases ...
191
+        appendString(&argv, argc, "tempClustalOmega.aln");
192
+        //output format, we expect
193
+        appendString(&argv, argc, "--outfmt=clustal");
194
+        /*FIXME TODO in later version: activate other formats
195
+        appendStringValue(&argv, argc, "--outfmt",
196
+                getListElement(rParams, "outFmt"));*/
197
+        appendStringValue(&argv, argc, "--infmt",
198
+                getListElement(rParams, "inFmt"));
199
+        appendStringValue(&argv, argc, "--seqtype", rType);
200
+        //overwrite files ...
201
+        appendString(&argv, argc, "--force");
202
+        appendDoubleValue(&argv, argc, "--gapopen", rGapOpening);
203
+        appendDoubleValue(&argv, argc, "--gapext", rGapExtension);
204
+        appendIntValue(&argv, argc, "--cluster-size", rCluster);
205
+        appendIntValue(&argv, argc, "--iter", rMaxiters);
206
+
207
+        if (hasClustalOmegaEntry(rParams, "clusteringOut")) {
208
+			appendStringValue(&argv, argc, "--clustering-out",
209
+					getListElement(rParams, "clusteringOut"));
210
+        }
211
+        if (hasClustalOmegaEntry(rParams, "macRam")) {
212
+			appendIntValue(&argv, argc, "--MAC-RAM",
213
+					getListElement(rParams, "macRam"));
214
+        }
215
+        if (hasClustalOmegaEntry(rParams, "maxGuidetreeIterations")) {
216
+			appendIntValue(&argv, argc, "--max-guidetree-iterations",
217
+					getListElement(rParams, "maxGuidetreeIterations"));
218
+        }
219
+        if (hasClustalOmegaEntry(rParams, "maxHmmIterations")) {
220
+			appendIntValue(&argv, argc, "--max-hmm-iterations",
221
+					getListElement(rParams, "maxHmmIterations"));
222
+        }
223
+        if (hasClustalOmegaEntry(rParams, "maxNumSeq")) {
224
+			appendIntValue(&argv, argc, "--maxnumseq",
225
+					getListElement(rParams, "maxNumSeq"));
226
+        }
227
+        if (hasClustalOmegaEntry(rParams, "maxSeqLen")) {
228
+			appendIntValue(&argv, argc, "--maxseqlen",
229
+					getListElement(rParams, "maxSeqLen"));
230
+        }
231
+        if (hasClustalOmegaEntry(rParams, "threads")) {
232
+			appendIntValue(&argv, argc, "--threads",
233
+					getListElement(rParams, "threads"));
234
+        }
235
+        if (hasClustalOmegaEntry(rParams, "wrap")) {
236
+			appendIntValue(&argv, argc, "--wrap",
237
+					getListElement(rParams, "wrap"));
238
+        }
239
+        if (hasClustalOmegaEntry(rParams, "distMatIn")) {
240
+			appendStringValue(&argv, argc, "--distmat-in",
241
+					getListElement(rParams, "distMatIn"));
242
+        }
243
+        if (hasClustalOmegaEntry(rParams, "distMatOut")) {
244
+			appendStringValue(&argv, argc, "--distmat-out",
245
+					getListElement(rParams, "distMatOut"));
246
+        }
247
+        if (hasClustalOmegaEntry(rParams, "guideTreeIn")) {
248
+			appendStringValue(&argv, argc, "--guidetree-in",
249
+					getListElement(rParams, "guideTreeIn"));
250
+        }
251
+        if (hasClustalOmegaEntry(rParams, "guideTreeOut")) {
252
+			appendStringValue(&argv, argc, "--guidetree-out",
253
+					getListElement(rParams, "guideTreeOut"));
254
+        }
255
+        if (hasClustalOmegaEntry(rParams, "hmmIn")) {
256
+			appendStringValue(&argv, argc, "--hmm-in",
257
+					getListElement(rParams, "hmmIn"));
258
+        }
259
+        if (hasClustalOmegaEntry(rParams, "log")) {
260
+			appendStringValue(&argv, argc, "--log",
261
+					getListElement(rParams, "log"));
262
+        }
263
+        if (hasClustalOmegaEntry(rParams, "outfile")) {
264
+			appendStringValue(&argv, argc, "--outfile",
265
+					getListElement(rParams, "outfile"));
266
+        }
267
+        if (hasClustalOmegaEntry(rParams, "profile1")) {
268
+			appendStringValue(&argv, argc, "--profile1",
269
+					getListElement(rParams, "profile1"));
270
+        }
271
+        if (hasClustalOmegaEntry(rParams, "profile2")) {
272
+			appendStringValue(&argv, argc, "--profile2",
273
+					getListElement(rParams, "profile2"));
274
+        }
275
+        if (hasClustalOmegaEntry(rParams, "auto")) {
276
+			bool autoFlag = as<bool>(params["auto"]);
277
+			if (autoFlag) { //14
278
+				appendString(&argv, argc, "--auto");
279
+			}
280
+        }
281
+        if (hasClustalOmegaEntry(rParams, "dealign")) {
282
+			bool dealignFlag = as<bool>(params["dealign"]);
283
+			if (dealignFlag) { //15
284
+				//Rprintf("DEALIGN");
285
+				appendString(&argv, argc, "--dealign");
286
+			}
287
+        }
288
+        if (hasClustalOmegaEntry(rParams, "force")) {
289
+			bool forceFlag = as<bool>(params["force"]);
290
+			if (forceFlag) { //16
291
+				appendString(&argv, argc, "--force");
292
+			}
293
+        }
294
+        if (hasClustalOmegaEntry(rParams, "full")) {
295
+			bool fullFlag = as<bool>(params["full"]);
296
+			if (fullFlag) { //17
297
+				//Rprintf("FULL");
298
+				appendString(&argv, argc, "--full");
299
+			}
300
+        }
301
+        if (hasClustalOmegaEntry(rParams, "fullIter")) {
302
+			bool fullIterFlag = as<bool>(params["fullIter"]);
303
+			if (fullIterFlag) { //18
304
+				//Rprintf("FULLITER");
305
+				appendString(&argv, argc, "--full-iter");
306
+			}
307
+        }
308
+        if (hasClustalOmegaEntry(rParams, "help")) {
309
+			bool helpFlag = as<bool>(params["help"]);
310
+			if (helpFlag) { //19
311
+				//Rprintf("HELP");
312
+				appendString(&argv, argc, "--help");
313
+			}
314
+        }
315
+        if (hasClustalOmegaEntry(rParams, "isProfile")) {
316
+			bool isProfileFlag = as<bool>(params["isProfile"]);
317
+			if (isProfileFlag) { //20
318
+				//Rprintf("ISPROFILE");
319
+				appendString(&argv, argc, "--is-profile");
320
+			}
321
+        }
322
+        if (hasClustalOmegaEntry(rParams, "longVersion")) {
323
+			bool longVersionFlag = as<bool>(params["longVersion"]);
324
+			if (longVersionFlag) { //21
325
+				//Rprintf("longVersion");
326
+				appendString(&argv, argc, "--long-version");
327
+			}
328
+        }
329
+        if (hasClustalOmegaEntry(rParams, "percentId")) {
330
+			bool percentIdFlag = as<bool>(params["percentId"]);
331
+			if (percentIdFlag) { //22
332
+				//Rprintf("percentId");
333
+				appendString(&argv, argc, "--percent-id");
334
+			}
335
+        }
336
+        if (hasClustalOmegaEntry(rParams, "residueNumber")) {
337
+			bool residueNumberFlag = as<bool>(params["residueNumber"]);
338
+			if (residueNumberFlag) { //23
339
+				//Rprintf("residueNumber");
340
+				appendString(&argv, argc, "--residuenumber");
341
+			}
342
+        }
343
+        if (hasClustalOmegaEntry(rParams, "useKimura")) {
344
+			bool useKimuraFlag = as<bool>(params["useKimura"]);
345
+			if (useKimuraFlag) { //24
346
+				//Rprintf("useKimura");
347
+				appendString(&argv, argc, "--use-kimura");
348
+			}
349
+        }
350
+        if (hasClustalOmegaEntry(rParams, "version")) {
351
+			bool versionFlag = as<bool>(params["version"]);
352
+			if (versionFlag) { //25
353
+				//Rprintf("version");
354
+				appendString(&argv, argc, "--version");
355
+			}
356
+        }
357
+
358
+        bool verbose = as<bool>(rVerbose);
359
+		if (verbose) {
360
+			Rprintf("params:", argc);
361
+			int cnt;
362
+			for (cnt = 0; cnt < argc; cnt++) {
363
+				Rprintf(" %s", argv[cnt]);
364
+			}
365
+			Rprintf("\n");
366
+			appendString(&argv, argc, "-v");
367
+		}
368
+
369
+        if (!Rf_isNull(rSubstitutionMatrix)) {
370
+            string substitutionMatrix = as<string>(rSubstitutionMatrix);
371
+            const char* subMatrix = substitutionMatrix.c_str();
372
+            if (strcmp("BLOSUM30", subMatrix) == 0) {
373
+                msaInput.substitutionMatrix = 30;
374
+                Rprintf("using BLOSUM30\n");
375
+            } else if (strcmp("BLOSUM40", subMatrix) == 0) {
376
+                msaInput.substitutionMatrix = 40;
377
+                Rprintf("using BLOSUM40\n");
378
+            } else if (strcmp("BLOSUM50", subMatrix) == 0) {
379
+                msaInput.substitutionMatrix = 50;
380
+                Rprintf("using BLOSUM50\n");
381
+            } else if (strcmp("BLOSUM65", subMatrix) == 0) {
382
+                msaInput.substitutionMatrix = 65;
383
+                Rprintf("using BLOSUM65\n");
384
+            } else if (strcmp("BLOSUM80", subMatrix) == 0) {
385
+                msaInput.substitutionMatrix = 80;
386
+                Rprintf("using BLOSUM80\n");
387
+            } else {
388
+                msaInput.substitutionMatrix = 0; //Gonnet
389
+                Rprintf("using Gonnet\n");
390
+            }
391
+        } else {
392
+            msaInput.substitutionMatrix = 0; //Gonnet
393
+            Rprintf("using Gonnet\n");
394
+        }
395
+
396
+        ClustalOmegaOutput msaOutput;
397
+        executeClustalOmega(argc, argv, &msaInput, &msaOutput);
398
+
399
+        for (int i = 0; i < argc; i++) {
400
+            free(argv[i]);
401
+        }
402
+        free(argv);
403
+
404
+        /*for (int i = 0; i < n; i++) {
405
+        	delete[](msaInput.inputSeqs);
406
+        }*/
407
+        if (!inputSeqIsFileFlag) {
408
+        	delete[](*(msaInput.inputSeqs));
409
+        }
410
+
411
+
412
+        vector<string> result;
413
+        for (int i = 0; i < msaOutput.msa_c; i++) {
414
+            result.push_back(msaOutput.msa_v[i]);
415
+            //Rprintf("free %i - %s\n", i, msaOutput.msa_v[i]);
416
+            free(msaOutput.msa_v[i]);
417
+        }
418
+        free((msaOutput.msa_v));
419
+
420
+        retList = Rcpp::List::create(Rcpp::Named("msa") =
421
+                Rcpp::CharacterVector(result.begin(), result.end()));
422
+
423
+    } catch(int i) {
424
+        if (i == 0) {
425
+            Rprintf("ClustalOmega finished successfully");
426
+        } else {
427
+            Rf_error("ClustalOmega finished with errors");
428
+        }
429
+    } catch( std::exception &ex ) {
430
+    	Rf_error("ClustalOmega finished with errors");
431
+        forward_exception_to_r(ex);
432
+    } catch(...) {
433
+        Rf_error("ClustalOmega finished by an unknown reason");
434
+    }
435
+    return retList;
436
+}