Browse code

Multiple fixes; final comit before release; version number bumped to 0.99.6

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102514 bc3139a8-67e5-0310-9ffc-ced21a209358

Ulrich Bodenhofer authored on 15/04/2015 13:20:09
Showing 47 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: msa
2 2
 Type: Package
3 3
 Title: Multiple Sequence Alignment
4
-Version: 0.99.5
5
-Date: 2015-04-12
4
+Version: 0.99.6
5
+Date: 2015-04-15
6 6
 Author: Enrico Bonatesta, Christoph Horejs-Kainrath, Ulrich Bodenhofer
7 7
 Maintainer: Ulrich Bodenhofer <bodenhofer@bioinf.jku.at>
8 8
 Description: This package provides a unified R/Bioconductor interface to the
... ...
@@ -17,15 +17,18 @@ URL: http://www.bioinf.jku.at/software/msa/
17 17
 License: GPL (>= 2)
18 18
 Copyright: See file inst/COPYRIGHT
19 19
 Depends: R (>= 3.1.0), methods, Biostrings (>= 2.30.0)
20
-Imports: Rcpp (>= 0.11.1), BiocGenerics, IRanges (>= 1.20.0), S4Vectors,
21
-	 tools
20
+Imports: Rcpp (>= 0.11.1), BiocGenerics, IRanges (>= 1.20.0),
21
+        S4Vectors, tools
22 22
 Suggests: Biobase, knitr
23 23
 LinkingTo: Rcpp
24
-SystemRequirements: GNU make
24
+SystemRequirements:
25 25
 VignetteBuilder: knitr
26 26
 LazyLoad: yes
27
-Collate: AllClasses.R AllGenerics.R params-methods.R version-methods.R 
28
-         helperFunctions.R inputChecks.R convertRows.R msaPrettyPrint.R
29
-	 print-methods.R show-methods.R msa.R msaMuscle.R msaClustalW.R 
30
-         msaClustalOmega.R
31
-biocViews: MultipleSequenceAlignment, Alignment, MultipleComparison, Sequencing
27
+Collate: AllClasses.R AllGenerics.R params-methods.R version-methods.R
28
+        helperFunctions.R inputChecks.R convertRows.R msaPrettyPrint.R
29
+        print-methods.R show-methods.R msa.R msaMuscle.R msaClustalW.R
30
+        msaClustalOmega.R
31
+biocViews: MultipleSequenceAlignment, Alignment, MultipleComparison,
32
+        Sequencing
33
+NeedsCompilation: yes
34
+Packaged: 2015-04-15 13:16:43 UTC; bodenhof
... ...
@@ -4,7 +4,7 @@ convertAlnRows <- function(rows, type)
4 4
 
5 5
     if (length(rows) < 3 ||
6 6
         ##!identical(grep("^CLUSTAL", rows[1L]), 1L) ||
7
-        !identical(rows[2:3], c("","")))
7
+        !identical(sub("^\\s+$", "", rows[2:3]), c("", "")))
8 8
         stop("There is an invalid aln file!")
9 9
 
10 10
     rows <- tail(rows, -3)
... ...
@@ -43,7 +43,7 @@ getParamsList <-function(type,
43 43
 
44 44
 ##function, which tests a given input sequence
45 45
 ##whether it is a character string, or a XStringSet
46
-transformInputSeq <- function(inputSeq, isFile=FALSE) {
46
+transformInputSeq <- function(inputSeq) {
47 47
     if (!is(inputSeq, "character")) {
48 48
         if (is(inputSeq, "XStringSet")) {
49 49
             inputSeq <- as.character(inputSeq)
... ...
@@ -53,24 +53,6 @@ transformInputSeq <- function(inputSeq, isFile=FALSE) {
53 53
         }
54 54
     }
55 55
 
56
-    if (!isFile)
57
-    {
58
-        if (is.null(names(inputSeq))) {
59
-            warning("The input sequences are unnamed! \n",
60
-                    "Note that the order of sequences \n",
61
-                    "in the resulting multiple sequence alignment \n",
62
-                    "may not be preserved, so unique sequence names\n",
63
-                    "are necessary to recover the order!")
64
-        }
65
-        else if (length(unique(names(inputSeq))) != length(inputSeq)){
66
-            warning("The input sequences are not unique! \n",
67
-                    "Note that the order of sequences \n",
68
-                    "in the resulting multiple sequence alignment \n",
69
-                    "may not be preserved, so unique sequence names \n",
70
-                    "are necessary to recover the order!")
71
-        }
72
-    }
73
-
74 56
     return(inputSeq)
75 57
 }
76 58
 
... ...
@@ -72,9 +72,8 @@ checkType <- function(type, inputSeqs, msaName){
72 72
 ###############################################################################
73 73
 
74 74
 ##function, that tests the input of gapOpening.
75
-##If the value is negative, everything is ok and the function returns
76
-##the gapOpening parameter. If the input is positive, the value is transformed
77
-##and a warning is given. If the input is not numeric, an exception is thrown.
75
+##If the value is numeric, everything is ok and the function returns
76
+##the gapOpening parameter. If the input is not numeric, an exception is thrown.
78 77
 ##Same for missing substitutionMatrix
79 78
 checkGapOpening <- function(gapOpening, type, substitutionMatrix,
80 79
         defaultDNAValue,  defaultAAValue){
... ...
@@ -90,25 +89,18 @@ checkGapOpening <- function(gapOpening, type, substitutionMatrix,
90 89
     if (is.numeric(gapOpening)) {
91 90
         if (is.matrix(gapOpening)) {
92 91
             stop("The parameter gapOpening should be \n",
93
-                 "a negative numeric, not a matrix!")
92
+                 "a numeric, not a matrix!")
94 93
         }
95 94
         if (length(gapOpening) != 1) {
96 95
             stop("The parameter gapOpening should be \n",
97
-                 "a negative numeric, not a vector!")
96
+                 "a numeric, not a vector!")
98 97
         }
99 98
         if (is.nan(gapOpening)) {
100 99
             stop("The parameter gapOpening should be \n",
101
-                 "a negative numeric, not a NaN!")
102
-        }
103
-        ##check if gapOpening is positive
104
-        if (gapOpening > 0) {
105
-            warning("According to BioStrings, the parameter gapOpening \n",
106
-                    "should be negative! The value is transformed \n",
107
-                    "to a negative numeric!")
108
-            gapOpening <- gapOpening * -1;
100
+                 "a numeric, not a NaN!")
109 101
         }
110 102
     } else {
111
-        stop("The parameter gapOpening should be a negative numeric!")
103
+        stop("The parameter gapOpening should be a numeric!")
112 104
     }
113 105
     return(gapOpening)
114 106
 }
... ...
@@ -117,9 +109,8 @@ checkGapOpening <- function(gapOpening, type, substitutionMatrix,
117 109
 
118 110
 ##used in MUSCLE, analoguous to checkGapOpening, but only ONE defaut value
119 111
 ##function, that tests the input of gapOpening.
120
-##If the value is negative, everything is ok and the function returns
121
-##the gapOpening parameter. If the input is positive, the value is transformed
122
-##and a warning is given. If the input is not numeric, an exception is thrown.
112
+##If the value is numeric, everything is ok and the function returns
113
+##the gapOpening parameter. If the input is not numeric, an exception is thrown.
123 114
 checkGapOpening2 <- function(gapOpening, substitutionMatrix,
124 115
         defaultValue){
125 116
     ##set defaultValue
... ...
@@ -132,25 +123,18 @@ checkGapOpening2 <- function(gapOpening, substitutionMatrix,
132 123
     if (is.numeric(gapOpening)) {
133 124
         if (is.matrix(gapOpening)) {
134 125
             stop("The parameter gapOpening should be \n",
135
-                 "a negative numeric, not a matrix!")
126
+                 "a numeric, not a matrix!")
136 127
         }
137 128
         if (length(gapOpening) != 1) {
138 129
             stop("The parameter gapOpening should be \n",
139
-                 "a negative numeric, not a vector!")
130
+                 "a numeric, not a vector!")
140 131
         }
141 132
         if (is.nan(gapOpening)) {
142 133
             stop("The parameter gapOpening should be \n",
143
-                 "a negative numeric, not a NaN!")
144
-        }
145
-        ##check if gapOpening is positive
146
-        if (gapOpening > 0) {
147
-            warning("According to BioStrings, the parameter gapOpening \n",
148
-                    "should be negative! The value is transformed \n",
149
-                    "to a negative numeric!")
150
-            gapOpening <- gapOpening * -1;
134
+                 "a numeric, not a NaN!")
151 135
         }
152 136
     } else {
153
-        stop("The parameter gapOpening should be a negative numeric!")
137
+        stop("The parameter gapOpening should be a numeric!")
154 138
     }
155 139
     return(gapOpening)
156 140
 }
... ...
@@ -172,25 +156,18 @@ checkGapExtension <- function(gapExtension, type, substitutionMatrix,
172 156
     if (is.numeric(gapExtension)) {
173 157
         if (is.matrix(gapExtension)) {
174 158
            stop("The parameter gapExtension should be \n",
175
-                "a negative numeric, not a matrix!")
159
+                "a numeric, not a matrix!")
176 160
         }
177 161
         if (length(gapExtension) != 1) {
178 162
            stop("The parameter gapExtension should be \n",
179
-                "a negative numeric, not a vector!")
163
+                "a numeric, not a vector!")
180 164
         }
181 165
         if (is.nan(gapExtension)) {
182 166
             stop("The parameter gapExtension should be \n",
183
-                 "a negative numeric, not a NaN!")
184
-        }
185
-        ##check if gapExtension is positive
186
-        if (gapExtension > 0) {
187
-           warning("According to BioStrings, the parameter gapOpening \n",
188
-                   "should be negative! The value is transformed to a \n",
189
-                   "negative numeric.")
190
-           gapExtension <- gapExtension * -1
167
+                 "a numeric, not a NaN!")
191 168
         }
192 169
     } else {
193
-        stop("The parameter gapExtension should be a negative numeric!")
170
+        stop("The parameter gapExtension should be a numeric!")
194 171
     }
195 172
 
196 173
     return(gapExtension)
... ...
@@ -6,6 +6,7 @@ msa <- function(inputSeqs,
6 6
                 maxiters="default",
7 7
                 substitutionMatrix="default",
8 8
                 type="default",
9
+                order=c("aligned", "input"),
9 10
                 verbose=FALSE,
10 11
                 help=FALSE,
11 12
                 ...)
... ...
@@ -5,6 +5,7 @@ msaClustalOmega <- function(inputSeqs,
5 5
                             maxiters="default",
6 6
                             substitutionMatrix="default",
7 7
                             type="default",
8
+                            order=c("aligned", "input"),
8 9
                             verbose=FALSE,
9 10
                             help=FALSE,
10 11
                             ...)
... ...
@@ -53,7 +54,15 @@ msaClustalOmega <- function(inputSeqs,
53 54
     #############
54 55
     # inputSeqs #
55 56
     #############
56
-    inputSeqs <- transformInputSeq(inputSeqs, params[["inputSeqIsFileFlag"]])
57
+    inputSeqs <- transformInputSeq(inputSeqs)
58
+
59
+    #############
60
+    # order     #
61
+    #############
62
+    order <- match.arg(order)
63
+    params[["outputOrder"]] <- switch(order,
64
+                                      aligned="tree-order",
65
+                                      input="input-order")
57 66
 
58 67
     ###########
59 68
     # cluster #
... ...
@@ -103,9 +112,9 @@ msaClustalOmega <- function(inputSeqs,
103 112
             substitutionMatrix <- NULL
104 113
     } else if (is.character(substitutionMatrix) &&
105 114
                    !is.matrix(substitutionMatrix)) {
106
-            possibleValues <- c("blosum30","blosum40", "blosum50",
107
-                                "blosum65","blosum80","gonnet")
108
-            if (!(tolower(substitutionMatrix) %in% possibleValues)){
115
+            possibleValues <- c("BLOSUM30", "BLOSUM40", "BLOSUM50",
116
+                                "BLOSUM65", "BLOSUM80", "Gonnet")
117
+            if (!(substitutionMatrix %in% possibleValues)){
109 118
                 ##create a string with all possible Values named text
110 119
                 text <- ""
111 120
                 text <- paste(possibleValues, collapse=", ")
... ...
@@ -117,21 +126,16 @@ msaClustalOmega <- function(inputSeqs,
117 126
     ##############
118 127
     # gapOpening #
119 128
     ##############
120
-
121
-    gapOpening <- checkGapOpening(gapOpening, type,
122
-                                  substitutionMatrix, -6, -6)
123
-    # ClustalOmega uses positive values
124
-    gapOpening <- gapOpening * -1
125
-
129
+    if (!identical(gapOpening, "default"))
130
+        warning("msaClustalOmega currently does not support to set\n",
131
+                "gapOpening to a non-default value!\n")
126 132
 
127 133
     ################
128 134
     # gapExtension #
129 135
     ################
130
-
131
-    gapExtension <- checkGapExtension(gapExtension, type,
132
-                                      substitutionMatrix, -1, -1)
133
-    # ClustalOmega uses positive values
134
-    gapExtension <- gapExtension * -1
136
+    if (!identical(gapExtension, "default"))
137
+        warning("msaClustalOmega currently does not support to set\n",
138
+                "gapExtension to a non-default value!\n")
135 139
 
136 140
     ############
137 141
     # maxiters #
... ...
@@ -334,13 +338,13 @@ msaClustalOmega <- function(inputSeqs,
334 338
 
335 339
     ##delete param in copy
336 340
     paramsCopy[["isProfile"]] <- NULL
337
-    
341
+
338 342
     #######
339 343
     # log #
340 344
     #######
341 345
     ##Log all non-essential output to this file
342 346
     ##DEACTIVATED: All log-messages are print to R console
343
-    
347
+
344 348
     ##if(!is.null(params[["log"]])) {
345 349
     ##    tempList <- checkOutFile("log", params)
346 350
     ##    if (tempList$existingFile) {
... ...
@@ -349,7 +353,7 @@ msaClustalOmega <- function(inputSeqs,
349 353
     ##    }
350 354
     ##    params[["log"]] <- tempList$param
351 355
     ##}
352
-    
356
+
353 357
     ##delete param in copy
354 358
     ##paramsCopy[["log"]] <- NULL
355 359
 
... ...
@@ -468,7 +472,7 @@ msaClustalOmega <- function(inputSeqs,
468 472
         if (!identical (params[["outFmt"]], "clustal") &&
469 473
             !identical (params[["outFmt"]], "clu")) {
470 474
             stop("Until now, the parameter outFmt is only implemented ",
471
-                 "for value \"clustal\" \n", 
475
+                 "for value \"clustal\" \n",
472 476
                  "the other formats will be ",
473 477
                  "realized in a later version.")
474 478
         }
... ...
@@ -477,23 +481,6 @@ msaClustalOmega <- function(inputSeqs,
477 481
     ##delete param in copy
478 482
     paramsCopy[["outFmt"]] <- NULL
479 483
 
480
-    ################
481
-    # outputOrder #
482
-    ################
483
-    ##MSA output orderlike in input/guide-tree
484
-    ##default: output-order=input-order
485
-
486
-    ##possible Values
487
-    posVal <- c("input-order", "tree-order")
488
-    ##params[["outputOrder"]] <- checkSingleValParams("outputOrder", params,
489
-    ##                                                 "input-order", posVal)
490
-
491
-    params[["outputOrder"]] <- checkSingleValParamsNew(
492
-                                "outputOrder", params, posVal)
493
-
494
-    ##delete param in copy
495
-    paramsCopy[["outputOrder"]] <- NULL
496
-
497 484
     #############
498 485
     # percentId #
499 486
     #############
... ...
@@ -523,9 +510,9 @@ msaClustalOmega <- function(inputSeqs,
523 510
 
524 511
     if (!is.null(params[["profile2"]])) {
525 512
         if (is.null(params[["profile1"]])){
526
-            stop("The parameter profile1 is NULL, \n", 
513
+            stop("The parameter profile1 is NULL, \n",
527 514
                  "so the parameter profile2 can't have a value! \n",
528
-                 "Please insert file for parameter profile1 \n", 
515
+                 "Please insert file for parameter profile1 \n",
529 516
                  "or change the parameters profile1 and profile2!")
530 517
         }
531 518
         checkInFile("profile2", params)
... ...
@@ -570,7 +557,7 @@ msaClustalOmega <- function(inputSeqs,
570 557
 
571 558
     ##useKimura==TRUE AND percentId==TRUE is not allowed!!!
572 559
     if(params[["useKimura"]] & params[["percentId"]]){
573
-        stop("Percentage Identity cannot be calcuted \n", 
560
+        stop("Percentage Identity cannot be calcuted \n",
574 561
              "if Kimura Distances are used!",
575 562
              "You have to set either the parameter percentID or \n",
576 563
              "the parameter useKimura to FALSE!")
... ...
@@ -622,10 +609,10 @@ msaClustalOmega <- function(inputSeqs,
622 609
 
623 610
     inputSeqNames <- names(inputSeqs)
624 611
 
625
-    names(inputSeqs) <- paste0("seq", 1:length(inputSeqs))
612
+    names(inputSeqs) <- paste0("Seq", 1:length(inputSeqs))
626 613
 
627
-    result <- .Call("RClustalOmega", inputSeqs, cluster, gapOpening,
628
-                    gapExtension, maxiters, substitutionMatrix, type,
614
+    result <- .Call("RClustalOmega", inputSeqs, cluster, 6,
615
+                    1, maxiters, substitutionMatrix, type,
629 616
                     verbose, params, PACKAGE="msa");
630 617
 
631 618
     out <- convertAlnRows(result$msa, type)
... ...
@@ -638,7 +625,6 @@ msaClustalOmega <- function(inputSeqs,
638 625
     else
639 626
         names(out@unmasked) <- NULL
640 627
 
641
-
642 628
     standardParams <- list(gapOpening=gapOpening,
643 629
                            gapExtension=gapExtension,
644 630
                            maxiters=maxiters,
... ...
@@ -5,6 +5,7 @@ msaClustalW <- function(inputSeqs,
5 5
                         maxiters="default",
6 6
                         substitutionMatrix="default",
7 7
                         type="default",
8
+                        order=c("aligned", "input"),
8 9
                         verbose=FALSE,
9 10
                         help=FALSE,
10 11
                         ...)
... ...
@@ -53,7 +54,14 @@ msaClustalW <- function(inputSeqs,
53 54
     #############
54 55
     # inputSeqs #
55 56
     #############
56
-    inputSeqs <- transformInputSeq(inputSeqs, params[["inputSeqIsFileFlag"]])
57
+    inputSeqs <- transformInputSeq(inputSeqs)
58
+
59
+    #############
60
+    # order     #
61
+    #############
62
+    order <- match.arg(order)
63
+
64
+    params[["outorder"]] <- order
57 65
 
58 66
     ###########
59 67
     # cluster #
... ...
@@ -122,7 +130,7 @@ msaClustalW <- function(inputSeqs,
122 130
         !is.matrix(substitutionMatrix)) {
123 131
         ##check whether value is BLOSUM, PAM, GONNET, or ID;
124 132
             possibleValues <- c("blosum", "pam", "gonnet", "id")
125
-            if (!(tolower(substitutionMatrix) %in% possibleValues)){
133
+            if (!(substitutionMatrix %in% possibleValues)){
126 134
                 ##create a string with all possible Values named text
127 135
                 text <- ""
128 136
                 text <- paste(possibleValues, collapse=", ")
... ...
@@ -135,15 +143,13 @@ msaClustalW <- function(inputSeqs,
135 143
         if (isSymmetric(substitutionMatrix)) {
136 144
             if (nrow(substitutionMatrix) <=20 ||
137 145
                 nrow(substitutionMatrix) >26 ) {
138
-                    stop("The parameter substitutionMatrix ",
139
-                         "has wrong dimensions!")
146
+                    stop("substitutionMatrix has wrong dimensions!")
140 147
             }
141 148
         } else {
142
-            stop("The parameter substitutionMatrix should be symmetric!")
149
+            stop("substitutionMatrix should be a symmetric matrix!")
143 150
         }
144 151
     }
145 152
 
146
-
147 153
     ##############
148 154
     # gapOpening #
149 155
     ##############
... ...
@@ -351,17 +357,6 @@ msaClustalW <- function(inputSeqs,
351 357
     ##delete param in copy
352 358
     paramsCopy[["output"]] <- NULL
353 359
 
354
-    ############
355
-    # outorder #
356
-    ############
357
-
358
-    ##possible Values
359
-    posVal <- c("input", "aligned")
360
-    params[["outorder"]] <- checkSingleValParamsNew("outorder", params, posVal)
361
-
362
-    ##delete param in copy
363
-    paramsCopy[["outorder"]] <- NULL
364
-
365 360
     ########
366 361
     # case #
367 362
     ########
... ...
@@ -617,6 +612,9 @@ msaClustalW <- function(inputSeqs,
617 612
 
618 613
     params[["pwgapopen"]] <- checkNumericParamsNew("pwgapopen", params)
619 614
 
615
+    if (is.numeric(params[["pwgapopen"]]))
616
+        params[["pwgapopen"]] <- abs(params[["pwgapopen"]])
617
+
620 618
     ##delete param in copy
621 619
     paramsCopy[["pwgapopen"]] <- NULL
622 620
 
... ...
@@ -627,6 +625,8 @@ msaClustalW <- function(inputSeqs,
627 625
 
628 626
     params[["pwgapext"]] <- checkNumericParamsNew("pwgapext", params)
629 627
 
628
+    if (is.numeric(params[["pwgapext"]]))
629
+        params[["pwgapext"]] <- abs(params[["pwgapext"]])
630 630
 
631 631
     ##delete param in copy
632 632
     paramsCopy[["pwgapext"]] <- NULL
... ...
@@ -1116,11 +1116,11 @@ msaClustalW <- function(inputSeqs,
1116 1116
 
1117 1117
     inputSeqNames <- names(inputSeqs)
1118 1118
 
1119
-    names(inputSeqs) <- paste0("seq", 1:length(inputSeqs))
1119
+    names(inputSeqs) <- paste0("Seq", 1:length(inputSeqs))
1120 1120
 
1121
-    result <- .Call("RClustalW", inputSeqs, cluster, gapOpening, gapExtension,
1122
-                    maxiters, substitutionMatrix, type, verbose,
1123
-                    params, PACKAGE="msa")
1121
+    result <- .Call("RClustalW", inputSeqs, cluster, abs(gapOpening),
1122
+                    abs(gapExtension), maxiters, substitutionMatrix,
1123
+                    type, verbose, params, PACKAGE="msa")
1124 1124
 
1125 1125
     out <- convertAlnRows(result$msa, type)
1126 1126
 
... ...
@@ -5,6 +5,7 @@ msaMuscle <- function(inputSeqs,
5 5
                       maxiters="default",
6 6
                       substitutionMatrix="default",
7 7
                       type="default",
8
+                      order=c("aligned", "input"),
8 9
                       verbose=FALSE,
9 10
                       help=FALSE,
10 11
                       ...)
... ...
@@ -70,7 +71,27 @@ msaMuscle <- function(inputSeqs,
70 71
     # inputSeqs #
71 72
     #############
72 73
     ##transform the input Sequences to a string vector
73
-    inputSeqs <- transformInputSeq(inputSeqs, params[["inputSeqIsFileFlag"]])
74
+    inputSeqs <- transformInputSeq(inputSeqs)
75
+
76
+    #############
77
+    # order     #
78
+    #############
79
+    order <- match.arg(order)
80
+
81
+    if (order == "input")
82
+    {
83
+        if (params[["inputSeqIsFileFlag"]])
84
+            stop("msaMuscle does not support order=\"input\" for reading\n",
85
+                 "sequences directly from a FASTA file.")
86
+        else if (is.null(names(inputSeqs)) ||
87
+                 length(unique(names(inputSeqs))) != length(inputSeqs))
88
+        {
89
+            warning("order=\"input\" requires input sequences to be named\n",
90
+                    "uniquely! Assigning default names 'Seq1'..'Seqn'\n",
91
+                    "to sequences.")
92
+            names(inputSeqs) <- paste0("Seq", 1:length(inputSeqs))
93
+        }
94
+    }
74 95
 
75 96
     ###########
76 97
     # cluster #
... ...
@@ -1126,24 +1147,31 @@ msaMuscle <- function(inputSeqs,
1126 1147
 
1127 1148
     inputSeqNames <- names(inputSeqs)
1128 1149
 
1129
-    names(inputSeqs) <- paste0("seq", 1:length(inputSeqs))
1130
-
1150
+    names(inputSeqs) <- paste0("Seq", 1:length(inputSeqs))
1131 1151
 
1132
-    result <- .Call("RMuscle", inputSeqs, cluster, gapOpening,
1133
-                    gapExtension, maxiters, substitutionMatrix, type,
1152
+    result <- .Call("RMuscle", inputSeqs, cluster, -abs(gapOpening),
1153
+                    -abs(gapExtension), maxiters, substitutionMatrix, type,
1134 1154
                     verbose, params, PACKAGE="msa")
1135 1155
 
1136 1156
     out <- convertAlnRows(result$msa, type)
1137 1157
 
1138 1158
     if (length(inputSeqNames) > 0)
1139 1159
     {
1140
-        perm <- match(names(out@unmasked), names(inputSeqs))
1141
-        names(out@unmasked) <- inputSeqNames[perm]
1160
+        if (order == "aligned")
1161
+        {
1162
+            perm <- match(names(out@unmasked), names(inputSeqs))
1163
+            names(out@unmasked) <- inputSeqNames[perm]
1164
+        }
1165
+        else
1166
+        {
1167
+            perm <- match(names(inputSeqs), names(out@unmasked))
1168
+            out@unmasked <- out@unmasked[perm]
1169
+            names(out@unmasked) <- inputSeqNames
1170
+        }
1142 1171
     }
1143 1172
     else
1144 1173
         names(out@unmasked) <- NULL
1145 1174
 
1146
-
1147 1175
     standardParams <- list(gapOpening=gapOpening,
1148 1176
                            gapExtension=gapExtension,
1149 1177
                            maxiters=maxiters,
... ...
@@ -13,7 +13,7 @@ rm -rf src/gc-7.2/*.a
13 13
 ## -- Muscle
14 14
 rm -rf src/Muscle/muscle.make.std???.txt
15 15
 rm -rf src/Muscle/*.o
16
-rm -rf src/Muscle/*.dll
16
+rm -rf src/Muscle/*.a
17 17
 rm -rf src/Muscle/*.dll
18 18
 
19 19
 ## -- ClustalW
... ...
@@ -1,5 +1,5 @@
1 1
 Change history of package msa:
2 2
 ==============================
3 3
 
4
-Version 0.99.0:
5
-- first version submitted to Bioconductor
4
+Version 1.0.0:
5
+- first official release as part of Bioconductor 3.1
... ...
@@ -1,68 +1,68 @@
1
->HBA1 Homo sapiens
1
+>HBA1_Homo_sapiens
2 2
 VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
3 3
 KVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPA
4 4
 VHASLDKFLASVSTVLTSKYR
5
->HBA1 Macaca mulatta
5
+>HBA1_Macaca_mulatta
6 6
 VLSPADKSNVKAAWGKVGGHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
7 7
 KVADALTLAVGHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPA
8 8
 VHASLDKFLASVSTVLTSKYR
9
->HBA1 Ornithorhynchus anatinus
9
+>HBA1_Ornithorhynchus_anatinus
10 10
 MLTDAEKKEVTALWGKAAGHGEEYGAEALERLFQAFPTTKTYFSHFDLSHGSAQIKAHGK
11 11
 KVADALSTAAGHFDDMDSALSALSDLHAHKLRVDPVNFKLLAHCILVVLARHCPGEFTPS
12 12
 AHAAMDKFLSKVATVLTSKYR
13
->HBA1 Bos taurus
13
+>HBA1_Bos_taurus
14 14
 VLSAADKGNVKAAWGKVGGHAAEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGA
15 15
 KVAAALTKAVEHLDDLPGALSELSDLHAHKLRVDPVNFKLLSHSLLVTLASHLPSDFTPA
16 16
 VHASLDKFLANVSTVLTSKYR
17
->HBA1 Monodelphis domestica
17
+>HBA1_Monodelphis_domestica
18 18
 VLSAADKTNVKAAWSKVGGNSGAYMGEALYRTFLSFPPTKTYFPHFEFSAGSAQIKGQGQ
19 19
 KIADAVSLAVAHMDDLATALSALSDLHAHNLKVDPVNFKFLCHNVLVTLASHLGKDFTPE
20 20
 IHASLDKFLALLSTVLTSKYR
21
->HBA1 Erinaceus europaeus
21
+>HBA1_Erinaceus_europaeus
22 22
 VLSATDKANVKTFWGKLGGHGGEYGGEALDRMFQAHPTTKTYFPHFDLNPGSAQVKGHGK
23 23
 KVADALTTAVNNLDDVPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLALHHPADFTPA
24 24
 VHASLDKFLATVATVLTSKYR
25
->HBA1 Loxodonta africana
25
+>HBA1_Loxodonta_africana
26 26
 VLSDNDKTNVKATWSKVGDHASDYVAEALERMFFSFPTTKTYFPHFDLGHGSGQVKAHGK
27 27
 KVGEALTQAVGHLDDLPSALSALSDLHAHKLRVDPVNFKLLSHCLLVTLSSHQPTEFTPE
28 28
 VHASLDKFLSNVSTVLTSKYR
29
->HBA1 Mus musculus
29
+>HBA1_Mus_musculus
30 30
 VLSGEDKSNIKAAWGKIGGHGAEYGAEALERMFASFPTTKTYFPHFDVSHGSAQVKGHGK
31 31
 KVADALASAAGHLDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLASHHPADFTPA
32 32
 VHASLDKFLASVSTVLTSKYR
33
->HBA1 Felis silvestris catus
33
+>HBA1_Felis_silvestris_catus
34 34
 VLSAADKSNVKACWGKIGSHAGEYGAEALERTFCSFPTTKTYFPHFDLSHGSAQVKAHGQ
35 35
 KVADALTQAVAHMDDLPTAMSALSDLHAYKLRVDPVNFKFLSHCLLVTLACHHPAEFTPA
36 36
 VHASLDKFFSAVSTVLTSKYR
37
->HBA1 Chrysocyon brachyurus
37
+>HBA1_Chrysocyon_brachyurus
38 38
 VLSPADKTNIKSTWDKIGGHAGDYGGEALDRTFQSFPTTKTYFPHFDLSPGSAQVKAHGK
39 39
 KVADALTTAVAHLDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPTEFTPA
40 40
 VHASLDKFFTAVSTVLTSKYR
41
->HBA1 Gallus gallus
41
+>HBA1_Gallus_gallus
42 42
 VLSAADKNNVKGIFTKIAGHAEEYGAETLERMFTTYPPTKTYFPHFDLSHGSAQIKGHGK
43 43
 KVVAALIEAANHIDDIAGTLSKLSDLHAHKLRVDPVNFKLLGQCFLVVVAIHHPAALTPE
44 44
 VHASLDKFLCAVGTVLTAKYR
45
->Zebra_fish (Brachydanio rerio)x
45
+>HBA1_Danio_rerio
46 46
 SLSDTDKAVVKAIWAKISPKADEIGAEALARMLTVYPQTKTYFSHWADLSPGSGPVKKHG
47 47
 KTIMGAVGEAISKIDDLVGGLAALSELHAFKLRVDPANFKILSHNVIVVIAMLFPADFTP
48 48
 EVHVSVDKFFNNLALALSEKYR
49
->HBA1 Tursiops truncatus
49
+>HBA1_Tursiops_truncatus
50 50
 VLSPADKTNVKGTWSKIGNHSAEYGAEALERMFINFPSTKTYFSHFDLGHGSAQIKGHGK
51 51
 KVADALTKAVGHIDNLPDALSELSDLHAHKLRVDPVNFKLLSHCLLVTLALHLPADFTPS
52 52
 VHASLDKFLASVSTVLTSKYR
53
->HBA1 Xenopus tropicalis
53
+>HBA1_Xenopus_tropicalis
54 54
 HLTADDKKHIKAIWPSVAAHGDKYGGEALHRMFMCAPKTKTYFPDFDFSEHSKHILAHGK
55 55
 KVSDALNEACNHLDNIAGCLSKLSDLHAYDLRVDPGNFPLLAHQILVVVAIHFPKQFDPA
56 56
 THKALDKFLVSVSNVLTSKYR
57
->HBA1 Microcephalophis gracilis
57
+>HBA1_Microcephalophis_gracilis
58 58
 VLTEEDKARVRVAWVPVSKNAELYGAETLTRLFAAHPTTKTYFPHFDLSPGSNDLKVHGK
59 59
 KVIDALTEAVNNLDDVAGALSKLSDLHAQKLRVDPDNFQFLGLCLEVTIAAHSGGPLKPE
60 60
 VLLSVDKFLGQISKVLASRYR
61
->HBA1 Pan troglodytes
61
+>HBA1_Pan_troglodytes
62 62
 MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG
63 63
 KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP
64 64
 AVHASLDKFLASVSTVLTSKYR
65
->HBA1 Rattus norvegicus
65
+>HBA1_Rattus_norvegicus
66 66
 MVLSADDKTNIKNCWGKIGGHGGEYGEEALQRMFAAFPTTKTYFSHIDVSPGSAQVKAHG
67 67
 KKVADALAKAADHVEDLPGALSTLSDLHAHKLRVDPVNFKFLSHCLLVTLACHHPGDFTP
68 68
 AMHASLDKFLASVSTVLTSKYR
... ...
@@ -1,4 +1,4 @@
1
->PH4H Homo sapiens
1
+>PH4H_Homo_sapiens
2 2
 MSTAVLENPGLGRKLSDFGQETSYIEDNCNQNGAISLIFSLKEEVGALAKVLRLFEENDV
3 3
 NLTHIESRPSRLKKDEYEFFTHLDKRSLPALTNIIKILRHDIGATVHELSRDKKKDTVPW
4 4
 FPRTIQELDRFANQILSYGAELDADHPGFKDPVYRARRKQFADIAYNYRHGQPIPRVEYM
... ...
@@ -7,7 +7,7 @@ RLRPVAGLLSSRDFLGGLAFRVFHCTQYIRHGSKPMYTPEPDICHELLGHVPLFSDRSFA
7 7
 QFSQEIGLASLGAPDEYIEKLATIYWFTVEFGLCKQGDSIKAYGAGLLSSFGELQYCLSE
8 8
 KPKLLPLELEKTAIQNYTVTEFQPLYYVAESFNDAKEKVRNFAATIPRPFSVRYDPYTQR
9 9
 IEVLDNTQQLKILADSINSEIGILCSALQKIK
10
->PH4H Rattus norvegicus
10
+>PH4H_Rattus_norvegicus
11 11
 MAAVVLENGVLSRKLSDFGQETSYIEDNSNQNGAISLIFSLKEEVGALAKVLRLFEENDI
12 12
 NLTHIESRPSRLNKDEYEFFTYLDKRTKPVLGSIIKSLRNDIGATVHELSRDKEKNTVPW
13 13
 FPRTIQELDRFANQILSYGAELDADHPGFKDPVYRARRKQFADIAYNYRHGQPIPRVEYT
... ...
@@ -16,7 +16,7 @@ RLRPVAGLLSSRDFLGGLAFRVFHCTQYIRHGSKPMYTPEPDICHELLGHVPLFSDRSFA
16 16
 QFSQEIGLASLGAPDEYIEKLATIYWFTVEFGLCKEGDSIKAYGAGLLSSFGELQYCLSD
17 17
 KPKLLPLELEKTACQEYSVTEFQPLYYVAESFSDAKEKVRTFAATIPRPFSVRYDPYTQR
18 18
 VEVLDNTQQLKILADSINSEVGILCNALQKIKS
19
->PH4H Mus musculus
19
+>PH4H_Mus_musculus
20 20
 MAAVVLENGVLSRKLSDFGQETSYIEDNSNQNGAVSLIFSLKEEVGALAKVLRLFEENEI
21 21
 NLTHIESRPSRLNKDEYEFFTYLDKRSKPVLGSIIKSLRNDIGATVHELSRDKEKNTVPW
22 22
 FPRTIQELDRFANQILSYGAELDADHPGFKDPVYRARRKQFADIAYNYRHGQPIPRVEYT
... ...
@@ -25,19 +25,19 @@ RLRPVAGLLSSRDFLGGLAFRVFHCTQYIRHGSKPMYTPEPDICHELLGHVPLFSDRSFA
25 25
 QFSQEIGLASLGAPDEYIEKLATIYWFTVEFGLCKEGDSIKAYGAGLLSSFGELQYCLSD
26 26
 KPKLLPLELEKTACQEYTVTEFQPLYYVAESFNDAKEKVRTFAATIPRPFSVRYDPYTQR
27 27
 VEVLDNTQQLKILADSINSEVGILCHALQKIKS
28
->PH4H Chromobacterium violaceum
28
+>PH4H_Chromobacterium_violaceum
29 29
 MNDRADFVVPDITTRKNVGLSHDANDFTLPQPLDRYSAEDHATWATLYQRQCKLLPGRAC
30 30
 DEFMEGLERLEVDADRVPDFNKLNQKLMAATGWKIVAVPGLIPDDVFFEHLANRRFPVTW
31 31
 WLREPHQLDYLQEPDVFHDLFGHVPLLINPVFADYLEAYGKGGVKAKALGALPMLARLYW
32 32
 YTVEFGLINTPAGMRIYGAGILSSKSESIYCLDSASPNRVGFDLMRIMNTRYRIDTFQKT
33 33
 YFVIDSFKQLFDATAPDFAPLYLQLADAQPWGAGDVAPDDLVLNAGDRQGWADTEDV
34
->PH4H Pseudomonas aeruginosa
34
+>PH4H_Pseudomonas_aeruginosa
35 35
 MKTTQYVARQPDDNGFIHYPETEHQVWNTLITRQLKVIEGRACQEYLDGIEQLGLPHERI
36 36
 PQLDEINRVLQATTGWRVARVPALIPFQTFFELLASQQFPVATFIRTPEELDYLQEPDIF
37 37
 HEIFGHCPLLTNPWFAEFTHTYGKLGLKASKEERVFLARLYWMTIEFGLVETDQGKRIYG
38 38
 GGILSSPKETVYSLSDEPLHQAFNPLEAMRTPYRIDILQPLYFVLPDLKRLFQLAQEDIM
39 39
 ALVHEAMRLGLHAPLFPPKQAA
40
->PH4H Bos taurus
40
+>PH4H_Bos_taurus
41 41
 MSALVLESRALGRKLSDFGQETSYIEGNSDQNAVSLIFSLKEEVGALARVLRLFEENDIN
42 42
 LTHIESRPSRLRKDEYEFFTNLDQRSVPALANIIKILRHDIGATVHELSRDKKKDTVPWF
43 43
 PRTIQELDNFANQVLSYGAELDADHPGFKDPVYRARRKQFADIAYNYRHGQPIPRVEYTE
... ...
@@ -46,20 +46,20 @@ LRPVAGLLSSRDFLGGLAFRVFHCTQYIRHGSKPMYTPEPDICHELLGHVPLFSDRSFAQ
46 46
 FSQEIGLASLGAPDEYIEKLATIYWFTVEFGLCKQGDSIKAYGAGLLSSFGELQYCLSDK
47 47
 PKLLPLELEKTAVQEYTITEFQPLYYVAESFNDAKEKVRNFAATIPRPFSVHYDPYTQRI
48 48
 EVLDNTQQLKILADSISSEVEILCSALQKLK
49
->PH4H Ralstonia solanacearum
49
+>PH4H_Ralstonia_solanacearum
50 50
 MAIATPTSAAPTPAPAGFTGTLTDKLREQFAEGLDGQTLRPDFTMEQPVHRYTAADHATW
51 51
 RTLYDRQEALLPGRACDEFLQGLSTLGMSREGVPSFDRLNETLMRATGWQIVAVPGLVPD
52 52
 EVFFEHLANRRFPASWWMRRPDQLDYLQEPDGFHDIFGHVPLLINPVFADYMQAYGQGGL
53 53
 KAARLGALDMLARLYWYTVEFGLIRTPAGLRIYGAGIVSSKSESVYALDSASPNRIGFDV
54 54
 HRIMRTRYRIDTFQKTYFVIDSFEQLFDATRPDFTPLYEALGTLPTFGAGDVVDGDAVLN
55 55
 AGTREGWADTADI
56
->PH4H Caulobacter crescentus
56
+>PH4H_Caulobacter_crescentus
57 57
 MSGDGLSNGPPPGARPDWTIDQGWETYTQAEHDVWITLYERQTDMLHGRACDEFMRGLDA
58 58
 LDLHRSGIPDFARINEELKRLTGWTVVAVPGLVPDDVFFDHLANRRFPAGQFIRKPHELD
59 59
 YLQEPDIFHDVFGHVPMLTDPVFADYMQAYGEGGRRALGLGRLANLARLYWYTVEFGLMN
60 60
 TPAGLRIYGAGIVSSRTESIFALDDPSPNRIGFDLERVMRTLYRIDDFQQVYFVIDSIQT
61 61
 LQEVTLRDFGAIYERLASVSDIGVAEIVPGDAVLTRGTQAYATAGGRLAGAAAG
62
->PH4H Rhizobium loti
62
+>PH4H_Rhizobium_loti
63 63
 MSVAEYARDCAAQGLRGDYSVCRADFTVAQDYDYSDEEQAVWRTLCDRQTKLTRKLAHHS
64 64
 YLDGVEKLGLLDRIPDFEDVSTKLRKLTGWEIIAVPGLIPAAPFFDHLANRRFPVTNWLR
65 65
 TRQELDYIVEPDMFHDFFGHVPVLSQPVFADFMQMYGKKAGDIIALGGDEMITRLYWYTA
... ...
@@ -1,4 +1,4 @@
1
->PAH Homo sapiens
1
+>PAH_Homo_sapiens
2 2
 CAGCTGGGGGTAAGGGGGGCGGATTATTCATATAATTGTTATACCAGACGGTCGCAGGCTTAGTCCAATT
3 3
 GCAGAGAACTCGCTTCCCAGGCTTCTGAGAGTCCCGGAAGTGCCTAAACCTGTCTAATCGACGGGGCTTG
4 4
 GGTGGCCCGTCGCTCCCTGGCTTCTTCCCTTTACCCAGGGCGGGCAGCGAAGTGGTGCCTCCTGCGTCCC
... ...
@@ -38,7 +38,7 @@ ACTCTGCACCTAATCCCCATAACTTCCAGTATCATTTTCCAATTAATTATCAAGTCTGTTTTGGGAAACA
38 38
 CTTTGAGGACATTTATGATGCAGCAGATGTTGACTAAAGGCTTGGTTGGTAGATATTCAGGAAATGTTCA
39 39
 CTGAATAAATAAGTAAATACATTATTGAAAAGCAAATCTGTATAAATGTGAAATTTTTATTTGTATTAGT
40 40
 AATAAAACATTAGTAGTTTA
41
->PAH Mus musculus
41
+>PAH_Mus_musculus
42 42
 AGTCCAGGACTGGAGTTGGGTATGGACTGTTCATGTCTATCCACTGCTACGTCAGGGCAACACCCACTGA
43 43
 GAGTGACCTTGTAGACTGCAGTGGGAGACACCCTTCAAAACCTCTCCTCTCCTGTCCTGAGAGCCAGGTT
44 44
 AAAACCATCAGCCCCGCATCCTGAGTGCAAACTTTTCCTAACCCTGCTGCTAAGCTAGACACCTCACTTA
... ...
@@ -70,7 +70,7 @@ CATTTGGGCTCTGTCTGTTCATTTTTAACCTCTCAGGTAAGCTCTGATGGAATACATATTGCTTAGTGTA
70 70
 AAATGTGAGACTGTCATCGGGAACAAATTATTCCATTCAGTACAGATTGTAATCTACAAAGCTTAGTTTC
71 71
 TACATTCATTCATTATGGCTCTGAGAACTACTTTGTTAGCCTGCTTACATAAATGTCCTCTGATCTAAGA
72 72
 CTCTATTAAGATAGTTACAAGCAATAAAGCATTATATAAAACAAAAAAAAAA
73
->PAH Rattus norvegicus
73
+>PAH_Rattus_norvegicus
74 74
 CAAGTTAAAACCATCAGCCCTCCCATCCTGAGTGCCTACCTTTCCCAACCCCGCTGCTAAGCTAGACACC
75 75
 TCACTCACTGAGAGCCAGCATGGCAGCTGTTGTCCTGGAGAATGGAGTCCTGAGCAGAAAACTCTCAGAC
76 76
 TTTGGGCAGGAAACAAGCTATATTGAAGACAACTCCAATCAAAATGGTGCCATATCTCTGATATTCTCAC
... ...
@@ -1,4 +1,4 @@
1
->PAH Homo sapiens
1
+>PAH_Homo_sapiens
2 2
 CAGCUGGGGGUAAGGGGGGCGGAUUAUUCAUAUAAUUGUUAUACCAGACGGUCGCAGGCUUAGUCCAAUU
3 3
 GCAGAGAACUCGCUUCCCAGGCUUCUGAGAGUCCCGGAAGUGCCUAAACCUGUCUAAUCGACGGGGCUUG
4 4
 GGUGGCCCGUCGCUCCCUGGCUUCUUCCCUUUACCCAGGGCGGGCAGCGAAGUGGUGCCUCCUGCGUCCC
... ...
@@ -38,7 +38,7 @@ ACUCUGCACCUAAUCCCCAUAACUUCCAGUAUCAUUUUCCAAUUAAUUAUCAAGUCUGUUUUGGGAAACA
38 38
 CUUUGAGGACAUUUAUGAUGCAGCAGAUGUUGACUAAAGGCUUGGUUGGUAGAUAUUCAGGAAAUGUUCA
39 39
 CUGAAUAAAUAAGUAAAUACAUUAUUGAAAAGCAAAUCUGUAUAAAUGUGAAAUUUUUAUUUGUAUUAGU
40 40
 AAUAAAACAUUAGUAGUUUA
41
->PAH Mus musculus
41
+>PAH_Mus_musculus
42 42
 AGUCCAGGACUGGAGUUGGGUAUGGACUGUUCAUGUCUAUCCACUGCUACGUCAGGGCAACACCCACUGA
43 43
 GAGUGACCUUGUAGACUGCAGUGGGAGACACCCUUCAAAACCUCUCCUCUCCUGUCCUGAGAGCCAGGUU
44 44
 AAAACCAUCAGCCCCGCAUCCUGAGUGCAAACUUUUCCUAACCCUGCUGCUAAGCUAGACACCUCACUUA
... ...
@@ -70,7 +70,7 @@ CAUUUGGGCUCUGUCUGUUCAUUUUUAACCUCUCAGGUAAGCUCUGAUGGAAUACAUAUUGCUUAGUGUA
70 70
 AAAUGUGAGACUGUCAUCGGGAACAAAUUAUUCCAUUCAGUACAGAUUGUAAUCUACAAAGCUUAGUUUC
71 71
 UACAUUCAUUCAUUAUGGCUCUGAGAACUACUUUGUUAGCCUGCUUACAUAAAUGUCCUCUGAUCUAAGA
72 72
 CUCUAUUAAGAUAGUUACAAGCAAUAAAGCAUUAUAUAAAACAAAAAAAAAA
73
->PAH Rattus norvegicus
73
+>PAH_Rattus_norvegicus
74 74
 CAAGUUAAAACCAUCAGCCCUCCCAUCCUGAGUGCCUACCUUUCCCAACCCCGCUGCUAAGCUAGACACC
75 75
 UCACUCACUGAGAGCCAGCAUGGCAGCUGUUGUCCUGGAGAAUGGAGUCCUGAGCAGAAAACUCUCAGAC
76 76
 UUUGGGCAGGAAACAAGCUAUAUUGAAGACAACUCCAAUCAAAAUGGUGCCAUAUCUCUGAUAUUCUCAC
... ...
@@ -12,8 +12,8 @@ distMatOut=                  [Filename]
12 12
 force=                       [Logical Value]
13 13
 full=                        [Logical Value]
14 14
 fullIter=                    [Logical Value]
15
-gapOpening=                  [Negative Numeric]
16
-gapExtension=                [Negative Numeric]
15
+gapOpening=                  [Numeric] (currently not supported)
16
+gapExtension=                [Numeric] (currently not supported)
17 17
 guideTreeIn=                 [Filename]
18 18
 guideTreeOut=                [Filename]
19 19
 help=                        [Logical Value]
... ...
@@ -28,9 +28,9 @@ maxHmmIterations=            [Int]
28 28
 maxiters=                    [Positive Int]
29 29
 maxNumSeq=                   [Int]
30 30
 maxSeqLen=                   [Int]
31
+order=                       [String]
31 32
 outfile=                     [Filename]
32 33
 outFmt=                      [String]
33
-outputOrder=                 [String]
34 34
 percentId=                   [Logical Value]
35 35
 profile1=                    [Filename]
36 36
 profile2=                    [Filename]
... ...
@@ -13,8 +13,8 @@ dnamatrix=                   [Filename] or [String]
13 13
 endgaps=                     [Logical Value]
14 14
 fullhelp=                    [Logical Value]
15 15
 gapdist=                     [Int]
16
-gapOpening=                  [Negative Numeric]
17
-gapExtension=                [Negative Numeric]
16
+gapOpening=                  [Numeric]
17
+gapExtension=                [Numeric]
18 18
 helixendin=                  [Int]
19 19
 helixendout=                 [Int]
20 20
 helixgap=                    [Int]
... ...
@@ -34,7 +34,7 @@ nosecstr2=                   [Logical Value]
34 34
 novgap=                      [Logical Value]
35 35
 noweights=                   [Logical Value]
36 36
 options=                     [Logical Value]
37
-outorder=                    [String]
37
+order=                       [String]
38 38
 output=                      [String]
39 39
 outputtree=                  [String]
40 40
 pairgap=                     [Int]
... ...
@@ -20,8 +20,8 @@ diags2=                      [Logical Value]
20 20
 dimer=                       [Logical Value]
21 21
 distance1=                   [String]
22 22
 distance2=                   [String]
23
-gapOpening=                  [Negative Numeric]
24
-gapExtension=                [Negative Numeric]
23
+gapOpening=                  [Numeric]
24
+gapExtension=                [Numeric]
25 25
 hydro=                       [Positive Int]
26 26
 hydrofactor=                 [Positive Numeric]
27 27
 in1=                         [Filename]
... ...
@@ -36,6 +36,7 @@ minsmoothscore=              [Positive Numeric]
36 36
 noanchors=                   [Logical Value]
37 37
 nocore=                      [Logical Value]
38 38
 objscore=                    [String]
39
+order=                       [String]
39 40
 profile=                     [Logical Value]
40 41
 refine=                      [Logical Value]
41 42
 refinew=                     [Logical Value]
... ...
@@ -11,7 +11,8 @@
11 11
         cluster="default", gapOpening="default",
12 12
         gapExtension="default", maxiters="default",
13 13
         substitutionMatrix="default", type="default",
14
-        verbose=FALSE, help=FALSE, ...)
14
+        order=c("aligned", "input"), verbose=FALSE, help=FALSE,
15
+        ...)
15 16
 }
16 17
 \arguments{
17 18
   \item{inputSeqs}{input sequences; this argument can be a character vector,
... ...
@@ -31,16 +32,16 @@
31 32
     \code{\link{msaMuscle}} for algorithm-specific information.}
32 33
   \item{gapOpening}{gap opening penalty; the defaults are
33 34
     specific to the algorithm (see \code{\link{msaClustalW}},
34
-    \code{\link{msaClustalOmega}}, or \code{\link{msaMuscle}});
35
-    In order to standardize interfaces,
36
-    all algorithms consistently use negative values for the gap opening
37
-    penalty.}
35
+    and \code{\link{msaMuscle}}). Note that the sign of
36
+    this parameter is ignored. The sign is automatically
37
+    adjusted such that the called algorithm penalizes gaps
38
+    instead of rewarding them.}
38 39
   \item{gapExtension}{gap extension penalty; the defaults are
39 40
     specific to the algorithm (see \code{\link{msaClustalW}},
40
-    \code{\link{msaClustalOmega}}, or \code{\link{msaMuscle}});
41
-    In order to standardize interfaces,
42
-    all algorithms consistently use negative values for the gap extension
43
-    penalty.}
41
+    and \code{\link{msaMuscle}}). Note that the sign of
42
+    this parameter is ignored. The sign is automatically
43
+    adjusted such that the called algorithm penalizes gaps
44
+    instead of rewarding them.}
44 45
   \item{maxiters}{maximum number of iterations; its
45 46
     interpretation and default value depends on the method;
46 47
     see \code{\link{msaClustalW}}, \code{\link{msaClustalOmega}}, or
... ...
@@ -63,6 +64,18 @@
63 64
     parameter is not necessary. If it is nevertheless specified and the
64 65
     type does not match the class of \code{inputSeqs}, the function
65 66
     stops with an error.}
67
+  \item{order}{how the sequences should be ordered in the output object;
68
+    if \code{"aligned"} is chosen, the sequences are ordered in the way
69
+    the multiple sequence alignment algorithm orders them. If
70
+    \code{"input"} is chosen, the sequences in the output object are
71
+    ordered in the same way as the input sequences. For MUSCLE, the
72
+    choice \code{"input"} is not available for sequence data that is
73
+    read directly from a FASTA file. Even if sequences are supplied
74
+    directly via R, the sequences must have unique names, otherwise
75
+    the input order cannot be recovered. If the sequences do not have
76
+    names or if the names are not unique, the \code{\link{msaMuscle}}
77
+    function assignes generic unique names \code{"Seq1"}-\code{Seqn}
78
+    to the sequences and issues a warning.}
66 79
   \item{verbose}{if \code{TRUE}, the algorithm displays detailed
67 80
     information and progress messages.}
68 81
   \item{help}{if \code{TRUE}, information about algorithm-specific
... ...
@@ -86,22 +99,24 @@
86 99
 
87 100
   Note that the input sequences may be reordered by the multiple
88 101
   sequence alignment algorithms in order to group together similar
89
-  sequences. Therefore, it is necessary to assign names to input
90
-  sequences in case the user wants to preserve a one-to-one assignment
91
-  between input sequences and the sequences in the multiple sequence
92
-  alignment. That is why all algorithms check input sequences for
93
-  unique names and issue warnings if the input sequences are unnamed or
94
-  if the names are not unique.
95
-  As noted in the description of the \code{inputSeqs} argument above,
96
-  all functions, \code{msa()}, \code{\link{msaClustalW}},
97
-  \code{\link{msaClustalOmega}}, and \code{\link{msaMuscle}}, also allow
102
+  sequences (see also description of argument \code{order} above).
103
+  So, if the input order should be preserved or if the input order
104
+  should be recovered later, we strongly recommend to always assign
105
+  unique names to the input sequences. As noted in the description
106
+  of the \code{inputSeqs} argument above, all functions, \code{msa()},
107
+  \code{\link{msaClustalW}}, \code{\link{msaClustalOmega}}, and
108
+  \code{\link{msaMuscle}}, also allow
98 109
   for direct reading from FASTA files. This is mainly for the reason of
99 110
   memory efficiency if the sequence data set is very large. Otherwise,
100 111
   we want to encourage users to first read the sequences into the R
101
-  workspace. In any case, if sequences are read from a FASTA file
102
-  directly, this is completely under the control of the respective
112
+  workspace. If sequences are read from a FASTA file
113
+  directly, the order of output sequences is completely under
114
+  the control of the respective
103 115
   algorithm and does not allow for checking whether the sequences are
104
-  named uniquely in the FASTA file.
116
+  named uniquely in the FASTA file. The preservation of the input order
117
+  works also for sequence data read from a FASTA file, but only for
118
+  ClustalW and ClustalOmega; MUSCLE does not support this (see also
119
+  argument \code{order} above and \code{\link{msaMuscle}}).
105 120
 }
106 121
 \value{
107 122
    Depending on the type of sequences for which it was called,
... ...
@@ -164,10 +179,12 @@ mySeqs <- readAAStringSet(filepath)
164 179
 msa(mySeqs)
165 180
 
166 181
 ## call ClustalOmega through unified interface
167
-msa(mySeqs, method="ClustalOmega", cluster=120, gapOpening=-3,
168
-    gapExtension=-1, maxGuidetreeIterations=20, outFmt="clustal",
169
-    outputOrder="input-order", substitutionMatrix="blosum30",
170
-    type="protein", verbose=FALSE)
182
+msa(mySeqs, method="ClustalOmega")
183
+
184
+## call MUSCLE through unified interface with some custom parameters
185
+msa(mySeqs, method="Muscle", gapOpening=12, gapExtension=3, maxiters=16,
186
+    cluster="upgmamax", SUEFF=0.4, brenner=FALSE,
187
+    order="input", verbose=FALSE)
171 188
 }
172 189
 \keyword{manip}
173 190
 
... ...
@@ -9,38 +9,36 @@
9 9
     msaClustalOmega(inputSeqs, cluster="default",
10 10
                     gapOpening="default", gapExtension="default",
11 11
                     maxiters="default",  substitutionMatrix="default",
12
-                    type="default", verbose=FALSE, help=FALSE,
13
-                    ...) 
12
+                    type="default", order=c("aligned", "input"),
13
+                    verbose=FALSE, help=FALSE, ...) 
14 14
 }
15 15
 \arguments{
16 16
   \item{inputSeqs}{input sequences; see \code{\link{msa}}.
17 17
     In the original ClustalOmega implementation, this
18
-    parameter is called \code{-infile}.}
18
+    parameter is called \code{infile}.}
19 19
   \item{cluster}{The cluster size which should be used. The default is 100.  
20 20
     In the original ClustalOmega implementation, this parameter is called 
21
-    \code{--cluster-size}.}
22
-  \item{gapOpening}{gap opening penalty; the default value is -6.0.
23
-    In order to standardize interfaces,
24
-    all algorithms consistently use negative values for the gap open
25
-    penalty. This parameter is a new feature - the original ClustalOmega
26
-    implementation does not allow for customizing gap penalties.}
27
-  \item{gapExtension}{gap extension penalty; the default value is -1.0.
28
-    In order to standardize interfaces,
29
-    all algorithms consistently use negative values for the gap open
30
-    penalty. This parameter is a new feature - the original ClustalOmega
31
-    implementation does not allow for customizing gap penalties.}
21
+    \code{cluster-size}.}
22
+  \item{gapOpening,gapExtension}{ClustalOmega currently does
23
+    not allow to adjust gap penalties; these arguments are only for
24
+    future extensions and consistency with the other algorithms
25
+    and \code{\link{msa}}. However, setting these parameters to values
26
+    other than \code{"default"} will result in a warning.}
32 27
   \item{maxiters}{maximum number of iterations; the default value is 0
33 28
     (no limitation). In the original ClustalOmega implementation, this
34
-    parameter is called \code{--iterations}.}
29
+    parameter is called \code{iterations}.}
35 30
   \item{substitutionMatrix}{substitution matrix for scoring matches and
36 31
     mismatches; can be a real matrix, a file name, or the name of a
37 32
     built-in substitution matrix. In the latter case, the choices
38
-    \code{"blosum30"}, \code{"blosum40"}, \code{"blosum50"},
39
-    \code{"blosum65"}, \code{"blosum80"}, and \code{"gonnet"} are
33
+    \code{"BLOSUM30"}, \code{"BLOSUM40"}, \code{"BLOSUM50"},
34
+    \code{"BLOSUM65"}, \code{"BLOSUM80"}, and \code{"Gonnet"} are
40 35
     supported.  This parameter is a new feature - the original ClustalOmega
41 36
     implementation does not allow for using a custom substitution matrix.}
42 37
   \item{type}{type of the input sequences \code{inputSeqs};
43 38
     see \code{\link{msa}}.}
39
+  \item{order}{how the sequences should be ordered in the output object
40
+    (see \code{\link{msa}}); in the original ClustalW implementation, this
41
+    parameter is called \code{output-order}.}
44 42
   \item{verbose}{if \code{TRUE}, the algorithm displays detailed
45 43
     information and progress messages.}
46 44
   \item{help}{if \code{TRUE}, information about algorithm-specific
... ...
@@ -102,8 +100,7 @@ mySeqs <- readAAStringSet(filepath)
102 100
 msaClustalOmega(mySeqs)
103 101
 
104 102
 ## call msaClustalOmega with custom parameters
105
-msaClustalOmega(mySeqs, gapOpening=-6, gapExtension=-1, auto=FALSE,
106
-                cluster=120, dealign=FALSE, useKimura=FALSE,
107
-                verbose=FALSE)
103
+msaClustalOmega(mySeqs, auto=FALSE, cluster=120, dealign=FALSE,
104
+                useKimura=FALSE, order="input", verbose=FALSE)
108 105
 }
109 106
 \keyword{manip}
... ...
@@ -9,31 +9,26 @@
9 9
     msaClustalW(inputSeqs, cluster="default", gapOpening="default", 
10 10
                 gapExtension="default", maxiters="default", 
11 11
                 substitutionMatrix="default", type="default", 
12
-                verbose=FALSE, help=FALSE, ...)
12
+                order=c("aligned", "input"), verbose=FALSE,
13
+                help=FALSE, ...)
13 14
 }
14 15
 \arguments{
15 16
   \item{inputSeqs}{input sequences; see \code{\link{msa}}.
16 17
     In the original ClustalW implementation, this
17
-    parameter is called \code{-infile}.}
18
+    parameter is called \code{infile}.}
18 19
   \item{cluster}{The clustering method which should be used.
19 20
     Possible values are \code{"nj"} (default) and \code{"upgma"}.
20 21
     In the original ClustalW implementation, this parameter is called 
21
-    \code{-clustering}.}
22
+    \code{clustering}.}
22 23
   \item{gapOpening}{gap opening penalty; the default value for
23
-    nucleotide sequences is -15.0, the default value for 
24
-    amino acid sequences is -10.0. In order to standardize interfaces,
25
-    all algorithms consistently use negative values for the gap open
26
-    penalty. In the original ClustalW implementation, this parameter is
27
-    called \code{-gapOpen}.}
24
+    nucleotide sequences is 15.0, the default value for 
25
+    amino acid sequences is 10.0.}
28 26
   \item{gapExtension}{gap extension penalty; the default value for
29
-    nucleotide sequences is -6.66, the default value for 
30
-    amino acid sequences is -0.2. In order to standardize interfaces,
31
-    all algorithms consistently use negative values for the gap extension
32
-    penalty. In the original ClustalW implementation, this parameter is
33
-    called \code{-gapExt}.}
27
+    nucleotide sequences is 6.66, the default value for 
28
+    amino acid sequences is 0.2.}
34 29
   \item{maxiters}{maximum number of iterations; the default value is 16.
35 30
     In the original ClustalW implementation, this parameter is called
36
-    \code{-numiters}.}
31
+    \code{numiters}.}
37 32
   \item{substitutionMatrix}{substitution matrix for scoring matches and
38 33
     mismatches; can be a real matrix, a file name, or the name of a
39 34
     built-in substitution matrix. In the latter case, the choices
... ...
@@ -42,9 +37,12 @@
42 37
     sequences, the parameter \code{dnamatrix} must be used instead.
43 38
     The valid choices for this parameter are \code{"iub"} and
44 39
     \code{"clustalw"}. In the original ClustalW implementation, this
45
-    parameter is called \code{-matrix}.}
40
+    parameter is called \code{matrix}.}
46 41
   \item{type}{type of the input sequences \code{inputSeqs};
47 42
     see \code{\link{msa}}.}
43
+  \item{order}{how the sequences should be ordered in the output object
44
+    (see \code{\link{msa}}); in the original ClustalW implementation, this
45
+    parameter is called \code{outorder}.}
48 46
   \item{verbose}{if \code{TRUE}, the algorithm displays detailed
49 47
     information and progress messages.}
50 48
   \item{help}{if \code{TRUE}, information about algorithm-specific
... ...
@@ -105,7 +103,7 @@ mySeqs <- readAAStringSet(filepath)
105 103
 msaClustalW(mySeqs)
106 104
 
107 105
 ## call msaClustalW with custom parameters
108
-msaClustalW(mySeqs, gapOpening=-1, gapExtension=-1, maxiters=16,
109
-            type="protein", cluster="upgma", kimura=FALSE, maxdiv=23)
106
+msaClustalW(mySeqs, gapOpening=1, gapExtension=1, maxiters=16,
107
+            cluster="upgma", kimura=FALSE, order="input", maxdiv=23)
110 108
 }
111 109
 \keyword{manip}
... ...
@@ -8,7 +8,8 @@
8 8
 \usage{
9 9
     msaMuscle(inputSeqs, cluster="default", gapOpening="default",
10 10
               gapExtension="default", maxiters="default",
11
-              substitutionMatrix="default", type="default",
11
+              substitutionMatrix="default",
12
+              type="default", order=c("aligned", "input"),
12 13
               verbose=FALSE, help=FALSE, ...)
13 14
 }
14 15
 \arguments{
... ...
@@ -19,16 +20,12 @@
19 20
     Possible values are \code{"upgma"}, \code{"upgmamax"},
20 21
     \code{"upgmamin"}, \code{"upgmb"}, and \code{"neighborjoining"}.}
21 22
   \item{gapOpening}{gap opening penalty; the default 
22
-    is -400 for DNA sequences and -420 for RNA sequences. The default
23
+    is 400 for DNA sequences and 420 for RNA sequences. The default
23 24
     for amino acid sequences depends on the profile score settings:
24
-    for the setting \code{le=TRUE}, the default is -2.9, for
25
-    \code{sp=TRUE}, the default is -1,439, and for \code{sv=TRUE},
26
-    the default is -300. In order to standardize interfaces,
27
-    all algorithms consistently use negative values for the gap opening
28
-    penalty.}
29
-  \item{gapExtension}{gap extension penalty; the default is 0.
30
-    In order to standardize interfaces, all algorithms consistently
31
-    use negative values for the gap extension penalty.}
25
+    for the setting \code{le=TRUE}, the default is 2.9, for
26
+    \code{sp=TRUE}, the default is 1,439, and for \code{sv=TRUE},
27
+    the default is 300.}
28
+  \item{gapExtension}{gap extension penalty; the default is 0.}
32 29
   \item{maxiters}{maximum number of iterations; the default is 16.
33 30
     In the original MUSCLE implementation, it is also possible
34 31
     to set \code{maxiters} to 0 which leads to an (out of memory) error. 
... ...
@@ -41,6 +38,17 @@
41 38
     this format is not supported by \code{msaMuscle}.}
42 39
   \item{type}{type of the input sequences \code{inputSeqs};
43 40
     see \code{\link{msa}}.}
41
+  \item{order}{how the sequences should be ordered in the output object
42
+    (see \code{\link{msa}} for more details); the original MUSCLE
43
+    implementation does not allow for preserving the order of input
44
+    sequences. The \code{msaMuscle} function realizes this functionality
45
+    by reverse matching of sequence names. Therefore, the sequences
46
+    need to have unique names. If the sequences do not have
47
+    names or if the names are not unique, the \code{\link{msaMuscle}}
48
+    function assignes generic unique names \code{"Seq1"}-\code{Seqn}
49
+    to the sequences and issues a warning. The choice \code{"input"}
50
+    is not available at all for sequence data that is
51
+    read directly from a FASTA file.}
44 52
   \item{verbose}{if \code{TRUE}, the algorithm displays detailed
45 53
     information and progress messages.}
46 54
   \item{help}{if \code{TRUE}, information about algorithm-specific
... ...
@@ -104,8 +112,8 @@ mySeqs <- readAAStringSet(filepath)
104 112
 msaMuscle(mySeqs)
105 113
 
106 114
 ## call msaMuscle with custom parameters
107
-msaMuscle(mySeqs, gapOpening=-12, gapExtension=-3, maxiters=16,
108
-          type="protein", cluster="upgmamax", SUEFF=0.4,
109
-          brenner=FALSE, verbose=FALSE)
115
+msaMuscle(mySeqs, gapOpening=12, gapExtension=3, maxiters=16,
116
+          cluster="upgmamax", SUEFF=0.4, brenner=FALSE,
117
+          order="input", verbose=FALSE)
110 118
 }
111 119
 \keyword{manip}
... ...
@@ -1,9 +1,9 @@
1
-OS := $(shell uname)
2
-ifeq ($(OS), Darwin)
3
-	CONFIGURE_FLAGS='--without-openmp'
4
-else 
5
-	CONFIGURE_FLAGS=''
6
-endif
1
+## Variant 1: deactivate OpenMP on Mac Platforms
2
+CONFIGURE_FLAGS=`Rscript -e "if (Sys.info()['sysname'] == 'Darwin') cat('--without-openmp')"`
3
+## Variant 2: deactivate OpenMP generally
4
+#CONFIGURE_FLAGS=--without-openmp 
5
+## Variant 3: let the configure script determine whether OpenMP is available
6
+#CONFIGURE_FLAGS=
7 7
 
8 8
 CPPNames=\
9 9
 exceptions4c/e4c_lite.c \
... ...
@@ -43,7 +43,7 @@ all: clustalomega
43 43
 
44 44
 clustalomega:
45 45
 	./configure $(CONFIGURE_FLAGS); \
46
-	export PKG_LIBS="$(PKG_LIBS) $(SHLIB_OPENMP_CFLAGS) `Rscript -e "Rcpp:::LdFlags()"`"; \
46
+	export PKG_LIBS="$(PKG_LIBS) $(SHLIB_OPENMP_CFLAGS)"; \
47 47
 	export PKG_CXXFLAGS="$(PKG_CXXFLAGS) $(SHLIB_OPENMP_CXXFLAGS) -fPIC -DHAVE_CONFIG_H -I. -DCLUSTALO -DCLUSTALO_NOFILE -DDEFAULT_FILTER=90 -I"../../gc-7.2/include" `Rscript -e "Rcpp:::CxxFlags()"`"; \
48 48
 	export PKG_CFLAGS="$(PKG_CFLAGS) $(SHLIB_OPENMP_CFLAGS) -fPIC -DHAVE_CONFIG_H -I. -DCLUSTALO -DCLUSTALO_NOFILE -DDEFAULT_FILTER=90 -I"../../gc-7.2/include" `Rscript -e "Rcpp:::CxxFlags()"`"; \
49 49
 	cd src; \
... ...
@@ -37,10 +37,10 @@ all: clustalomega
37 37
 clustalomega:
38 38
 	cp windows/src/config.h src/; \
39 39
 	cp windows/src/clustal-omega-config.h src/; \
40
-	export PKG_LIBS="$(PKG_LIBS) -L"../../gc-7.2" -lgccpp72 -lgc72 `${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe -e "Rcpp:::LdFlags()"`"; \
40
+	export PKG_LIBS="$(PKG_LIBS) -L"../../gc-7.2" -lgccpp72 -lgc72"; \
41 41
 	export PKG_CXXFLAGS="$(PKG_CXXFLAGS) -DHAVE_CONFIG_H -I. -DCLUSTALO -DCLUSTALO_NOFILE -DDEFAULT_FILTER=90 -I"../../gc-7.2/include" `${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe -e "Rcpp:::CxxFlags()"`"; \
42 42
 	export PKG_CFLAGS="$(PKG_CFLAGS) -DHAVE_CONFIG_H -I. -DCLUSTALO -DCLUSTALO_NOFILE -DDEFAULT_FILTER=90 -I"../../gc-7.2/include" -lgccpp -lgc `${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe -e "Rcpp:::CxxFlags()"`"; \
43 43
 	cd src; \
44
-	${R_HOME}/bin${R_ARCH_BIN}/R.exe CMD SHLIB -o ClustalOmega.dll $(CPPNames); \
45
-	$(AR) rcs libClustalOmega.a $(OBJNames); \
44
+	${R_HOME}/bin${R_ARCH_BIN}/R.exe CMD SHLIB -o ClustalOmega.dll $(CPPNames) && \
45
+	$(AR) rcs libClustalOmega.a $(OBJNames) && \
46 46
 	cp libClustalOmega.a ../../
... ...
@@ -182,7 +182,6 @@ SEXP RClustalOmega(SEXP rInputSeqs,
182 182
             msaInput.seqLength = seqLength;
183 183
             msaInput.seqNames = seqNames;
184 184
             //hardcoded value, if sequence is an R sequence
185
-            //appendString(&argv, argc, "R");
186 185
             appendString(&argv, argc, "--R");
187 186
         }
188 187
         appendString(&argv, argc, "-o");
... ...
@@ -243,6 +242,10 @@ SEXP RClustalOmega(SEXP rInputSeqs,
243 242
 			appendStringValue(&argv, argc, "--distmat-out",
244 243
 					getListElement(rParams, "distMatOut"));
245 244
         }
245
+        if (hasClustalOmegaEntry(rParams, "outputOrder")) {
246
+			appendStringValue(&argv, argc, "--output-order",
247
+					getListElement(rParams, "outputOrder"));
248
+        }
246 249
         if (hasClustalOmegaEntry(rParams, "guideTreeIn")) {
247 250
 			appendStringValue(&argv, argc, "--guidetree-in",
248 251
 					getListElement(rParams, "guideTreeIn"));
... ...
@@ -244,7 +244,7 @@
244 244
 
245 245
 /* The size of `fpos_t', as computed by sizeof. */
246 246
 #ifndef CLUSTAL_OMEGA_SIZEOF_FPOS_T
247
-#define CLUSTAL_OMEGA_SIZEOF_FPOS_T 16
247
+#define CLUSTAL_OMEGA_SIZEOF_FPOS_T 12
248 248
 #endif
249 249
 
250 250
 /* The size of `unsigned int', as computed by sizeof. */
... ...
@@ -254,7 +254,7 @@
254 254
 
255 255
 /* The size of `unsigned long', as computed by sizeof. */
256 256
 #ifndef CLUSTAL_OMEGA_SIZEOF_UNSIGNED_LONG
257
-#define CLUSTAL_OMEGA_SIZEOF_UNSIGNED_LONG 8
257
+#define CLUSTAL_OMEGA_SIZEOF_UNSIGNED_LONG 4
258 258
 #endif
259 259
 
260 260
 /* The size of `unsigned long long', as computed by sizeof. */
... ...
@@ -1094,8 +1094,10 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR,
1094 1094
                 CKFREE(pcReprsnt2);
1095 1095
                 CKFREE(pcConsens2);
1096 1096
             }
1097
-            ppiLeafList[iL] = CKFREE(ppiLeafList[iL]);
1098
-            ppiLeafList[iR] = CKFREE(ppiLeafList[iR]);
1097
+            CKFREE(ppiLeafList[iL]);
1098
+            ppiLeafList[iL] = NULL;
1099
+            CKFREE(ppiLeafList[iR]);
1100
+            ppiLeafList[iR] = NULL;
1099 1101
             piLeafCount[iL] = piLeafCount[iR] = 0;
1100 1102
             
1101 1103
         } /* was a merge node */
... ...
@@ -940,7 +940,9 @@ FreeRSeq(mseq_t **mseq, bool bRSequence)
940 940
     }
941 941
         
942 942
 	if ((*mseq)->filename && !bRSequence) {
943
-        (*mseq)->filename = CKFREE((*mseq)->filename);
943
+        //(*mseq)->filename = CKFREE((*mseq)->filename);
944
+		CKFREE((*mseq)->filename);
945
+		(*mseq)->filename = NULL;
944 946
     }
945 947
 
946 948
     for (i=0; i<(*mseq)->nseqs; i++) {
... ...
@@ -134,7 +134,7 @@ ReadSequences(mseq_t *prMSeq_p, char *pcSeqFile,
134 134
               int iMaxNumSeq, int iMaxSeqLen);
135 135
 
136 136
 extern int
137
-ReadSequencesFrom(mseq_t *prMSeq_p, int seqLength, char **seq, char **seqNames,
137
+ReadSequencesFromR(mseq_t *prMSeq_p, int seqLength, char **seq, char **seqNames,
138 138
               int iSeqType,  int iSeqFmt, bool bIsProfile,
139 139
               int iMaxNumSeq, int iMaxSeqLen);
140 140
 
... ...
@@ -918,11 +918,10 @@ hhalign(char **ppcFirstProf, int iFirstCnt, double *pdWeightsL,
918 918
         par.gapg = par.gapgV;
919 919
         par.gaph = par.gaphV;
920 920
         par.gapi = par.gapiV;
921
-
922
-        par.gapExtension = rHhalignPara.gapExtension;
923
-        par.gapOpening   = rHhalignPara.gapOpening;
924
-
925 921
     }
922
+
923
+    par.gapExtension = rHhalignPara.gapExtension;
924
+    par.gapOpening   = rHhalignPara.gapOpening;
926 925
     
927 926
     // Read input file (HMM, HHM, or alignment format), and add pseudocounts etc.
928 927
     q.cQT = 'q';
... ...
@@ -77,15 +77,15 @@ Alignment::Alignment(int maxseq, int maxres)
77 77
 
78 78
     //printf(">>>>>>>>%s:%s:%d: maxseq=%d, maxres=%d\n", __FUNCTION__, __FILE__, __LINE__, maxseq, maxres); /* (FS) */
79 79
   longname = new(char[DESCLEN]);
80
-  sname = new(char*[maxseq+2]); /* MR1 */
81
-  seq = new(char*[maxseq+2]); /* MR1 */
82
-  l = new(int[maxres]);
83
-  X = new(char*[maxseq+2]);  /* MR1 */
84
-  I = new(short unsigned int*[maxseq+2]); /* MR1 */
85
-  keep = new(char[maxseq+2]); /* MR1 */
86
-  display = new(char[maxseq+2]); /* MR1 */
87
-  wg = new(float[maxseq+2]); /* MR1 */
88
-  nseqs = new(int[maxres+2]); /* MR1 */
80
+  sname = new char*[maxseq+2]; /* MR1 */
81
+  seq = new char*[maxseq+2]; /* MR1 */
82
+  l = new int[maxres];
83
+  X = new char*[maxseq+2];  /* MR1 */
84
+  I = new short unsigned int*[maxseq+2]; /* MR1 */
85
+  keep = new char[maxseq+2]; /* MR1 */
86
+  display = new char[maxseq+2]; /* MR1 */
87
+  wg = new float[maxseq+2]; /* MR1 */
88
+  nseqs = new int[maxres+2]; /* MR1 */
89 89
   N_in=L=0;
90 90
   nres=NULL; // number of residues per sequence k
91 91
   first=NULL; // first residue in sequence k
... ...
@@ -138,7 +138,7 @@ Alignment::Read(FILE* inf, char infile[], char* firstline)
138 138
   int k; // Index of sequence being read currently (first=0)
139 139
   char line[LINELEN]=""; // input line
140 140
   //char cur_seq[MAXCOL]; // Sequence currently read in
141
-  char *cur_seq=new(char[par.maxColCnt]);
141
+  char *cur_seq=new char[par.maxColCnt];
142 142
   char* cur_name; // Sequence currently read in
143 143
   int linenr=0; // current line number in input file
144 144
   char skip_sequence=0;
... ...
@@ -180,11 +180,11 @@ Alignment::Read(FILE* inf, char infile[], char* firstline)
180 180
                               }
181 181
 
182 182
                           // Create space for residues and paste new sequence in
183
-                          seq[k]=new(char[strlen(cur_seq)+2]);
183
+                          seq[k]=new char[strlen(cur_seq)+2];
184 184
                           if (!seq[k]) MemoryError("array for input sequences");
185
-                          X[k]=new(char[strlen(cur_seq)+2]);
185
+                          X[k]=new char[strlen(cur_seq)+2];
186 186
                           if (!X[k]) MemoryError("array for input sequences");
187
-                          I[k]=new(short unsigned int[strlen(cur_seq)+2]);
187
+                          I[k]=new short unsigned int[strlen(cur_seq)+2];
188 188
                           if (!I[k]) MemoryError("array for input sequences");
189 189
                           strcpy(seq[k],cur_seq);
190 190
                       }
... ...
@@ -233,7 +233,7 @@ Alignment::Read(FILE* inf, char infile[], char* firstline)
233 233
 
234 234
                   // store sequence name
235 235
                   if (v>=4) printf("Reading seq %-16.16s k=%3i n_displ=%3i display[k]=%i keep[k]=%i\n",cur_name,k,n_display,display[k],keep[k]);
236
-                  sname[k] = new(char[strlen(cur_name)+1]);
236
+                  sname[k] = new char[strlen(cur_name)+1];
237 237
                   if (!sname[k]) {MemoryError("array for sequence names");}
238 238
                   strcpy(sname[k],cur_name);
239 239
               } // end if(line contains sequence name)
... ...
@@ -348,11 +348,11 @@ Alignment::Read(FILE* inf, char infile[], char* firstline)
348 348
 
349 349
   if (k>=0) //if at least one sequence was read in
350 350
       {
351
-          seq[k]=new(char[strlen(cur_seq)+2]);
351
+          seq[k]=new char[strlen(cur_seq)+2];
352 352
           if (!seq[k]) MemoryError("array for input sequences");
353
-          X[k]=new(char[strlen(cur_seq)+2]);
353
+          X[k]=new char[strlen(cur_seq)+2];
354 354
           if (!X[k]) MemoryError("array for input sequences");
355
-          I[k]=new(short unsigned int[strlen(cur_seq)+2]);
355
+          I[k]=new short unsigned int[strlen(cur_seq)+2];
356 356
           if (!I[k]) MemoryError("array for input sequences");
357 357
           strcpy(seq[k],cur_seq);
358 358
       }
... ...
@@ -420,7 +420,7 @@ Alignment::Compress(const char infile[])
420 420
     /*static short unsigned int h[MAXSEQ];*/
421 421
     /*short*/ unsigned int *h = NULL; /* short may lead to overflow for long alignments, FS, r235 -> r236 */
422 422
 
423
-    h = new(/*short*/ unsigned int[N_in+2]); /* short -> overflow, FS, r235 -> r236 */
423
+    h = new /*short*/ unsigned int[N_in+2]; /* short -> overflow, FS, r235 -> r236 */
424 424
     float *percent_gaps = NULL; /* FS, 2010-Nov */
425 425
     char *match_state = NULL;  /* FS, 2010-Nov */
426 426
 
... ...
@@ -552,13 +552,13 @@ Alignment::Compress(const char infile[])
552 552
                had to move declaration of float *percent_gaps out of switch()
553 553
             */
554 554
             //float percent_gaps[MAXCOL]; //percentage of gaps in column k (with weighted sequences)
555
-            percent_gaps = new(float[par.maxColCnt]);
555
+            percent_gaps = new float[par.maxColCnt];
556 556
 
557 557
             //determine number of columns L in alignment
558 558
             L=strlen(seq[kfirst])-1;
559 559
 
560 560
             // Conversion to integer representation, checking for unequal lengths and initialization
561
-            if (nres==NULL) nres=new(int[N_in]);
561
+            if (nres==NULL) nres=new int[N_in];
562 562
             for (k=0; k<N_in; k++)
563 563
                 {
564 564
                     if (!keep[k]) continue;
... ...
@@ -780,7 +780,7 @@ Alignment::Compress(const char infile[])
780 780
                had to move declaration of float *percent_gaps out of switch()
781 781
             */
782 782
             //char match_state[MAXCOL]; //1: column assigned to match state 0: insert state
783
-            match_state = new(char[par.maxColCnt]);
783
+            match_state = new char[par.maxColCnt];
784 784
 
785 785
             // Determine number of columns L in alignment
786 786
             L=strlen(seq[0]+1);
... ...
@@ -942,7 +942,7 @@ Alignment::FilterForDisplay(int max_seqid, int coverage, int qid, float qsc, int
942 942
 
943 943
 
944 944
     if (par.mark) return n_display;
945
-    char *dummy = new(char[N_in+1]);
945
+    char *dummy = new char[N_in+1];
946 946
     int vtmp=v, seqid;
947 947
     v=0;
948 948
     n_display=0;
... ...
@@ -1005,12 +1005,12 @@ Alignment::Filter2(char keep[], int coverage, int qid, float qsc, int seqid1, in
1005 1005
 {
1006 1006
     // In the beginnning, keep[k] is 1 for all regular amino acid sequences and 0 for all others (ss_conf, ss_pred,...)
1007 1007
     // In the end, keep[k] will be 1 for all regular representative sequences kept in the alignment, 0 for all others
1008
-    char* in=new(char[N_in+1]); // in[k]=1: seq k has been accepted; in[k]=0: seq k has not yet been accepted at current seqid
1009
-    char* inkk=new(char[N_in+1]); // inkk[k]=1 iff in[ksort[k]]=1 else 0;
1010
-    int* Nmax=new(int[L+2]); // position-dependent maximum-sequence-identity threshold for filtering /* MR1, used to be called idmax*/
1011
-    int* idmaxwin=new(int[L+2]); // minimum value of idmax[i-WFIL,i+WFIL]
1012
-    int* seqid_prev=new(int[N_in+1]); // maximum-sequence-identity threshold used in previous round of filtering (with lower seqid)
1013
-    int* N=new(int[L+2]); // N[i] number of already accepted sequences at position i
1008
+    char* in=new char[N_in+1]; // in[k]=1: seq k has been accepted; in[k]=0: seq k has not yet been accepted at current seqid
1009
+    char* inkk=new char[N_in+1]; // inkk[k]=1 iff in[ksort[k]]=1 else 0;
1010
+    int* Nmax=new int[L+2]; // position-dependent maximum-sequence-identity threshold for filtering /* MR1, used to be called idmax*/
1011
+    int* idmaxwin=new int[L+2]; // minimum value of idmax[i-WFIL,i+WFIL]
1012
+    int* seqid_prev=new int[N_in+1]; // maximum-sequence-identity threshold used in previous round of filtering (with lower seqid)
1013
+    int* N=new int[L+2]; // N[i] number of already accepted sequences at position i