git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102514 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: msa |
2 | 2 |
Type: Package |
3 | 3 |
Title: Multiple Sequence Alignment |
4 |
-Version: 0.99.5 |
|
5 |
-Date: 2015-04-12 |
|
4 |
+Version: 0.99.6 |
|
5 |
+Date: 2015-04-15 |
|
6 | 6 |
Author: Enrico Bonatesta, Christoph Horejs-Kainrath, Ulrich Bodenhofer |
7 | 7 |
Maintainer: Ulrich Bodenhofer <bodenhofer@bioinf.jku.at> |
8 | 8 |
Description: This package provides a unified R/Bioconductor interface to the |
... | ... |
@@ -17,15 +17,18 @@ URL: http://www.bioinf.jku.at/software/msa/ |
17 | 17 |
License: GPL (>= 2) |
18 | 18 |
Copyright: See file inst/COPYRIGHT |
19 | 19 |
Depends: R (>= 3.1.0), methods, Biostrings (>= 2.30.0) |
20 |
-Imports: Rcpp (>= 0.11.1), BiocGenerics, IRanges (>= 1.20.0), S4Vectors, |
|
21 |
- tools |
|
20 |
+Imports: Rcpp (>= 0.11.1), BiocGenerics, IRanges (>= 1.20.0), |
|
21 |
+ S4Vectors, tools |
|
22 | 22 |
Suggests: Biobase, knitr |
23 | 23 |
LinkingTo: Rcpp |
24 |
-SystemRequirements: GNU make |
|
24 |
+SystemRequirements: |
|
25 | 25 |
VignetteBuilder: knitr |
26 | 26 |
LazyLoad: yes |
27 |
-Collate: AllClasses.R AllGenerics.R params-methods.R version-methods.R |
|
28 |
- helperFunctions.R inputChecks.R convertRows.R msaPrettyPrint.R |
|
29 |
- print-methods.R show-methods.R msa.R msaMuscle.R msaClustalW.R |
|
30 |
- msaClustalOmega.R |
|
31 |
-biocViews: MultipleSequenceAlignment, Alignment, MultipleComparison, Sequencing |
|
27 |
+Collate: AllClasses.R AllGenerics.R params-methods.R version-methods.R |
|
28 |
+ helperFunctions.R inputChecks.R convertRows.R msaPrettyPrint.R |
|
29 |
+ print-methods.R show-methods.R msa.R msaMuscle.R msaClustalW.R |
|
30 |
+ msaClustalOmega.R |
|
31 |
+biocViews: MultipleSequenceAlignment, Alignment, MultipleComparison, |
|
32 |
+ Sequencing |
|
33 |
+NeedsCompilation: yes |
|
34 |
+Packaged: 2015-04-15 13:16:43 UTC; bodenhof |
... | ... |
@@ -4,7 +4,7 @@ convertAlnRows <- function(rows, type) |
4 | 4 |
|
5 | 5 |
if (length(rows) < 3 || |
6 | 6 |
##!identical(grep("^CLUSTAL", rows[1L]), 1L) || |
7 |
- !identical(rows[2:3], c("",""))) |
|
7 |
+ !identical(sub("^\\s+$", "", rows[2:3]), c("", ""))) |
|
8 | 8 |
stop("There is an invalid aln file!") |
9 | 9 |
|
10 | 10 |
rows <- tail(rows, -3) |
... | ... |
@@ -43,7 +43,7 @@ getParamsList <-function(type, |
43 | 43 |
|
44 | 44 |
##function, which tests a given input sequence |
45 | 45 |
##whether it is a character string, or a XStringSet |
46 |
-transformInputSeq <- function(inputSeq, isFile=FALSE) { |
|
46 |
+transformInputSeq <- function(inputSeq) { |
|
47 | 47 |
if (!is(inputSeq, "character")) { |
48 | 48 |
if (is(inputSeq, "XStringSet")) { |
49 | 49 |
inputSeq <- as.character(inputSeq) |
... | ... |
@@ -53,24 +53,6 @@ transformInputSeq <- function(inputSeq, isFile=FALSE) { |
53 | 53 |
} |
54 | 54 |
} |
55 | 55 |
|
56 |
- if (!isFile) |
|
57 |
- { |
|
58 |
- if (is.null(names(inputSeq))) { |
|
59 |
- warning("The input sequences are unnamed! \n", |
|
60 |
- "Note that the order of sequences \n", |
|
61 |
- "in the resulting multiple sequence alignment \n", |
|
62 |
- "may not be preserved, so unique sequence names\n", |
|
63 |
- "are necessary to recover the order!") |
|
64 |
- } |
|
65 |
- else if (length(unique(names(inputSeq))) != length(inputSeq)){ |
|
66 |
- warning("The input sequences are not unique! \n", |
|
67 |
- "Note that the order of sequences \n", |
|
68 |
- "in the resulting multiple sequence alignment \n", |
|
69 |
- "may not be preserved, so unique sequence names \n", |
|
70 |
- "are necessary to recover the order!") |
|
71 |
- } |
|
72 |
- } |
|
73 |
- |
|
74 | 56 |
return(inputSeq) |
75 | 57 |
} |
76 | 58 |
|
... | ... |
@@ -72,9 +72,8 @@ checkType <- function(type, inputSeqs, msaName){ |
72 | 72 |
############################################################################### |
73 | 73 |
|
74 | 74 |
##function, that tests the input of gapOpening. |
75 |
-##If the value is negative, everything is ok and the function returns |
|
76 |
-##the gapOpening parameter. If the input is positive, the value is transformed |
|
77 |
-##and a warning is given. If the input is not numeric, an exception is thrown. |
|
75 |
+##If the value is numeric, everything is ok and the function returns |
|
76 |
+##the gapOpening parameter. If the input is not numeric, an exception is thrown. |
|
78 | 77 |
##Same for missing substitutionMatrix |
79 | 78 |
checkGapOpening <- function(gapOpening, type, substitutionMatrix, |
80 | 79 |
defaultDNAValue, defaultAAValue){ |
... | ... |
@@ -90,25 +89,18 @@ checkGapOpening <- function(gapOpening, type, substitutionMatrix, |
90 | 89 |
if (is.numeric(gapOpening)) { |
91 | 90 |
if (is.matrix(gapOpening)) { |
92 | 91 |
stop("The parameter gapOpening should be \n", |
93 |
- "a negative numeric, not a matrix!") |
|
92 |
+ "a numeric, not a matrix!") |
|
94 | 93 |
} |
95 | 94 |
if (length(gapOpening) != 1) { |
96 | 95 |
stop("The parameter gapOpening should be \n", |
97 |
- "a negative numeric, not a vector!") |
|
96 |
+ "a numeric, not a vector!") |
|
98 | 97 |
} |
99 | 98 |
if (is.nan(gapOpening)) { |
100 | 99 |
stop("The parameter gapOpening should be \n", |
101 |
- "a negative numeric, not a NaN!") |
|
102 |
- } |
|
103 |
- ##check if gapOpening is positive |
|
104 |
- if (gapOpening > 0) { |
|
105 |
- warning("According to BioStrings, the parameter gapOpening \n", |
|
106 |
- "should be negative! The value is transformed \n", |
|
107 |
- "to a negative numeric!") |
|
108 |
- gapOpening <- gapOpening * -1; |
|
100 |
+ "a numeric, not a NaN!") |
|
109 | 101 |
} |
110 | 102 |
} else { |
111 |
- stop("The parameter gapOpening should be a negative numeric!") |
|
103 |
+ stop("The parameter gapOpening should be a numeric!") |
|
112 | 104 |
} |
113 | 105 |
return(gapOpening) |
114 | 106 |
} |
... | ... |
@@ -117,9 +109,8 @@ checkGapOpening <- function(gapOpening, type, substitutionMatrix, |
117 | 109 |
|
118 | 110 |
##used in MUSCLE, analoguous to checkGapOpening, but only ONE defaut value |
119 | 111 |
##function, that tests the input of gapOpening. |
120 |
-##If the value is negative, everything is ok and the function returns |
|
121 |
-##the gapOpening parameter. If the input is positive, the value is transformed |
|
122 |
-##and a warning is given. If the input is not numeric, an exception is thrown. |
|
112 |
+##If the value is numeric, everything is ok and the function returns |
|
113 |
+##the gapOpening parameter. If the input is not numeric, an exception is thrown. |
|
123 | 114 |
checkGapOpening2 <- function(gapOpening, substitutionMatrix, |
124 | 115 |
defaultValue){ |
125 | 116 |
##set defaultValue |
... | ... |
@@ -132,25 +123,18 @@ checkGapOpening2 <- function(gapOpening, substitutionMatrix, |
132 | 123 |
if (is.numeric(gapOpening)) { |
133 | 124 |
if (is.matrix(gapOpening)) { |
134 | 125 |
stop("The parameter gapOpening should be \n", |
135 |
- "a negative numeric, not a matrix!") |
|
126 |
+ "a numeric, not a matrix!") |
|
136 | 127 |
} |
137 | 128 |
if (length(gapOpening) != 1) { |
138 | 129 |
stop("The parameter gapOpening should be \n", |
139 |
- "a negative numeric, not a vector!") |
|
130 |
+ "a numeric, not a vector!") |
|
140 | 131 |
} |
141 | 132 |
if (is.nan(gapOpening)) { |
142 | 133 |
stop("The parameter gapOpening should be \n", |
143 |
- "a negative numeric, not a NaN!") |
|
144 |
- } |
|
145 |
- ##check if gapOpening is positive |
|
146 |
- if (gapOpening > 0) { |
|
147 |
- warning("According to BioStrings, the parameter gapOpening \n", |
|
148 |
- "should be negative! The value is transformed \n", |
|
149 |
- "to a negative numeric!") |
|
150 |
- gapOpening <- gapOpening * -1; |
|
134 |
+ "a numeric, not a NaN!") |
|
151 | 135 |
} |
152 | 136 |
} else { |
153 |
- stop("The parameter gapOpening should be a negative numeric!") |
|
137 |
+ stop("The parameter gapOpening should be a numeric!") |
|
154 | 138 |
} |
155 | 139 |
return(gapOpening) |
156 | 140 |
} |
... | ... |
@@ -172,25 +156,18 @@ checkGapExtension <- function(gapExtension, type, substitutionMatrix, |
172 | 156 |
if (is.numeric(gapExtension)) { |
173 | 157 |
if (is.matrix(gapExtension)) { |
174 | 158 |
stop("The parameter gapExtension should be \n", |
175 |
- "a negative numeric, not a matrix!") |
|
159 |
+ "a numeric, not a matrix!") |
|
176 | 160 |
} |
177 | 161 |
if (length(gapExtension) != 1) { |
178 | 162 |
stop("The parameter gapExtension should be \n", |
179 |
- "a negative numeric, not a vector!") |
|
163 |
+ "a numeric, not a vector!") |
|
180 | 164 |
} |
181 | 165 |
if (is.nan(gapExtension)) { |
182 | 166 |
stop("The parameter gapExtension should be \n", |
183 |
- "a negative numeric, not a NaN!") |
|
184 |
- } |
|
185 |
- ##check if gapExtension is positive |
|
186 |
- if (gapExtension > 0) { |
|
187 |
- warning("According to BioStrings, the parameter gapOpening \n", |
|
188 |
- "should be negative! The value is transformed to a \n", |
|
189 |
- "negative numeric.") |
|
190 |
- gapExtension <- gapExtension * -1 |
|
167 |
+ "a numeric, not a NaN!") |
|
191 | 168 |
} |
192 | 169 |
} else { |
193 |
- stop("The parameter gapExtension should be a negative numeric!") |
|
170 |
+ stop("The parameter gapExtension should be a numeric!") |
|
194 | 171 |
} |
195 | 172 |
|
196 | 173 |
return(gapExtension) |
... | ... |
@@ -5,6 +5,7 @@ msaClustalOmega <- function(inputSeqs, |
5 | 5 |
maxiters="default", |
6 | 6 |
substitutionMatrix="default", |
7 | 7 |
type="default", |
8 |
+ order=c("aligned", "input"), |
|
8 | 9 |
verbose=FALSE, |
9 | 10 |
help=FALSE, |
10 | 11 |
...) |
... | ... |
@@ -53,7 +54,15 @@ msaClustalOmega <- function(inputSeqs, |
53 | 54 |
############# |
54 | 55 |
# inputSeqs # |
55 | 56 |
############# |
56 |
- inputSeqs <- transformInputSeq(inputSeqs, params[["inputSeqIsFileFlag"]]) |
|
57 |
+ inputSeqs <- transformInputSeq(inputSeqs) |
|
58 |
+ |
|
59 |
+ ############# |
|
60 |
+ # order # |
|
61 |
+ ############# |
|
62 |
+ order <- match.arg(order) |
|
63 |
+ params[["outputOrder"]] <- switch(order, |
|
64 |
+ aligned="tree-order", |
|
65 |
+ input="input-order") |
|
57 | 66 |
|
58 | 67 |
########### |
59 | 68 |
# cluster # |
... | ... |
@@ -103,9 +112,9 @@ msaClustalOmega <- function(inputSeqs, |
103 | 112 |
substitutionMatrix <- NULL |
104 | 113 |
} else if (is.character(substitutionMatrix) && |
105 | 114 |
!is.matrix(substitutionMatrix)) { |
106 |
- possibleValues <- c("blosum30","blosum40", "blosum50", |
|
107 |
- "blosum65","blosum80","gonnet") |
|
108 |
- if (!(tolower(substitutionMatrix) %in% possibleValues)){ |
|
115 |
+ possibleValues <- c("BLOSUM30", "BLOSUM40", "BLOSUM50", |
|
116 |
+ "BLOSUM65", "BLOSUM80", "Gonnet") |
|
117 |
+ if (!(substitutionMatrix %in% possibleValues)){ |
|
109 | 118 |
##create a string with all possible Values named text |
110 | 119 |
text <- "" |
111 | 120 |
text <- paste(possibleValues, collapse=", ") |
... | ... |
@@ -117,21 +126,16 @@ msaClustalOmega <- function(inputSeqs, |
117 | 126 |
############## |
118 | 127 |
# gapOpening # |
119 | 128 |
############## |
120 |
- |
|
121 |
- gapOpening <- checkGapOpening(gapOpening, type, |
|
122 |
- substitutionMatrix, -6, -6) |
|
123 |
- # ClustalOmega uses positive values |
|
124 |
- gapOpening <- gapOpening * -1 |
|
125 |
- |
|
129 |
+ if (!identical(gapOpening, "default")) |
|
130 |
+ warning("msaClustalOmega currently does not support to set\n", |
|
131 |
+ "gapOpening to a non-default value!\n") |
|
126 | 132 |
|
127 | 133 |
################ |
128 | 134 |
# gapExtension # |
129 | 135 |
################ |
130 |
- |
|
131 |
- gapExtension <- checkGapExtension(gapExtension, type, |
|
132 |
- substitutionMatrix, -1, -1) |
|
133 |
- # ClustalOmega uses positive values |
|
134 |
- gapExtension <- gapExtension * -1 |
|
136 |
+ if (!identical(gapExtension, "default")) |
|
137 |
+ warning("msaClustalOmega currently does not support to set\n", |
|
138 |
+ "gapExtension to a non-default value!\n") |
|
135 | 139 |
|
136 | 140 |
############ |
137 | 141 |
# maxiters # |
... | ... |
@@ -334,13 +338,13 @@ msaClustalOmega <- function(inputSeqs, |
334 | 338 |
|
335 | 339 |
##delete param in copy |
336 | 340 |
paramsCopy[["isProfile"]] <- NULL |
337 |
- |
|
341 |
+ |
|
338 | 342 |
####### |
339 | 343 |
# log # |
340 | 344 |
####### |
341 | 345 |
##Log all non-essential output to this file |
342 | 346 |
##DEACTIVATED: All log-messages are print to R console |
343 |
- |
|
347 |
+ |
|
344 | 348 |
##if(!is.null(params[["log"]])) { |
345 | 349 |
## tempList <- checkOutFile("log", params) |
346 | 350 |
## if (tempList$existingFile) { |
... | ... |
@@ -349,7 +353,7 @@ msaClustalOmega <- function(inputSeqs, |
349 | 353 |
## } |
350 | 354 |
## params[["log"]] <- tempList$param |
351 | 355 |
##} |
352 |
- |
|
356 |
+ |
|
353 | 357 |
##delete param in copy |
354 | 358 |
##paramsCopy[["log"]] <- NULL |
355 | 359 |
|
... | ... |
@@ -468,7 +472,7 @@ msaClustalOmega <- function(inputSeqs, |
468 | 472 |
if (!identical (params[["outFmt"]], "clustal") && |
469 | 473 |
!identical (params[["outFmt"]], "clu")) { |
470 | 474 |
stop("Until now, the parameter outFmt is only implemented ", |
471 |
- "for value \"clustal\" \n", |
|
475 |
+ "for value \"clustal\" \n", |
|
472 | 476 |
"the other formats will be ", |
473 | 477 |
"realized in a later version.") |
474 | 478 |
} |
... | ... |
@@ -477,23 +481,6 @@ msaClustalOmega <- function(inputSeqs, |
477 | 481 |
##delete param in copy |
478 | 482 |
paramsCopy[["outFmt"]] <- NULL |
479 | 483 |
|
480 |
- ################ |
|
481 |
- # outputOrder # |
|
482 |
- ################ |
|
483 |
- ##MSA output orderlike in input/guide-tree |
|
484 |
- ##default: output-order=input-order |
|
485 |
- |
|
486 |
- ##possible Values |
|
487 |
- posVal <- c("input-order", "tree-order") |
|
488 |
- ##params[["outputOrder"]] <- checkSingleValParams("outputOrder", params, |
|
489 |
- ## "input-order", posVal) |
|
490 |
- |
|
491 |
- params[["outputOrder"]] <- checkSingleValParamsNew( |
|
492 |
- "outputOrder", params, posVal) |
|
493 |
- |
|
494 |
- ##delete param in copy |
|
495 |
- paramsCopy[["outputOrder"]] <- NULL |
|
496 |
- |
|
497 | 484 |
############# |
498 | 485 |
# percentId # |
499 | 486 |
############# |
... | ... |
@@ -523,9 +510,9 @@ msaClustalOmega <- function(inputSeqs, |
523 | 510 |
|
524 | 511 |
if (!is.null(params[["profile2"]])) { |
525 | 512 |
if (is.null(params[["profile1"]])){ |
526 |
- stop("The parameter profile1 is NULL, \n", |
|
513 |
+ stop("The parameter profile1 is NULL, \n", |
|
527 | 514 |
"so the parameter profile2 can't have a value! \n", |
528 |
- "Please insert file for parameter profile1 \n", |
|
515 |
+ "Please insert file for parameter profile1 \n", |
|
529 | 516 |
"or change the parameters profile1 and profile2!") |
530 | 517 |
} |
531 | 518 |
checkInFile("profile2", params) |
... | ... |
@@ -570,7 +557,7 @@ msaClustalOmega <- function(inputSeqs, |
570 | 557 |
|
571 | 558 |
##useKimura==TRUE AND percentId==TRUE is not allowed!!! |
572 | 559 |
if(params[["useKimura"]] & params[["percentId"]]){ |
573 |
- stop("Percentage Identity cannot be calcuted \n", |
|
560 |
+ stop("Percentage Identity cannot be calcuted \n", |
|
574 | 561 |
"if Kimura Distances are used!", |
575 | 562 |
"You have to set either the parameter percentID or \n", |
576 | 563 |
"the parameter useKimura to FALSE!") |
... | ... |
@@ -622,10 +609,10 @@ msaClustalOmega <- function(inputSeqs, |
622 | 609 |
|
623 | 610 |
inputSeqNames <- names(inputSeqs) |
624 | 611 |
|
625 |
- names(inputSeqs) <- paste0("seq", 1:length(inputSeqs)) |
|
612 |
+ names(inputSeqs) <- paste0("Seq", 1:length(inputSeqs)) |
|
626 | 613 |
|
627 |
- result <- .Call("RClustalOmega", inputSeqs, cluster, gapOpening, |
|
628 |
- gapExtension, maxiters, substitutionMatrix, type, |
|
614 |
+ result <- .Call("RClustalOmega", inputSeqs, cluster, 6, |
|
615 |
+ 1, maxiters, substitutionMatrix, type, |
|
629 | 616 |
verbose, params, PACKAGE="msa"); |
630 | 617 |
|
631 | 618 |
out <- convertAlnRows(result$msa, type) |
... | ... |
@@ -638,7 +625,6 @@ msaClustalOmega <- function(inputSeqs, |
638 | 625 |
else |
639 | 626 |
names(out@unmasked) <- NULL |
640 | 627 |
|
641 |
- |
|
642 | 628 |
standardParams <- list(gapOpening=gapOpening, |
643 | 629 |
gapExtension=gapExtension, |
644 | 630 |
maxiters=maxiters, |
... | ... |
@@ -5,6 +5,7 @@ msaClustalW <- function(inputSeqs, |
5 | 5 |
maxiters="default", |
6 | 6 |
substitutionMatrix="default", |
7 | 7 |
type="default", |
8 |
+ order=c("aligned", "input"), |
|
8 | 9 |
verbose=FALSE, |
9 | 10 |
help=FALSE, |
10 | 11 |
...) |
... | ... |
@@ -53,7 +54,14 @@ msaClustalW <- function(inputSeqs, |
53 | 54 |
############# |
54 | 55 |
# inputSeqs # |
55 | 56 |
############# |
56 |
- inputSeqs <- transformInputSeq(inputSeqs, params[["inputSeqIsFileFlag"]]) |
|
57 |
+ inputSeqs <- transformInputSeq(inputSeqs) |
|
58 |
+ |
|
59 |
+ ############# |
|
60 |
+ # order # |
|
61 |
+ ############# |
|
62 |
+ order <- match.arg(order) |
|
63 |
+ |
|
64 |
+ params[["outorder"]] <- order |
|
57 | 65 |
|
58 | 66 |
########### |
59 | 67 |
# cluster # |
... | ... |
@@ -122,7 +130,7 @@ msaClustalW <- function(inputSeqs, |
122 | 130 |
!is.matrix(substitutionMatrix)) { |
123 | 131 |
##check whether value is BLOSUM, PAM, GONNET, or ID; |
124 | 132 |
possibleValues <- c("blosum", "pam", "gonnet", "id") |
125 |
- if (!(tolower(substitutionMatrix) %in% possibleValues)){ |
|
133 |
+ if (!(substitutionMatrix %in% possibleValues)){ |
|
126 | 134 |
##create a string with all possible Values named text |
127 | 135 |
text <- "" |
128 | 136 |
text <- paste(possibleValues, collapse=", ") |
... | ... |
@@ -135,15 +143,13 @@ msaClustalW <- function(inputSeqs, |
135 | 143 |
if (isSymmetric(substitutionMatrix)) { |
136 | 144 |
if (nrow(substitutionMatrix) <=20 || |
137 | 145 |
nrow(substitutionMatrix) >26 ) { |
138 |
- stop("The parameter substitutionMatrix ", |
|
139 |
- "has wrong dimensions!") |
|
146 |
+ stop("substitutionMatrix has wrong dimensions!") |
|
140 | 147 |
} |
141 | 148 |
} else { |
142 |
- stop("The parameter substitutionMatrix should be symmetric!") |
|
149 |
+ stop("substitutionMatrix should be a symmetric matrix!") |
|
143 | 150 |
} |
144 | 151 |
} |
145 | 152 |
|
146 |
- |
|
147 | 153 |
############## |
148 | 154 |
# gapOpening # |
149 | 155 |
############## |
... | ... |
@@ -351,17 +357,6 @@ msaClustalW <- function(inputSeqs, |
351 | 357 |
##delete param in copy |
352 | 358 |
paramsCopy[["output"]] <- NULL |
353 | 359 |
|
354 |
- ############ |
|
355 |
- # outorder # |
|
356 |
- ############ |
|
357 |
- |
|
358 |
- ##possible Values |
|
359 |
- posVal <- c("input", "aligned") |
|
360 |
- params[["outorder"]] <- checkSingleValParamsNew("outorder", params, posVal) |
|
361 |
- |
|
362 |
- ##delete param in copy |
|
363 |
- paramsCopy[["outorder"]] <- NULL |
|
364 |
- |
|
365 | 360 |
######## |
366 | 361 |
# case # |
367 | 362 |
######## |
... | ... |
@@ -617,6 +612,9 @@ msaClustalW <- function(inputSeqs, |
617 | 612 |
|
618 | 613 |
params[["pwgapopen"]] <- checkNumericParamsNew("pwgapopen", params) |
619 | 614 |
|
615 |
+ if (is.numeric(params[["pwgapopen"]])) |
|
616 |
+ params[["pwgapopen"]] <- abs(params[["pwgapopen"]]) |
|
617 |
+ |
|
620 | 618 |
##delete param in copy |
621 | 619 |
paramsCopy[["pwgapopen"]] <- NULL |
622 | 620 |
|
... | ... |
@@ -627,6 +625,8 @@ msaClustalW <- function(inputSeqs, |
627 | 625 |
|
628 | 626 |
params[["pwgapext"]] <- checkNumericParamsNew("pwgapext", params) |
629 | 627 |
|
628 |
+ if (is.numeric(params[["pwgapext"]])) |
|
629 |
+ params[["pwgapext"]] <- abs(params[["pwgapext"]]) |
|
630 | 630 |
|
631 | 631 |
##delete param in copy |
632 | 632 |
paramsCopy[["pwgapext"]] <- NULL |
... | ... |
@@ -1116,11 +1116,11 @@ msaClustalW <- function(inputSeqs, |
1116 | 1116 |
|
1117 | 1117 |
inputSeqNames <- names(inputSeqs) |
1118 | 1118 |
|
1119 |
- names(inputSeqs) <- paste0("seq", 1:length(inputSeqs)) |
|
1119 |
+ names(inputSeqs) <- paste0("Seq", 1:length(inputSeqs)) |
|
1120 | 1120 |
|
1121 |
- result <- .Call("RClustalW", inputSeqs, cluster, gapOpening, gapExtension, |
|
1122 |
- maxiters, substitutionMatrix, type, verbose, |
|
1123 |
- params, PACKAGE="msa") |
|
1121 |
+ result <- .Call("RClustalW", inputSeqs, cluster, abs(gapOpening), |
|
1122 |
+ abs(gapExtension), maxiters, substitutionMatrix, |
|
1123 |
+ type, verbose, params, PACKAGE="msa") |
|
1124 | 1124 |
|
1125 | 1125 |
out <- convertAlnRows(result$msa, type) |
1126 | 1126 |
|
... | ... |
@@ -5,6 +5,7 @@ msaMuscle <- function(inputSeqs, |
5 | 5 |
maxiters="default", |
6 | 6 |
substitutionMatrix="default", |
7 | 7 |
type="default", |
8 |
+ order=c("aligned", "input"), |
|
8 | 9 |
verbose=FALSE, |
9 | 10 |
help=FALSE, |
10 | 11 |
...) |
... | ... |
@@ -70,7 +71,27 @@ msaMuscle <- function(inputSeqs, |
70 | 71 |
# inputSeqs # |
71 | 72 |
############# |
72 | 73 |
##transform the input Sequences to a string vector |
73 |
- inputSeqs <- transformInputSeq(inputSeqs, params[["inputSeqIsFileFlag"]]) |
|
74 |
+ inputSeqs <- transformInputSeq(inputSeqs) |
|
75 |
+ |
|
76 |
+ ############# |
|
77 |
+ # order # |
|
78 |
+ ############# |
|
79 |
+ order <- match.arg(order) |
|
80 |
+ |
|
81 |
+ if (order == "input") |
|
82 |
+ { |
|
83 |
+ if (params[["inputSeqIsFileFlag"]]) |
|
84 |
+ stop("msaMuscle does not support order=\"input\" for reading\n", |
|
85 |
+ "sequences directly from a FASTA file.") |
|
86 |
+ else if (is.null(names(inputSeqs)) || |
|
87 |
+ length(unique(names(inputSeqs))) != length(inputSeqs)) |
|
88 |
+ { |
|
89 |
+ warning("order=\"input\" requires input sequences to be named\n", |
|
90 |
+ "uniquely! Assigning default names 'Seq1'..'Seqn'\n", |
|
91 |
+ "to sequences.") |
|
92 |
+ names(inputSeqs) <- paste0("Seq", 1:length(inputSeqs)) |
|
93 |
+ } |
|
94 |
+ } |
|
74 | 95 |
|
75 | 96 |
########### |
76 | 97 |
# cluster # |
... | ... |
@@ -1126,24 +1147,31 @@ msaMuscle <- function(inputSeqs, |
1126 | 1147 |
|
1127 | 1148 |
inputSeqNames <- names(inputSeqs) |
1128 | 1149 |
|
1129 |
- names(inputSeqs) <- paste0("seq", 1:length(inputSeqs)) |
|
1130 |
- |
|
1150 |
+ names(inputSeqs) <- paste0("Seq", 1:length(inputSeqs)) |
|
1131 | 1151 |
|
1132 |
- result <- .Call("RMuscle", inputSeqs, cluster, gapOpening, |
|
1133 |
- gapExtension, maxiters, substitutionMatrix, type, |
|
1152 |
+ result <- .Call("RMuscle", inputSeqs, cluster, -abs(gapOpening), |
|
1153 |
+ -abs(gapExtension), maxiters, substitutionMatrix, type, |
|
1134 | 1154 |
verbose, params, PACKAGE="msa") |
1135 | 1155 |
|
1136 | 1156 |
out <- convertAlnRows(result$msa, type) |
1137 | 1157 |
|
1138 | 1158 |
if (length(inputSeqNames) > 0) |
1139 | 1159 |
{ |
1140 |
- perm <- match(names(out@unmasked), names(inputSeqs)) |
|
1141 |
- names(out@unmasked) <- inputSeqNames[perm] |
|
1160 |
+ if (order == "aligned") |
|
1161 |
+ { |
|
1162 |
+ perm <- match(names(out@unmasked), names(inputSeqs)) |
|
1163 |
+ names(out@unmasked) <- inputSeqNames[perm] |
|
1164 |
+ } |
|
1165 |
+ else |
|
1166 |
+ { |
|
1167 |
+ perm <- match(names(inputSeqs), names(out@unmasked)) |
|
1168 |
+ out@unmasked <- out@unmasked[perm] |
|
1169 |
+ names(out@unmasked) <- inputSeqNames |
|
1170 |
+ } |
|
1142 | 1171 |
} |
1143 | 1172 |
else |
1144 | 1173 |
names(out@unmasked) <- NULL |
1145 | 1174 |
|
1146 |
- |
|
1147 | 1175 |
standardParams <- list(gapOpening=gapOpening, |
1148 | 1176 |
gapExtension=gapExtension, |
1149 | 1177 |
maxiters=maxiters, |
... | ... |
@@ -1,68 +1,68 @@ |
1 |
->HBA1 Homo sapiens |
|
1 |
+>HBA1_Homo_sapiens |
|
2 | 2 |
VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK |
3 | 3 |
KVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPA |
4 | 4 |
VHASLDKFLASVSTVLTSKYR |
5 |
->HBA1 Macaca mulatta |
|
5 |
+>HBA1_Macaca_mulatta |
|
6 | 6 |
VLSPADKSNVKAAWGKVGGHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK |
7 | 7 |
KVADALTLAVGHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPA |
8 | 8 |
VHASLDKFLASVSTVLTSKYR |
9 |
->HBA1 Ornithorhynchus anatinus |
|
9 |
+>HBA1_Ornithorhynchus_anatinus |
|
10 | 10 |
MLTDAEKKEVTALWGKAAGHGEEYGAEALERLFQAFPTTKTYFSHFDLSHGSAQIKAHGK |
11 | 11 |
KVADALSTAAGHFDDMDSALSALSDLHAHKLRVDPVNFKLLAHCILVVLARHCPGEFTPS |
12 | 12 |
AHAAMDKFLSKVATVLTSKYR |
13 |
->HBA1 Bos taurus |
|
13 |
+>HBA1_Bos_taurus |
|
14 | 14 |
VLSAADKGNVKAAWGKVGGHAAEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGA |
15 | 15 |
KVAAALTKAVEHLDDLPGALSELSDLHAHKLRVDPVNFKLLSHSLLVTLASHLPSDFTPA |
16 | 16 |
VHASLDKFLANVSTVLTSKYR |
17 |
->HBA1 Monodelphis domestica |
|
17 |
+>HBA1_Monodelphis_domestica |
|
18 | 18 |
VLSAADKTNVKAAWSKVGGNSGAYMGEALYRTFLSFPPTKTYFPHFEFSAGSAQIKGQGQ |
19 | 19 |
KIADAVSLAVAHMDDLATALSALSDLHAHNLKVDPVNFKFLCHNVLVTLASHLGKDFTPE |
20 | 20 |
IHASLDKFLALLSTVLTSKYR |
21 |
->HBA1 Erinaceus europaeus |
|
21 |
+>HBA1_Erinaceus_europaeus |
|
22 | 22 |
VLSATDKANVKTFWGKLGGHGGEYGGEALDRMFQAHPTTKTYFPHFDLNPGSAQVKGHGK |
23 | 23 |
KVADALTTAVNNLDDVPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLALHHPADFTPA |
24 | 24 |
VHASLDKFLATVATVLTSKYR |
25 |
->HBA1 Loxodonta africana |
|
25 |
+>HBA1_Loxodonta_africana |
|
26 | 26 |
VLSDNDKTNVKATWSKVGDHASDYVAEALERMFFSFPTTKTYFPHFDLGHGSGQVKAHGK |
27 | 27 |
KVGEALTQAVGHLDDLPSALSALSDLHAHKLRVDPVNFKLLSHCLLVTLSSHQPTEFTPE |
28 | 28 |
VHASLDKFLSNVSTVLTSKYR |
29 |
->HBA1 Mus musculus |
|
29 |
+>HBA1_Mus_musculus |
|
30 | 30 |
VLSGEDKSNIKAAWGKIGGHGAEYGAEALERMFASFPTTKTYFPHFDVSHGSAQVKGHGK |
31 | 31 |
KVADALASAAGHLDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLASHHPADFTPA |
32 | 32 |
VHASLDKFLASVSTVLTSKYR |
33 |
->HBA1 Felis silvestris catus |
|
33 |
+>HBA1_Felis_silvestris_catus |
|
34 | 34 |
VLSAADKSNVKACWGKIGSHAGEYGAEALERTFCSFPTTKTYFPHFDLSHGSAQVKAHGQ |
35 | 35 |
KVADALTQAVAHMDDLPTAMSALSDLHAYKLRVDPVNFKFLSHCLLVTLACHHPAEFTPA |
36 | 36 |
VHASLDKFFSAVSTVLTSKYR |
37 |
->HBA1 Chrysocyon brachyurus |
|
37 |
+>HBA1_Chrysocyon_brachyurus |
|
38 | 38 |
VLSPADKTNIKSTWDKIGGHAGDYGGEALDRTFQSFPTTKTYFPHFDLSPGSAQVKAHGK |
39 | 39 |
KVADALTTAVAHLDDLPGALSALSDLHAYKLRVDPVNFKLLSHCLLVTLACHHPTEFTPA |
40 | 40 |
VHASLDKFFTAVSTVLTSKYR |
41 |
->HBA1 Gallus gallus |
|
41 |
+>HBA1_Gallus_gallus |
|
42 | 42 |
VLSAADKNNVKGIFTKIAGHAEEYGAETLERMFTTYPPTKTYFPHFDLSHGSAQIKGHGK |
43 | 43 |
KVVAALIEAANHIDDIAGTLSKLSDLHAHKLRVDPVNFKLLGQCFLVVVAIHHPAALTPE |
44 | 44 |
VHASLDKFLCAVGTVLTAKYR |
45 |
->Zebra_fish (Brachydanio rerio)x |
|
45 |
+>HBA1_Danio_rerio |
|
46 | 46 |
SLSDTDKAVVKAIWAKISPKADEIGAEALARMLTVYPQTKTYFSHWADLSPGSGPVKKHG |
47 | 47 |
KTIMGAVGEAISKIDDLVGGLAALSELHAFKLRVDPANFKILSHNVIVVIAMLFPADFTP |
48 | 48 |
EVHVSVDKFFNNLALALSEKYR |
49 |
->HBA1 Tursiops truncatus |
|
49 |
+>HBA1_Tursiops_truncatus |
|
50 | 50 |
VLSPADKTNVKGTWSKIGNHSAEYGAEALERMFINFPSTKTYFSHFDLGHGSAQIKGHGK |
51 | 51 |
KVADALTKAVGHIDNLPDALSELSDLHAHKLRVDPVNFKLLSHCLLVTLALHLPADFTPS |
52 | 52 |
VHASLDKFLASVSTVLTSKYR |
53 |
->HBA1 Xenopus tropicalis |
|
53 |
+>HBA1_Xenopus_tropicalis |
|
54 | 54 |
HLTADDKKHIKAIWPSVAAHGDKYGGEALHRMFMCAPKTKTYFPDFDFSEHSKHILAHGK |
55 | 55 |
KVSDALNEACNHLDNIAGCLSKLSDLHAYDLRVDPGNFPLLAHQILVVVAIHFPKQFDPA |
56 | 56 |
THKALDKFLVSVSNVLTSKYR |
57 |
->HBA1 Microcephalophis gracilis |
|
57 |
+>HBA1_Microcephalophis_gracilis |
|
58 | 58 |
VLTEEDKARVRVAWVPVSKNAELYGAETLTRLFAAHPTTKTYFPHFDLSPGSNDLKVHGK |
59 | 59 |
KVIDALTEAVNNLDDVAGALSKLSDLHAQKLRVDPDNFQFLGLCLEVTIAAHSGGPLKPE |
60 | 60 |
VLLSVDKFLGQISKVLASRYR |
61 |
->HBA1 Pan troglodytes |
|
61 |
+>HBA1_Pan_troglodytes |
|
62 | 62 |
MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG |
63 | 63 |
KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP |
64 | 64 |
AVHASLDKFLASVSTVLTSKYR |
65 |
->HBA1 Rattus norvegicus |
|
65 |
+>HBA1_Rattus_norvegicus |
|
66 | 66 |
MVLSADDKTNIKNCWGKIGGHGGEYGEEALQRMFAAFPTTKTYFSHIDVSPGSAQVKAHG |
67 | 67 |
KKVADALAKAADHVEDLPGALSTLSDLHAHKLRVDPVNFKFLSHCLLVTLACHHPGDFTP |
68 | 68 |
AMHASLDKFLASVSTVLTSKYR |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
->PH4H Homo sapiens |
|
1 |
+>PH4H_Homo_sapiens |
|
2 | 2 |
MSTAVLENPGLGRKLSDFGQETSYIEDNCNQNGAISLIFSLKEEVGALAKVLRLFEENDV |
3 | 3 |
NLTHIESRPSRLKKDEYEFFTHLDKRSLPALTNIIKILRHDIGATVHELSRDKKKDTVPW |
4 | 4 |
FPRTIQELDRFANQILSYGAELDADHPGFKDPVYRARRKQFADIAYNYRHGQPIPRVEYM |
... | ... |
@@ -7,7 +7,7 @@ RLRPVAGLLSSRDFLGGLAFRVFHCTQYIRHGSKPMYTPEPDICHELLGHVPLFSDRSFA |
7 | 7 |
QFSQEIGLASLGAPDEYIEKLATIYWFTVEFGLCKQGDSIKAYGAGLLSSFGELQYCLSE |
8 | 8 |
KPKLLPLELEKTAIQNYTVTEFQPLYYVAESFNDAKEKVRNFAATIPRPFSVRYDPYTQR |
9 | 9 |
IEVLDNTQQLKILADSINSEIGILCSALQKIK |
10 |
->PH4H Rattus norvegicus |
|
10 |
+>PH4H_Rattus_norvegicus |
|
11 | 11 |
MAAVVLENGVLSRKLSDFGQETSYIEDNSNQNGAISLIFSLKEEVGALAKVLRLFEENDI |
12 | 12 |
NLTHIESRPSRLNKDEYEFFTYLDKRTKPVLGSIIKSLRNDIGATVHELSRDKEKNTVPW |
13 | 13 |
FPRTIQELDRFANQILSYGAELDADHPGFKDPVYRARRKQFADIAYNYRHGQPIPRVEYT |
... | ... |
@@ -16,7 +16,7 @@ RLRPVAGLLSSRDFLGGLAFRVFHCTQYIRHGSKPMYTPEPDICHELLGHVPLFSDRSFA |
16 | 16 |
QFSQEIGLASLGAPDEYIEKLATIYWFTVEFGLCKEGDSIKAYGAGLLSSFGELQYCLSD |
17 | 17 |
KPKLLPLELEKTACQEYSVTEFQPLYYVAESFSDAKEKVRTFAATIPRPFSVRYDPYTQR |
18 | 18 |
VEVLDNTQQLKILADSINSEVGILCNALQKIKS |
19 |
->PH4H Mus musculus |
|
19 |
+>PH4H_Mus_musculus |
|
20 | 20 |
MAAVVLENGVLSRKLSDFGQETSYIEDNSNQNGAVSLIFSLKEEVGALAKVLRLFEENEI |
21 | 21 |
NLTHIESRPSRLNKDEYEFFTYLDKRSKPVLGSIIKSLRNDIGATVHELSRDKEKNTVPW |
22 | 22 |
FPRTIQELDRFANQILSYGAELDADHPGFKDPVYRARRKQFADIAYNYRHGQPIPRVEYT |
... | ... |
@@ -25,19 +25,19 @@ RLRPVAGLLSSRDFLGGLAFRVFHCTQYIRHGSKPMYTPEPDICHELLGHVPLFSDRSFA |
25 | 25 |
QFSQEIGLASLGAPDEYIEKLATIYWFTVEFGLCKEGDSIKAYGAGLLSSFGELQYCLSD |
26 | 26 |
KPKLLPLELEKTACQEYTVTEFQPLYYVAESFNDAKEKVRTFAATIPRPFSVRYDPYTQR |
27 | 27 |
VEVLDNTQQLKILADSINSEVGILCHALQKIKS |
28 |
->PH4H Chromobacterium violaceum |
|
28 |
+>PH4H_Chromobacterium_violaceum |
|
29 | 29 |
MNDRADFVVPDITTRKNVGLSHDANDFTLPQPLDRYSAEDHATWATLYQRQCKLLPGRAC |
30 | 30 |
DEFMEGLERLEVDADRVPDFNKLNQKLMAATGWKIVAVPGLIPDDVFFEHLANRRFPVTW |
31 | 31 |
WLREPHQLDYLQEPDVFHDLFGHVPLLINPVFADYLEAYGKGGVKAKALGALPMLARLYW |
32 | 32 |
YTVEFGLINTPAGMRIYGAGILSSKSESIYCLDSASPNRVGFDLMRIMNTRYRIDTFQKT |
33 | 33 |
YFVIDSFKQLFDATAPDFAPLYLQLADAQPWGAGDVAPDDLVLNAGDRQGWADTEDV |
34 |
->PH4H Pseudomonas aeruginosa |
|
34 |
+>PH4H_Pseudomonas_aeruginosa |
|
35 | 35 |
MKTTQYVARQPDDNGFIHYPETEHQVWNTLITRQLKVIEGRACQEYLDGIEQLGLPHERI |
36 | 36 |
PQLDEINRVLQATTGWRVARVPALIPFQTFFELLASQQFPVATFIRTPEELDYLQEPDIF |
37 | 37 |
HEIFGHCPLLTNPWFAEFTHTYGKLGLKASKEERVFLARLYWMTIEFGLVETDQGKRIYG |
38 | 38 |
GGILSSPKETVYSLSDEPLHQAFNPLEAMRTPYRIDILQPLYFVLPDLKRLFQLAQEDIM |
39 | 39 |
ALVHEAMRLGLHAPLFPPKQAA |
40 |
->PH4H Bos taurus |
|
40 |
+>PH4H_Bos_taurus |
|
41 | 41 |
MSALVLESRALGRKLSDFGQETSYIEGNSDQNAVSLIFSLKEEVGALARVLRLFEENDIN |
42 | 42 |
LTHIESRPSRLRKDEYEFFTNLDQRSVPALANIIKILRHDIGATVHELSRDKKKDTVPWF |
43 | 43 |
PRTIQELDNFANQVLSYGAELDADHPGFKDPVYRARRKQFADIAYNYRHGQPIPRVEYTE |
... | ... |
@@ -46,20 +46,20 @@ LRPVAGLLSSRDFLGGLAFRVFHCTQYIRHGSKPMYTPEPDICHELLGHVPLFSDRSFAQ |
46 | 46 |
FSQEIGLASLGAPDEYIEKLATIYWFTVEFGLCKQGDSIKAYGAGLLSSFGELQYCLSDK |
47 | 47 |
PKLLPLELEKTAVQEYTITEFQPLYYVAESFNDAKEKVRNFAATIPRPFSVHYDPYTQRI |
48 | 48 |
EVLDNTQQLKILADSISSEVEILCSALQKLK |
49 |
->PH4H Ralstonia solanacearum |
|
49 |
+>PH4H_Ralstonia_solanacearum |
|
50 | 50 |
MAIATPTSAAPTPAPAGFTGTLTDKLREQFAEGLDGQTLRPDFTMEQPVHRYTAADHATW |
51 | 51 |
RTLYDRQEALLPGRACDEFLQGLSTLGMSREGVPSFDRLNETLMRATGWQIVAVPGLVPD |
52 | 52 |
EVFFEHLANRRFPASWWMRRPDQLDYLQEPDGFHDIFGHVPLLINPVFADYMQAYGQGGL |
53 | 53 |
KAARLGALDMLARLYWYTVEFGLIRTPAGLRIYGAGIVSSKSESVYALDSASPNRIGFDV |
54 | 54 |
HRIMRTRYRIDTFQKTYFVIDSFEQLFDATRPDFTPLYEALGTLPTFGAGDVVDGDAVLN |
55 | 55 |
AGTREGWADTADI |
56 |
->PH4H Caulobacter crescentus |
|
56 |
+>PH4H_Caulobacter_crescentus |
|
57 | 57 |
MSGDGLSNGPPPGARPDWTIDQGWETYTQAEHDVWITLYERQTDMLHGRACDEFMRGLDA |
58 | 58 |
LDLHRSGIPDFARINEELKRLTGWTVVAVPGLVPDDVFFDHLANRRFPAGQFIRKPHELD |
59 | 59 |
YLQEPDIFHDVFGHVPMLTDPVFADYMQAYGEGGRRALGLGRLANLARLYWYTVEFGLMN |
60 | 60 |
TPAGLRIYGAGIVSSRTESIFALDDPSPNRIGFDLERVMRTLYRIDDFQQVYFVIDSIQT |
61 | 61 |
LQEVTLRDFGAIYERLASVSDIGVAEIVPGDAVLTRGTQAYATAGGRLAGAAAG |
62 |
->PH4H Rhizobium loti |
|
62 |
+>PH4H_Rhizobium_loti |
|
63 | 63 |
MSVAEYARDCAAQGLRGDYSVCRADFTVAQDYDYSDEEQAVWRTLCDRQTKLTRKLAHHS |
64 | 64 |
YLDGVEKLGLLDRIPDFEDVSTKLRKLTGWEIIAVPGLIPAAPFFDHLANRRFPVTNWLR |
65 | 65 |
TRQELDYIVEPDMFHDFFGHVPVLSQPVFADFMQMYGKKAGDIIALGGDEMITRLYWYTA |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
->PAH Homo sapiens |
|
1 |
+>PAH_Homo_sapiens |
|
2 | 2 |
CAGCTGGGGGTAAGGGGGGCGGATTATTCATATAATTGTTATACCAGACGGTCGCAGGCTTAGTCCAATT |
3 | 3 |
GCAGAGAACTCGCTTCCCAGGCTTCTGAGAGTCCCGGAAGTGCCTAAACCTGTCTAATCGACGGGGCTTG |
4 | 4 |
GGTGGCCCGTCGCTCCCTGGCTTCTTCCCTTTACCCAGGGCGGGCAGCGAAGTGGTGCCTCCTGCGTCCC |
... | ... |
@@ -38,7 +38,7 @@ ACTCTGCACCTAATCCCCATAACTTCCAGTATCATTTTCCAATTAATTATCAAGTCTGTTTTGGGAAACA |
38 | 38 |
CTTTGAGGACATTTATGATGCAGCAGATGTTGACTAAAGGCTTGGTTGGTAGATATTCAGGAAATGTTCA |
39 | 39 |
CTGAATAAATAAGTAAATACATTATTGAAAAGCAAATCTGTATAAATGTGAAATTTTTATTTGTATTAGT |
40 | 40 |
AATAAAACATTAGTAGTTTA |
41 |
->PAH Mus musculus |
|
41 |
+>PAH_Mus_musculus |
|
42 | 42 |
AGTCCAGGACTGGAGTTGGGTATGGACTGTTCATGTCTATCCACTGCTACGTCAGGGCAACACCCACTGA |
43 | 43 |
GAGTGACCTTGTAGACTGCAGTGGGAGACACCCTTCAAAACCTCTCCTCTCCTGTCCTGAGAGCCAGGTT |
44 | 44 |
AAAACCATCAGCCCCGCATCCTGAGTGCAAACTTTTCCTAACCCTGCTGCTAAGCTAGACACCTCACTTA |
... | ... |
@@ -70,7 +70,7 @@ CATTTGGGCTCTGTCTGTTCATTTTTAACCTCTCAGGTAAGCTCTGATGGAATACATATTGCTTAGTGTA |
70 | 70 |
AAATGTGAGACTGTCATCGGGAACAAATTATTCCATTCAGTACAGATTGTAATCTACAAAGCTTAGTTTC |
71 | 71 |
TACATTCATTCATTATGGCTCTGAGAACTACTTTGTTAGCCTGCTTACATAAATGTCCTCTGATCTAAGA |
72 | 72 |
CTCTATTAAGATAGTTACAAGCAATAAAGCATTATATAAAACAAAAAAAAAA |
73 |
->PAH Rattus norvegicus |
|
73 |
+>PAH_Rattus_norvegicus |
|
74 | 74 |
CAAGTTAAAACCATCAGCCCTCCCATCCTGAGTGCCTACCTTTCCCAACCCCGCTGCTAAGCTAGACACC |
75 | 75 |
TCACTCACTGAGAGCCAGCATGGCAGCTGTTGTCCTGGAGAATGGAGTCCTGAGCAGAAAACTCTCAGAC |
76 | 76 |
TTTGGGCAGGAAACAAGCTATATTGAAGACAACTCCAATCAAAATGGTGCCATATCTCTGATATTCTCAC |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
->PAH Homo sapiens |
|
1 |
+>PAH_Homo_sapiens |
|
2 | 2 |
CAGCUGGGGGUAAGGGGGGCGGAUUAUUCAUAUAAUUGUUAUACCAGACGGUCGCAGGCUUAGUCCAAUU |
3 | 3 |
GCAGAGAACUCGCUUCCCAGGCUUCUGAGAGUCCCGGAAGUGCCUAAACCUGUCUAAUCGACGGGGCUUG |
4 | 4 |
GGUGGCCCGUCGCUCCCUGGCUUCUUCCCUUUACCCAGGGCGGGCAGCGAAGUGGUGCCUCCUGCGUCCC |
... | ... |
@@ -38,7 +38,7 @@ ACUCUGCACCUAAUCCCCAUAACUUCCAGUAUCAUUUUCCAAUUAAUUAUCAAGUCUGUUUUGGGAAACA |
38 | 38 |
CUUUGAGGACAUUUAUGAUGCAGCAGAUGUUGACUAAAGGCUUGGUUGGUAGAUAUUCAGGAAAUGUUCA |
39 | 39 |
CUGAAUAAAUAAGUAAAUACAUUAUUGAAAAGCAAAUCUGUAUAAAUGUGAAAUUUUUAUUUGUAUUAGU |
40 | 40 |
AAUAAAACAUUAGUAGUUUA |
41 |
->PAH Mus musculus |
|
41 |
+>PAH_Mus_musculus |
|
42 | 42 |
AGUCCAGGACUGGAGUUGGGUAUGGACUGUUCAUGUCUAUCCACUGCUACGUCAGGGCAACACCCACUGA |
43 | 43 |
GAGUGACCUUGUAGACUGCAGUGGGAGACACCCUUCAAAACCUCUCCUCUCCUGUCCUGAGAGCCAGGUU |
44 | 44 |
AAAACCAUCAGCCCCGCAUCCUGAGUGCAAACUUUUCCUAACCCUGCUGCUAAGCUAGACACCUCACUUA |
... | ... |
@@ -70,7 +70,7 @@ CAUUUGGGCUCUGUCUGUUCAUUUUUAACCUCUCAGGUAAGCUCUGAUGGAAUACAUAUUGCUUAGUGUA |
70 | 70 |
AAAUGUGAGACUGUCAUCGGGAACAAAUUAUUCCAUUCAGUACAGAUUGUAAUCUACAAAGCUUAGUUUC |
71 | 71 |
UACAUUCAUUCAUUAUGGCUCUGAGAACUACUUUGUUAGCCUGCUUACAUAAAUGUCCUCUGAUCUAAGA |
72 | 72 |
CUCUAUUAAGAUAGUUACAAGCAAUAAAGCAUUAUAUAAAACAAAAAAAAAA |
73 |
->PAH Rattus norvegicus |
|
73 |
+>PAH_Rattus_norvegicus |
|
74 | 74 |
CAAGUUAAAACCAUCAGCCCUCCCAUCCUGAGUGCCUACCUUUCCCAACCCCGCUGCUAAGCUAGACACC |
75 | 75 |
UCACUCACUGAGAGCCAGCAUGGCAGCUGUUGUCCUGGAGAAUGGAGUCCUGAGCAGAAAACUCUCAGAC |
76 | 76 |
UUUGGGCAGGAAACAAGCUAUAUUGAAGACAACUCCAAUCAAAAUGGUGCCAUAUCUCUGAUAUUCUCAC |
... | ... |
@@ -12,8 +12,8 @@ distMatOut= [Filename] |
12 | 12 |
force= [Logical Value] |
13 | 13 |
full= [Logical Value] |
14 | 14 |
fullIter= [Logical Value] |
15 |
-gapOpening= [Negative Numeric] |
|
16 |
-gapExtension= [Negative Numeric] |
|
15 |
+gapOpening= [Numeric] (currently not supported) |
|
16 |
+gapExtension= [Numeric] (currently not supported) |
|
17 | 17 |
guideTreeIn= [Filename] |
18 | 18 |
guideTreeOut= [Filename] |
19 | 19 |
help= [Logical Value] |
... | ... |
@@ -28,9 +28,9 @@ maxHmmIterations= [Int] |
28 | 28 |
maxiters= [Positive Int] |
29 | 29 |
maxNumSeq= [Int] |
30 | 30 |
maxSeqLen= [Int] |
31 |
+order= [String] |
|
31 | 32 |
outfile= [Filename] |
32 | 33 |
outFmt= [String] |
33 |
-outputOrder= [String] |
|
34 | 34 |
percentId= [Logical Value] |
35 | 35 |
profile1= [Filename] |
36 | 36 |
profile2= [Filename] |
... | ... |
@@ -13,8 +13,8 @@ dnamatrix= [Filename] or [String] |
13 | 13 |
endgaps= [Logical Value] |
14 | 14 |
fullhelp= [Logical Value] |
15 | 15 |
gapdist= [Int] |
16 |
-gapOpening= [Negative Numeric] |
|
17 |
-gapExtension= [Negative Numeric] |
|
16 |
+gapOpening= [Numeric] |
|
17 |
+gapExtension= [Numeric] |
|
18 | 18 |
helixendin= [Int] |
19 | 19 |
helixendout= [Int] |
20 | 20 |
helixgap= [Int] |
... | ... |
@@ -34,7 +34,7 @@ nosecstr2= [Logical Value] |
34 | 34 |
novgap= [Logical Value] |
35 | 35 |
noweights= [Logical Value] |
36 | 36 |
options= [Logical Value] |
37 |
-outorder= [String] |
|
37 |
+order= [String] |
|
38 | 38 |
output= [String] |
39 | 39 |
outputtree= [String] |
40 | 40 |
pairgap= [Int] |
... | ... |
@@ -20,8 +20,8 @@ diags2= [Logical Value] |
20 | 20 |
dimer= [Logical Value] |
21 | 21 |
distance1= [String] |
22 | 22 |
distance2= [String] |
23 |
-gapOpening= [Negative Numeric] |
|
24 |
-gapExtension= [Negative Numeric] |
|
23 |
+gapOpening= [Numeric] |
|
24 |
+gapExtension= [Numeric] |
|
25 | 25 |
hydro= [Positive Int] |
26 | 26 |
hydrofactor= [Positive Numeric] |
27 | 27 |
in1= [Filename] |
... | ... |
@@ -36,6 +36,7 @@ minsmoothscore= [Positive Numeric] |
36 | 36 |
noanchors= [Logical Value] |
37 | 37 |
nocore= [Logical Value] |
38 | 38 |
objscore= [String] |
39 |
+order= [String] |
|
39 | 40 |
profile= [Logical Value] |
40 | 41 |
refine= [Logical Value] |
41 | 42 |
refinew= [Logical Value] |
... | ... |
@@ -11,7 +11,8 @@ |
11 | 11 |
cluster="default", gapOpening="default", |
12 | 12 |
gapExtension="default", maxiters="default", |
13 | 13 |
substitutionMatrix="default", type="default", |
14 |
- verbose=FALSE, help=FALSE, ...) |
|
14 |
+ order=c("aligned", "input"), verbose=FALSE, help=FALSE, |
|
15 |
+ ...) |
|
15 | 16 |
} |
16 | 17 |
\arguments{ |
17 | 18 |
\item{inputSeqs}{input sequences; this argument can be a character vector, |
... | ... |
@@ -31,16 +32,16 @@ |
31 | 32 |
\code{\link{msaMuscle}} for algorithm-specific information.} |
32 | 33 |
\item{gapOpening}{gap opening penalty; the defaults are |
33 | 34 |
specific to the algorithm (see \code{\link{msaClustalW}}, |
34 |
- \code{\link{msaClustalOmega}}, or \code{\link{msaMuscle}}); |
|
35 |
- In order to standardize interfaces, |
|
36 |
- all algorithms consistently use negative values for the gap opening |
|
37 |
- penalty.} |
|
35 |
+ and \code{\link{msaMuscle}}). Note that the sign of |
|
36 |
+ this parameter is ignored. The sign is automatically |
|
37 |
+ adjusted such that the called algorithm penalizes gaps |
|
38 |
+ instead of rewarding them.} |
|
38 | 39 |
\item{gapExtension}{gap extension penalty; the defaults are |
39 | 40 |
specific to the algorithm (see \code{\link{msaClustalW}}, |
40 |
- \code{\link{msaClustalOmega}}, or \code{\link{msaMuscle}}); |
|
41 |
- In order to standardize interfaces, |
|
42 |
- all algorithms consistently use negative values for the gap extension |
|
43 |
- penalty.} |
|
41 |
+ and \code{\link{msaMuscle}}). Note that the sign of |
|
42 |
+ this parameter is ignored. The sign is automatically |
|
43 |
+ adjusted such that the called algorithm penalizes gaps |
|
44 |
+ instead of rewarding them.} |
|
44 | 45 |
\item{maxiters}{maximum number of iterations; its |
45 | 46 |
interpretation and default value depends on the method; |
46 | 47 |
see \code{\link{msaClustalW}}, \code{\link{msaClustalOmega}}, or |
... | ... |
@@ -63,6 +64,18 @@ |
63 | 64 |
parameter is not necessary. If it is nevertheless specified and the |
64 | 65 |
type does not match the class of \code{inputSeqs}, the function |
65 | 66 |
stops with an error.} |
67 |
+ \item{order}{how the sequences should be ordered in the output object; |
|
68 |
+ if \code{"aligned"} is chosen, the sequences are ordered in the way |
|
69 |
+ the multiple sequence alignment algorithm orders them. If |
|
70 |
+ \code{"input"} is chosen, the sequences in the output object are |
|
71 |
+ ordered in the same way as the input sequences. For MUSCLE, the |
|
72 |
+ choice \code{"input"} is not available for sequence data that is |
|
73 |
+ read directly from a FASTA file. Even if sequences are supplied |
|
74 |
+ directly via R, the sequences must have unique names, otherwise |
|
75 |
+ the input order cannot be recovered. If the sequences do not have |
|
76 |
+ names or if the names are not unique, the \code{\link{msaMuscle}} |
|
77 |
+ function assignes generic unique names \code{"Seq1"}-\code{Seqn} |
|
78 |
+ to the sequences and issues a warning.} |
|
66 | 79 |
\item{verbose}{if \code{TRUE}, the algorithm displays detailed |
67 | 80 |
information and progress messages.} |
68 | 81 |
\item{help}{if \code{TRUE}, information about algorithm-specific |
... | ... |
@@ -86,22 +99,24 @@ |
86 | 99 |
|
87 | 100 |
Note that the input sequences may be reordered by the multiple |
88 | 101 |
sequence alignment algorithms in order to group together similar |
89 |
- sequences. Therefore, it is necessary to assign names to input |
|
90 |
- sequences in case the user wants to preserve a one-to-one assignment |
|
91 |
- between input sequences and the sequences in the multiple sequence |
|
92 |
- alignment. That is why all algorithms check input sequences for |
|
93 |
- unique names and issue warnings if the input sequences are unnamed or |
|
94 |
- if the names are not unique. |
|
95 |
- As noted in the description of the \code{inputSeqs} argument above, |
|
96 |
- all functions, \code{msa()}, \code{\link{msaClustalW}}, |
|
97 |
- \code{\link{msaClustalOmega}}, and \code{\link{msaMuscle}}, also allow |
|
102 |
+ sequences (see also description of argument \code{order} above). |
|
103 |
+ So, if the input order should be preserved or if the input order |
|
104 |
+ should be recovered later, we strongly recommend to always assign |
|
105 |
+ unique names to the input sequences. As noted in the description |
|
106 |
+ of the \code{inputSeqs} argument above, all functions, \code{msa()}, |
|
107 |
+ \code{\link{msaClustalW}}, \code{\link{msaClustalOmega}}, and |
|
108 |
+ \code{\link{msaMuscle}}, also allow |
|
98 | 109 |
for direct reading from FASTA files. This is mainly for the reason of |
99 | 110 |
memory efficiency if the sequence data set is very large. Otherwise, |
100 | 111 |
we want to encourage users to first read the sequences into the R |
101 |
- workspace. In any case, if sequences are read from a FASTA file |
|
102 |
- directly, this is completely under the control of the respective |
|
112 |
+ workspace. If sequences are read from a FASTA file |
|
113 |
+ directly, the order of output sequences is completely under |
|
114 |
+ the control of the respective |
|
103 | 115 |
algorithm and does not allow for checking whether the sequences are |
104 |
- named uniquely in the FASTA file. |
|
116 |
+ named uniquely in the FASTA file. The preservation of the input order |
|
117 |
+ works also for sequence data read from a FASTA file, but only for |
|
118 |
+ ClustalW and ClustalOmega; MUSCLE does not support this (see also |
|
119 |
+ argument \code{order} above and \code{\link{msaMuscle}}). |
|
105 | 120 |
} |
106 | 121 |
\value{ |
107 | 122 |
Depending on the type of sequences for which it was called, |
... | ... |
@@ -164,10 +179,12 @@ mySeqs <- readAAStringSet(filepath) |
164 | 179 |
msa(mySeqs) |
165 | 180 |
|
166 | 181 |
## call ClustalOmega through unified interface |
167 |
-msa(mySeqs, method="ClustalOmega", cluster=120, gapOpening=-3, |
|
168 |
- gapExtension=-1, maxGuidetreeIterations=20, outFmt="clustal", |
|
169 |
- outputOrder="input-order", substitutionMatrix="blosum30", |
|
170 |
- type="protein", verbose=FALSE) |
|
182 |
+msa(mySeqs, method="ClustalOmega") |
|
183 |
+ |
|
184 |
+## call MUSCLE through unified interface with some custom parameters |
|
185 |
+msa(mySeqs, method="Muscle", gapOpening=12, gapExtension=3, maxiters=16, |
|
186 |
+ cluster="upgmamax", SUEFF=0.4, brenner=FALSE, |
|
187 |
+ order="input", verbose=FALSE) |
|
171 | 188 |
} |
172 | 189 |
\keyword{manip} |
173 | 190 |
|
... | ... |
@@ -9,38 +9,36 @@ |
9 | 9 |
msaClustalOmega(inputSeqs, cluster="default", |
10 | 10 |
gapOpening="default", gapExtension="default", |
11 | 11 |
maxiters="default", substitutionMatrix="default", |
12 |
- type="default", verbose=FALSE, help=FALSE, |
|
13 |
- ...) |
|
12 |
+ type="default", order=c("aligned", "input"), |
|
13 |
+ verbose=FALSE, help=FALSE, ...) |
|
14 | 14 |
} |
15 | 15 |
\arguments{ |
16 | 16 |
\item{inputSeqs}{input sequences; see \code{\link{msa}}. |
17 | 17 |
In the original ClustalOmega implementation, this |
18 |
- parameter is called \code{-infile}.} |
|
18 |
+ parameter is called \code{infile}.} |
|
19 | 19 |
\item{cluster}{The cluster size which should be used. The default is 100. |
20 | 20 |
In the original ClustalOmega implementation, this parameter is called |
21 |
- \code{--cluster-size}.} |
|
22 |
- \item{gapOpening}{gap opening penalty; the default value is -6.0. |
|
23 |
- In order to standardize interfaces, |
|
24 |
- all algorithms consistently use negative values for the gap open |
|
25 |
- penalty. This parameter is a new feature - the original ClustalOmega |
|
26 |
- implementation does not allow for customizing gap penalties.} |
|
27 |
- \item{gapExtension}{gap extension penalty; the default value is -1.0. |
|
28 |
- In order to standardize interfaces, |
|
29 |
- all algorithms consistently use negative values for the gap open |
|
30 |
- penalty. This parameter is a new feature - the original ClustalOmega |
|
31 |
- implementation does not allow for customizing gap penalties.} |
|
21 |
+ \code{cluster-size}.} |
|
22 |
+ \item{gapOpening,gapExtension}{ClustalOmega currently does |
|
23 |
+ not allow to adjust gap penalties; these arguments are only for |
|
24 |
+ future extensions and consistency with the other algorithms |
|
25 |
+ and \code{\link{msa}}. However, setting these parameters to values |
|
26 |
+ other than \code{"default"} will result in a warning.} |
|
32 | 27 |
\item{maxiters}{maximum number of iterations; the default value is 0 |
33 | 28 |
(no limitation). In the original ClustalOmega implementation, this |
34 |
- parameter is called \code{--iterations}.} |
|
29 |
+ parameter is called \code{iterations}.} |
|
35 | 30 |
\item{substitutionMatrix}{substitution matrix for scoring matches and |
36 | 31 |
mismatches; can be a real matrix, a file name, or the name of a |
37 | 32 |
built-in substitution matrix. In the latter case, the choices |
38 |
- \code{"blosum30"}, \code{"blosum40"}, \code{"blosum50"}, |
|
39 |
- \code{"blosum65"}, \code{"blosum80"}, and \code{"gonnet"} are |
|
33 |
+ \code{"BLOSUM30"}, \code{"BLOSUM40"}, \code{"BLOSUM50"}, |
|
34 |
+ \code{"BLOSUM65"}, \code{"BLOSUM80"}, and \code{"Gonnet"} are |
|
40 | 35 |
supported. This parameter is a new feature - the original ClustalOmega |
41 | 36 |
implementation does not allow for using a custom substitution matrix.} |
42 | 37 |
\item{type}{type of the input sequences \code{inputSeqs}; |
43 | 38 |
see \code{\link{msa}}.} |
39 |
+ \item{order}{how the sequences should be ordered in the output object |
|
40 |
+ (see \code{\link{msa}}); in the original ClustalW implementation, this |
|
41 |
+ parameter is called \code{output-order}.} |
|
44 | 42 |
\item{verbose}{if \code{TRUE}, the algorithm displays detailed |
45 | 43 |
information and progress messages.} |
46 | 44 |
\item{help}{if \code{TRUE}, information about algorithm-specific |
... | ... |
@@ -102,8 +100,7 @@ mySeqs <- readAAStringSet(filepath) |
102 | 100 |
msaClustalOmega(mySeqs) |
103 | 101 |
|
104 | 102 |
## call msaClustalOmega with custom parameters |
105 |
-msaClustalOmega(mySeqs, gapOpening=-6, gapExtension=-1, auto=FALSE, |
|
106 |
- cluster=120, dealign=FALSE, useKimura=FALSE, |
|
107 |
- verbose=FALSE) |
|
103 |
+msaClustalOmega(mySeqs, auto=FALSE, cluster=120, dealign=FALSE, |
|
104 |
+ useKimura=FALSE, order="input", verbose=FALSE) |
|
108 | 105 |
} |
109 | 106 |
\keyword{manip} |
... | ... |
@@ -9,31 +9,26 @@ |
9 | 9 |
msaClustalW(inputSeqs, cluster="default", gapOpening="default", |
10 | 10 |
gapExtension="default", maxiters="default", |
11 | 11 |
substitutionMatrix="default", type="default", |
12 |
- verbose=FALSE, help=FALSE, ...) |
|
12 |
+ order=c("aligned", "input"), verbose=FALSE, |
|
13 |
+ help=FALSE, ...) |
|
13 | 14 |
} |
14 | 15 |
\arguments{ |
15 | 16 |
\item{inputSeqs}{input sequences; see \code{\link{msa}}. |
16 | 17 |
In the original ClustalW implementation, this |
17 |
- parameter is called \code{-infile}.} |
|
18 |
+ parameter is called \code{infile}.} |
|
18 | 19 |
\item{cluster}{The clustering method which should be used. |
19 | 20 |
Possible values are \code{"nj"} (default) and \code{"upgma"}. |
20 | 21 |
In the original ClustalW implementation, this parameter is called |
21 |
- \code{-clustering}.} |
|
22 |
+ \code{clustering}.} |
|
22 | 23 |
\item{gapOpening}{gap opening penalty; the default value for |
23 |
- nucleotide sequences is -15.0, the default value for |
|
24 |
- amino acid sequences is -10.0. In order to standardize interfaces, |
|
25 |
- all algorithms consistently use negative values for the gap open |
|
26 |
- penalty. In the original ClustalW implementation, this parameter is |
|
27 |
- called \code{-gapOpen}.} |
|
24 |
+ nucleotide sequences is 15.0, the default value for |
|
25 |
+ amino acid sequences is 10.0.} |
|
28 | 26 |
\item{gapExtension}{gap extension penalty; the default value for |
29 |
- nucleotide sequences is -6.66, the default value for |
|
30 |
- amino acid sequences is -0.2. In order to standardize interfaces, |
|
31 |
- all algorithms consistently use negative values for the gap extension |
|
32 |
- penalty. In the original ClustalW implementation, this parameter is |
|
33 |
- called \code{-gapExt}.} |
|
27 |
+ nucleotide sequences is 6.66, the default value for |
|
28 |
+ amino acid sequences is 0.2.} |
|
34 | 29 |
\item{maxiters}{maximum number of iterations; the default value is 16. |
35 | 30 |
In the original ClustalW implementation, this parameter is called |
36 |
- \code{-numiters}.} |
|
31 |
+ \code{numiters}.} |
|
37 | 32 |
\item{substitutionMatrix}{substitution matrix for scoring matches and |
38 | 33 |
mismatches; can be a real matrix, a file name, or the name of a |
39 | 34 |
built-in substitution matrix. In the latter case, the choices |
... | ... |
@@ -42,9 +37,12 @@ |
42 | 37 |
sequences, the parameter \code{dnamatrix} must be used instead. |
43 | 38 |
The valid choices for this parameter are \code{"iub"} and |
44 | 39 |
\code{"clustalw"}. In the original ClustalW implementation, this |
45 |
- parameter is called \code{-matrix}.} |
|
40 |
+ parameter is called \code{matrix}.} |
|
46 | 41 |
\item{type}{type of the input sequences \code{inputSeqs}; |
47 | 42 |
see \code{\link{msa}}.} |
43 |
+ \item{order}{how the sequences should be ordered in the output object |
|
44 |
+ (see \code{\link{msa}}); in the original ClustalW implementation, this |
|
45 |
+ parameter is called \code{outorder}.} |
|
48 | 46 |
\item{verbose}{if \code{TRUE}, the algorithm displays detailed |
49 | 47 |
information and progress messages.} |
50 | 48 |
\item{help}{if \code{TRUE}, information about algorithm-specific |
... | ... |
@@ -105,7 +103,7 @@ mySeqs <- readAAStringSet(filepath) |
105 | 103 |
msaClustalW(mySeqs) |
106 | 104 |
|
107 | 105 |
## call msaClustalW with custom parameters |
108 |
-msaClustalW(mySeqs, gapOpening=-1, gapExtension=-1, maxiters=16, |
|
109 |
- type="protein", cluster="upgma", kimura=FALSE, maxdiv=23) |
|
106 |
+msaClustalW(mySeqs, gapOpening=1, gapExtension=1, maxiters=16, |
|
107 |
+ cluster="upgma", kimura=FALSE, order="input", maxdiv=23) |
|
110 | 108 |
} |
111 | 109 |
\keyword{manip} |
... | ... |
@@ -8,7 +8,8 @@ |
8 | 8 |
\usage{ |
9 | 9 |
msaMuscle(inputSeqs, cluster="default", gapOpening="default", |
10 | 10 |
gapExtension="default", maxiters="default", |
11 |
- substitutionMatrix="default", type="default", |
|
11 |
+ substitutionMatrix="default", |
|
12 |
+ type="default", order=c("aligned", "input"), |
|
12 | 13 |
verbose=FALSE, help=FALSE, ...) |
13 | 14 |
} |
14 | 15 |
\arguments{ |
... | ... |
@@ -19,16 +20,12 @@ |
19 | 20 |
Possible values are \code{"upgma"}, \code{"upgmamax"}, |
20 | 21 |
\code{"upgmamin"}, \code{"upgmb"}, and \code{"neighborjoining"}.} |
21 | 22 |
\item{gapOpening}{gap opening penalty; the default |
22 |
- is -400 for DNA sequences and -420 for RNA sequences. The default |
|
23 |
+ is 400 for DNA sequences and 420 for RNA sequences. The default |
|
23 | 24 |
for amino acid sequences depends on the profile score settings: |
24 |
- for the setting \code{le=TRUE}, the default is -2.9, for |
|
25 |
- \code{sp=TRUE}, the default is -1,439, and for \code{sv=TRUE}, |
|
26 |
- the default is -300. In order to standardize interfaces, |
|
27 |
- all algorithms consistently use negative values for the gap opening |
|
28 |
- penalty.} |
|
29 |
- \item{gapExtension}{gap extension penalty; the default is 0. |
|
30 |
- In order to standardize interfaces, all algorithms consistently |
|
31 |
- use negative values for the gap extension penalty.} |
|
25 |
+ for the setting \code{le=TRUE}, the default is 2.9, for |
|
26 |
+ \code{sp=TRUE}, the default is 1,439, and for \code{sv=TRUE}, |
|
27 |
+ the default is 300.} |
|
28 |
+ \item{gapExtension}{gap extension penalty; the default is 0.} |
|
32 | 29 |
\item{maxiters}{maximum number of iterations; the default is 16. |
33 | 30 |
In the original MUSCLE implementation, it is also possible |
34 | 31 |
to set \code{maxiters} to 0 which leads to an (out of memory) error. |
... | ... |
@@ -41,6 +38,17 @@ |
41 | 38 |
this format is not supported by \code{msaMuscle}.} |
42 | 39 |
\item{type}{type of the input sequences \code{inputSeqs}; |
43 | 40 |
see \code{\link{msa}}.} |
41 |
+ \item{order}{how the sequences should be ordered in the output object |
|
42 |
+ (see \code{\link{msa}} for more details); the original MUSCLE |
|
43 |
+ implementation does not allow for preserving the order of input |
|
44 |
+ sequences. The \code{msaMuscle} function realizes this functionality |
|
45 |
+ by reverse matching of sequence names. Therefore, the sequences |
|
46 |
+ need to have unique names. If the sequences do not have |
|
47 |
+ names or if the names are not unique, the \code{\link{msaMuscle}} |
|
48 |
+ function assignes generic unique names \code{"Seq1"}-\code{Seqn} |
|
49 |
+ to the sequences and issues a warning. The choice \code{"input"} |
|
50 |
+ is not available at all for sequence data that is |
|
51 |
+ read directly from a FASTA file.} |
|
44 | 52 |
\item{verbose}{if \code{TRUE}, the algorithm displays detailed |
45 | 53 |
information and progress messages.} |
46 | 54 |
\item{help}{if \code{TRUE}, information about algorithm-specific |
... | ... |
@@ -104,8 +112,8 @@ mySeqs <- readAAStringSet(filepath) |
104 | 112 |
msaMuscle(mySeqs) |
105 | 113 |
|
106 | 114 |
## call msaMuscle with custom parameters |
107 |
-msaMuscle(mySeqs, gapOpening=-12, gapExtension=-3, maxiters=16, |
|
108 |
- type="protein", cluster="upgmamax", SUEFF=0.4, |
|
109 |
- brenner=FALSE, verbose=FALSE) |
|
115 |
+msaMuscle(mySeqs, gapOpening=12, gapExtension=3, maxiters=16, |
|
116 |
+ cluster="upgmamax", SUEFF=0.4, brenner=FALSE, |
|
117 |
+ order="input", verbose=FALSE) |
|
110 | 118 |
} |
111 | 119 |
\keyword{manip} |
... | ... |
@@ -1,9 +1,9 @@ |
1 |
-OS := $(shell uname) |
|
2 |
-ifeq ($(OS), Darwin) |
|
3 |
- CONFIGURE_FLAGS='--without-openmp' |
|
4 |
-else |
|
5 |
- CONFIGURE_FLAGS='' |
|
6 |
-endif |
|
1 |
+## Variant 1: deactivate OpenMP on Mac Platforms |
|
2 |
+CONFIGURE_FLAGS=`Rscript -e "if (Sys.info()['sysname'] == 'Darwin') cat('--without-openmp')"` |
|
3 |
+## Variant 2: deactivate OpenMP generally |
|
4 |
+#CONFIGURE_FLAGS=--without-openmp |
|
5 |
+## Variant 3: let the configure script determine whether OpenMP is available |
|
6 |
+#CONFIGURE_FLAGS= |
|
7 | 7 |
|
8 | 8 |
CPPNames=\ |
9 | 9 |
exceptions4c/e4c_lite.c \ |
... | ... |
@@ -43,7 +43,7 @@ all: clustalomega |
43 | 43 |
|
44 | 44 |
clustalomega: |
45 | 45 |
./configure $(CONFIGURE_FLAGS); \ |
46 |
- export PKG_LIBS="$(PKG_LIBS) $(SHLIB_OPENMP_CFLAGS) `Rscript -e "Rcpp:::LdFlags()"`"; \ |
|
46 |
+ export PKG_LIBS="$(PKG_LIBS) $(SHLIB_OPENMP_CFLAGS)"; \ |
|
47 | 47 |
export PKG_CXXFLAGS="$(PKG_CXXFLAGS) $(SHLIB_OPENMP_CXXFLAGS) -fPIC -DHAVE_CONFIG_H -I. -DCLUSTALO -DCLUSTALO_NOFILE -DDEFAULT_FILTER=90 -I"../../gc-7.2/include" `Rscript -e "Rcpp:::CxxFlags()"`"; \ |
48 | 48 |
export PKG_CFLAGS="$(PKG_CFLAGS) $(SHLIB_OPENMP_CFLAGS) -fPIC -DHAVE_CONFIG_H -I. -DCLUSTALO -DCLUSTALO_NOFILE -DDEFAULT_FILTER=90 -I"../../gc-7.2/include" `Rscript -e "Rcpp:::CxxFlags()"`"; \ |
49 | 49 |
cd src; \ |
... | ... |
@@ -37,10 +37,10 @@ all: clustalomega |
37 | 37 |
clustalomega: |
38 | 38 |
cp windows/src/config.h src/; \ |
39 | 39 |
cp windows/src/clustal-omega-config.h src/; \ |
40 |
- export PKG_LIBS="$(PKG_LIBS) -L"../../gc-7.2" -lgccpp72 -lgc72 `${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe -e "Rcpp:::LdFlags()"`"; \ |
|
40 |
+ export PKG_LIBS="$(PKG_LIBS) -L"../../gc-7.2" -lgccpp72 -lgc72"; \ |
|
41 | 41 |
export PKG_CXXFLAGS="$(PKG_CXXFLAGS) -DHAVE_CONFIG_H -I. -DCLUSTALO -DCLUSTALO_NOFILE -DDEFAULT_FILTER=90 -I"../../gc-7.2/include" `${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe -e "Rcpp:::CxxFlags()"`"; \ |
42 | 42 |
export PKG_CFLAGS="$(PKG_CFLAGS) -DHAVE_CONFIG_H -I. -DCLUSTALO -DCLUSTALO_NOFILE -DDEFAULT_FILTER=90 -I"../../gc-7.2/include" -lgccpp -lgc `${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe -e "Rcpp:::CxxFlags()"`"; \ |
43 | 43 |
cd src; \ |
44 |
- ${R_HOME}/bin${R_ARCH_BIN}/R.exe CMD SHLIB -o ClustalOmega.dll $(CPPNames); \ |
|
45 |
- $(AR) rcs libClustalOmega.a $(OBJNames); \ |
|
44 |
+ ${R_HOME}/bin${R_ARCH_BIN}/R.exe CMD SHLIB -o ClustalOmega.dll $(CPPNames) && \ |
|
45 |
+ $(AR) rcs libClustalOmega.a $(OBJNames) && \ |
|
46 | 46 |
cp libClustalOmega.a ../../ |
... | ... |
@@ -182,7 +182,6 @@ SEXP RClustalOmega(SEXP rInputSeqs, |
182 | 182 |
msaInput.seqLength = seqLength; |
183 | 183 |
msaInput.seqNames = seqNames; |
184 | 184 |
//hardcoded value, if sequence is an R sequence |
185 |
- //appendString(&argv, argc, "R"); |
|
186 | 185 |
appendString(&argv, argc, "--R"); |
187 | 186 |
} |
188 | 187 |
appendString(&argv, argc, "-o"); |
... | ... |
@@ -243,6 +242,10 @@ SEXP RClustalOmega(SEXP rInputSeqs, |
243 | 242 |
appendStringValue(&argv, argc, "--distmat-out", |
244 | 243 |
getListElement(rParams, "distMatOut")); |
245 | 244 |
} |
245 |
+ if (hasClustalOmegaEntry(rParams, "outputOrder")) { |
|
246 |
+ appendStringValue(&argv, argc, "--output-order", |
|
247 |
+ getListElement(rParams, "outputOrder")); |
|
248 |
+ } |
|
246 | 249 |
if (hasClustalOmegaEntry(rParams, "guideTreeIn")) { |
247 | 250 |
appendStringValue(&argv, argc, "--guidetree-in", |
248 | 251 |
getListElement(rParams, "guideTreeIn")); |
... | ... |
@@ -244,7 +244,7 @@ |
244 | 244 |
|
245 | 245 |
/* The size of `fpos_t', as computed by sizeof. */ |
246 | 246 |
#ifndef CLUSTAL_OMEGA_SIZEOF_FPOS_T |
247 |
-#define CLUSTAL_OMEGA_SIZEOF_FPOS_T 16 |
|
247 |
+#define CLUSTAL_OMEGA_SIZEOF_FPOS_T 12 |
|
248 | 248 |
#endif |
249 | 249 |
|
250 | 250 |
/* The size of `unsigned int', as computed by sizeof. */ |
... | ... |
@@ -254,7 +254,7 @@ |
254 | 254 |
|
255 | 255 |
/* The size of `unsigned long', as computed by sizeof. */ |
256 | 256 |
#ifndef CLUSTAL_OMEGA_SIZEOF_UNSIGNED_LONG |
257 |
-#define CLUSTAL_OMEGA_SIZEOF_UNSIGNED_LONG 8 |
|
257 |
+#define CLUSTAL_OMEGA_SIZEOF_UNSIGNED_LONG 4 |
|
258 | 258 |
#endif |
259 | 259 |
|
260 | 260 |
/* The size of `unsigned long long', as computed by sizeof. */ |
... | ... |
@@ -1094,8 +1094,10 @@ HHalignWrapper(mseq_t *prMSeq, int *piOrderLR, |
1094 | 1094 |
CKFREE(pcReprsnt2); |
1095 | 1095 |
CKFREE(pcConsens2); |
1096 | 1096 |
} |
1097 |
- ppiLeafList[iL] = CKFREE(ppiLeafList[iL]); |
|
1098 |
- ppiLeafList[iR] = CKFREE(ppiLeafList[iR]); |
|
1097 |
+ CKFREE(ppiLeafList[iL]); |
|
1098 |
+ ppiLeafList[iL] = NULL; |
|
1099 |
+ CKFREE(ppiLeafList[iR]); |
|
1100 |
+ ppiLeafList[iR] = NULL; |
|
1099 | 1101 |
piLeafCount[iL] = piLeafCount[iR] = 0; |
1100 | 1102 |
|
1101 | 1103 |
} /* was a merge node */ |
... | ... |
@@ -940,7 +940,9 @@ FreeRSeq(mseq_t **mseq, bool bRSequence) |
940 | 940 |
} |
941 | 941 |
|
942 | 942 |
if ((*mseq)->filename && !bRSequence) { |
943 |
- (*mseq)->filename = CKFREE((*mseq)->filename); |
|
943 |
+ //(*mseq)->filename = CKFREE((*mseq)->filename); |
|
944 |
+ CKFREE((*mseq)->filename); |
|
945 |
+ (*mseq)->filename = NULL; |
|
944 | 946 |
} |
945 | 947 |
|
946 | 948 |
for (i=0; i<(*mseq)->nseqs; i++) { |
... | ... |
@@ -134,7 +134,7 @@ ReadSequences(mseq_t *prMSeq_p, char *pcSeqFile, |
134 | 134 |
int iMaxNumSeq, int iMaxSeqLen); |
135 | 135 |
|
136 | 136 |
extern int |
137 |
-ReadSequencesFrom(mseq_t *prMSeq_p, int seqLength, char **seq, char **seqNames, |
|
137 |
+ReadSequencesFromR(mseq_t *prMSeq_p, int seqLength, char **seq, char **seqNames, |
|
138 | 138 |
int iSeqType, int iSeqFmt, bool bIsProfile, |
139 | 139 |
int iMaxNumSeq, int iMaxSeqLen); |
140 | 140 |
|
... | ... |
@@ -918,11 +918,10 @@ hhalign(char **ppcFirstProf, int iFirstCnt, double *pdWeightsL, |
918 | 918 |
par.gapg = par.gapgV; |
919 | 919 |
par.gaph = par.gaphV; |
920 | 920 |
par.gapi = par.gapiV; |
921 |
- |
|
922 |
- par.gapExtension = rHhalignPara.gapExtension; |
|
923 |
- par.gapOpening = rHhalignPara.gapOpening; |
|
924 |
- |
|
925 | 921 |
} |
922 |
+ |
|
923 |
+ par.gapExtension = rHhalignPara.gapExtension; |
|
924 |
+ par.gapOpening = rHhalignPara.gapOpening; |
|
926 | 925 |
|
927 | 926 |
// Read input file (HMM, HHM, or alignment format), and add pseudocounts etc. |
928 | 927 |
q.cQT = 'q'; |
... | ... |
@@ -77,15 +77,15 @@ Alignment::Alignment(int maxseq, int maxres) |
77 | 77 |
|
78 | 78 |
//printf(">>>>>>>>%s:%s:%d: maxseq=%d, maxres=%d\n", __FUNCTION__, __FILE__, __LINE__, maxseq, maxres); /* (FS) */ |
79 | 79 |
longname = new(char[DESCLEN]); |
80 |
- sname = new(char*[maxseq+2]); /* MR1 */ |
|
81 |
- seq = new(char*[maxseq+2]); /* MR1 */ |
|
82 |
- l = new(int[maxres]); |
|
83 |
- X = new(char*[maxseq+2]); /* MR1 */ |
|
84 |
- I = new(short unsigned int*[maxseq+2]); /* MR1 */ |
|
85 |
- keep = new(char[maxseq+2]); /* MR1 */ |
|
86 |
- display = new(char[maxseq+2]); /* MR1 */ |
|
87 |
- wg = new(float[maxseq+2]); /* MR1 */ |
|
88 |
- nseqs = new(int[maxres+2]); /* MR1 */ |
|
80 |
+ sname = new char*[maxseq+2]; /* MR1 */ |
|
81 |
+ seq = new char*[maxseq+2]; /* MR1 */ |
|
82 |
+ l = new int[maxres]; |
|
83 |
+ X = new char*[maxseq+2]; /* MR1 */ |
|
84 |
+ I = new short unsigned int*[maxseq+2]; /* MR1 */ |
|
85 |
+ keep = new char[maxseq+2]; /* MR1 */ |
|
86 |
+ display = new char[maxseq+2]; /* MR1 */ |
|
87 |
+ wg = new float[maxseq+2]; /* MR1 */ |
|
88 |
+ nseqs = new int[maxres+2]; /* MR1 */ |
|
89 | 89 |
N_in=L=0; |
90 | 90 |
nres=NULL; // number of residues per sequence k |
91 | 91 |
first=NULL; // first residue in sequence k |
... | ... |
@@ -138,7 +138,7 @@ Alignment::Read(FILE* inf, char infile[], char* firstline) |
138 | 138 |
int k; // Index of sequence being read currently (first=0) |
139 | 139 |
char line[LINELEN]=""; // input line |
140 | 140 |
//char cur_seq[MAXCOL]; // Sequence currently read in |
141 |
- char *cur_seq=new(char[par.maxColCnt]); |
|
141 |
+ char *cur_seq=new char[par.maxColCnt]; |
|
142 | 142 |
char* cur_name; // Sequence currently read in |
143 | 143 |
int linenr=0; // current line number in input file |
144 | 144 |
char skip_sequence=0; |
... | ... |
@@ -180,11 +180,11 @@ Alignment::Read(FILE* inf, char infile[], char* firstline) |
180 | 180 |
} |
181 | 181 |
|
182 | 182 |
// Create space for residues and paste new sequence in |
183 |
- seq[k]=new(char[strlen(cur_seq)+2]); |
|
183 |
+ seq[k]=new char[strlen(cur_seq)+2]; |
|
184 | 184 |
if (!seq[k]) MemoryError("array for input sequences"); |
185 |
- X[k]=new(char[strlen(cur_seq)+2]); |
|
185 |
+ X[k]=new char[strlen(cur_seq)+2]; |
|
186 | 186 |
if (!X[k]) MemoryError("array for input sequences"); |
187 |
- I[k]=new(short unsigned int[strlen(cur_seq)+2]); |
|
187 |
+ I[k]=new short unsigned int[strlen(cur_seq)+2]; |
|
188 | 188 |
if (!I[k]) MemoryError("array for input sequences"); |
189 | 189 |
strcpy(seq[k],cur_seq); |
190 | 190 |
} |
... | ... |
@@ -233,7 +233,7 @@ Alignment::Read(FILE* inf, char infile[], char* firstline) |
233 | 233 |
|
234 | 234 |
// store sequence name |
235 | 235 |
if (v>=4) printf("Reading seq %-16.16s k=%3i n_displ=%3i display[k]=%i keep[k]=%i\n",cur_name,k,n_display,display[k],keep[k]); |
236 |
- sname[k] = new(char[strlen(cur_name)+1]); |
|
236 |
+ sname[k] = new char[strlen(cur_name)+1]; |
|
237 | 237 |
if (!sname[k]) {MemoryError("array for sequence names");} |
238 | 238 |
strcpy(sname[k],cur_name); |
239 | 239 |
} // end if(line contains sequence name) |
... | ... |
@@ -348,11 +348,11 @@ Alignment::Read(FILE* inf, char infile[], char* firstline) |
348 | 348 |
|
349 | 349 |
if (k>=0) //if at least one sequence was read in |
350 | 350 |
{ |
351 |
- seq[k]=new(char[strlen(cur_seq)+2]); |
|
351 |
+ seq[k]=new char[strlen(cur_seq)+2]; |
|
352 | 352 |
if (!seq[k]) MemoryError("array for input sequences"); |
353 |
- X[k]=new(char[strlen(cur_seq)+2]); |
|
353 |
+ X[k]=new char[strlen(cur_seq)+2]; |
|
354 | 354 |
if (!X[k]) MemoryError("array for input sequences"); |
355 |
- I[k]=new(short unsigned int[strlen(cur_seq)+2]); |
|
355 |
+ I[k]=new short unsigned int[strlen(cur_seq)+2]; |
|
356 | 356 |
if (!I[k]) MemoryError("array for input sequences"); |
357 | 357 |
strcpy(seq[k],cur_seq); |
358 | 358 |
} |
... | ... |
@@ -420,7 +420,7 @@ Alignment::Compress(const char infile[]) |
420 | 420 |
/*static short unsigned int h[MAXSEQ];*/ |
421 | 421 |
/*short*/ unsigned int *h = NULL; /* short may lead to overflow for long alignments, FS, r235 -> r236 */ |
422 | 422 |
|
423 |
- h = new(/*short*/ unsigned int[N_in+2]); /* short -> overflow, FS, r235 -> r236 */ |
|
423 |
+ h = new /*short*/ unsigned int[N_in+2]; /* short -> overflow, FS, r235 -> r236 */ |
|
424 | 424 |
float *percent_gaps = NULL; /* FS, 2010-Nov */ |
425 | 425 |
char *match_state = NULL; /* FS, 2010-Nov */ |
426 | 426 |
|
... | ... |
@@ -552,13 +552,13 @@ Alignment::Compress(const char infile[]) |
552 | 552 |
had to move declaration of float *percent_gaps out of switch() |
553 | 553 |
*/ |
554 | 554 |
//float percent_gaps[MAXCOL]; //percentage of gaps in column k (with weighted sequences) |
555 |
- percent_gaps = new(float[par.maxColCnt]); |
|
555 |
+ percent_gaps = new float[par.maxColCnt]; |
|
556 | 556 |
|
557 | 557 |
//determine number of columns L in alignment |
558 | 558 |
L=strlen(seq[kfirst])-1; |
559 | 559 |
|
560 | 560 |
// Conversion to integer representation, checking for unequal lengths and initialization |
561 |
- if (nres==NULL) nres=new(int[N_in]); |
|
561 |
+ if (nres==NULL) nres=new int[N_in]; |
|
562 | 562 |
for (k=0; k<N_in; k++) |
563 | 563 |
{ |
564 | 564 |
if (!keep[k]) continue; |
... | ... |
@@ -780,7 +780,7 @@ Alignment::Compress(const char infile[]) |
780 | 780 |
had to move declaration of float *percent_gaps out of switch() |
781 | 781 |
*/ |
782 | 782 |
//char match_state[MAXCOL]; //1: column assigned to match state 0: insert state |
783 |
- match_state = new(char[par.maxColCnt]); |
|
783 |
+ match_state = new char[par.maxColCnt]; |
|
784 | 784 |
|
785 | 785 |
// Determine number of columns L in alignment |
786 | 786 |
L=strlen(seq[0]+1); |
... | ... |
@@ -942,7 +942,7 @@ Alignment::FilterForDisplay(int max_seqid, int coverage, int qid, float qsc, int |
942 | 942 |
|
943 | 943 |
|
944 | 944 |
if (par.mark) return n_display; |
945 |
- char *dummy = new(char[N_in+1]); |
|
945 |
+ char *dummy = new char[N_in+1]; |
|
946 | 946 |
int vtmp=v, seqid; |
947 | 947 |
v=0; |
948 | 948 |
n_display=0; |
... | ... |
@@ -1005,12 +1005,12 @@ Alignment::Filter2(char keep[], int coverage, int qid, float qsc, int seqid1, in |
1005 | 1005 |
{ |
1006 | 1006 |
// In the beginnning, keep[k] is 1 for all regular amino acid sequences and 0 for all others (ss_conf, ss_pred,...) |
1007 | 1007 |
// In the end, keep[k] will be 1 for all regular representative sequences kept in the alignment, 0 for all others |
1008 |
- char* in=new(char[N_in+1]); // in[k]=1: seq k has been accepted; in[k]=0: seq k has not yet been accepted at current seqid |
|
1009 |
- char* inkk=new(char[N_in+1]); // inkk[k]=1 iff in[ksort[k]]=1 else 0; |
|
1010 |
- int* Nmax=new(int[L+2]); // position-dependent maximum-sequence-identity threshold for filtering /* MR1, used to be called idmax*/ |
|
1011 |
- int* idmaxwin=new(int[L+2]); // minimum value of idmax[i-WFIL,i+WFIL] |
|
1012 |
- int* seqid_prev=new(int[N_in+1]); // maximum-sequence-identity threshold used in previous round of filtering (with lower seqid) |
|
1013 |
- int* N=new(int[L+2]); // N[i] number of already accepted sequences at position i |
|
1008 |
+ char* in=new char[N_in+1]; // in[k]=1: seq k has been accepted; in[k]=0: seq k has not yet been accepted at current seqid |
|
1009 |
+ char* inkk=new char[N_in+1]; // inkk[k]=1 iff in[ksort[k]]=1 else 0; |
|
1010 |
+ int* Nmax=new int[L+2]; // position-dependent maximum-sequence-identity threshold for filtering /* MR1, used to be called idmax*/ |
|
1011 |
+ int* idmaxwin=new int[L+2]; // minimum value of idmax[i-WFIL,i+WFIL] |
|
1012 |
+ int* seqid_prev=new int[N_in+1]; // maximum-sequence-identity threshold used in previous round of filtering (with lower seqid) |
|
1013 |
+ int* N=new int[L+2]; // N[i] number of already accepted sequences at position i |
|