... | ... |
@@ -83,7 +83,7 @@ |
83 | 83 |
U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
84 | 84 |
(2015). msa: an R package for multiple sequence alignment. |
85 | 85 |
\emph{Bioinformatics} \bold{31}(24):3997-3999. DOI: |
86 |
- \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
86 |
+ \doi{10.1093/bioinformatics/btv494}. |
|
87 | 87 |
|
88 | 88 |
\url{http://www.clustal.org/omega/README} |
89 | 89 |
|
... | ... |
@@ -92,7 +92,7 @@ |
92 | 92 |
and Higgins, D. G. (2011) Fast, scalable generation of high-quality |
93 | 93 |
protein multiple sequence alignments using Clustal Omega. |
94 | 94 |
\emph{Mol. Syst. Biol.} \bold{7}:539. DOI: |
95 |
- \href{http://dx.doi.org/10.1038/msb.2011.75}{10.1038/msb.2011.75}. |
|
95 |
+ \doi{10.1038/msb.2011.75}. |
|
96 | 96 |
} |
97 | 97 |
\seealso{\code{\link{msa}}, \code{\linkS4class{MsaAAMultipleAlignment}}, |
98 | 98 |
\code{\linkS4class{MsaDNAMultipleAlignment}}, |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@111693 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -82,7 +82,7 @@ |
82 | 82 |
|
83 | 83 |
U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
84 | 84 |
(2015). msa: an R package for multiple sequence alignment. |
85 |
- \emph{Bioinformatics} (accepted). DOI: |
|
85 |
+ \emph{Bioinformatics} \bold{31}(24):3997-3999. DOI: |
|
86 | 86 |
\href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
87 | 87 |
|
88 | 88 |
\url{http://www.clustal.org/omega/README} |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@109473 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -27,12 +27,11 @@ |
27 | 27 |
\item{maxiters}{maximum number of iterations; the default value is 0 |
28 | 28 |
(no limitation). In the original ClustalOmega implementation, this |
29 | 29 |
parameter is called \code{iterations}.} |
30 |
- \item{substitutionMatrix}{substitution matrix for scoring matches and |
|
31 |
- mismatches; can be a real matrix, a file name, or the name of a |
|
32 |
- built-in substitution matrix. In the latter case, the choices |
|
30 |
+ \item{substitutionMatrix}{name of substitution matrix for scoring matches and |
|
31 |
+ mismatches; can be one of the choices |
|
33 | 32 |
\code{"BLOSUM30"}, \code{"BLOSUM40"}, \code{"BLOSUM50"}, |
34 |
- \code{"BLOSUM65"}, \code{"BLOSUM80"}, and \code{"Gonnet"} are |
|
35 |
- supported. This parameter is a new feature - the original ClustalOmega |
|
33 |
+ \code{"BLOSUM65"}, \code{"BLOSUM80"}, and \code{"Gonnet"}. |
|
34 |
+ This parameter is a new feature - the original ClustalOmega |
|
36 | 35 |
implementation does not allow for using a custom substitution matrix.} |
37 | 36 |
\item{type}{type of the input sequences \code{inputSeqs}; |
38 | 37 |
see \code{\link{msa}}.} |
... | ... |
@@ -59,6 +58,10 @@ |
59 | 58 |
specific to ClustalOmega can be passed to ClustalOmega via additional |
60 | 59 |
arguments (see argument \code{help} above). |
61 | 60 |
|
61 |
+ Since ClustalOmega only allows for using built-in amino acid |
|
62 |
+ substitution matrices, it is hardly useful for multiple alignments |
|
63 |
+ of nucleotide sequences. |
|
64 |
+ |
|
62 | 65 |
For a note on the order of output sequences and direct reading from |
63 | 66 |
FASTA files, see \code{\link{msa}}. |
64 | 67 |
} |
... | ... |
@@ -76,6 +79,11 @@ |
76 | 79 |
} |
77 | 80 |
\references{ |
78 | 81 |
\url{http://www.bioinf.jku.at/software/msa} |
82 |
+ |
|
83 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
84 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
85 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
86 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
79 | 87 |
|
80 | 88 |
\url{http://www.clustal.org/omega/README} |
81 | 89 |
|
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102514 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -9,38 +9,36 @@ |
9 | 9 |
msaClustalOmega(inputSeqs, cluster="default", |
10 | 10 |
gapOpening="default", gapExtension="default", |
11 | 11 |
maxiters="default", substitutionMatrix="default", |
12 |
- type="default", verbose=FALSE, help=FALSE, |
|
13 |
- ...) |
|
12 |
+ type="default", order=c("aligned", "input"), |
|
13 |
+ verbose=FALSE, help=FALSE, ...) |
|
14 | 14 |
} |
15 | 15 |
\arguments{ |
16 | 16 |
\item{inputSeqs}{input sequences; see \code{\link{msa}}. |
17 | 17 |
In the original ClustalOmega implementation, this |
18 |
- parameter is called \code{-infile}.} |
|
18 |
+ parameter is called \code{infile}.} |
|
19 | 19 |
\item{cluster}{The cluster size which should be used. The default is 100. |
20 | 20 |
In the original ClustalOmega implementation, this parameter is called |
21 |
- \code{--cluster-size}.} |
|
22 |
- \item{gapOpening}{gap opening penalty; the default value is -6.0. |
|
23 |
- In order to standardize interfaces, |
|
24 |
- all algorithms consistently use negative values for the gap open |
|
25 |
- penalty. This parameter is a new feature - the original ClustalOmega |
|
26 |
- implementation does not allow for customizing gap penalties.} |
|
27 |
- \item{gapExtension}{gap extension penalty; the default value is -1.0. |
|
28 |
- In order to standardize interfaces, |
|
29 |
- all algorithms consistently use negative values for the gap open |
|
30 |
- penalty. This parameter is a new feature - the original ClustalOmega |
|
31 |
- implementation does not allow for customizing gap penalties.} |
|
21 |
+ \code{cluster-size}.} |
|
22 |
+ \item{gapOpening,gapExtension}{ClustalOmega currently does |
|
23 |
+ not allow to adjust gap penalties; these arguments are only for |
|
24 |
+ future extensions and consistency with the other algorithms |
|
25 |
+ and \code{\link{msa}}. However, setting these parameters to values |
|
26 |
+ other than \code{"default"} will result in a warning.} |
|
32 | 27 |
\item{maxiters}{maximum number of iterations; the default value is 0 |
33 | 28 |
(no limitation). In the original ClustalOmega implementation, this |
34 |
- parameter is called \code{--iterations}.} |
|
29 |
+ parameter is called \code{iterations}.} |
|
35 | 30 |
\item{substitutionMatrix}{substitution matrix for scoring matches and |
36 | 31 |
mismatches; can be a real matrix, a file name, or the name of a |
37 | 32 |
built-in substitution matrix. In the latter case, the choices |
38 |
- \code{"blosum30"}, \code{"blosum40"}, \code{"blosum50"}, |
|
39 |
- \code{"blosum65"}, \code{"blosum80"}, and \code{"gonnet"} are |
|
33 |
+ \code{"BLOSUM30"}, \code{"BLOSUM40"}, \code{"BLOSUM50"}, |
|
34 |
+ \code{"BLOSUM65"}, \code{"BLOSUM80"}, and \code{"Gonnet"} are |
|
40 | 35 |
supported. This parameter is a new feature - the original ClustalOmega |
41 | 36 |
implementation does not allow for using a custom substitution matrix.} |
42 | 37 |
\item{type}{type of the input sequences \code{inputSeqs}; |
43 | 38 |
see \code{\link{msa}}.} |
39 |
+ \item{order}{how the sequences should be ordered in the output object |
|
40 |
+ (see \code{\link{msa}}); in the original ClustalW implementation, this |
|
41 |
+ parameter is called \code{output-order}.} |
|
44 | 42 |
\item{verbose}{if \code{TRUE}, the algorithm displays detailed |
45 | 43 |
information and progress messages.} |
46 | 44 |
\item{help}{if \code{TRUE}, information about algorithm-specific |
... | ... |
@@ -102,8 +100,7 @@ mySeqs <- readAAStringSet(filepath) |
102 | 100 |
msaClustalOmega(mySeqs) |
103 | 101 |
|
104 | 102 |
## call msaClustalOmega with custom parameters |
105 |
-msaClustalOmega(mySeqs, gapOpening=-6, gapExtension=-1, auto=FALSE, |
|
106 |
- cluster=120, dealign=FALSE, useKimura=FALSE, |
|
107 |
- verbose=FALSE) |
|
103 |
+msaClustalOmega(mySeqs, auto=FALSE, cluster=120, dealign=FALSE, |
|
104 |
+ useKimura=FALSE, order="input", verbose=FALSE) |
|
108 | 105 |
} |
109 | 106 |
\keyword{manip} |
msa
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102253 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,109 @@ |
1 |
+\name{msaClustalOmega} |
|
2 |
+\alias{msaClustalOmega} |
|
3 |
+\title{Multiple Sequence Alignment with ClustalOmega} |
|
4 |
+\description{ |
|
5 |
+ This function calls the multiple sequence alignment |
|
6 |
+ algorithm ClustalOmega. |
|
7 |
+} |
|
8 |
+\usage{ |
|
9 |
+ msaClustalOmega(inputSeqs, cluster="default", |
|
10 |
+ gapOpening="default", gapExtension="default", |
|
11 |
+ maxiters="default", substitutionMatrix="default", |
|
12 |
+ type="default", verbose=FALSE, help=FALSE, |
|
13 |
+ ...) |
|
14 |
+} |
|
15 |
+\arguments{ |
|
16 |
+ \item{inputSeqs}{input sequences; see \code{\link{msa}}. |
|
17 |
+ In the original ClustalOmega implementation, this |
|
18 |
+ parameter is called \code{-infile}.} |
|
19 |
+ \item{cluster}{The cluster size which should be used. The default is 100. |
|
20 |
+ In the original ClustalOmega implementation, this parameter is called |
|
21 |
+ \code{--cluster-size}.} |
|
22 |
+ \item{gapOpening}{gap opening penalty; the default value is -6.0. |
|
23 |
+ In order to standardize interfaces, |
|
24 |
+ all algorithms consistently use negative values for the gap open |
|
25 |
+ penalty. This parameter is a new feature - the original ClustalOmega |
|
26 |
+ implementation does not allow for customizing gap penalties.} |
|
27 |
+ \item{gapExtension}{gap extension penalty; the default value is -1.0. |
|
28 |
+ In order to standardize interfaces, |
|
29 |
+ all algorithms consistently use negative values for the gap open |
|
30 |
+ penalty. This parameter is a new feature - the original ClustalOmega |
|
31 |
+ implementation does not allow for customizing gap penalties.} |
|
32 |
+ \item{maxiters}{maximum number of iterations; the default value is 0 |
|
33 |
+ (no limitation). In the original ClustalOmega implementation, this |
|
34 |
+ parameter is called \code{--iterations}.} |
|
35 |
+ \item{substitutionMatrix}{substitution matrix for scoring matches and |
|
36 |
+ mismatches; can be a real matrix, a file name, or the name of a |
|
37 |
+ built-in substitution matrix. In the latter case, the choices |
|
38 |
+ \code{"blosum30"}, \code{"blosum40"}, \code{"blosum50"}, |
|
39 |
+ \code{"blosum65"}, \code{"blosum80"}, and \code{"gonnet"} are |
|
40 |
+ supported. This parameter is a new feature - the original ClustalOmega |
|
41 |
+ implementation does not allow for using a custom substitution matrix.} |
|
42 |
+ \item{type}{type of the input sequences \code{inputSeqs}; |
|
43 |
+ see \code{\link{msa}}.} |
|
44 |
+ \item{verbose}{if \code{TRUE}, the algorithm displays detailed |
|
45 |
+ information and progress messages.} |
|
46 |
+ \item{help}{if \code{TRUE}, information about algorithm-specific |
|
47 |
+ parameters is displayed. In this case, no multiple sequence |
|
48 |
+ alignment is performed and the function quits after displaying |
|
49 |
+ the additional help information.} |
|
50 |
+ \item{...}{further parameters specific to ClustalOmega; |
|
51 |
+ An overview of parameters that are available in this interface |
|
52 |
+ is shown when calling \code{msaClustalOmega} with \code{help=TRUE}. |
|
53 |
+ For more details, see also the documentation of ClustalOmega.} |
|
54 |
+} |
|
55 |
+\details{This is a function providing the ClustalOmega multiple alignment |
|
56 |
+ algorithm as an R function. It can be used for various types of |
|
57 |
+ sequence data (see \code{inputSeqs} argument above). Parameters that |
|
58 |
+ are common to all multiple sequences alignments provided by the |
|
59 |
+ \pkg{msa} package are explicitly provided by the function and named |
|
60 |
+ in the same for all algorithms. Most other parameters that are |
|
61 |
+ specific to ClustalOmega can be passed to ClustalOmega via additional |
|
62 |
+ arguments (see argument \code{help} above). |
|
63 |
+ |
|
64 |
+ For a note on the order of output sequences and direct reading from |
|
65 |
+ FASTA files, see \code{\link{msa}}. |
|
66 |
+} |
|
67 |
+\value{ |
|
68 |
+ Depending on the type of sequences for which it was called, |
|
69 |
+ \code{msaClustalOmega} returns a |
|
70 |
+ \code{\linkS4class{MsaAAMultipleAlignment}}, |
|
71 |
+ \code{\linkS4class{MsaDNAMultipleAlignment}}, or |
|
72 |
+ \code{\linkS4class{MsaRNAMultipleAlignment}} object. |
|
73 |
+ If called with \code{help=TRUE}, \code{msaClustalOmega} returns |
|
74 |
+ an invisible \code{NULL}. |
|
75 |
+} |
|
76 |
+\author{Enrico Bonatesta and Christoph Horejs-Kainrath |
|
77 |
+ <msa@bioinf.jku.at> |
|
78 |
+} |
|
79 |
+\references{ |
|
80 |
+ \url{http://www.bioinf.jku.at/software/msa} |
|
81 |
+ |
|
82 |
+ \url{http://www.clustal.org/omega/README} |
|
83 |
+ |
|
84 |
+ Sievers, F., Wilm, A., Dineen, D., Gibson, T. J., Karplus, K., Li, W., |
|
85 |
+ Lopez, R., McWilliam, H., Remmert, M., Soeding, J., Thompson, J. D., |
|
86 |
+ and Higgins, D. G. (2011) Fast, scalable generation of high-quality |
|
87 |
+ protein multiple sequence alignments using Clustal Omega. |
|
88 |
+ \emph{Mol. Syst. Biol.} \bold{7}:539. DOI: |
|
89 |
+ \href{http://dx.doi.org/10.1038/msb.2011.75}{10.1038/msb.2011.75}. |
|
90 |
+} |
|
91 |
+\seealso{\code{\link{msa}}, \code{\linkS4class{MsaAAMultipleAlignment}}, |
|
92 |
+ \code{\linkS4class{MsaDNAMultipleAlignment}}, |
|
93 |
+ \code{\linkS4class{MsaRNAMultipleAlignment}}, |
|
94 |
+ \code{\linkS4class{MsaMetaData}} |
|
95 |
+} |
|
96 |
+\examples{ |
|
97 |
+## read sequences |
|
98 |
+filepath <- system.file("examples", "exampleAA.fasta", package="msa") |
|
99 |
+mySeqs <- readAAStringSet(filepath) |
|
100 |
+ |
|
101 |
+## call msaClustalOmega with default values |
|
102 |
+msaClustalOmega(mySeqs) |
|
103 |
+ |
|
104 |
+## call msaClustalOmega with custom parameters |
|
105 |
+msaClustalOmega(mySeqs, gapOpening=-6, gapExtension=-1, auto=FALSE, |
|
106 |
+ cluster=120, dealign=FALSE, useKimura=FALSE, |
|
107 |
+ verbose=FALSE) |
|
108 |
+} |
|
109 |
+\keyword{manip} |