git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/branches/RELEASE_3_1/madman/Rpacks/msa@108923 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: msa |
2 | 2 |
Type: Package |
3 | 3 |
Title: Multiple Sequence Alignment |
4 |
-Version: 1.0.1 |
|
5 |
-Date: 2015-06-12 |
|
4 |
+Version: 1.0.2 |
|
5 |
+Date: 2015-09-29 |
|
6 | 6 |
Author: Enrico Bonatesta, Christoph Horejs-Kainrath, Ulrich Bodenhofer |
7 | 7 |
Maintainer: Ulrich Bodenhofer <bodenhofer@bioinf.jku.at> |
8 | 8 |
Description: This package provides a unified R/Bioconductor interface to the |
... | ... |
@@ -57,24 +57,24 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"), |
57 | 57 |
stop("The parameter alFile has an invalid argument!") |
58 | 58 |
|
59 | 59 |
if (!is(x, "MultipleAlignment")) |
60 |
- stop("The parameter x has an invalid argument! \n", |
|
60 |
+ stop("The parameter x has an invalid argument! \n", |
|
61 | 61 |
"x must be a multiple alignment object!") |
62 |
- |
|
62 |
+ |
|
63 | 63 |
|
64 | 64 |
if (output != "asis") |
65 | 65 |
{ |
66 | 66 |
if (!is.numeric(paperWidth) || length(paperWidth) != 1 || |
67 | 67 |
paperWidth <= 0) |
68 |
- stop("The parameter paperWidth must be ", |
|
68 |
+ stop("The parameter paperWidth must be ", |
|
69 | 69 |
"single positive number (unit: inches)!") |
70 | 70 |
|
71 | 71 |
if (!is.numeric(paperHeight) || length(paperHeight) != 1 || |
72 | 72 |
paperHeight <= 0) |
73 |
- stop("The parameter paperHeight must be ", |
|
73 |
+ stop("The parameter paperHeight must be ", |
|
74 | 74 |
"single positive number (unit: inches)!") |
75 | 75 |
|
76 | 76 |
if (!is.numeric(margins) || length(margins) != 2) |
77 |
- stop("The parameter margins must be ", |
|
77 |
+ stop("The parameter margins must be ", |
|
78 | 78 |
"two positive numbers (unit: inches)!") |
79 | 79 |
} |
80 | 80 |
|
... | ... |
@@ -85,14 +85,14 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"), |
85 | 85 |
if (max(subset) < .Machine$integer.max) |
86 | 86 |
subset <- as.integer(subset) |
87 | 87 |
else |
88 |
- stop("One or more values for parameter subset ", |
|
88 |
+ stop("One or more values for parameter subset ", |
|
89 | 89 |
"are larger than integer!") |
90 | 90 |
} |
91 | 91 |
else if (!is.integer(subset)) |
92 | 92 |
stop("The parameter subset has an invalid argument!") |
93 | 93 |
|
94 | 94 |
if (length(subset) < 2) |
95 |
- stop("The parameter subset is expected to be \n", |
|
95 |
+ stop("The parameter subset is expected to be \n", |
|
96 | 96 |
" a vector with at least 2 entries!") |
97 | 97 |
|
98 | 98 |
if (!all(subset %in% 1:nrow(x))) |
... | ... |
@@ -122,7 +122,7 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"), |
122 | 122 |
|
123 | 123 |
if (!is.numeric(consensusThreshold) || length(consensusThreshold) != 1 || |
124 | 124 |
consensusThreshold < 0 || consensusThreshold > 100) |
125 |
- stop("The parameter consensusThreshold must be \n", |
|
125 |
+ stop("The parameter consensusThreshold must be \n", |
|
126 | 126 |
"a single numeric between 0 and 100 !") |
127 | 127 |
|
128 | 128 |
if (shadingMode %in% c("identical", "similar")) |
... | ... |
@@ -151,9 +151,9 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"), |
151 | 151 |
"accessible area")) |
152 | 152 |
else |
153 | 153 |
stop("Missing shadingModeArg for functional shading mode. \n", |
154 |
- "Valid values are: \n", |
|
155 |
- "\"charge\", \n", |
|
156 |
- "\"hydropathy\", \n", |
|
154 |
+ "Valid values are: \n", |
|
155 |
+ "\"charge\", \n", |
|
156 |
+ "\"hydropathy\", \n", |
|
157 | 157 |
"\"structure\", \n", |
158 | 158 |
"\"chemical\",\n", |
159 | 159 |
" \"rasmol\",\n", |
... | ... |
@@ -203,7 +203,7 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"), |
203 | 203 |
if (output != "asis") |
204 | 204 |
{ |
205 | 205 |
if (!is.character(file) || length(file) > 1) |
206 |
- stop("The argument for parameter file must be \n", |
|
206 |
+ stop("The argument for parameter file must be \n", |
|
207 | 207 |
"a single character string!") |
208 | 208 |
|
209 | 209 |
if (substr(file, nchar(file) - 2, nchar(file)) != output) |
... | ... |
@@ -241,6 +241,11 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"), |
241 | 241 |
|
242 | 242 |
texOutput <- paste0("\\begin{texshade}{", stratifyFilenames(alFile), "}") |
243 | 243 |
|
244 |
+ if (is(x, "AAMultipleAlignment")) |
|
245 |
+ texOutput <- c(texOutput, "\\seqtype{P}") |
|
246 |
+ else |
|
247 |
+ texOutput <- c(texOutput, "\\seqtype{N}") |
|
248 |
+ |
|
244 | 249 |
if (length(toShow) == 1) |
245 | 250 |
{ |
246 | 251 |
if (sum(width(toShow)) < ncol(x)) |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
##345678901234567890123456789012345678901234567890123456789012345678901234567890 |
2 | 2 |
citHeader("To cite package 'msa' in publications use:") |
3 | 3 |
|
4 |
-#year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date, perl = TRUE) |
|
4 |
+#year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date, perl = TRUE) |
|
5 | 5 |
#vers <- paste("R package version", meta$Version) |
6 | 6 |
desc <- packageDescription("msa") |
7 | 7 |
year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", desc$Date) |
... | ... |
@@ -9,19 +9,21 @@ vers <- paste("R package version", desc$Version) |
9 | 9 |
url <- desc$URL |
10 | 10 |
|
11 | 11 |
|
12 |
-citEntry(entry="Manual", |
|
13 |
- title = "msa -- An R Package for Multiple Sequence Alignment.", |
|
14 |
- author = personList(as.person("Enrico Bonatesta"), as.person("Christoph Horejs-Kainrath"), as.person("Ulrich Bodenhofer")), |
|
15 |
- year = year, |
|
16 |
- note = vers, |
|
17 |
- organization=paste("Institute of Bioinformatics", |
|
18 |
- "Johannes Kepler University", sep=", "), |
|
19 |
- address="Linz, Austria", |
|
20 |
- url=url, |
|
21 |
- textVersion = |
|
22 |
- paste("Enrico Bonatesta, Christoph Horejs-Kainrath and Ulrich Bodenhofer (", year, "). ", |
|
23 |
- "msa -- An R Package for Multiple Sequence Alignment. ", |
|
24 |
- vers, ".", sep="") |
|
12 |
+citEntry(entry="Article", |
|
13 |
+ title = "msa: an R package for multiple sequence alignment", |
|
14 |
+ author = personList(as.person("Ulrich Bodenhofer"), |
|
15 |
+ as.person("Enrico Bonatesta"), |
|
16 |
+ as.person("Christoph Horejs-Kainrath"), |
|
17 |
+ as.person("Sepp Hochreiter")), |
|
18 |
+ journal="Bioinformatics", |
|
19 |
+ note="(accepted)", |
|
20 |
+ year="2015", |
|
21 |
+ doi="10.1093/bioinformatics/btv494", |
|
22 |
+ textVersion = |
|
23 |
+ paste("U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter (2015)", |
|
24 |
+ "msa: an R package for multiple sequence alignment.", |
|
25 |
+ "Bioinformatics (accepted).", |
|
26 |
+ "DOI: 10.1093/bioinformatics/btv176.") |
|
25 | 27 |
) |
26 | 28 |
|
27 | 29 |
citFooter( |
... | ... |
@@ -37,6 +37,14 @@ The following slots are defined for \code{MsaMetaData} objects: |
37 | 37 |
\author{Enrico Bonatesta and Christoph Horejs-Kainrath |
38 | 38 |
<msa@bioinf.jku.at> |
39 | 39 |
} |
40 |
+\references{ |
|
41 |
+ \url{http://www.bioinf.jku.at/software/msa} |
|
42 |
+ |
|
43 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
44 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
45 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
46 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
47 |
+} |
|
40 | 48 |
\seealso{\code{\link{msa}}, \code{\link{msaClustalW}}, |
41 | 49 |
\code{\link{msaClustalOmega}}, \code{\link{msaMuscle}}, |
42 | 50 |
\code{\linkS4class{MsaAAMultipleAlignment}}, |
... | ... |
@@ -81,6 +81,14 @@ |
81 | 81 |
\author{Enrico Bonatesta and Christoph Horejs-Kainrath |
82 | 82 |
<msa@bioinf.jku.at> |
83 | 83 |
} |
84 |
+\references{ |
|
85 |
+ \url{http://www.bioinf.jku.at/software/msa} |
|
86 |
+ |
|
87 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
88 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
89 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
90 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
91 |
+} |
|
84 | 92 |
\seealso{\code{\link{msa}}, \code{\link{msaClustalW}}, |
85 | 93 |
\code{\link{msaClustalOmega}}, \code{\link{msaMuscle}}, |
86 | 94 |
\code{\linkS4class{MsaMetaData}} |
... | ... |
@@ -27,6 +27,11 @@ |
27 | 27 |
} |
28 | 28 |
\references{ |
29 | 29 |
\url{http://www.bioinf.jku.at/software/msa} |
30 |
+ |
|
31 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
32 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
33 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
34 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
30 | 35 |
|
31 | 36 |
Thompson, J. D., Higgins, D. G., and Gibson, T. J. (1994) |
32 | 37 |
CLUSTAL W: improving the sensitivity of progressive multiple sequence |
... | ... |
@@ -132,6 +132,11 @@ |
132 | 132 |
\references{ |
133 | 133 |
\url{http://www.bioinf.jku.at/software/msa} |
134 | 134 |
|
135 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
136 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
137 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
138 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
139 |
+ |
|
135 | 140 |
\url{http://www.clustal.org/download/clustalw_help.txt} |
136 | 141 |
|
137 | 142 |
\url{http://www.clustal.org/omega/README} |
... | ... |
@@ -76,6 +76,11 @@ |
76 | 76 |
} |
77 | 77 |
\references{ |
78 | 78 |
\url{http://www.bioinf.jku.at/software/msa} |
79 |
+ |
|
80 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
81 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
82 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
83 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
79 | 84 |
|
80 | 85 |
\url{http://www.clustal.org/omega/README} |
81 | 86 |
|
... | ... |
@@ -79,7 +79,12 @@ |
79 | 79 |
} |
80 | 80 |
\references{ |
81 | 81 |
\url{http://www.bioinf.jku.at/software/msa} |
82 |
- |
|
82 |
+ |
|
83 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
84 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
85 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
86 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
87 |
+ |
|
83 | 88 |
\url{http://www.clustal.org/download/clustalw_help.txt} |
84 | 89 |
|
85 | 90 |
Thompson, J. D., Higgins, D. G., and Gibson, T. J. (1994) |
... | ... |
@@ -85,6 +85,11 @@ |
85 | 85 |
} |
86 | 86 |
\references{ |
87 | 87 |
\url{http://www.bioinf.jku.at/software/msa} |
88 |
+ |
|
89 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
90 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
91 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
92 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
88 | 93 |
|
89 | 94 |
\url{http://www.drive5.com/muscle/muscle.html} |
90 | 95 |
|
... | ... |
@@ -188,9 +188,14 @@ |
188 | 188 |
<msa@bioinf.jku.at> |
189 | 189 |
} |
190 | 190 |
\references{ |
191 |
-\url{http://www.bioinf.jku.at/software/msa} |
|
191 |
+ \url{http://www.bioinf.jku.at/software/msa} |
|
192 |
+ |
|
193 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
194 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
195 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
196 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
192 | 197 |
|
193 |
-\url{https://www.ctan.org/pkg/texshade} |
|
198 |
+ \url{https://www.ctan.org/pkg/texshade} |
|
194 | 199 |
|
195 | 200 |
Beitz, E. (2000) TeXshade: shading and labeling of multiple |
196 | 201 |
sequence alignments using LaTeX2e |
... | ... |
@@ -9,6 +9,38 @@ |
9 | 9 |
pages = {135--139} |
10 | 10 |
} |
11 | 11 |
|
12 |
+@article{Bodenhofer2015, |
|
13 |
+ author = {U. Bodenhofer and E. Bonatesta and |
|
14 |
+ C. Horej\v{s}-Kainrath and S. Hochreiter}, |
|
15 |
+ title = {{msa}: an {R} package for multiple sequence |
|
16 |
+ alignment}, |
|
17 |
+ journal = {Bioinformatics}, |
|
18 |
+ year = 2015, |
|
19 |
+ note = {(accepted)} |
|
20 |
+} |
|
21 |
+ |
|
22 |
+@article{Brudno2003, |
|
23 |
+ author = {M. Brudno and M. Chapman and B. G\"ottgens and |
|
24 |
+ S. Batzoglou and B. Morgenstern}, |
|
25 |
+ title = {Fast and sensitive multiple alignment of large |
|
26 |
+ genomic sequences}, |
|
27 |
+ journal = {BMC Bioinformatics}, |
|
28 |
+ year = 2003, |
|
29 |
+ volume = 4, |
|
30 |
+ pages = 66 |
|
31 |
+} |
|
32 |
+ |
|
33 |
+@article{Dagum1998, |
|
34 |
+ author = {L. Dagum and R. Menon}, |
|
35 |
+ title = {{OpenMP}: an industry standard {API} for |
|
36 |
+ shared-memory programming}, |
|
37 |
+ journal = {Computational Science \&\ Engineering, IEEE}, |
|
38 |
+ volume = 5, |
|
39 |
+ number = 1, |
|
40 |
+ pages = {46--55}, |
|
41 |
+ year = 1998 |
|
42 |
+} |
|
43 |
+ |
|
12 | 44 |
@article{Eddelbuettel2011, |
13 | 45 |
title = {{Rcpp}: Seamless {R} and {C++} Integration}, |
14 | 46 |
author = {D. Eddelbuettel and R. Fran\c{c}ois}, |
... | ... |
@@ -49,6 +81,16 @@ |
49 | 81 |
year = 2004 |
50 | 82 |
} |
51 | 83 |
|
84 |
+@article{Edgar2006, |
|
85 |
+ author = {R. C. Edgar and S. Batzoglou}, |
|
86 |
+ title = {Multiple sequence alignment}, |
|
87 |
+ journal = {Curr. Opin. Struct. Biol.}, |
|
88 |
+ volume = 16, |
|
89 |
+ number = 3, |
|
90 |
+ pages = {368--373}, |
|
91 |
+ year = 2006 |
|
92 |
+} |
|
93 |
+ |
|
52 | 94 |
@book{Lamport1999, |
53 | 95 |
title = {{\LaTeX} --- A Document Preparation System. User's |
54 | 96 |
Guide and Reference Manual}, |
... | ... |
@@ -58,6 +100,20 @@ |
58 | 100 |
year = 1999 |
59 | 101 |
} |
60 | 102 |
|
103 |
+@article{Larkin2007, |
|
104 |
+ author = {M. A. Larkin and G. Blackshields and N. P. Brown and |
|
105 |
+ R. Chenna and P. A. McGettigan and H. McWilliam and |
|
106 |
+ F. Valentin and I. M. Wallace and A. Wilm and |
|
107 |
+ R. Lopez and J. D. Thompson and T. J. Gibson and |
|
108 |
+ D. G. Higgins}, |
|
109 |
+ title = {{Clustal W} and {Clustal X} version 2.0}, |
|
110 |
+ journal = {Bioinformatics}, |
|
111 |
+ year = 2007, |
|
112 |
+ volume = 23, |
|
113 |
+ number = 21, |
|
114 |
+ pages = {2947--2948} |
|
115 |
+} |
|
116 |
+ |
|
61 | 117 |
@inproceedings{Leisch2002, |
62 | 118 |
author = {F. Leisch}, |
63 | 119 |
title = {Sweave: dynamic generation of statistical reports |
... | ... |
@@ -71,6 +127,17 @@ |
71 | 127 |
year = 2002 |
72 | 128 |
} |
73 | 129 |
|
130 |
+@article{Loytynoja2012, |
|
131 |
+ author = {A. L\"oytynoja and A. J. Vilella and N. Goldman}, |
|
132 |
+ title = {Accurate extension of multiple sequence alignments |
|
133 |
+ using a phylogeny-aware graph algorithm}, |
|
134 |
+ journal = {Bioinformatics}, |
|
135 |
+ year = 2012, |
|
136 |
+ volume = 28, |
|
137 |
+ number = 13, |
|
138 |
+ pages = {1684--1691} |
|
139 |
+} |
|
140 |
+ |
|
74 | 141 |
@article{Morgenstern1999, |
75 | 142 |
author = {B. Morgenstern}, |
76 | 143 |
title = {{DIALIGN 2}: improvement of the segment-to-segment |
... | ... |
@@ -103,6 +170,26 @@ |
103 | 170 |
pages = {205--217} |
104 | 171 |
} |
105 | 172 |
|
173 |
+@article{Notredame2007, |
|
174 |
+ author = {C. Notredame}, |
|
175 |
+ title = {Recent Evolutions of Multiple Sequence Alignment |
|
176 |
+ Algorithms}, |
|
177 |
+ journal = {PLoS Comput. Biol.}, |
|
178 |
+ year = 2007, |
|
179 |
+ volume = 3, |
|
180 |
+ number = 8, |
|
181 |
+ pages = {e123} |
|
182 |
+} |
|
183 |
+ |
|
184 |
+@manual{Pages2015, |
|
185 |
+ title = {Biostrings: String objects representing biological |
|
186 |
+ sequences, and matching algorithms}, |
|
187 |
+ author = {H. Pag\`es and P. Aboyoun and R. Gentleman and |
|
188 |
+ S. DebRoy}, |
|
189 |
+ note = {R package version 2.36.1}, |
|
190 |
+ year = 2015 |
|
191 |
+} |
|
192 |
+ |
|
106 | 193 |
@article{Sievers2011, |
107 | 194 |
author = {F. Sievers and A. Wilm and D. Dineen and |
108 | 195 |
T. J. Gibson and K. Karplus and W. Li and R. Lopez |
... | ... |
@@ -116,6 +203,16 @@ |
116 | 203 |
year = 2011 |
117 | 204 |
} |
118 | 205 |
|
206 |
+@article{Szalkowski2012, |
|
207 |
+ author = {A. M. Szalkowski}, |
|
208 |
+ title = {Fast and robust multiple sequence alignment with |
|
209 |
+ phylogeny-aware gap placement}, |
|
210 |
+ journal = {BMC Bioinformatics}, |
|
211 |
+ year = 2012, |
|
212 |
+ volume = 13, |
|
213 |
+ pages = 129 |
|
214 |
+} |
|
215 |
+ |
|
119 | 216 |
@article{Thompson1994, |
120 | 217 |
author = {J. D. Thompson and D. G. Higgins and T. J. Gibson}, |
121 | 218 |
title = {{CLUSTAL W}: improving the sensitivity of |
... | ... |
@@ -129,6 +226,16 @@ |
129 | 226 |
pages = {4673--4680} |
130 | 227 |
} |
131 | 228 |
|
229 |
+@article{Wallace2005, |
|
230 |
+ author = {I. M. Wallace and G. Blackshields and D. G. Higgins}, |
|
231 |
+ title = {Multiple sequence alignments}, |
|
232 |
+ journal = {Curr. Opin. Struct. Biol.}, |
|
233 |
+ volume = 15, |
|
234 |
+ number = 3, |
|
235 |
+ pages = {261--266}, |
|
236 |
+ year = 2005 |
|
237 |
+} |
|
238 |
+ |
|
132 | 239 |
@book{Xie2014, |
133 | 240 |
title = {Dynamic Documents with R and knitr}, |
134 | 241 |
author = {Y. Xie}, |
... | ... |
@@ -83,15 +83,16 @@ first have to read introductory literature on the subjects mentioned above. |
83 | 83 |
|
84 | 84 |
\section{Introduction} |
85 | 85 |
Multiple sequence alignment is one of the most fundamental tasks in |
86 |
-bioinformatics. Algorithms like ClustalW~\cite{Thompson1994}, |
|
86 |
+bioinformatics. Algorithms like ClustalW~\cite{Thompson1994,Larkin2007}, |
|
87 | 87 |
ClustalOmega~\cite{Sievers2011}, and MUSCLE~\cite{Edgar2004b,Edgar2004a} |
88 |
-are well known and |
|
89 |
-widely used. However, all these algorithms are implemented as stand-alone |
|
88 |
+are well known and widely used (for more comprehensive overviews of methods, |
|
89 |
+see \cite{Edgar2006,Notredame2007,Wallace2005}). |
|
90 |
+However, all these algorithms are implemented as stand-alone |
|
90 | 91 |
commmand line programs without any integration into the R/Bioconductor |
91 | 92 |
ecosystem. Before the \MSA\ package, only the \verb+muscle+ package has |
92 | 93 |
been available in \R, but no other multiple sequence alignment algorithm, |
93 | 94 |
although the \verb+Biostrings+ package has provided data types for |
94 |
-representing multiple sequence alignments for quite some time. |
|
95 |
+representing multiple sequence alignments for quite some time \cite{Pages2015}. |
|
95 | 96 |
The \MSA\ package aims to close that gap by |
96 | 97 |
providing a unified R interface to the multiple sequence alignment algorithms |
97 | 98 |
ClustalW, ClustalOmega, and MUSCLE. The package requires no additional |
... | ... |
@@ -541,12 +542,6 @@ has a minor memory leak, but the loss of data is so small that no major |
541 | 542 |
problems are to be expected except for thousands of executions of |
542 | 543 |
ClustalOmega. |
543 | 544 |
|
544 |
-\subsubsection*{\shade: Alignment of Sequence Logos} |
|
545 |
- |
|
546 |
-\shade\ has some issues with aligning the sequence logo to the multiple |
|
547 |
-sequence alignment. Under which conditions this happens, would require a |
|
548 |
-more detailed investigation. |
|
549 |
- |
|
550 | 545 |
\subsubsection*{ClustalOmega vs.\ Older GCC Versions on Linux/Unix} |
551 | 546 |
|
552 | 547 |
We have encountered peculiar behavior of ClustalOmega if the package was |
... | ... |
@@ -562,7 +557,7 @@ to update to a newer GCC version and re-install the package. |
562 | 557 |
\subsubsection*{ClustalOmega: OpenMP Support on Mac OS} |
563 | 558 |
|
564 | 559 |
ClustalOmega is implemented to make use of OpenMP (if available on the |
565 |
-target platform). Due to issues on one of the Bioconductor build servers |
|
560 |
+target platform) \cite{Dagum1998}. Due to issues on one of the Bioconductor build servers |
|
566 | 561 |
running Mac OS, we had to deactivate OpenMP generally for Mac OS platforms. |
567 | 562 |
If a Mac OS user wants to re-activate OpenMP, he/she should download the |
568 | 563 |
source package tarball, untar it, comment/uncomment the corresponding line in |
... | ... |
@@ -580,7 +575,8 @@ function. This interface will be improved in future versions. |
580 | 575 |
We envision the following changes/extensions in future versions of the package: |
581 | 576 |
\begin{itemize} |
582 | 577 |
\item Integration of more multiple sequence alignment algorithms, such as, |
583 |
- T-Coffee \cite{Notredame2000} or DIALIGN \cite{Morgenstern1999} |
|
578 |
+ T-Coffee \cite{Notredame2000} or others |
|
579 |
+ \cite{Brudno2003,Loytynoja2012,Morgenstern1999,Szalkowski2012} |
|
584 | 580 |
\item Support for retrieving guide trees from the multiple sequence |
585 | 581 |
alignment algorithms |
586 | 582 |
\item Interface to methods computing phylogenetic trees (e.g.\ as |
... | ... |
@@ -595,10 +591,12 @@ We envision the following changes/extensions in future versions of the package: |
595 | 591 |
\section{How to Cite This Package} |
596 | 592 |
|
597 | 593 |
If you use this package for research that is published later, you are kindly |
598 |
-asked to cite it as follows: |
|
594 |
+asked to cite it as follows \cite{Bodenhofer2015}: |
|
599 | 595 |
\begin{quotation} |
600 |
-\noindent E.~Bonatesta, C.~Horejs-Kainrath, and U.~Bodenhofer, (2015). |
|
601 |
-msa: An R Package for Multiple Sequence Alignment. |
|
596 |
+\noindent U.~Bodenhofer, E.~Bonatesta, C.~Horej\v{s}-Kainrath, and Sepp Hochreiter(2015). |
|
597 |
+msa: an R Package for multiple sequence alignment. |
|
598 |
+{\em Bioinformatics} (accepted). DOI: |
|
599 |
+\href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
602 | 600 |
\end{quotation} |
603 | 601 |
To obtain a Bib\TeX\ entries of the reference, enter the |
604 | 602 |
following into your R session: |
... | ... |
@@ -613,6 +611,11 @@ bibliography below). |
613 | 611 |
\section{Change Log} |
614 | 612 |
|
615 | 613 |
\begin{description} |
614 |
+\item[Version 1.0.2:] \mbox{ } \newline |
|
615 |
+ \begin{itemize} |
|
616 |
+ \item fix of improperly aligned sequence logos produced by \verb+msaPrettyPrint()+ |
|
617 |
+ \item updated citation information |
|
618 |
+ \end{itemize} |
|
616 | 619 |
\item[Version 1.0.1:] fix of \verb+msa()+ function |
617 | 620 |
\item[Version 1.0.0:] first official release as part of Bioconductor 3.1 |
618 | 621 |
\end{description} |
... | ... |
@@ -628,6 +631,22 @@ E.~Beitz. |
628 | 631 |
using {\LaTeX2e}. |
629 | 632 |
\newblock {\em Bioinformatics}, 16(2):135--139, 2000. |
630 | 633 |
|
634 |
+\bibitem{Bodenhofer2015} |
|
635 |
+U.~Bodenhofer, E.~Bonatesta, C.~Horej\v{s}-Kainrath, and S.~Hochreiter. |
|
636 |
+\newblock {msa}: an {R} package for multiple sequence alignment. |
|
637 |
+\newblock {\em Bioinformatics}, 2015. |
|
638 |
+\newblock (accepted). |
|
639 |
+ |
|
640 |
+\bibitem{Brudno2003} |
|
641 |
+M.~Brudno, M.~Chapman, B.~G\"ottgens, S.~Batzoglou, and B.~Morgenstern. |
|
642 |
+\newblock Fast and sensitive multiple alignment of large genomic sequences. |
|
643 |
+\newblock {\em BMC Bioinformatics}, 4:66, 2003. |
|
644 |
+ |
|
645 |
+\bibitem{Dagum1998} |
|
646 |
+L.~Dagum and R.~Menon. |
|
647 |
+\newblock {OpenMP}: an industry standard {API} for shared-memory programming. |
|
648 |
+\newblock {\em Computational Science \&\ Engineering, IEEE}, 5(1):46--55, 1998. |
|
649 |
+ |
|
631 | 650 |
\bibitem{Edgar2004b} |
632 | 651 |
R.~C. Edgar. |
633 | 652 |
\newblock {MUSCLE}: a multiple sequence alignment method with reduced time and |
... | ... |
@@ -640,12 +659,24 @@ R.~C. Edgar. |
640 | 659 |
throughput. |
641 | 660 |
\newblock {\em Nucleic Acids Res.}, 32(5):1792--1797, 2004. |
642 | 661 |
|
662 |
+\bibitem{Edgar2006} |
|
663 |
+R.~C. Edgar and S.~Batzoglou. |
|
664 |
+\newblock Multiple sequence alignment. |
|
665 |
+\newblock {\em Curr. Opin. Struct. Biol.}, 16(3):368--373, 2006. |
|
666 |
+ |
|
643 | 667 |
\bibitem{Lamport1999} |
644 | 668 |
L.~Lamport. |
645 | 669 |
\newblock {\em {\LaTeX} --- A Document Preparation System. User's Guide and |
646 | 670 |
Reference Manual}. |
647 | 671 |
\newblock Addison-Wesley Longman, Amsterdam, 1999. |
648 | 672 |
|
673 |
+\bibitem{Larkin2007} |
|
674 |
+M.~A. Larkin, G.~Blackshields, N.~P. Brown, R.~Chenna, P.~A. McGettigan, |
|
675 |
+ H.~McWilliam, F.~Valentin, I.~M. Wallace, A.~Wilm, R.~Lopez, J.~D. Thompson, |
|
676 |
+ T.~J. Gibson, and D.~G. Higgins. |
|
677 |
+\newblock {Clustal W} and {Clustal X} version 2.0. |
|
678 |
+\newblock {\em Bioinformatics}, 23(21):2947--2948, 2007. |
|
679 |
+ |
|
649 | 680 |
\bibitem{Leisch2002} |
650 | 681 |
F.~Leisch. |
651 | 682 |
\newblock Sweave: dynamic generation of statistical reports using literate data |
... | ... |
@@ -654,6 +685,12 @@ F.~Leisch. |
654 | 685 |
Proceedings in Computational Statistics}, pages 575--580, Heidelberg, 2002. |
655 | 686 |
Physica-Verlag. |
656 | 687 |
|
688 |
+\bibitem{Loytynoja2012} |
|
689 |
+A.~L\"oytynoja, A.~J. Vilella, and N.~Goldman. |
|
690 |
+\newblock Accurate extension of multiple sequence alignments using a |
|
691 |
+ phylogeny-aware graph algorithm. |
|
692 |
+\newblock {\em Bioinformatics}, 28(13):1684--1691, 2012. |
|
693 |
+ |
|
657 | 694 |
\bibitem{Morgenstern1999} |
658 | 695 |
B.~Morgenstern. |
659 | 696 |
\newblock {DIALIGN 2}: improvement of the segment-to-segment approach to |
... | ... |
@@ -666,12 +703,23 @@ N.~Nethercote and J.~Seward. |
666 | 703 |
\newblock In {\em Proc. of the ACM SIGPLAN 2007 Conf. on Programming Language |
667 | 704 |
Design and Implementation}, San Diego, CA, 2007. |
668 | 705 |
|
706 |
+\bibitem{Notredame2007} |
|
707 |
+C.~Notredame. |
|
708 |
+\newblock Recent evolutions of multiple sequence alignment algorithms. |
|
709 |
+\newblock {\em PLoS Comput. Biol.}, 3(8):e123, 2007. |
|
710 |
+ |
|
669 | 711 |
\bibitem{Notredame2000} |
670 | 712 |
C.~Notredame, D.~G. Higgins, and J.~Heringa. |
671 | 713 |
\newblock {T-Coffee}: A novel method for fast and accurate multiple sequence |
672 | 714 |
alignment. |
673 | 715 |
\newblock {\em J. Mol. Biol.}, 302(1):205--217, 2000. |
674 | 716 |
|
717 |
+\bibitem{Pages2015} |
|
718 |
+H.~Pag\`es, P.~Aboyoun, R.~Gentleman, and S.~DebRoy. |
|
719 |
+\newblock {\em Biostrings: String objects representing biological sequences, |
|
720 |
+ and matching algorithms}, 2015. |
|
721 |
+\newblock R package version 2.36.1. |
|
722 |
+ |
|
675 | 723 |
\bibitem{Sievers2011} |
676 | 724 |
F.~Sievers, A.~Wilm, D.~Dineen, T.~J. Gibson, K.~Karplus, W.~Li, R.~Lopez, |
677 | 725 |
H.~McWilliam, M.~Remmert, J.~S\"oding, J.~D. Thompson, and D.~G. Higgins. |
... | ... |
@@ -679,6 +727,12 @@ F.~Sievers, A.~Wilm, D.~Dineen, T.~J. Gibson, K.~Karplus, W.~Li, R.~Lopez, |
679 | 727 |
alignments using {Clustal Omega}. |
680 | 728 |
\newblock {\em Mol. Syst. Biol.}, 7:539, 2011. |
681 | 729 |
|
730 |
+\bibitem{Szalkowski2012} |
|
731 |
+A.~M. Szalkowski. |
|
732 |
+\newblock Fast and robust multiple sequence alignment with phylogeny-aware gap |
|
733 |
+ placement. |
|
734 |
+\newblock {\em BMC Bioinformatics}, 13:129, 2012. |
|
735 |
+ |
|
682 | 736 |
\bibitem{Thompson1994} |
683 | 737 |
J.~D. Thompson, D.~G. Higgins, and T.~J. Gibson. |
684 | 738 |
\newblock {CLUSTAL W}: improving the sensitivity of progressive multiple |
... | ... |
@@ -686,6 +740,11 @@ J.~D. Thompson, D.~G. Higgins, and T.~J. Gibson. |
686 | 740 |
penalties and weight matrix choice. |
687 | 741 |
\newblock {\em Nucleic Acids Res.}, 22(22):4673--4680, 2004. |
688 | 742 |
|
743 |
+\bibitem{Wallace2005} |
|
744 |
+I.~M. Wallace, G.~Blackshields, and D.~G. Higgins. |
|
745 |
+\newblock Multiple sequence alignments. |
|
746 |
+\newblock {\em Curr. Opin. Struct. Biol.}, 15(3):261--266, 2005. |
|
747 |
+ |
|
689 | 748 |
\bibitem{Xie2014} |
690 | 749 |
Y.~Xie. |
691 | 750 |
\newblock {\em Dynamic Documents with R and knitr}. |