git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@109473 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: msa |
2 | 2 |
Type: Package |
3 | 3 |
Title: Multiple Sequence Alignment |
4 |
-Version: 1.1.1 |
|
5 |
-Date: 2015-06-12 |
|
4 |
+Version: 1.1.2 |
|
5 |
+Date: 2015-10-10 |
|
6 | 6 |
Author: Enrico Bonatesta, Christoph Horejs-Kainrath, Ulrich Bodenhofer |
7 | 7 |
Maintainer: Ulrich Bodenhofer <bodenhofer@bioinf.jku.at> |
8 | 8 |
Description: This package provides a unified R/Bioconductor interface to the |
... | ... |
@@ -17,15 +17,18 @@ URL: http://www.bioinf.jku.at/software/msa/ |
17 | 17 |
License: GPL (>= 2) |
18 | 18 |
Copyright: See file inst/COPYRIGHT |
19 | 19 |
Depends: R (>= 3.1.0), methods, Biostrings (>= 2.30.0) |
20 |
-Imports: Rcpp (>= 0.11.1), BiocGenerics, IRanges (>= 1.20.0), S4Vectors, |
|
21 |
- tools |
|
20 |
+Imports: Rcpp (>= 0.11.1), BiocGenerics, IRanges (>= 1.20.0), |
|
21 |
+ S4Vectors, tools |
|
22 | 22 |
Suggests: Biobase, knitr |
23 | 23 |
LinkingTo: Rcpp |
24 | 24 |
SystemRequirements: |
25 | 25 |
VignetteBuilder: knitr |
26 | 26 |
LazyLoad: yes |
27 |
-Collate: AllClasses.R AllGenerics.R params-methods.R version-methods.R |
|
28 |
- helperFunctions.R inputChecks.R convertRows.R msaPrettyPrint.R |
|
29 |
- print-methods.R show-methods.R msa.R msaMuscle.R msaClustalW.R |
|
30 |
- msaClustalOmega.R |
|
31 |
-biocViews: MultipleSequenceAlignment, Alignment, MultipleComparison, Sequencing |
|
27 |
+Collate: AllClasses.R AllGenerics.R params-methods.R version-methods.R |
|
28 |
+ helperFunctions.R inputChecks.R convertRows.R msaPrettyPrint.R |
|
29 |
+ print-methods.R show-methods.R msa.R msaMuscle.R msaClustalW.R |
|
30 |
+ msaClustalOmega.R |
|
31 |
+biocViews: MultipleSequenceAlignment, Alignment, MultipleComparison, |
|
32 |
+ Sequencing |
|
33 |
+NeedsCompilation: yes |
|
34 |
+Packaged: 2015-10-07 16:20:45 UTC; bodenhof |
... | ... |
@@ -110,11 +110,11 @@ msaClustalOmega <- function(inputSeqs, |
110 | 110 |
if (is.null(substitutionMatrix) || |
111 | 111 |
identical(substitutionMatrix, "default")) { |
112 | 112 |
substitutionMatrix <- NULL |
113 |
- } else if (is.character(substitutionMatrix) && |
|
114 |
- !is.matrix(substitutionMatrix)) { |
|
113 |
+ } else { |
|
115 | 114 |
possibleValues <- c("BLOSUM30", "BLOSUM40", "BLOSUM50", |
116 | 115 |
"BLOSUM65", "BLOSUM80", "Gonnet") |
117 |
- if (!(substitutionMatrix %in% possibleValues)){ |
|
116 |
+ if (!is.character(substitutionMatrix) || |
|
117 |
+ !(substitutionMatrix %in% possibleValues)){ |
|
118 | 118 |
##create a string with all possible Values named text |
119 | 119 |
text <- "" |
120 | 120 |
text <- paste(possibleValues, collapse=", ") |
... | ... |
@@ -129,6 +129,8 @@ msaClustalW <- function(inputSeqs, |
129 | 129 |
##name of a matrix that should be used |
130 | 130 |
!is.matrix(substitutionMatrix)) { |
131 | 131 |
##check whether value is BLOSUM, PAM, GONNET, or ID; |
132 |
+ if (type == "protein") |
|
133 |
+ { |
|
132 | 134 |
possibleValues <- c("blosum", "pam", "gonnet", "id") |
133 | 135 |
if (!(substitutionMatrix %in% possibleValues)){ |
134 | 136 |
##create a string with all possible Values named text |
... | ... |
@@ -138,15 +140,69 @@ msaClustalW <- function(inputSeqs, |
138 | 140 |
"only can have the values: \n", text) |
139 | 141 |
} |
140 | 142 |
params[["substitutionMatrixIsStringFlag"]] <- TRUE |
143 |
+ } |
|
144 |
+ else |
|
145 |
+ { |
|
146 |
+ possibleValues <- c("iub", "clustalw") |
|
147 |
+ if (!(substitutionMatrix %in% possibleValues)){ |
|
148 |
+ ##create a string with all possible Values named text |
|
149 |
+ text <- "" |
|
150 |
+ text <- paste(possibleValues, collapse=", ") |
|
151 |
+ stop("The parameter substitutionMatrix ", |
|
152 |
+ "only can have the values: \n", text) |
|
153 |
+ } |
|
154 |
+ |
|
155 |
+ params[["substitutionMatrixIsStringFlag"]] <- FALSE |
|
156 |
+ params[["substitutionMatrixIsDefaultFlag"]] <- TRUE |
|
157 |
+ params[["dnamatrix"]] <- substitutionMatrix |
|
158 |
+ substitutionMatrix <- "default" |
|
159 |
+ } |
|
141 | 160 |
} else { |
142 | 161 |
##real matrix |
143 |
- if (isSymmetric(substitutionMatrix)) { |
|
144 |
- if (nrow(substitutionMatrix) <=20 || |
|
145 |
- nrow(substitutionMatrix) >26 ) { |
|
146 |
- stop("substitutionMatrix has wrong dimensions!") |
|
147 |
- } |
|
148 |
- } else { |
|
149 |
- stop("substitutionMatrix should be a symmetric matrix!") |
|
162 |
+ reqNames <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", |
|
163 |
+ "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V", |
|
164 |
+ "B", "Z", "X", "*") |
|
165 |
+ |
|
166 |
+ if (type == "protein") |
|
167 |
+ { |
|
168 |
+ rowPerm <- match(reqNames, rownames(substitutionMatrix)) |
|
169 |
+ if (any(is.na(rowPerm))) |
|
170 |
+ stop("substitutionMatrix does not contain all necessary rows") |
|
171 |
+ |
|
172 |
+ colPerm <- match(reqNames, colnames(substitutionMatrix)) |
|
173 |
+ if (any(is.na(colPerm))) |
|
174 |
+ stop("substitutionMatrix does not contain all necessary columns") |
|
175 |
+ |
|
176 |
+ substitutionMatrix <- substitutionMatrix[rowPerm, colPerm] |
|
177 |
+ |
|
178 |
+ if (!isSymmetric(substitutionMatrix)) |
|
179 |
+ stop("substitutionMatrix should be a symmetric matrix!") |
|
180 |
+ } |
|
181 |
+ else |
|
182 |
+ { |
|
183 |
+ reqNuc <- if (type == "dna") c("A", "G", "C", "T") |
|
184 |
+ else c("A", "G", "C", "U") |
|
185 |
+ |
|
186 |
+ if (any(is.na(match(reqNuc, rownames(substitutionMatrix))))) |
|
187 |
+ stop("substitutionMatrix does not contain all necessary rows") |
|
188 |
+ |
|
189 |
+ if (any(is.na(match(reqNuc, colnames(substitutionMatrix))))) |
|
190 |
+ stop("substitutionMatrix does not contain all necessary columns") |
|
191 |
+ |
|
192 |
+ rowSel <- which(rownames(substitutionMatrix) %in% reqNames) |
|
193 |
+ colSel <- which(colnames(substitutionMatrix) %in% reqNames) |
|
194 |
+ |
|
195 |
+ substitutionMatrix <- substitutionMatrix[rowSel, colSel] |
|
196 |
+ |
|
197 |
+ fakeAAmat <- matrix(0, length(reqNames), length(reqNames)) |
|
198 |
+ rownames(fakeAAmat) <- reqNames |
|
199 |
+ colnames(fakeAAmat) <- reqNames |
|
200 |
+ fakeAAmat[rownames(substitutionMatrix), colnames(substitutionMatrix)] <- |
|
201 |
+ substitutionMatrix |
|
202 |
+ |
|
203 |
+ substitutionMatrix <- fakeAAmat |
|
204 |
+ |
|
205 |
+ params[["dnamatrix"]] <- NULL |
|
150 | 206 |
} |
151 | 207 |
} |
152 | 208 |
|
... | ... |
@@ -148,111 +148,48 @@ msaMuscle <- function(inputSeqs, |
148 | 148 |
substitutionMatrix <- NULL; |
149 | 149 |
} |
150 | 150 |
|
151 |
- |
|
152 | 151 |
##check if substitutionMatrix is a matrix |
153 |
- if (!is.null(substitutionMatrix) & !is.matrix(substitutionMatrix)) { |
|
152 |
+ if ((!is.null(substitutionMatrix) && !is.matrix(substitutionMatrix)) || |
|
153 |
+ identical(mode(substitutionMatrix), "list")) |
|
154 | 154 |
stop("The parameter substitutionMatrix should be a matrix!") |
155 |
- } |
|
156 |
- |
|
157 |
- ## Attention, the upper check does NOT detect the |
|
158 |
- ## difference between matrices build as |
|
159 |
- ## a) y <- list() |
|
160 |
- ## length(y) <- 3^2 |
|
161 |
- ## dim(y) <- c(3,3) |
|
162 |
- ## b) x <- matrix(...) |
|
163 |
- ## |
|
164 |
- ## is.matrix(x)==is.matrix(y)==TRUE !!!! |
|
165 |
- ## class(x)==class(y)=="matrix" |
|
166 |
- ## so another is necessary: |
|
167 |
- ## mode(x)==numeric |
|
168 |
- ## mode(y)==list |
|
169 |
- |
|
170 |
- ##case a) |
|
171 |
- if (identical(mode(substitutionMatrix), "list")) { |
|
172 |
- ##check if there are any NA-Values |
|
173 |
- if (length(which(is.na(substitutionMatrix)))!=0) { |
|
174 |
- stop("The parameter substitutionMatrix is not valid. \n", |
|
175 |
- "There are NA-values in the substitutionMatrix! ") |
|
176 |
- } |
|
177 |
- |
|
178 |
- ##check if there are any NaN-Values |
|
179 |
- if (length(which(is.element(substitutionMatrix, NaN)))!=0) { |
|
180 |
- stop("The parameter substitutionMatrix is not valid. \n", |
|
181 |
- "There are NaN-values in the substitutionMatrix! ") |
|
182 |
- } |
|
183 |
- |
|
184 |
- ##check if there are any Inf-Values |
|
185 |
- if (length(which(is.element(substitutionMatrix, Inf)))!=0) { |
|
186 |
- stop("The parameter substitutionMatrix is not valid.\n", |
|
187 |
- "There are Inf-values in the substitutionMatrix! ") |
|
188 |
- } |
|
189 |
- |
|
190 |
- ##check if there are any NULL-Values |
|
191 |
- if (length(which(unlist(lapply(substitutionMatrix,is.null))))!=0) { |
|
192 |
- stop("The parameter substitutionMatrix is not valid. \n", |
|
193 |
- "There are NULL-values in the substitutionMatrix! ") |
|
194 |
- } |
|
195 |
- |
|
196 |
- |
|
197 |
- ##after having checked all, |
|
198 |
- ##create a real matrix of type b) |
|
199 | 155 |
|
200 |
- ##nasty, but should work |
|
156 |
+ if (!is.null(substitutionMatrix)) |
|
157 |
+ { |
|
158 |
+ headerNames <- c("A", "C", "D", "E", "F", |
|
159 |
+ "G", "H", "I", "K", "L", |
|
160 |
+ "M", "N", "P", "Q", "R", |
|
161 |
+ "S", "T", "V","W", "Y") |
|
162 |
+ |
|
163 |
+ if (type == "protein") |
|
164 |
+ reqNames <- headerNames |
|
165 |
+ else if (type == "dna") |
|
166 |
+ reqNames <- c("A", "C", "G", "T") |
|
167 |
+ else |
|
168 |
+ reqNames <- c("A", "C", "G", "U") |
|
201 | 169 |
|
202 |
- ##mode(substitutionMatrix)=list() |
|
203 |
- helpMatrix = matrix(nrow=nrow(substitutionMatrix), |
|
204 |
- ncol=ncol(substitutionMatrix)) |
|
205 |
- colnames(helpMatrix)=colnames(substitutionMatrix) |
|
206 |
- rownames(helpMatrix)=rownames(substitutionMatrix) |
|
207 |
- for (i in 1:length(t(sapply(substitutionMatrix, unlist)))) { |
|
208 |
- helpMatrix[i]= unlist(t(sapply(substitutionMatrix, unlist))[i]) |
|
209 |
- } |
|
210 |
- substitutionMatrix = helpMatrix |
|
211 |
- ##mode(substitutionMatrix)=numeric =>now: case b) |
|
212 |
- } |
|
170 |
+ rowPerm <- match(reqNames, rownames(substitutionMatrix)) |
|
171 |
+ if (any(is.na(rowPerm))) |
|
172 |
+ stop("substitutionMatrix does not contain all necessary rows") |
|
213 | 173 |
|
174 |
+ colPerm <- match(reqNames, colnames(substitutionMatrix)) |
|
175 |
+ if (any(is.na(colPerm))) |
|
176 |
+ stop("substitutionMatrix does not contain all necessary columns") |
|
214 | 177 |
|
215 |
- ## case b) |
|
216 |
- if(!is.null(substitutionMatrix)){ |
|
217 |
- ##check, if matrix is symmetric |
|
218 |
- if (isSymmetric(substitutionMatrix)) { |
|
219 |
- ##check dimensions according to NCBI-BLAST |
|
220 |
- if (nrow(substitutionMatrix) !=24) { |
|
221 |
- stop("The parameter substitutionMatrix has wrong dimensions! \n", |
|
222 |
- "Should be a 24x24 matrix!" ) |
|
223 |
- } |
|
224 |
- ##check order according to NCBI-BLAST |
|
225 |
- if (!(identical(toString(tolower(rownames(substitutionMatrix))), |
|
226 |
- "a, r, n, d, c, q, e, g, h, i, l, k, m, f, p, s, t, w, y, v, b, z, x, *"))){ |
|
227 |
- stop("The parameter substitutionMatrix has wrong ", |
|
228 |
- "order or no names! \n", |
|
229 |
- "Should be in order <abcdefghiklmnpqrstvwxyz>!") |
|
230 |
- } |
|
231 |
- ##check if there are any NA-Values |
|
232 |
- if (length(which(is.na(substitutionMatrix)))!=0) { |
|
233 |
- stop("The parameter substitutionMatrix is not valid. \n", |
|
234 |
- "There are NA-values in the substitutionMatrix!") |
|
235 |
- } |
|
178 |
+ substitutionMatrix <- substitutionMatrix[rowPerm, colPerm] |
|
236 | 179 |
|
237 |
- ##check if there are any NaN-Values |
|
238 |
- if (length(which(is.nan(substitutionMatrix)))!=0) { |
|
239 |
- stop("The parameter substitutionMatrix is not valid. \n", |
|
240 |
- "There are NaN-values in the substitutionMatrix! ") |
|
241 |
- } |
|
180 |
+ auxMat <- matrix(0, length(headerNames), length(headerNames)) |
|
181 |
+ rownames(auxMat) <- headerNames |
|
182 |
+ colnames(auxMat) <- headerNames |
|
183 |
+ auxMat[reqNames, reqNames] <- substitutionMatrix |
|
184 |
+ substitutionMatrix <- auxMat |
|
242 | 185 |
|
243 |
- ##check if there are any Inf-Values |
|
244 |
- if (length(which(is.element(substitutionMatrix, Inf)))!=0) { |
|
245 |
- stop("The parameter substitutionMatrix is not valid. \n", |
|
246 |
- "There are Inf-values in the substitutionMatrix! ") |
|
247 |
- } |
|
186 |
+ if (!isSymmetric(substitutionMatrix)) |
|
187 |
+ stop("substitutionMatrix should be a symmetric matrix!") |
|
248 | 188 |
|
249 |
- ##check if there are any NULL-Values: |
|
250 |
- ##by definition not allowed and not possible! |
|
251 |
- ##You can't assign NULL-values to Matrices as well as to Vectors! |
|
252 |
- } else { |
|
253 |
- stop("The parameter substitutionMatrix should be symmetric!") |
|
254 |
- } |
|
255 |
- } |
|
189 |
+ if (any(is.na(substitutionMatrix)) || any(is.na(substitutionMatrix)) || |
|
190 |
+ any(is.infinite(substitutionMatrix))) |
|
191 |
+ stop("substitutionMatrix contains invalid values!") |
|
192 |
+ } |
|
256 | 193 |
|
257 | 194 |
############## |
258 | 195 |
# gapOpening # |
... | ... |
@@ -57,24 +57,24 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"), |
57 | 57 |
stop("The parameter alFile has an invalid argument!") |
58 | 58 |
|
59 | 59 |
if (!is(x, "MultipleAlignment")) |
60 |
- stop("The parameter x has an invalid argument! \n", |
|
60 |
+ stop("The parameter x has an invalid argument! \n", |
|
61 | 61 |
"x must be a multiple alignment object!") |
62 |
- |
|
62 |
+ |
|
63 | 63 |
|
64 | 64 |
if (output != "asis") |
65 | 65 |
{ |
66 | 66 |
if (!is.numeric(paperWidth) || length(paperWidth) != 1 || |
67 | 67 |
paperWidth <= 0) |
68 |
- stop("The parameter paperWidth must be ", |
|
68 |
+ stop("The parameter paperWidth must be ", |
|
69 | 69 |
"single positive number (unit: inches)!") |
70 | 70 |
|
71 | 71 |
if (!is.numeric(paperHeight) || length(paperHeight) != 1 || |
72 | 72 |
paperHeight <= 0) |
73 |
- stop("The parameter paperHeight must be ", |
|
73 |
+ stop("The parameter paperHeight must be ", |
|
74 | 74 |
"single positive number (unit: inches)!") |
75 | 75 |
|
76 | 76 |
if (!is.numeric(margins) || length(margins) != 2) |
77 |
- stop("The parameter margins must be ", |
|
77 |
+ stop("The parameter margins must be ", |
|
78 | 78 |
"two positive numbers (unit: inches)!") |
79 | 79 |
} |
80 | 80 |
|
... | ... |
@@ -85,14 +85,14 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"), |
85 | 85 |
if (max(subset) < .Machine$integer.max) |
86 | 86 |
subset <- as.integer(subset) |
87 | 87 |
else |
88 |
- stop("One or more values for parameter subset ", |
|
88 |
+ stop("One or more values for parameter subset ", |
|
89 | 89 |
"are larger than integer!") |
90 | 90 |
} |
91 | 91 |
else if (!is.integer(subset)) |
92 | 92 |
stop("The parameter subset has an invalid argument!") |
93 | 93 |
|
94 | 94 |
if (length(subset) < 2) |
95 |
- stop("The parameter subset is expected to be \n", |
|
95 |
+ stop("The parameter subset is expected to be \n", |
|
96 | 96 |
" a vector with at least 2 entries!") |
97 | 97 |
|
98 | 98 |
if (!all(subset %in% 1:nrow(x))) |
... | ... |
@@ -122,7 +122,7 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"), |
122 | 122 |
|
123 | 123 |
if (!is.numeric(consensusThreshold) || length(consensusThreshold) != 1 || |
124 | 124 |
consensusThreshold < 0 || consensusThreshold > 100) |
125 |
- stop("The parameter consensusThreshold must be \n", |
|
125 |
+ stop("The parameter consensusThreshold must be \n", |
|
126 | 126 |
"a single numeric between 0 and 100 !") |
127 | 127 |
|
128 | 128 |
if (shadingMode %in% c("identical", "similar")) |
... | ... |
@@ -151,9 +151,9 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"), |
151 | 151 |
"accessible area")) |
152 | 152 |
else |
153 | 153 |
stop("Missing shadingModeArg for functional shading mode. \n", |
154 |
- "Valid values are: \n", |
|
155 |
- "\"charge\", \n", |
|
156 |
- "\"hydropathy\", \n", |
|
154 |
+ "Valid values are: \n", |
|
155 |
+ "\"charge\", \n", |
|
156 |
+ "\"hydropathy\", \n", |
|
157 | 157 |
"\"structure\", \n", |
158 | 158 |
"\"chemical\",\n", |
159 | 159 |
" \"rasmol\",\n", |
... | ... |
@@ -203,7 +203,7 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"), |
203 | 203 |
if (output != "asis") |
204 | 204 |
{ |
205 | 205 |
if (!is.character(file) || length(file) > 1) |
206 |
- stop("The argument for parameter file must be \n", |
|
206 |
+ stop("The argument for parameter file must be \n", |
|
207 | 207 |
"a single character string!") |
208 | 208 |
|
209 | 209 |
if (substr(file, nchar(file) - 2, nchar(file)) != output) |
... | ... |
@@ -241,6 +241,11 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"), |
241 | 241 |
|
242 | 242 |
texOutput <- paste0("\\begin{texshade}{", stratifyFilenames(alFile), "}") |
243 | 243 |
|
244 |
+ if (is(x, "AAMultipleAlignment")) |
|
245 |
+ texOutput <- c(texOutput, "\\seqtype{P}") |
|
246 |
+ else |
|
247 |
+ texOutput <- c(texOutput, "\\seqtype{N}") |
|
248 |
+ |
|
244 | 249 |
if (length(toShow) == 1) |
245 | 250 |
{ |
246 | 251 |
if (sum(width(toShow)) < ncol(x)) |
... | ... |
@@ -45,21 +45,191 @@ print.MsaMetaData <- function(x, show) |
45 | 45 |
} |
46 | 46 |
} |
47 | 47 |
|
48 |
+print.MsaMultipleAlignmentChunk <- function(str, names=NULL, halfNrow=9, pos="", |
|
49 |
+ addOne=FALSE) |
|
50 |
+{ |
|
51 |
+ lx <- length(str) |
|
52 |
+ iW <- nchar(as.character(lx - if (addOne) 1 else 0)) + 2 |
|
53 |
+ |
|
54 |
+ cat(format("", width=iW), sep = "") |
|
55 |
+ |
|
56 |
+ if (length(names) > 0) |
|
57 |
+ cat(format(paste(" aln", pos), width=nchar(str)[1]), " names\n") |
|
58 |
+ else |
|
59 |
+ cat(paste(" aln", pos), "\n", sep="") |
|
48 | 60 |
|
49 |
-print.MsaMultipleAlignment <- function(x, show=c("alignment", "version", |
|
50 |
- "call")) |
|
61 |
+ if (addOne) |
|
62 |
+ str <- paste0(format(c(paste("[", 1:(lx - 1), "]", sep=""), "Con"), |
|
63 |
+ width=iW, justify="right"), " ", |
|
64 |
+ str, if (is.null(names)) "" else paste0(" ", names)) |
|
65 |
+ else |
|
66 |
+ str <- paste0(format(paste("[", 1:lx, "]", sep=""), width=iW, |
|
67 |
+ justify="right"), " ", |
|
68 |
+ str, if (is.null(names)) "" else paste0(" ", names)) |
|
69 |
+ |
|
70 |
+ if (lx <= 2 * halfNrow + 1) |
|
71 |
+ cat(paste(str, collapse="\n"), "\n") |
|
72 |
+ else |
|
73 |
+ { |
|
74 |
+ cat(paste(str[1:halfNrow], collapse="\n"), "\n") |
|
75 |
+ cat(format("...", width = iW, justify = "right"), "...\n") |
|
76 |
+ cat(paste(str[(lx - halfNrow + 1):lx], collapse="\n"), "\n") |
|
77 |
+ } |
|
78 |
+} |
|
79 |
+ |
|
80 |
+print.MsaMultipleAlignment <- function(x, show=c("alignment", "version", "call"), |
|
81 |
+ showNames=TRUE, showConsensus=TRUE, |
|
82 |
+ halfNrow=9, nameWidth=20) |
|
51 | 83 |
{ |
52 | 84 |
show <- match.arg(show, |
53 |
- choices=c("alignment", "version", "call", |
|
54 |
- "standardParams", "algParams"), |
|
85 |
+ choices=c("alignment", "complete", "version", "call", |
|
86 |
+ "standardParams", "algParams", "all"), |
|
55 | 87 |
several.ok=TRUE) |
56 | 88 |
|
89 |
+ if ("all" %in% show) |
|
90 |
+ show <- c("complete", "version", "call", "standardParams", "algParams") |
|
91 |
+ |
|
57 | 92 |
print.MsaMetaData(x, show=show) |
58 | 93 |
|
59 |
- if ("alignment" %in% show) |
|
94 |
+ if (any(c("alignment", "complete") %in% show)) |
|
60 | 95 |
{ |
61 |
- cat("\n") |
|
62 |
- print(as(x, substr(class(x), 4, nchar(class(x))))) |
|
96 |
+ nr <- nrow(x) |
|
97 |
+ nc <- ncol(x) |
|
98 |
+ |
|
99 |
+ if (identical(halfNrow, NA) || identical(halfNrow, -1)) |
|
100 |
+ halfNrow <- nr |
|
101 |
+ |
|
102 |
+ if (!is.numeric(halfNrow) || length(halfNrow) != 1 || |
|
103 |
+ round(halfNrow) != halfNrow || halfNrow < 1) |
|
104 |
+ stop("halfNrow must be a single whole number or NA") |
|
105 |
+ |
|
106 |
+ if (!is.numeric(nameWidth) || length(nameWidth) != 1 || |
|
107 |
+ round(nameWidth) != nameWidth || nameWidth < 5) |
|
108 |
+ stop("nameWidth must be a single whole number at least as large as 5") |
|
109 |
+ |
|
110 |
+ if (nameWidth > getOption("width") - 20) |
|
111 |
+ { |
|
112 |
+ nameWidth <- getOption("width") - 20 |
|
113 |
+ warning("nameWidth must be at least width - 20") |
|
114 |
+ } |
|
115 |
+ |
|
116 |
+ cat("\n", class(x), " with ", nr, |
|
117 |
+ ifelse(nr == 1, " row and ", " rows and "), |
|
118 |
+ nc, ifelse(nc == 1, " column\n", " columns\n"), sep = "") |
|
119 |
+ |
|
120 |
+ if (nr > 0) |
|
121 |
+ { |
|
122 |
+ strings <- unmasked(x) |
|
123 |
+ mdim <- maskeddim(x) |
|
124 |
+ |
|
125 |
+ if (sum(mdim) > 0) |
|
126 |
+ { |
|
127 |
+ if (mdim[1] > 0) |
|
128 |
+ { |
|
129 |
+ strings <- BStringSet(strings) |
|
130 |
+ |
|
131 |
+ maskStrings <- rep(BStringSet(paste(rep.int("#", nc), |
|
132 |
+ collapse = "")), |
|
133 |
+ mdim[1]) |
|
134 |
+ |
|
135 |
+ i <- as.integer(rowmask(x)) |
|
136 |
+ |
|
137 |
+ if (!is.null(rownames(x))) |
|
138 |
+ names(maskStrings) <- rownames(x)[i] |
|
139 |
+ strings[i] <- maskStrings |
|
140 |
+ } |
|
141 |
+ |
|
142 |
+ if (mdim[2] > 0) |
|
143 |
+ { |
|
144 |
+ strings <- as.matrix(strings) |
|
145 |
+ strings[, as.integer(colmask(x))] <- "#" |
|
146 |
+ strings <- BStringSet(apply(strings, 1, paste, |
|
147 |
+ collapse = "")) |
|
148 |
+ } |
|
149 |
+ } |
|
150 |
+ |
|
151 |
+ strings <- as.character(strings) |
|
152 |
+ |
|
153 |
+ iw <- nchar(as.character(length(strings))) + 2 |
|
154 |
+ |
|
155 |
+ if (showConsensus) |
|
156 |
+ { |
|
157 |
+ cons <- consensusString(consensusMatrix(unmasked(x))) |
|
158 |
+ strings <- c(strings, cons) |
|
159 |
+ |
|
160 |
+ if (length(names(strings)) > 0) |
|
161 |
+ names(strings)[length(strings)] <- "Consensus" |
|
162 |
+ |
|
163 |
+ addOne <- TRUE |
|
164 |
+ } |
|
165 |
+ else |
|
166 |
+ addOne <- FALSE |
|
167 |
+ |
|
168 |
+ names <- names(strings) |
|
169 |
+ |
|
170 |
+ if (showNames && length(names) > 0) |
|
171 |
+ { |
|
172 |
+ names <- names(strings) |
|
173 |
+ |
|
174 |
+ names <- ifelse(nchar(names) <= nameWidth, |
|
175 |
+ names, |
|
176 |
+ paste0(substr(names, 1, nameWidth - 3), "...")) |
|
177 |
+ |
|
178 |
+ chunkSize <- getOption("width") - iw - nameWidth - 3 |
|
179 |
+ } |
|
180 |
+ else |
|
181 |
+ { |
|
182 |
+ names <- NULL |
|
183 |
+ chunkSize <- getOption("width") - iw - 2 |
|
184 |
+ } |
|
185 |
+ |
|
186 |
+ seqLen <- nchar(strings)[1] |
|
187 |
+ |
|
188 |
+ if (seqLen < 7) |
|
189 |
+ strings <- format(strings, width=7, justify="left") |
|
190 |
+ |
|
191 |
+ if (nchar(strings)[1] <= chunkSize) |
|
192 |
+ print.MsaMultipleAlignmentChunk(strings, names, halfNrow=halfNrow, |
|
193 |
+ addOne=addOne) |
|
194 |
+ else if ("complete" %in% show) |
|
195 |
+ { |
|
196 |
+ starts <- seq(from=1, to=seqLen, by=chunkSize) |
|
197 |
+ stops <- pmin(starts + chunkSize - 1, seqLen) |
|
198 |
+ |
|
199 |
+ n <- length(starts) |
|
200 |
+ |
|
201 |
+ for (i in 1:n) |
|
202 |
+ { |
|
203 |
+ aln <- substr(strings, starts[i], stops[i]) |
|
204 |
+ |
|
205 |
+ pos <- paste0("(", starts[i], "..", stops[i], ")") |
|
206 |
+ |
|
207 |
+ if (nchar(pos) + 4 > chunkSize) |
|
208 |
+ pos <- paste0(substr(pos, 1, chunkSize - 7), "...") |
|
209 |
+ |
|
210 |
+ if (nchar(aln)[1] < nchar(pos) + 4) |
|
211 |
+ aln <- format(aln, width=nchar(pos) + 4, justify="left") |
|
212 |
+ |
|
213 |
+ print.MsaMultipleAlignmentChunk(aln, names, halfNrow=halfNrow, |
|
214 |
+ pos=pos, addOne=addOne) |
|
215 |
+ |
|
216 |
+ if (i < n) |
|
217 |
+ cat("\n") |
|
218 |
+ } |
|
219 |
+ } |
|
220 |
+ else |
|
221 |
+ { |
|
222 |
+ w1 <- (chunkSize - 2) %/% 2 |
|
223 |
+ w2 <- (chunkSize - 3) %/% 2 |
|
224 |
+ |
|
225 |
+ strings <- paste0(substr(strings, start=1, stop=w1), |
|
226 |
+ "...", |
|
227 |
+ substr(strings, start=seqLen - w2 + 1, stop=seqLen)) |
|
228 |
+ |
|
229 |
+ print.MsaMultipleAlignmentChunk(strings, names, halfNrow=halfNrow, |
|
230 |
+ addOne=addOne) |
|
231 |
+ } |
|
232 |
+ } |
|
63 | 233 |
} |
64 | 234 |
} |
65 | 235 |
|
... | ... |
@@ -1,27 +1,21 @@ |
1 | 1 |
##345678901234567890123456789012345678901234567890123456789012345678901234567890 |
2 | 2 |
citHeader("To cite package 'msa' in publications use:") |
3 | 3 |
|
4 |
-#year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date, perl = TRUE) |
|
5 |
-#vers <- paste("R package version", meta$Version) |
|
6 |
-desc <- packageDescription("msa") |
|
7 |
-year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", desc$Date) |
|
8 |
-vers <- paste("R package version", desc$Version) |
|
9 |
-url <- desc$URL |
|
10 |
- |
|
11 |
- |
|
12 |
-citEntry(entry="Manual", |
|
13 |
- title = "msa -- An R Package for Multiple Sequence Alignment.", |
|
14 |
- author = personList(as.person("Enrico Bonatesta"), as.person("Christoph Horejs-Kainrath"), as.person("Ulrich Bodenhofer")), |
|
15 |
- year = year, |
|
16 |
- note = vers, |
|
17 |
- organization=paste("Institute of Bioinformatics", |
|
18 |
- "Johannes Kepler University", sep=", "), |
|
19 |
- address="Linz, Austria", |
|
20 |
- url=url, |
|
21 |
- textVersion = |
|
22 |
- paste("Enrico Bonatesta, Christoph Horejs-Kainrath and Ulrich Bodenhofer (", year, "). ", |
|
23 |
- "msa -- An R Package for Multiple Sequence Alignment. ", |
|
24 |
- vers, ".", sep="") |
|
4 |
+citEntry(entry="Article", |
|
5 |
+ title = "msa: an R package for multiple sequence alignment", |
|
6 |
+ author = personList(as.person("Ulrich Bodenhofer"), |
|
7 |
+ as.person("Enrico Bonatesta"), |
|
8 |
+ as.person("Christoph Horejs-Kainrath"), |
|
9 |
+ as.person("Sepp Hochreiter")), |
|
10 |
+ journal="Bioinformatics", |
|
11 |
+ note="(accepted)", |
|
12 |
+ year="2015", |
|
13 |
+ doi="10.1093/bioinformatics/btv494", |
|
14 |
+ textVersion = |
|
15 |
+ paste("U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter (2015)", |
|
16 |
+ "msa: an R package for multiple sequence alignment.", |
|
17 |
+ "Bioinformatics (accepted).", |
|
18 |
+ "DOI: 10.1093/bioinformatics/btv176.") |
|
25 | 19 |
) |
26 | 20 |
|
27 | 21 |
citFooter( |
... | ... |
@@ -1,6 +1,23 @@ |
1 | 1 |
Change history of package msa: |
2 | 2 |
============================== |
3 | 3 |
|
4 |
+Version 1.2.0: |
|
5 |
+- new branch for Bioconductor 3.2 release |
|
6 |
+ |
|
7 |
+Version 1.1.2: |
|
8 |
+- new print() function for multiple alignments that also |
|
9 |
+ allows for displaying alignments in their entirety (plus additional |
|
10 |
+ customizations) |
|
11 |
+- strongly improved handling of custom substitution matrices by |
|
12 |
+ msaClustalW(): now custom matrices can also be supplied for nucleotide |
|
13 |
+ sequences which can also be passed via the 'substitutionMatrix' argument. |
|
14 |
+ The 'dnamatrix' argument is still available for the sake of backwards |
|
15 |
+ compatibility. |
|
16 |
+- strongly improved handling of custom substitution matrices by |
|
17 |
+ msaMuscle() |
|
18 |
+- fix of improperly aligned sequence logos produced by msaPrettyPrint() |
|
19 |
+- updated citation information |
|
20 |
+ |
|
4 | 21 |
Version 1.1.1: |
5 | 22 |
- fix of msa() function |
6 | 23 |
|
... | ... |
@@ -37,6 +37,14 @@ The following slots are defined for \code{MsaMetaData} objects: |
37 | 37 |
\author{Enrico Bonatesta and Christoph Horejs-Kainrath |
38 | 38 |
<msa@bioinf.jku.at> |
39 | 39 |
} |
40 |
+\references{ |
|
41 |
+ \url{http://www.bioinf.jku.at/software/msa} |
|
42 |
+ |
|
43 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
44 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
45 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
46 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
47 |
+} |
|
40 | 48 |
\seealso{\code{\link{msa}}, \code{\link{msaClustalW}}, |
41 | 49 |
\code{\link{msaClustalOmega}}, \code{\link{msaMuscle}}, |
42 | 50 |
\code{\linkS4class{MsaAAMultipleAlignment}}, |
... | ... |
@@ -45,14 +45,20 @@ |
45 | 45 |
} |
46 | 46 |
\section{Methods}{ |
47 | 47 |
\describe{ |
48 |
- \item{\code{print(x, show=c("alignment", "version", "call"))}:}{ |
|
48 |
+ \item{\code{print(x, show=c("alignment", "version", "call"), |
|
49 |
+ showNames=TRUE, showConsensus=TRUE, halfNrow=9, nameWidth=20)}:}{ |
|
49 | 50 |
prints information about the object \code{x}; the \code{show} |
50 | 51 |
argument allows for determining what should be printed. |
51 | 52 |
The \code{show} must be a character vector and may contain any |
52 |
- combination of the following five strings: |
|
53 |
+ combination of the following strings: |
|
53 | 54 |
if \code{show} contains \code{"alignment"}, the multiple |
54 |
- sequence alignment is printed using the corresponding method |
|
55 |
- from the \pkg{Biostrings} package. |
|
55 |
+ sequence alignment is printed in a way similar to the |
|
56 |
+ corresponding method from the \pkg{Biostrings} package |
|
57 |
+ (except for the consensus sequence, see below). |
|
58 |
+ If \code{show} contains \code{"complete"}, the entire width of |
|
59 |
+ the alignment is printed by splitting it over multiple blocks of |
|
60 |
+ lines if necessary. This overrules \code{"alignment"} if both |
|
61 |
+ are contained in the \code{show} argument. |
|
56 | 62 |
If \code{show} contains \code{"version"}, |
57 | 63 |
the \code{version} slot is shown. If \code{show} contains |
58 | 64 |
\code{"call"}, the \code{call} slot is shown. |
... | ... |
@@ -66,11 +72,33 @@ |
66 | 72 |
data are printed. The default is |
67 | 73 |
\code{show=c("alignment", "version", "call")}, i.e. by default, |
68 | 74 |
the multiple sequence alignment is shown along with version and |
69 |
- call information. |
|
75 |
+ call information. If \code{show} contains \code{"all"}, the |
|
76 |
+ complete alignment is shown along with version information, |
|
77 |
+ call, and the complete set of parameters. |
|
78 |
+ As said above, by default, printing alignments is similar to |
|
79 |
+ the standard \code{print} method provided by the \pkg{Biostrings} |
|
80 |
+ package, whereas including \code{"complete"} in the argument |
|
81 |
+ \code{show} prints the entire width of the alignment. |
|
82 |
+ Unlike the method from the \pkg{Biostrings} |
|
83 |
+ package, the appearance can be customized: by default, |
|
84 |
+ the consensus sequence is appended below the alignment. To switch |
|
85 |
+ this off, use \code{showConsensus=FALSE}. Whether or not sequence |
|
86 |
+ names should be printed can be controlled via the |
|
87 |
+ \code{showNames} argument. The width reserved for the sequence |
|
88 |
+ names can be adjusted using the \code{nameWidth} argument; |
|
89 |
+ the default is 20 like in the \pkg{Biostrings} method. |
|
90 |
+ If the number of sequences in the alignment is large, output |
|
91 |
+ can become quite lengthy. That is why only the first |
|
92 |
+ \code{halfNrow} and the last \code{halfNrow} sequences are |
|
93 |
+ shown. To show all sequences, set \code{halfNrow} to \code{NA} |
|
94 |
+ or -1. Note that \code{print} can also handle masked objects, |
|
95 |
+ where the masked sequences/positions are shown as hash marks. |
|
96 |
+ However, the consensus sequences are computed from the |
|
97 |
+ complete, unmasked alignment and displayed as such. |
|
70 | 98 |
} |
71 | 99 |
\item{\code{show(object)}:}{displays the alignment along with |
72 | 100 |
metadata; synonymous to calling \code{print} with default |
73 |
- \code{show} argument \code{c("alignment", "version", "call")}} |
|
101 |
+ arguments.} |
|
74 | 102 |
\item{\code{version(object)}:}{displays the algorithm with which |
75 | 103 |
the multiple alignment has been computed along with its |
76 | 104 |
version number (see also \code{\linkS4class{MsaMetaData}}).} |
... | ... |
@@ -78,8 +106,16 @@ |
78 | 106 |
also \code{\linkS4class{MsaMetaData}})} |
79 | 107 |
} |
80 | 108 |
} |
81 |
-\author{Enrico Bonatesta and Christoph Horejs-Kainrath |
|
82 |
- <msa@bioinf.jku.at> |
|
109 |
+\author{Enrico Bonatesta, Christoph Horejs-Kainrath, and |
|
110 |
+ Ulrich Bodenhofer <msa@bioinf.jku.at> |
|
111 |
+} |
|
112 |
+\references{ |
|
113 |
+ \url{http://www.bioinf.jku.at/software/msa} |
|
114 |
+ |
|
115 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
116 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
117 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
118 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
83 | 119 |
} |
84 | 120 |
\seealso{\code{\link{msa}}, \code{\link{msaClustalW}}, |
85 | 121 |
\code{\link{msaClustalOmega}}, \code{\link{msaMuscle}}, |
... | ... |
@@ -101,6 +137,8 @@ show(myAlignment) |
101 | 137 |
|
102 | 138 |
## print the results |
103 | 139 |
print(myAlignment, show="alignment") |
140 |
+print(myAlignment, show="alignment", showConsensus=FALSE) |
|
141 |
+print(myAlignment, show="complete") |
|
104 | 142 |
print(myAlignment, show=c("alignment", "version")) |
105 | 143 |
print(myAlignment, show="standardParams") |
106 | 144 |
print(myAlignment, show="algParams") |
... | ... |
@@ -17,8 +17,8 @@ |
17 | 17 |
\tabular{ll}{ |
18 | 18 |
Package: \tab msa\cr |
19 | 19 |
Type: \tab Package\cr |
20 |
- Version: \tab 0.99.1\cr |
|
21 |
- Date: \tab 2015-03-29\cr |
|
20 |
+ Version: \tab 1.1.2\cr |
|
21 |
+ Date: \tab 2015-09-29\cr |
|
22 | 22 |
License: \tab GPL-2\cr |
23 | 23 |
} |
24 | 24 |
} |
... | ... |
@@ -27,6 +27,11 @@ |
27 | 27 |
} |
28 | 28 |
\references{ |
29 | 29 |
\url{http://www.bioinf.jku.at/software/msa} |
30 |
+ |
|
31 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
32 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
33 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
34 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
30 | 35 |
|
31 | 36 |
Thompson, J. D., Higgins, D. G., and Gibson, T. J. (1994) |
32 | 37 |
CLUSTAL W: improving the sensitivity of progressive multiple sequence |
... | ... |
@@ -132,6 +132,11 @@ |
132 | 132 |
\references{ |
133 | 133 |
\url{http://www.bioinf.jku.at/software/msa} |
134 | 134 |
|
135 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
136 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
137 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
138 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
139 |
+ |
|
135 | 140 |
\url{http://www.clustal.org/download/clustalw_help.txt} |
136 | 141 |
|
137 | 142 |
\url{http://www.clustal.org/omega/README} |
... | ... |
@@ -27,12 +27,11 @@ |
27 | 27 |
\item{maxiters}{maximum number of iterations; the default value is 0 |
28 | 28 |
(no limitation). In the original ClustalOmega implementation, this |
29 | 29 |
parameter is called \code{iterations}.} |
30 |
- \item{substitutionMatrix}{substitution matrix for scoring matches and |
|
31 |
- mismatches; can be a real matrix, a file name, or the name of a |
|
32 |
- built-in substitution matrix. In the latter case, the choices |
|
30 |
+ \item{substitutionMatrix}{name of substitution matrix for scoring matches and |
|
31 |
+ mismatches; can be one of the choices |
|
33 | 32 |
\code{"BLOSUM30"}, \code{"BLOSUM40"}, \code{"BLOSUM50"}, |
34 |
- \code{"BLOSUM65"}, \code{"BLOSUM80"}, and \code{"Gonnet"} are |
|
35 |
- supported. This parameter is a new feature - the original ClustalOmega |
|
33 |
+ \code{"BLOSUM65"}, \code{"BLOSUM80"}, and \code{"Gonnet"}. |
|
34 |
+ This parameter is a new feature - the original ClustalOmega |
|
36 | 35 |
implementation does not allow for using a custom substitution matrix.} |
37 | 36 |
\item{type}{type of the input sequences \code{inputSeqs}; |
38 | 37 |
see \code{\link{msa}}.} |
... | ... |
@@ -59,6 +58,10 @@ |
59 | 58 |
specific to ClustalOmega can be passed to ClustalOmega via additional |
60 | 59 |
arguments (see argument \code{help} above). |
61 | 60 |
|
61 |
+ Since ClustalOmega only allows for using built-in amino acid |
|
62 |
+ substitution matrices, it is hardly useful for multiple alignments |
|
63 |
+ of nucleotide sequences. |
|
64 |
+ |
|
62 | 65 |
For a note on the order of output sequences and direct reading from |
63 | 66 |
FASTA files, see \code{\link{msa}}. |
64 | 67 |
} |
... | ... |
@@ -76,6 +79,11 @@ |
76 | 79 |
} |
77 | 80 |
\references{ |
78 | 81 |
\url{http://www.bioinf.jku.at/software/msa} |
82 |
+ |
|
83 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
84 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
85 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
86 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
79 | 87 |
|
80 | 88 |
\url{http://www.clustal.org/omega/README} |
81 | 89 |
|
... | ... |
@@ -34,7 +34,9 @@ |
34 | 34 |
built-in substitution matrix. In the latter case, the choices |
35 | 35 |
\code{"blosum"}, \code{"pam"}, \code{"gonnet"}, and \code{"id"} are |
36 | 36 |
supported for amino acid sequences. For aligning nucleotide |
37 |
- sequences, the parameter \code{dnamatrix} must be used instead. |
|
37 |
+ sequences, the choices \code{"iub"} and \code{"clustalw"} are |
|
38 |
+ possible. The parameter \code{dnamatrix} can also be used instead |
|
39 |
+ for the sake of backwards compatibility. |
|
38 | 40 |
The valid choices for this parameter are \code{"iub"} and |
39 | 41 |
\code{"clustalw"}. In the original ClustalW implementation, this |
40 | 42 |
parameter is called \code{matrix}.} |
... | ... |
@@ -79,7 +81,12 @@ |
79 | 81 |
} |
80 | 82 |
\references{ |
81 | 83 |
\url{http://www.bioinf.jku.at/software/msa} |
82 |
- |
|
84 |
+ |
|
85 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
86 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
87 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
88 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
89 |
+ |
|
83 | 90 |
\url{http://www.clustal.org/download/clustalw_help.txt} |
84 | 91 |
|
85 | 92 |
Thompson, J. D., Higgins, D. G., and Gibson, T. J. (1994) |
... | ... |
@@ -85,6 +85,11 @@ |
85 | 85 |
} |
86 | 86 |
\references{ |
87 | 87 |
\url{http://www.bioinf.jku.at/software/msa} |
88 |
+ |
|
89 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
90 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
91 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
92 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
88 | 93 |
|
89 | 94 |
\url{http://www.drive5.com/muscle/muscle.html} |
90 | 95 |
|
... | ... |
@@ -188,9 +188,14 @@ |
188 | 188 |
<msa@bioinf.jku.at> |
189 | 189 |
} |
190 | 190 |
\references{ |
191 |
-\url{http://www.bioinf.jku.at/software/msa} |
|
191 |
+ \url{http://www.bioinf.jku.at/software/msa} |
|
192 |
+ |
|
193 |
+ U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter |
|
194 |
+ (2015). msa: an R package for multiple sequence alignment. |
|
195 |
+ \emph{Bioinformatics} (accepted). DOI: |
|
196 |
+ \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}. |
|
192 | 197 |
|
193 |
-\url{https://www.ctan.org/pkg/texshade} |
|
198 |
+ \url{https://www.ctan.org/pkg/texshade} |
|
194 | 199 |
|
195 | 200 |
Beitz, E. (2000) TeXshade: shading and labeling of multiple |
196 | 201 |
sequence alignments using LaTeX2e |
... | ... |
@@ -574,13 +574,13 @@ void CommandLineParser::run(StringArray* args, bool xmenus, ClustalWInput *input |
574 | 574 |
{ |
575 | 575 |
userParameters->setDNAFlag(false); |
576 | 576 |
userParameters->setExplicitDNAFlag(true); |
577 |
- msg = "Sequence type explicitly set to Protein"; |
|
578 |
- cout << msg << std::endl; |
|
577 |
+ /*msg = "Sequence type explicitly set to Protein"; |
|
578 |
+ cout << msg << std::endl;*/ |
|
579 | 579 |
} |
580 | 580 |
else if(temp == 1) |
581 | 581 |
{ |
582 |
- msg = "Sequence type explicitly set to DNA"; |
|
583 |
- cout << msg << std::endl; |
|
582 |
+ /*msg = "Sequence type explicitly set to DNA"; |
|
583 |
+ cout << msg << std::endl;*/ |
|
584 | 584 |
userParameters->setDNAFlag(true); |
585 | 585 |
userParameters->setExplicitDNAFlag(true); |
586 | 586 |
} |
... | ... |
@@ -4,10 +4,10 @@ |
4 | 4 |
#define TRACE 0 |
5 | 5 |
|
6 | 6 |
const int MAX_LINE = 4096; |
7 |
-const int MAX_HEADINGS = 32; |
|
7 |
+const int MAX_HEADINGS = 20; |
|
8 | 8 |
static char Heading[MAX_HEADINGS]; |
9 | 9 |
static unsigned HeadingCount = 0; |
10 |
-static float Mx[32][32]; |
|
10 |
+static float Mx[20][20]; |
|
11 | 11 |
|
12 | 12 |
static void LogMx() |
13 | 13 |
{ |
... | ... |
@@ -111,7 +111,7 @@ PTR_SCOREMATRIX ReadMx(TextFile &File) |
111 | 111 |
|
112 | 112 |
char *p = Line + 1; |
113 | 113 |
char *maxp = p + strlen(Line); |
114 |
- for (unsigned Col = 0; Col < HeadingCount - 1; ++Col) |
|
114 |
+ for (unsigned Col = 0; Col < HeadingCount; ++Col) |
|
115 | 115 |
{ |
116 | 116 |
if (p >= maxp) |
117 | 117 |
Quit("Too few fields in line of matrix file: '%s'", Line); |
... | ... |
@@ -148,7 +148,7 @@ PTR_SCOREMATRIX ReadMx(TextFile &File) |
148 | 148 |
Mx[j][i]); |
149 | 149 |
goto ExitLoop; |
150 | 150 |
} |
151 |
- } |
|
151 |
+ } |
|
152 | 152 |
ExitLoop:; |
153 | 153 |
|
154 | 154 |
if (g_bVerbose) |
... | ... |
@@ -156,10 +156,17 @@ function which, by default, runs ClustalW with default parameters: |
156 | 156 |
myFirstAlignment <- msa(mySequences) |
157 | 157 |
myFirstAlignment |
158 | 158 |
@ |
159 |
+\noindent Obviously, the default printing function shortens the alignment |
|
160 |
+for the sake of compact output. The \verb+print()+ function provided by the |
|
161 |
+\MSA\ package provides some ways for customizing the output, such as, |
|
162 |
+showing the entire alignment split over multiple blocks of sub-sequences: |
|
163 |
+<<showWholeWidth>>= |
|
164 |
+print(myFirstAlignment, show="complete") |
|
165 |
+@ |
|
159 | 166 |
|
160 |
-The \MSA\ package offers the function \verb+msaPrettyPrint()+ which allows |
|
161 |
-for pretty-printing multiple alignments using the \LaTeX\ package \shade. |
|
162 |
-As an example, the following \R\ code creates a PDF file |
|
167 |
+The \MSA\ package additionally offers the function \verb+msaPrettyPrint()+ |
|
168 |
+which allows for pretty-printing multiple alignments using the \LaTeX\ package |
|
169 |
+\shade. As an example, the following \R\ code creates a PDF file |
|
163 | 170 |
\verb+myfirstAlignment.pdf+ which is shown in |
164 | 171 |
Figure~\ref{fig:myFirstAlignment}: |
165 | 172 |
<<IntegratePDF2>>= |
... | ... |
@@ -276,12 +283,26 @@ make use of most these parameters (see the documentation of ClustalOmega% |
276 | 283 |
for a comprehensive overview). Currently, the following restrictions and |
277 | 284 |
caveats apply: |
278 | 285 |
\begin{itemize} |
279 |
- \item The parameters \verb+infile+, \verb+clustersize+, \verb+gapOpen+, |
|
280 |
- \verb+gapExt+, \verb+iterations+, and \verb+out-order+ have been renamed to |
|
286 |
+ \item The parameters \verb+infile+, \verb+cluster-size+, |
|
287 |
+ \verb+iterations+, and \verb+output-order+ have been renamed to |
|
281 | 288 |
the argument names \verb+inputSeqs+, \verb+cluster+, |
282 |
- \verb+gapOpening+, \verb+gapExtension+, \verb+maxiters+, and \verb+order+ |
|
289 |
+ \verb+maxiters+, and \verb+order+ |
|
283 | 290 |
in order to provide a consistent interface for all three multiple |
284 | 291 |
sequence alignment algorithms. |
292 |
+ \item ClustalOmega does not allow for setting custom gap penalties. |
|
293 |
+ Therefore, setting the parameters \verb+gapOpening+ and |
|
294 |
+ \verb+gapExtension+ currently has no effect and will lead to a |
|
295 |
+ warning. These arguments are only defined for |
|
296 |
+ future extensions and consistency with the other algorithms |
|
297 |
+ available in \MSA. |
|
298 |
+ \item ClustalOmega only allows for choosing substitution matrices |
|
299 |
+ from a pre-defined set of names, namely \verb+"BLOSUM30"+, |
|
300 |
+ \verb+"BLOSUM40"+, \verb+"BLOSUM50"+, \verb+"BLOSUM65"+, |
|
301 |
+ \verb+"BLOSUM80"+, and \verb+"Gonnet"+. This is a new feature |
|
302 |
+ --- the original ClustalOmega implementation does not allow for |
|
303 |
+ using any custom substitution matrix. However, since these are |
|
304 |
+ all amino acid substitution matrices, ClustalOmega is still hardly |
|
305 |
+ useful for multiple alignments of nucleotide sequences. |
|
285 | 306 |
\item Boolean flags must be passed as logical values, e.g.\ |
286 | 307 |
\verb+verbose=TRUE+. |
287 | 308 |
\item The following parameters are (currently) not supported: |
... | ... |
@@ -321,6 +342,94 @@ caveats apply: |
321 | 342 |
\verb+weight1+, and \verb+weight2+. |
322 | 343 |
\end{itemize} |
323 | 344 |
|
345 |
+\section{Printing Multiple Sequence Alignments}\label{sec:msaPrint} |
|
346 |
+ |
|
347 |
+As already shown above, multiple sequence alignments can be shown in |
|
348 |
+plain text format on the \R\ console using the \verb+print()+ function |
|
349 |
+(which is implicitly called if just the object name is entered on the |
|
350 |
+\R\ console). This function allows for multiple customizations, such as, |
|
351 |
+enabling/disabling to display a consensus sequence, printing the entire |
|
352 |
+alignment or only a subset, enabling/disabling to display sequence names, |
|
353 |
+and adjusting the width allocated for sequence names. For more information, |
|
354 |
+the reader is referred to the |
|
355 |
+help page of the print function: |
|
356 |
+<<helpPrint,eval=FALSE>>= |
|
357 |
+help("print,MsaDNAMultipleAlignment-method") |
|
358 |
+@ |
|
359 |
+We only provide some examples here: |
|
360 |
+<<printExamples>>= |
|
361 |
+print(myFirstAlignment) |
|
362 |
+print(myFirstAlignment, show="complete") |
|
363 |
+print(myFirstAlignment, showConsensus=FALSE, halfNrow=3) |
|
364 |
+print(myFirstAlignment, showNames=FALSE, show="complete") |
|
365 |
+@ |
|
366 |
+ |
|
367 |
+\section{Processing Multiple Alignments}\label{sec:msaProc} |
|
368 |
+ |
|
369 |
+The classes defined by the \MSA\ package for storing multiple alignment results |
|
370 |
+have been derived from the corresponding classes defined by the |
|
371 |
+\verb+Biostrings+ package. Therefore, all methods for processing |
|
372 |
+multiple alignments are available and work without any practical limitation. In |
|
373 |
+this section, we highlight some of these. |
|
374 |
+ |
|
375 |
+The classes used for storing multiple alignments allow for defining masks |
|
376 |
+on sequences and sequence positions via their row and column mask slots. |
|
377 |
+They can be set by \verb+rowmask()+ and \verb+colmask()+ functions which serve |
|
378 |
+both as setter and getter functions. To set row or column masks, an |
|
379 |
+\verb+IRanges+ object must be supplied: |
|
380 |
+<<maskExample>>= |
|
381 |
+myMaskedAlignment <- myFirstAlignment |
|
382 |
+rowM <- IRanges(start=1, end=2) |
|
383 |
+rowmask(myMaskedAlignment) <- rowM |
|
384 |
+myMaskedAlignment |
|
385 |
+@ |
|
386 |
+ |
|
387 |
+The \verb+unmasked()+ allows for removing these masks, thereby casting |
|
388 |
+the multiple alignment to a set of aligned \verb+Biostrings+ sequences |
|
389 |
+(class \verb+AAStringSet+, \verb+DNAStringSet+, or \verb+RNAStringSet+): |
|
390 |
+<<unmaskedExample>>= |
|
391 |
+unmasked(myMaskedAlignment) |
|
392 |
+@ |
|
393 |
+ |
|
394 |
+Consensus matrices can be computed conveniently as follows: |
|
395 |
+<<consensusExample1>>= |
|
396 |
+conMat <- consensusMatrix(myFirstAlignment) |
|
397 |
+dim(conMat) |
|
398 |
+conMat[, 101:110] |
|
399 |
+@ |
|
400 |
+Note that \verb+consensusMatrix()+ cannot handle |
|
401 |
+alignments with active masks. So, the masks in multiple alignment objects must |
|
402 |
+must be removed prior to the computation of the consensus matrix: |
|
403 |
+<<consensusExample2>>= |
|
404 |
+conMat <- consensusMatrix(unmasked(myMaskedAlignment)) |
|
405 |
+@ |
|
406 |
+ |
|
407 |
+Consensus strings can be computed from consensus matrices: |
|
408 |
+<<consensusExample3>>= |
|
409 |
+## auxiliary function for splitting a string into displayable portions |
|
410 |
+printSplitString <- function(x, width=getOption("width") - 1) |
|
411 |
+{ |
|
412 |
+ starts <- seq(from=1, to=nchar(x), by=width) |
|
413 |
+ |
|
414 |
+ for (i in 1:length(starts)) |
|
415 |
+ cat(substr(x, starts[i], starts[i] + width - 1), "\n") |
|
416 |
+} |
|
417 |
+ |
|
418 |
+printSplitString(consensusString(conMat)) |
|
419 |
+@ |
|
420 |
+\noindent Consensus sequences can also be computed directly without computing |
|
421 |
+intermediate consensus matrices. However, the \verb+consensusString()+ |
|
422 |
+function cannot handle the |
|
423 |
+masks contained in the multiple alignment objects (no matter whether |
|
424 |
+there are active masks or not). Therefore, it is necessary to remove |
|
425 |
+the masks beforehand: |
|
426 |
+<<consensusExample4>>= |
|
427 |
+printSplitString(consensusString(unmasked(myFirstAlignment))) |
|
428 |
+printSplitString(consensusString(unmasked(myMaskedAlignment))) |
|
429 |
+@ |
|
430 |
+\noindent Actually, the \verb+print()+ method (see Section~\ref{sec:msaPrint} above) |
|
431 |
+uses this function to compute the consensus sequence. |
|
432 |
+ |
|
324 | 433 |
\section{Pretty-Printing Multiple Sequence Alignments}\label{sec:msaPrettyPrint} |
325 | 434 |
|
326 | 435 |
As already mentioned above, the \MSA\ package offers the function |
... | ... |
@@ -398,9 +507,14 @@ The color scheme of the sequence logo can be configured with the |
398 | 507 |
\verb+"standard area"+, and \verb+"accessible area"+. The above example |
399 | 508 |
uses the color scheme \verb+"rasmol"+. |
400 | 509 |
|
401 |
-Finally note that a consensus sequence and a sequence logo can be displayed |
|
510 |
+Note that a consensus sequence and a sequence logo can be displayed |
|
402 | 511 |
together, but only on opposite sides. |
403 | 512 |
|
513 |
+Finally, a caveat: for computing consensus sequences, |
|
514 |
+\verb+msaPrettyPrint()+ uses the functionality provided by \shade, therefore, |
|
515 |
+the results need not match to the results of the methods described in |
|
516 |
+Section~\ref{sec:msaProc} above. |
|
517 |
+ |
|
404 | 518 |
\subsection{Color Shading Modes} |
405 | 519 |
|
406 | 520 |
\shade\ offers different shading schemes for displaying the multiple sequence |
... | ... |
@@ -541,12 +655,6 @@ has a minor memory leak, but the loss of data is so small that no major |
541 | 655 |
problems are to be expected except for thousands of executions of |
542 | 656 |
ClustalOmega. |
543 | 657 |
|
544 |
-\subsubsection*{\shade: Alignment of Sequence Logos} |
|
545 |
- |
|
546 |
-\shade\ has some issues with aligning the sequence logo to the multiple |
|
547 |
-sequence alignment. Under which conditions this happens, would require a |
|
548 |
-more detailed investigation. |
|
549 |
- |
|
550 | 658 |
\subsubsection*{ClustalOmega vs.\ Older GCC Versions on Linux/Unix} |
551 | 659 |
|
552 | 660 |
We have encountered peculiar behavior of ClustalOmega if the package was |
... | ... |
@@ -613,6 +721,14 @@ bibliography below). |
613 | 721 |
\section{Change Log} |
614 | 722 |
|
615 | 723 |
\begin{description} |
724 |
+\item[Version 1.0.2:] \mbox{ } \begin{itemize} |
|
725 |
+ \item new \verb+print()+ function for multiple alignments that also |
|
726 |
+ allows for displaying alignments in their entirety (plus additional |
|
727 |
+ customizations) |
|
728 |
+ \item fix of improperly aligned sequence logos produced by |
|
729 |
+ \verb+msaPrettyPrint()+ |
|
730 |
+ \item updated citation information |
|
731 |
+ \end{itemize} |
|
616 | 732 |
\item[Version 1.1.1:] fix of \verb+msa()+ function |
617 | 733 |
\item[Version 1.1.0:] new branch for Bioconductor 3.2 devel |
618 | 734 |
\item[Version 1.0.0:] first official release as part of Bioconductor 3.1 |