Browse code

Multiple new features and fixes; version number bumped to 1.1.2

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@109473 bc3139a8-67e5-0310-9ffc-ced21a209358

Ulrich Bodenhofer authored on 10/10/2015 13:46:31
Showing 20 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: msa
2 2
 Type: Package
3 3
 Title: Multiple Sequence Alignment
4
-Version: 1.1.1
5
-Date: 2015-06-12
4
+Version: 1.1.2
5
+Date: 2015-10-10
6 6
 Author: Enrico Bonatesta, Christoph Horejs-Kainrath, Ulrich Bodenhofer
7 7
 Maintainer: Ulrich Bodenhofer <bodenhofer@bioinf.jku.at>
8 8
 Description: This package provides a unified R/Bioconductor interface to the
... ...
@@ -17,15 +17,18 @@ URL: http://www.bioinf.jku.at/software/msa/
17 17
 License: GPL (>= 2)
18 18
 Copyright: See file inst/COPYRIGHT
19 19
 Depends: R (>= 3.1.0), methods, Biostrings (>= 2.30.0)
20
-Imports: Rcpp (>= 0.11.1), BiocGenerics, IRanges (>= 1.20.0), S4Vectors,
21
-	 tools
20
+Imports: Rcpp (>= 0.11.1), BiocGenerics, IRanges (>= 1.20.0),
21
+        S4Vectors, tools
22 22
 Suggests: Biobase, knitr
23 23
 LinkingTo: Rcpp
24 24
 SystemRequirements:
25 25
 VignetteBuilder: knitr
26 26
 LazyLoad: yes
27
-Collate: AllClasses.R AllGenerics.R params-methods.R version-methods.R 
28
-         helperFunctions.R inputChecks.R convertRows.R msaPrettyPrint.R
29
-	 print-methods.R show-methods.R msa.R msaMuscle.R msaClustalW.R 
30
-         msaClustalOmega.R
31
-biocViews: MultipleSequenceAlignment, Alignment, MultipleComparison, Sequencing
27
+Collate: AllClasses.R AllGenerics.R params-methods.R version-methods.R
28
+        helperFunctions.R inputChecks.R convertRows.R msaPrettyPrint.R
29
+        print-methods.R show-methods.R msa.R msaMuscle.R msaClustalW.R
30
+        msaClustalOmega.R
31
+biocViews: MultipleSequenceAlignment, Alignment, MultipleComparison,
32
+        Sequencing
33
+NeedsCompilation: yes
34
+Packaged: 2015-10-07 16:20:45 UTC; bodenhof
... ...
@@ -110,11 +110,11 @@ msaClustalOmega <- function(inputSeqs,
110 110
     if (is.null(substitutionMatrix) ||
111 111
             identical(substitutionMatrix, "default")) {
112 112
             substitutionMatrix <- NULL
113
-    } else if (is.character(substitutionMatrix) &&
114
-                   !is.matrix(substitutionMatrix)) {
113
+    } else {
115 114
             possibleValues <- c("BLOSUM30", "BLOSUM40", "BLOSUM50",
116 115
                                 "BLOSUM65", "BLOSUM80", "Gonnet")
117
-            if (!(substitutionMatrix %in% possibleValues)){
116
+            if (!is.character(substitutionMatrix) ||
117
+                 !(substitutionMatrix %in% possibleValues)){
118 118
                 ##create a string with all possible Values named text
119 119
                 text <- ""
120 120
                 text <- paste(possibleValues, collapse=", ")
... ...
@@ -129,6 +129,8 @@ msaClustalW <- function(inputSeqs,
129 129
             ##name of a matrix that should be used
130 130
         !is.matrix(substitutionMatrix)) {
131 131
         ##check whether value is BLOSUM, PAM, GONNET, or ID;
132
+        if (type == "protein")
133
+        {
132 134
             possibleValues <- c("blosum", "pam", "gonnet", "id")
133 135
             if (!(substitutionMatrix %in% possibleValues)){
134 136
                 ##create a string with all possible Values named text
... ...
@@ -138,15 +140,69 @@ msaClustalW <- function(inputSeqs,
138 140
                      "only can have the values: \n", text)
139 141
             }
140 142
             params[["substitutionMatrixIsStringFlag"]] <- TRUE
143
+        }
144
+        else
145
+        {
146
+            possibleValues <- c("iub", "clustalw")
147
+            if (!(substitutionMatrix %in% possibleValues)){
148
+                ##create a string with all possible Values named text
149
+                text <- ""
150
+                text <- paste(possibleValues, collapse=", ")
151
+                stop("The parameter substitutionMatrix ",
152
+                     "only can have the values: \n", text)
153
+            }
154
+
155
+            params[["substitutionMatrixIsStringFlag"]] <- FALSE
156
+            params[["substitutionMatrixIsDefaultFlag"]] <- TRUE
157
+            params[["dnamatrix"]] <- substitutionMatrix
158
+            substitutionMatrix <- "default"
159
+        }
141 160
     } else {
142 161
         ##real matrix
143
-        if (isSymmetric(substitutionMatrix)) {
144
-            if (nrow(substitutionMatrix) <=20 ||
145
-                nrow(substitutionMatrix) >26 ) {
146
-                    stop("substitutionMatrix has wrong dimensions!")
147
-            }
148
-        } else {
149
-            stop("substitutionMatrix should be a symmetric matrix!")
162
+        reqNames <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I",
163
+                      "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
164
+                      "B", "Z", "X", "*")
165
+
166
+        if (type == "protein")
167
+        {
168
+            rowPerm <- match(reqNames, rownames(substitutionMatrix))
169
+            if (any(is.na(rowPerm)))
170
+                stop("substitutionMatrix does not contain all necessary rows")
171
+
172
+            colPerm <- match(reqNames, colnames(substitutionMatrix))
173
+            if (any(is.na(colPerm)))
174
+                stop("substitutionMatrix does not contain all necessary columns")
175
+
176
+            substitutionMatrix <- substitutionMatrix[rowPerm, colPerm]
177
+
178
+            if (!isSymmetric(substitutionMatrix))
179
+                stop("substitutionMatrix should be a symmetric matrix!")
180
+        }
181
+        else
182
+        {
183
+            reqNuc <- if (type == "dna") c("A", "G", "C", "T")
184
+                      else c("A", "G", "C", "U")
185
+
186
+            if (any(is.na(match(reqNuc, rownames(substitutionMatrix)))))
187
+                    stop("substitutionMatrix does not contain all necessary rows")
188
+
189
+            if (any(is.na(match(reqNuc, colnames(substitutionMatrix)))))
190
+                stop("substitutionMatrix does not contain all necessary columns")
191
+
192
+            rowSel <- which(rownames(substitutionMatrix) %in% reqNames)
193
+            colSel <- which(colnames(substitutionMatrix) %in% reqNames)
194
+
195
+            substitutionMatrix <- substitutionMatrix[rowSel, colSel]
196
+
197
+            fakeAAmat <- matrix(0, length(reqNames), length(reqNames))
198
+            rownames(fakeAAmat) <- reqNames
199
+            colnames(fakeAAmat) <- reqNames
200
+            fakeAAmat[rownames(substitutionMatrix), colnames(substitutionMatrix)] <-
201
+                substitutionMatrix
202
+
203
+            substitutionMatrix <- fakeAAmat
204
+
205
+            params[["dnamatrix"]] <- NULL
150 206
         }
151 207
     }
152 208
 
... ...
@@ -148,111 +148,48 @@ msaMuscle <- function(inputSeqs,
148 148
         substitutionMatrix <- NULL;
149 149
     }
150 150
 
151
-
152 151
     ##check if substitutionMatrix is a matrix
153
-    if (!is.null(substitutionMatrix) & !is.matrix(substitutionMatrix)) {
152
+    if ((!is.null(substitutionMatrix) && !is.matrix(substitutionMatrix)) ||
153
+        identical(mode(substitutionMatrix), "list"))
154 154
         stop("The parameter substitutionMatrix should be a matrix!")
155
-    }
156
-
157
-    ## Attention, the upper check does NOT detect the
158
-    ## difference between matrices build as
159
-    ## a) y <- list()
160
-    ##    length(y) <- 3^2
161
-    ##    dim(y) <- c(3,3)
162
-    ## b) x <- matrix(...)
163
-    ##
164
-    ## is.matrix(x)==is.matrix(y)==TRUE !!!!
165
-    ## class(x)==class(y)=="matrix"
166
-    ## so another is necessary:
167
-    ## mode(x)==numeric
168
-    ## mode(y)==list
169
-
170
-    ##case a)
171
-    if (identical(mode(substitutionMatrix), "list")) {
172
-        ##check if there are any NA-Values
173
-        if (length(which(is.na(substitutionMatrix)))!=0) {
174
-            stop("The parameter substitutionMatrix is not valid. \n",
175
-                 "There are NA-values in the substitutionMatrix! ")
176
-        }
177
-
178
-        ##check if there are any NaN-Values
179
-        if (length(which(is.element(substitutionMatrix, NaN)))!=0) {
180
-            stop("The parameter substitutionMatrix is not valid. \n",
181
-                 "There are NaN-values in the substitutionMatrix! ")
182
-        }
183
-
184
-        ##check if there are any Inf-Values
185
-        if (length(which(is.element(substitutionMatrix, Inf)))!=0) {
186
-            stop("The parameter substitutionMatrix is not valid.\n",
187
-                 "There are Inf-values in the substitutionMatrix! ")
188
-        }
189
-
190
-        ##check if there are any NULL-Values
191
-        if (length(which(unlist(lapply(substitutionMatrix,is.null))))!=0) {
192
-            stop("The parameter substitutionMatrix is not valid. \n",
193
-                 "There are NULL-values in the substitutionMatrix! ")
194
-        }
195
-
196
-
197
-        ##after having checked all,
198
-        ##create a real matrix of type b)
199 155
 
200
-        ##nasty, but should work
156
+    if (!is.null(substitutionMatrix))
157
+    {
158
+        headerNames <- c("A", "C", "D", "E", "F",
159
+                         "G", "H", "I", "K", "L",
160
+                         "M", "N", "P", "Q", "R",
161
+                         "S", "T",  "V","W", "Y")
162
+
163
+        if (type == "protein")
164
+            reqNames <- headerNames
165
+        else if (type == "dna")
166
+            reqNames <- c("A", "C", "G", "T")
167
+        else
168
+            reqNames <- c("A", "C", "G", "U")
201 169
 
202
-        ##mode(substitutionMatrix)=list()
203
-        helpMatrix = matrix(nrow=nrow(substitutionMatrix),
204
-                            ncol=ncol(substitutionMatrix))
205
-        colnames(helpMatrix)=colnames(substitutionMatrix)
206
-        rownames(helpMatrix)=rownames(substitutionMatrix)
207
-        for (i in 1:length(t(sapply(substitutionMatrix, unlist)))) {
208
-            helpMatrix[i]=  unlist(t(sapply(substitutionMatrix, unlist))[i])
209
-        }
210
-        substitutionMatrix = helpMatrix
211
-        ##mode(substitutionMatrix)=numeric =>now: case b)
212
-    }
170
+        rowPerm <- match(reqNames, rownames(substitutionMatrix))
171
+        if (any(is.na(rowPerm)))
172
+            stop("substitutionMatrix does not contain all necessary rows")
213 173
 
174
+        colPerm <- match(reqNames, colnames(substitutionMatrix))
175
+        if (any(is.na(colPerm)))
176
+            stop("substitutionMatrix does not contain all necessary columns")
214 177
 
215
-    ## case b)
216
-    if(!is.null(substitutionMatrix)){
217
-    ##check, if matrix is symmetric
218
-    if (isSymmetric(substitutionMatrix)) {
219
-        ##check dimensions according to NCBI-BLAST
220
-        if (nrow(substitutionMatrix) !=24) {
221
-            stop("The parameter substitutionMatrix has wrong dimensions! \n",
222
-                 "Should be a 24x24 matrix!" )
223
-        }
224
-        ##check order according to NCBI-BLAST
225
-        if (!(identical(toString(tolower(rownames(substitutionMatrix))),
226
-    "a, r, n, d, c, q, e, g, h, i, l, k, m, f, p, s, t, w, y, v, b, z, x, *"))){
227
-                stop("The parameter substitutionMatrix has wrong ",
228
-                     "order or no names! \n",
229
-                     "Should be in order <abcdefghiklmnpqrstvwxyz>!")
230
-        }
231
-        ##check if there are any NA-Values
232
-        if (length(which(is.na(substitutionMatrix)))!=0) {
233
-            stop("The parameter substitutionMatrix is not valid. \n",
234
-                 "There are NA-values in the substitutionMatrix!")
235
-        }
178
+        substitutionMatrix <- substitutionMatrix[rowPerm, colPerm]
236 179
 
237
-        ##check if there are any NaN-Values
238
-        if (length(which(is.nan(substitutionMatrix)))!=0) {
239
-            stop("The parameter substitutionMatrix is not valid. \n",
240
-                 "There are NaN-values in the substitutionMatrix! ")
241
-        }
180
+        auxMat <- matrix(0, length(headerNames), length(headerNames))
181
+        rownames(auxMat) <- headerNames
182
+        colnames(auxMat) <- headerNames
183
+        auxMat[reqNames, reqNames] <- substitutionMatrix
184
+        substitutionMatrix <- auxMat
242 185
 
243
-        ##check if there are any Inf-Values
244
-        if (length(which(is.element(substitutionMatrix, Inf)))!=0) {
245
-            stop("The parameter substitutionMatrix is not valid. \n",
246
-                 "There are Inf-values in the substitutionMatrix! ")
247
-        }
186
+        if (!isSymmetric(substitutionMatrix))
187
+            stop("substitutionMatrix should be a symmetric matrix!")
248 188
 
249
-        ##check if there are any NULL-Values:
250
-        ##by definition not allowed and not possible!
251
-        ##You can't assign NULL-values to Matrices as well as to Vectors!
252
-    } else {
253
-        stop("The parameter substitutionMatrix should be symmetric!")
254
-    }
255
-    }
189
+        if (any(is.na(substitutionMatrix)) || any(is.na(substitutionMatrix)) ||
190
+            any(is.infinite(substitutionMatrix)))
191
+            stop("substitutionMatrix contains invalid values!")
192
+     }
256 193
 
257 194
     ##############
258 195
     # gapOpening #
... ...
@@ -57,24 +57,24 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"),
57 57
         stop("The parameter alFile has an invalid argument!")
58 58
 
59 59
     if (!is(x, "MultipleAlignment"))
60
-        stop("The parameter x has an invalid argument! \n", 
60
+        stop("The parameter x has an invalid argument! \n",
61 61
              "x must be a multiple alignment object!")
62
-    
62
+
63 63
 
64 64
     if (output != "asis")
65 65
     {
66 66
         if (!is.numeric(paperWidth) || length(paperWidth) != 1 ||
67 67
             paperWidth <= 0)
68
-            stop("The parameter paperWidth must be ", 
68
+            stop("The parameter paperWidth must be ",
69 69
                  "single positive number (unit: inches)!")
70 70
 
71 71
         if (!is.numeric(paperHeight) || length(paperHeight) != 1 ||
72 72
             paperHeight <= 0)
73
-            stop("The parameter paperHeight must be ", 
73
+            stop("The parameter paperHeight must be ",
74 74
                  "single positive number (unit: inches)!")
75 75
 
76 76
         if (!is.numeric(margins) || length(margins) != 2)
77
-            stop("The parameter margins must be ", 
77
+            stop("The parameter margins must be ",
78 78
                  "two positive numbers (unit: inches)!")
79 79
     }
80 80
 
... ...
@@ -85,14 +85,14 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"),
85 85
             if (max(subset) < .Machine$integer.max)
86 86
                 subset <- as.integer(subset)
87 87
             else
88
-               stop("One or more values for parameter subset ", 
88
+               stop("One or more values for parameter subset ",
89 89
                     "are larger than integer!")
90 90
         }
91 91
         else if (!is.integer(subset))
92 92
             stop("The parameter subset has an invalid argument!")
93 93
 
94 94
         if (length(subset) < 2)
95
-            stop("The parameter subset is expected to be \n", 
95
+            stop("The parameter subset is expected to be \n",
96 96
                  " a vector with at least 2 entries!")
97 97
 
98 98
         if (!all(subset %in% 1:nrow(x)))
... ...
@@ -122,7 +122,7 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"),
122 122
 
123 123
     if (!is.numeric(consensusThreshold) || length(consensusThreshold) != 1 ||
124 124
         consensusThreshold < 0 || consensusThreshold > 100)
125
-        stop("The parameter consensusThreshold must be \n", 
125
+        stop("The parameter consensusThreshold must be \n",
126 126
              "a single numeric between 0 and 100 !")
127 127
 
128 128
     if (shadingMode %in% c("identical", "similar"))
... ...
@@ -151,9 +151,9 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"),
151 151
                                          "accessible area"))
152 152
         else
153 153
             stop("Missing shadingModeArg for functional shading mode. \n",
154
-                 "Valid values are: \n", 
155
-                 "\"charge\", \n", 
156
-                 "\"hydropathy\", \n", 
154
+                 "Valid values are: \n",
155
+                 "\"charge\", \n",
156
+                 "\"hydropathy\", \n",
157 157
                  "\"structure\", \n",
158 158
                  "\"chemical\",\n",
159 159
                  " \"rasmol\",\n",
... ...
@@ -203,7 +203,7 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"),
203 203
     if (output != "asis")
204 204
     {
205 205
         if (!is.character(file) || length(file) > 1)
206
-            stop("The argument for parameter file must be \n", 
206
+            stop("The argument for parameter file must be \n",
207 207
                  "a single character string!")
208 208
 
209 209
         if (substr(file, nchar(file) - 2, nchar(file)) != output)
... ...
@@ -241,6 +241,11 @@ msaPrettyPrint <- function(x, y, output=c("pdf", "tex", "dvi", "asis"),
241 241
 
242 242
     texOutput <- paste0("\\begin{texshade}{", stratifyFilenames(alFile), "}")
243 243
 
244
+    if (is(x, "AAMultipleAlignment"))
245
+        texOutput <- c(texOutput, "\\seqtype{P}")
246
+    else
247
+        texOutput <- c(texOutput, "\\seqtype{N}")
248
+
244 249
     if (length(toShow) == 1)
245 250
     {
246 251
         if (sum(width(toShow)) < ncol(x))
... ...
@@ -45,21 +45,191 @@ print.MsaMetaData <- function(x, show)
45 45
     }
46 46
 }
47 47
 
48
+print.MsaMultipleAlignmentChunk <- function(str, names=NULL, halfNrow=9, pos="",
49
+                                            addOne=FALSE)
50
+{
51
+    lx <- length(str)
52
+    iW <- nchar(as.character(lx - if (addOne) 1 else 0)) + 2
53
+
54
+    cat(format("", width=iW), sep = "")
55
+
56
+    if (length(names) > 0)
57
+        cat(format(paste(" aln", pos), width=nchar(str)[1]), " names\n")
58
+    else
59
+        cat(paste(" aln", pos), "\n", sep="")
48 60
 
49
-print.MsaMultipleAlignment <- function(x, show=c("alignment", "version",
50
-                                                 "call"))
61
+    if (addOne)
62
+        str <- paste0(format(c(paste("[", 1:(lx - 1), "]", sep=""), "Con"),
63
+                             width=iW, justify="right"), " ",
64
+                      str, if (is.null(names)) "" else paste0(" ", names))
65
+    else
66
+        str <- paste0(format(paste("[", 1:lx, "]", sep=""), width=iW,
67
+                             justify="right"), " ",
68
+                      str, if (is.null(names)) "" else paste0(" ", names))
69
+
70
+    if (lx <= 2 * halfNrow + 1)
71
+        cat(paste(str, collapse="\n"), "\n")
72
+    else
73
+    {
74
+        cat(paste(str[1:halfNrow], collapse="\n"), "\n")
75
+        cat(format("...", width = iW, justify = "right"), "...\n")
76
+        cat(paste(str[(lx - halfNrow + 1):lx], collapse="\n"), "\n")
77
+    }
78
+}
79
+
80
+print.MsaMultipleAlignment <- function(x, show=c("alignment", "version", "call"),
81
+                                       showNames=TRUE, showConsensus=TRUE,
82
+                                       halfNrow=9, nameWidth=20)
51 83
 {
52 84
     show <- match.arg(show,
53
-                      choices=c("alignment", "version", "call",
54
-                                "standardParams", "algParams"),
85
+                      choices=c("alignment", "complete", "version", "call",
86
+                                "standardParams", "algParams", "all"),
55 87
                       several.ok=TRUE)
56 88
 
89
+    if ("all" %in% show)
90
+        show <- c("complete", "version", "call", "standardParams", "algParams")
91
+
57 92
     print.MsaMetaData(x, show=show)
58 93
 
59
-    if ("alignment" %in% show)
94
+    if (any(c("alignment", "complete") %in% show))
60 95
     {
61
-        cat("\n")
62
-        print(as(x, substr(class(x), 4, nchar(class(x)))))
96
+        nr <- nrow(x)
97
+        nc <- ncol(x)
98
+
99
+        if (identical(halfNrow, NA) || identical(halfNrow, -1))
100
+            halfNrow <- nr
101
+
102
+        if (!is.numeric(halfNrow) || length(halfNrow) != 1 ||
103
+             round(halfNrow) != halfNrow || halfNrow < 1)
104
+            stop("halfNrow must be a single whole number or NA")
105
+
106
+        if (!is.numeric(nameWidth) || length(nameWidth) != 1 ||
107
+             round(nameWidth) != nameWidth || nameWidth < 5)
108
+            stop("nameWidth must be a single whole number at least as large as 5")
109
+
110
+        if (nameWidth > getOption("width") - 20)
111
+        {
112
+            nameWidth <- getOption("width") - 20
113
+            warning("nameWidth must be at least width - 20")
114
+        }
115
+
116
+        cat("\n", class(x), " with ", nr,
117
+            ifelse(nr == 1, " row and ", " rows and "),
118
+            nc, ifelse(nc == 1, " column\n", " columns\n"), sep = "")
119
+
120
+        if (nr > 0)
121
+        {
122
+            strings <- unmasked(x)
123
+            mdim <- maskeddim(x)
124
+
125
+            if (sum(mdim) > 0)
126
+            {
127
+                if (mdim[1] > 0)
128
+                {
129
+                    strings <- BStringSet(strings)
130
+
131
+                    maskStrings <- rep(BStringSet(paste(rep.int("#", nc),
132
+                                                        collapse = "")),
133
+                                       mdim[1])
134
+
135
+                    i <- as.integer(rowmask(x))
136
+
137
+                    if (!is.null(rownames(x)))
138
+                        names(maskStrings) <- rownames(x)[i]
139
+                    strings[i] <- maskStrings
140
+                }
141
+
142
+                if (mdim[2] > 0)
143
+                {
144
+                    strings <- as.matrix(strings)
145
+                    strings[, as.integer(colmask(x))] <- "#"
146
+                    strings <- BStringSet(apply(strings, 1, paste,
147
+                                                collapse = ""))
148
+                }
149
+            }
150
+
151
+            strings <- as.character(strings)
152
+
153
+            iw <- nchar(as.character(length(strings))) + 2
154
+
155
+            if (showConsensus)
156
+            {
157
+                cons <- consensusString(consensusMatrix(unmasked(x)))
158
+                strings <- c(strings, cons)
159
+
160
+                if (length(names(strings)) > 0)
161
+                    names(strings)[length(strings)] <- "Consensus"
162
+
163
+                addOne <- TRUE
164
+            }
165
+            else
166
+                addOne <- FALSE
167
+
168
+            names <- names(strings)
169
+
170
+            if (showNames && length(names) > 0)
171
+            {
172
+                names <- names(strings)
173
+
174
+                names <- ifelse(nchar(names) <= nameWidth,
175
+                                names,
176
+                                paste0(substr(names, 1, nameWidth - 3), "..."))
177
+
178
+                chunkSize <- getOption("width") - iw - nameWidth - 3
179
+            }
180
+            else
181
+            {
182
+                names <- NULL
183
+                chunkSize <- getOption("width") - iw - 2
184
+            }
185
+
186
+            seqLen <- nchar(strings)[1]
187
+
188
+            if (seqLen < 7)
189
+                strings <- format(strings, width=7, justify="left")
190
+
191
+            if (nchar(strings)[1] <= chunkSize)
192
+                print.MsaMultipleAlignmentChunk(strings, names, halfNrow=halfNrow,
193
+                                                addOne=addOne)
194
+            else if ("complete" %in% show)
195
+            {
196
+                starts <- seq(from=1, to=seqLen, by=chunkSize)
197
+                stops <- pmin(starts + chunkSize - 1, seqLen)
198
+
199
+                n <- length(starts)
200
+
201
+                for (i in 1:n)
202
+                {
203
+                    aln <- substr(strings, starts[i], stops[i])
204
+
205
+                    pos <- paste0("(", starts[i], "..", stops[i], ")")
206
+
207
+                    if (nchar(pos) + 4 > chunkSize)
208
+                        pos <- paste0(substr(pos, 1, chunkSize - 7), "...")
209
+
210
+                    if (nchar(aln)[1] < nchar(pos) + 4)
211
+                        aln <- format(aln, width=nchar(pos) + 4, justify="left")
212
+
213
+                    print.MsaMultipleAlignmentChunk(aln, names, halfNrow=halfNrow,
214
+                                                    pos=pos, addOne=addOne)
215
+
216
+                    if (i < n)
217
+                        cat("\n")
218
+                }
219
+            }
220
+           else
221
+            {
222
+                w1 <- (chunkSize - 2) %/% 2
223
+                w2 <- (chunkSize - 3) %/% 2
224
+
225
+                strings <- paste0(substr(strings, start=1, stop=w1),
226
+                                  "...",
227
+                                  substr(strings, start=seqLen - w2 + 1, stop=seqLen))
228
+
229
+                print.MsaMultipleAlignmentChunk(strings, names, halfNrow=halfNrow,
230
+                                                addOne=addOne)
231
+            }
232
+        }
63 233
     }
64 234
 }
65 235
 
... ...
@@ -1,27 +1,21 @@
1 1
 ##345678901234567890123456789012345678901234567890123456789012345678901234567890
2 2
 citHeader("To cite package 'msa' in publications use:")
3 3
 
4
-#year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date, perl = TRUE) 
5
-#vers <- paste("R package version", meta$Version)
6
-desc <- packageDescription("msa")
7
-year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", desc$Date)
8
-vers <- paste("R package version", desc$Version)
9
-url  <- desc$URL
10
-
11
-
12
-citEntry(entry="Manual", 
13
-         title = "msa -- An R Package for Multiple Sequence Alignment.", 
14
-         author = personList(as.person("Enrico Bonatesta"), as.person("Christoph Horejs-Kainrath"), as.person("Ulrich Bodenhofer")),
15
-         year = year, 
16
-         note = vers,
17
-         organization=paste("Institute of Bioinformatics",
18
-                            "Johannes Kepler University", sep=", "),
19
-         address="Linz, Austria",
20
-         url=url,
21
-         textVersion = 
22
-             paste("Enrico Bonatesta, Christoph Horejs-Kainrath and Ulrich Bodenhofer (", year, "). ",
23
-                   "msa -- An R Package for Multiple Sequence Alignment. ",
24
-                   vers, ".", sep="")
4
+citEntry(entry="Article",
5
+         title = "msa: an R package for multiple sequence alignment",
6
+         author = personList(as.person("Ulrich Bodenhofer"),
7
+                             as.person("Enrico Bonatesta"),
8
+                             as.person("Christoph Horejs-Kainrath"),
9
+                             as.person("Sepp Hochreiter")),
10
+         journal="Bioinformatics",
11
+         note="(accepted)",
12
+         year="2015",
13
+         doi="10.1093/bioinformatics/btv494",
14
+         textVersion =
15
+             paste("U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter (2015)",
16
+                   "msa: an R package for multiple sequence alignment.",
17
+                   "Bioinformatics (accepted).",
18
+                   "DOI: 10.1093/bioinformatics/btv176.")
25 19
 )
26 20
 
27 21
 citFooter(
... ...
@@ -1,6 +1,23 @@
1 1
 Change history of package msa:
2 2
 ==============================
3 3
 
4
+Version 1.2.0:
5
+- new branch for Bioconductor 3.2 release
6
+
7
+Version 1.1.2:
8
+- new print() function for multiple alignments that also
9
+  allows for displaying alignments in their entirety (plus additional
10
+  customizations)
11
+- strongly improved handling of custom substitution matrices by
12
+  msaClustalW(): now custom matrices can also be supplied for nucleotide
13
+  sequences which can also be passed via the 'substitutionMatrix' argument.
14
+  The 'dnamatrix' argument is still available for the sake of backwards
15
+  compatibility.
16
+- strongly improved handling of custom substitution matrices by
17
+  msaMuscle()
18
+- fix of improperly aligned sequence logos produced by msaPrettyPrint()
19
+- updated citation information
20
+
4 21
 Version 1.1.1:
5 22
 - fix of msa() function
6 23
 
... ...
@@ -37,6 +37,14 @@ The following slots are defined for \code{MsaMetaData} objects:
37 37
 \author{Enrico Bonatesta and Christoph Horejs-Kainrath
38 38
   <msa@bioinf.jku.at>
39 39
 }
40
+\references{
41
+  \url{http://www.bioinf.jku.at/software/msa}
42
+  
43
+  U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter
44
+  (2015). msa: an R package for multiple sequence alignment. 
45
+  \emph{Bioinformatics} (accepted). DOI:
46
+  \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}.
47
+}
40 48
 \seealso{\code{\link{msa}}, \code{\link{msaClustalW}},
41 49
   \code{\link{msaClustalOmega}}, \code{\link{msaMuscle}},
42 50
   \code{\linkS4class{MsaAAMultipleAlignment}},
... ...
@@ -45,14 +45,20 @@
45 45
 }
46 46
 \section{Methods}{
47 47
   \describe{
48
-    \item{\code{print(x, show=c("alignment", "version", "call"))}:}{
48
+    \item{\code{print(x, show=c("alignment", "version", "call"),
49
+      showNames=TRUE, showConsensus=TRUE, halfNrow=9, nameWidth=20)}:}{
49 50
       prints information about the object \code{x}; the \code{show}
50 51
       argument allows for determining what should be printed.
51 52
       The \code{show} must be a character vector and may contain any
52
-      combination of the following five strings:
53
+      combination of the following strings:
53 54
       if \code{show} contains \code{"alignment"}, the multiple
54
-      sequence alignment is printed using the corresponding method
55
-      from the \pkg{Biostrings} package.
55
+      sequence alignment is printed in a way similar to the
56
+      corresponding method from the \pkg{Biostrings} package
57
+      (except for the consensus sequence, see below).
58
+      If \code{show} contains \code{"complete"}, the entire width of
59
+      the alignment is printed by splitting it over multiple blocks of
60
+      lines if necessary. This overrules \code{"alignment"} if both
61
+      are contained in the \code{show} argument.
56 62
       If \code{show} contains \code{"version"},
57 63
       the \code{version} slot is shown. If \code{show} contains
58 64
       \code{"call"}, the \code{call} slot is shown.
... ...
@@ -66,11 +72,33 @@
66 72
       data are printed. The default is
67 73
       \code{show=c("alignment", "version", "call")}, i.e. by default,
68 74
       the multiple sequence alignment is shown along with version and
69
-      call information.
75
+      call information. If \code{show} contains \code{"all"}, the
76
+      complete alignment is shown along with version information,
77
+      call, and the complete set of parameters.
78
+      As said above, by default, printing alignments is similar to
79
+      the standard \code{print} method provided by the \pkg{Biostrings}
80
+      package, whereas including \code{"complete"} in the argument
81
+      \code{show} prints the entire width of the alignment.
82
+      Unlike the method from the \pkg{Biostrings}
83
+      package, the appearance can be customized: by default,
84
+      the consensus sequence is appended below the alignment. To switch
85
+      this off, use \code{showConsensus=FALSE}. Whether or not sequence
86
+      names should be printed can be controlled via the
87
+      \code{showNames} argument. The width reserved for the sequence
88
+      names can be adjusted using the \code{nameWidth} argument;
89
+      the default is 20 like in the \pkg{Biostrings} method.
90
+      If the number of sequences in the alignment is large, output
91
+      can become quite lengthy. That is why only the first
92
+      \code{halfNrow} and the last \code{halfNrow} sequences are
93
+      shown. To show all sequences, set \code{halfNrow} to \code{NA}
94
+      or -1. Note that \code{print} can also handle masked objects,
95
+      where the masked sequences/positions are shown as hash marks.
96
+      However, the consensus sequences are computed from the
97
+      complete, unmasked alignment and displayed as such.
70 98
     }
71 99
     \item{\code{show(object)}:}{displays the alignment along with
72 100
       metadata; synonymous to calling \code{print} with default
73
-      \code{show} argument \code{c("alignment", "version", "call")}}
101
+      arguments.}
74 102
     \item{\code{version(object)}:}{displays the algorithm with which
75 103
       the multiple alignment has been computed along with its
76 104
       version number (see also \code{\linkS4class{MsaMetaData}}).}
... ...
@@ -78,8 +106,16 @@
78 106
       also \code{\linkS4class{MsaMetaData}})}
79 107
   }
80 108
 }
81
-\author{Enrico Bonatesta and Christoph Horejs-Kainrath
82
-  <msa@bioinf.jku.at>
109
+\author{Enrico Bonatesta, Christoph Horejs-Kainrath, and
110
+  Ulrich Bodenhofer <msa@bioinf.jku.at>
111
+}
112
+\references{
113
+  \url{http://www.bioinf.jku.at/software/msa}
114
+  
115
+  U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter
116
+  (2015). msa: an R package for multiple sequence alignment. 
117
+  \emph{Bioinformatics} (accepted). DOI:
118
+  \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}.
83 119
 }
84 120
 \seealso{\code{\link{msa}}, \code{\link{msaClustalW}},
85 121
   \code{\link{msaClustalOmega}}, \code{\link{msaMuscle}},
... ...
@@ -101,6 +137,8 @@ show(myAlignment)
101 137
 
102 138
 ## print the results
103 139
 print(myAlignment, show="alignment")
140
+print(myAlignment, show="alignment", showConsensus=FALSE)
141
+print(myAlignment, show="complete")
104 142
 print(myAlignment, show=c("alignment", "version"))
105 143
 print(myAlignment, show="standardParams")
106 144
 print(myAlignment, show="algParams")
... ...
@@ -17,8 +17,8 @@
17 17
   \tabular{ll}{
18 18
     Package: \tab msa\cr
19 19
     Type: \tab Package\cr
20
-    Version: \tab 0.99.1\cr
21
-    Date: \tab 2015-03-29\cr
20
+    Version: \tab 1.1.2\cr
21
+    Date: \tab 2015-09-29\cr
22 22
     License: \tab GPL-2\cr
23 23
   }
24 24
 }
... ...
@@ -27,6 +27,11 @@
27 27
 }
28 28
 \references{
29 29
   \url{http://www.bioinf.jku.at/software/msa}
30
+  
31
+  U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter
32
+  (2015). msa: an R package for multiple sequence alignment. 
33
+  \emph{Bioinformatics} (accepted). DOI:
34
+  \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}.
30 35
 
31 36
   Thompson, J. D., Higgins, D. G., and Gibson, T. J. (1994)
32 37
   CLUSTAL W: improving the sensitivity of progressive multiple sequence
... ...
@@ -132,6 +132,11 @@
132 132
 \references{
133 133
   \url{http://www.bioinf.jku.at/software/msa}
134 134
   
135
+  U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter
136
+  (2015). msa: an R package for multiple sequence alignment. 
137
+  \emph{Bioinformatics} (accepted). DOI:
138
+  \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}.
139
+ 
135 140
   \url{http://www.clustal.org/download/clustalw_help.txt}
136 141
 
137 142
   \url{http://www.clustal.org/omega/README}
... ...
@@ -27,12 +27,11 @@
27 27
   \item{maxiters}{maximum number of iterations; the default value is 0
28 28
     (no limitation). In the original ClustalOmega implementation, this
29 29
     parameter is called \code{iterations}.}
30
-  \item{substitutionMatrix}{substitution matrix for scoring matches and
31
-    mismatches; can be a real matrix, a file name, or the name of a
32
-    built-in substitution matrix. In the latter case, the choices
30
+  \item{substitutionMatrix}{name of substitution matrix for scoring matches and
31
+    mismatches; can be one of the choices
33 32
     \code{"BLOSUM30"}, \code{"BLOSUM40"}, \code{"BLOSUM50"},
34
-    \code{"BLOSUM65"}, \code{"BLOSUM80"}, and \code{"Gonnet"} are
35
-    supported.  This parameter is a new feature - the original ClustalOmega
33
+    \code{"BLOSUM65"}, \code{"BLOSUM80"}, and \code{"Gonnet"}.
34
+    This parameter is a new feature - the original ClustalOmega
36 35
     implementation does not allow for using a custom substitution matrix.}
37 36
   \item{type}{type of the input sequences \code{inputSeqs};
38 37
     see \code{\link{msa}}.}
... ...
@@ -59,6 +58,10 @@
59 58
   specific to ClustalOmega can be passed to ClustalOmega via additional
60 59
   arguments (see argument \code{help} above).
61 60
 
61
+  Since ClustalOmega only allows for using built-in amino acid
62
+  substitution matrices, it is hardly useful for multiple alignments
63
+  of nucleotide sequences.
64
+
62 65
   For a note on the order of output sequences and direct reading from
63 66
   FASTA files, see \code{\link{msa}}.
64 67
 }
... ...
@@ -76,6 +79,11 @@
76 79
 }
77 80
 \references{
78 81
   \url{http://www.bioinf.jku.at/software/msa}
82
+  
83
+  U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter
84
+  (2015). msa: an R package for multiple sequence alignment. 
85
+  \emph{Bioinformatics} (accepted). DOI:
86
+  \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}.
79 87
 
80 88
   \url{http://www.clustal.org/omega/README}
81 89
   
... ...
@@ -34,7 +34,9 @@
34 34
     built-in substitution matrix. In the latter case, the choices
35 35
     \code{"blosum"}, \code{"pam"}, \code{"gonnet"}, and \code{"id"} are
36 36
     supported for amino acid sequences. For aligning nucleotide
37
-    sequences, the parameter \code{dnamatrix} must be used instead.
37
+    sequences, the choices \code{"iub"} and \code{"clustalw"} are
38
+    possible. The parameter \code{dnamatrix} can also be used instead
39
+    for the sake of backwards compatibility.
38 40
     The valid choices for this parameter are \code{"iub"} and
39 41
     \code{"clustalw"}. In the original ClustalW implementation, this
40 42
     parameter is called \code{matrix}.}
... ...
@@ -79,7 +81,12 @@
79 81
 }
80 82
 \references{
81 83
   \url{http://www.bioinf.jku.at/software/msa}
82
-  
84
+   
85
+  U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter
86
+  (2015). msa: an R package for multiple sequence alignment. 
87
+  \emph{Bioinformatics} (accepted). DOI:
88
+  \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}.
89
+ 
83 90
   \url{http://www.clustal.org/download/clustalw_help.txt}
84 91
 
85 92
   Thompson, J. D., Higgins, D. G., and Gibson, T. J. (1994)
... ...
@@ -85,6 +85,11 @@
85 85
 }
86 86
 \references{
87 87
   \url{http://www.bioinf.jku.at/software/msa}
88
+  
89
+  U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter
90
+  (2015). msa: an R package for multiple sequence alignment. 
91
+  \emph{Bioinformatics} (accepted). DOI:
92
+  \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}.
88 93
 
89 94
   \url{http://www.drive5.com/muscle/muscle.html}
90 95
   
... ...
@@ -188,9 +188,14 @@
188 188
   <msa@bioinf.jku.at>
189 189
 }
190 190
 \references{
191
-\url{http://www.bioinf.jku.at/software/msa}
191
+  \url{http://www.bioinf.jku.at/software/msa}
192
+  
193
+  U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter
194
+  (2015). msa: an R package for multiple sequence alignment. 
195
+  \emph{Bioinformatics} (accepted). DOI:
196
+  \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}.
192 197
 
193
-\url{https://www.ctan.org/pkg/texshade}
198
+  \url{https://www.ctan.org/pkg/texshade}
194 199
 
195 200
   Beitz, E. (2000) TeXshade: shading and labeling of multiple
196 201
   sequence alignments using LaTeX2e
... ...
@@ -574,13 +574,13 @@ void CommandLineParser::run(StringArray* args, bool xmenus, ClustalWInput *input
574 574
             { 
575 575
                 userParameters->setDNAFlag(false);
576 576
                 userParameters->setExplicitDNAFlag(true);
577
-                msg = "Sequence type explicitly set to Protein";
578
-                cout << msg << std::endl;
577
+                /*msg = "Sequence type explicitly set to Protein";
578
+		  cout << msg << std::endl;*/
579 579
             }
580 580
             else if(temp == 1) 
581 581
             {
582
-                msg = "Sequence type explicitly set to DNA";
583
-                cout << msg << std::endl;
582
+                /*msg = "Sequence type explicitly set to DNA";
583
+		  cout << msg << std::endl;*/
584 584
                 userParameters->setDNAFlag(true);
585 585
                 userParameters->setExplicitDNAFlag(true);
586 586
             }
... ...
@@ -4,10 +4,10 @@
4 4
 #define TRACE	0
5 5
 
6 6
 const int MAX_LINE = 4096;
7
-const int MAX_HEADINGS = 32;
7
+const int MAX_HEADINGS = 20;
8 8
 static char Heading[MAX_HEADINGS];
9 9
 static unsigned HeadingCount = 0;
10
-static float Mx[32][32];
10
+static float Mx[20][20];
11 11
 
12 12
 static void LogMx()
13 13
 	{
... ...
@@ -111,7 +111,7 @@ PTR_SCOREMATRIX ReadMx(TextFile &File)
111 111
 
112 112
 		char *p = Line + 1;
113 113
 		char *maxp = p + strlen(Line);
114
-		for (unsigned Col = 0; Col < HeadingCount - 1; ++Col)
114
+		for (unsigned Col = 0; Col < HeadingCount; ++Col)
115 115
 			{
116 116
 			if (p >= maxp)
117 117
 				Quit("Too few fields in line of matrix file: '%s'", Line);
... ...
@@ -148,7 +148,7 @@ PTR_SCOREMATRIX ReadMx(TextFile &File)
148 148
 				  Mx[j][i]);
149 149
 				goto ExitLoop;
150 150
 				}
151
-			}
151
+		}
152 152
 ExitLoop:;
153 153
 
154 154
 	if (g_bVerbose)
... ...
@@ -4,7 +4,7 @@
4 4
 typedef unsigned char byte;
5 5
 typedef unsigned short ushort;
6 6
 
7
-typedef float SCOREMATRIX[32][32];
7
+typedef float SCOREMATRIX[20][20];
8 8
 typedef SCOREMATRIX *PTR_SCOREMATRIX;
9 9
 
10 10
 class MSA;
... ...
@@ -156,10 +156,17 @@ function which, by default, runs ClustalW with default parameters:
156 156
 myFirstAlignment <- msa(mySequences)
157 157
 myFirstAlignment
158 158
 @
159
+\noindent Obviously, the default printing function shortens the alignment
160
+for the sake of compact output. The \verb+print()+ function provided by the
161
+\MSA\ package provides some ways for customizing the output, such as,
162
+showing the entire alignment split over multiple blocks of sub-sequences:
163
+<<showWholeWidth>>=
164
+print(myFirstAlignment, show="complete")
165
+@
159 166
 
160
-The \MSA\ package offers the function \verb+msaPrettyPrint()+ which allows
161
-for pretty-printing multiple alignments using the \LaTeX\ package \shade.
162
-As an example, the following \R\ code creates a PDF file
167
+The \MSA\ package additionally offers the function \verb+msaPrettyPrint()+
168
+which allows for pretty-printing multiple alignments using the \LaTeX\ package
169
+\shade. As an example, the following \R\ code creates a PDF file
163 170
 \verb+myfirstAlignment.pdf+ which is shown in
164 171
 Figure~\ref{fig:myFirstAlignment}:
165 172
 <<IntegratePDF2>>=
... ...
@@ -276,12 +283,26 @@ make use of most these parameters (see the documentation of ClustalOmega%
276 283
 for a comprehensive overview). Currently, the following restrictions and
277 284
 caveats apply:
278 285
 \begin{itemize}
279
-  \item The parameters \verb+infile+, \verb+clustersize+, \verb+gapOpen+,
280
-    \verb+gapExt+, \verb+iterations+, and \verb+out-order+ have been renamed to
286
+  \item The parameters \verb+infile+, \verb+cluster-size+,
287
+    \verb+iterations+, and \verb+output-order+ have been renamed to
281 288
     the argument names \verb+inputSeqs+, \verb+cluster+,
282
-    \verb+gapOpening+, \verb+gapExtension+, \verb+maxiters+, and \verb+order+
289
+    \verb+maxiters+, and \verb+order+
283 290
     in order to provide a consistent interface for all three multiple
284 291
     sequence alignment algorithms.
292
+  \item ClustalOmega does not allow for setting custom gap penalties.
293
+    Therefore, setting the parameters \verb+gapOpening+ and
294
+    \verb+gapExtension+ currently has no effect and will lead to a
295
+    warning. These arguments are only defined for
296
+    future extensions and consistency with the other algorithms
297
+    available in \MSA.
298
+  \item ClustalOmega only allows for choosing substitution matrices
299
+    from a pre-defined set of names, namely \verb+"BLOSUM30"+,
300
+    \verb+"BLOSUM40"+, \verb+"BLOSUM50"+, \verb+"BLOSUM65"+,
301
+    \verb+"BLOSUM80"+, and \verb+"Gonnet"+. This is a new feature
302
+    --- the original ClustalOmega implementation does not allow for
303
+    using any custom substitution matrix. However, since these are
304
+    all amino acid substitution matrices, ClustalOmega is still hardly
305
+    useful for multiple alignments of nucleotide sequences.
285 306
   \item Boolean flags must be passed as logical values, e.g.\
286 307
     \verb+verbose=TRUE+.
287 308
   \item The following parameters are (currently) not supported:
... ...
@@ -321,6 +342,94 @@ caveats apply:
321 342
     \verb+weight1+, and \verb+weight2+.
322 343
 \end{itemize}
323 344
 
345
+\section{Printing Multiple Sequence Alignments}\label{sec:msaPrint}
346
+
347
+As already shown above, multiple sequence alignments can be shown in
348
+plain text format on the \R\ console using the \verb+print()+ function
349
+(which is implicitly called if just the object name is entered on the
350
+\R\ console). This function allows for multiple customizations, such as,
351
+enabling/disabling to display a consensus sequence, printing the entire
352
+alignment or only a subset, enabling/disabling to display sequence names,
353
+and adjusting the width allocated for sequence names. For more information,
354
+the reader is referred to the
355
+help page of the print function:
356
+<<helpPrint,eval=FALSE>>=
357
+help("print,MsaDNAMultipleAlignment-method")
358
+@
359
+We only provide some examples here:
360
+<<printExamples>>=
361
+print(myFirstAlignment)
362
+print(myFirstAlignment, show="complete")
363
+print(myFirstAlignment, showConsensus=FALSE, halfNrow=3)
364
+print(myFirstAlignment, showNames=FALSE, show="complete")
365
+@
366
+
367
+\section{Processing Multiple Alignments}\label{sec:msaProc}
368
+
369
+The classes defined by the \MSA\ package for storing multiple alignment results
370
+have been derived from the corresponding classes defined by the
371
+\verb+Biostrings+ package. Therefore, all methods for processing
372
+multiple alignments are available and work without any practical limitation. In
373
+this section, we highlight some of these.
374
+
375
+The classes used for storing multiple alignments allow for defining masks
376
+on sequences and sequence positions via their row and column mask slots.
377
+They can be set by \verb+rowmask()+ and \verb+colmask()+ functions which serve
378
+both as setter and getter functions. To set row or column masks, an
379
+\verb+IRanges+ object must be supplied:
380
+<<maskExample>>=
381
+myMaskedAlignment <- myFirstAlignment
382
+rowM <- IRanges(start=1, end=2)
383
+rowmask(myMaskedAlignment) <- rowM
384
+myMaskedAlignment
385
+@
386
+
387
+The \verb+unmasked()+ allows for removing these masks, thereby casting
388
+the multiple alignment to a set of aligned \verb+Biostrings+ sequences
389
+(class \verb+AAStringSet+, \verb+DNAStringSet+, or \verb+RNAStringSet+):
390
+<<unmaskedExample>>=
391
+unmasked(myMaskedAlignment)
392
+@
393
+
394
+Consensus matrices can be computed conveniently as follows:
395
+<<consensusExample1>>=
396
+conMat <- consensusMatrix(myFirstAlignment)
397
+dim(conMat)
398
+conMat[, 101:110]
399
+@
400
+Note that \verb+consensusMatrix()+ cannot handle
401
+alignments with active masks. So, the masks in multiple alignment objects must
402
+must be removed prior to the computation of the consensus matrix:
403
+<<consensusExample2>>=
404
+conMat <- consensusMatrix(unmasked(myMaskedAlignment))
405
+@
406
+
407
+Consensus strings can be computed from consensus matrices:
408
+<<consensusExample3>>=
409
+## auxiliary function for splitting a string into displayable portions
410
+printSplitString <- function(x, width=getOption("width") - 1)
411
+{
412
+    starts <- seq(from=1, to=nchar(x), by=width)
413
+
414
+    for (i in 1:length(starts))
415
+        cat(substr(x, starts[i], starts[i] + width - 1), "\n")
416
+}
417
+
418
+printSplitString(consensusString(conMat))
419
+@
420
+\noindent Consensus sequences can also be computed directly without computing
421
+intermediate consensus matrices. However, the \verb+consensusString()+
422
+function cannot handle the
423
+masks contained in the multiple alignment objects (no matter whether
424
+there are active masks or not). Therefore, it is necessary to remove
425
+the masks beforehand:
426
+<<consensusExample4>>=
427
+printSplitString(consensusString(unmasked(myFirstAlignment)))
428
+printSplitString(consensusString(unmasked(myMaskedAlignment)))
429
+@
430
+\noindent Actually, the \verb+print()+ method (see Section~\ref{sec:msaPrint} above)
431
+uses this function to compute the consensus sequence.
432
+
324 433
 \section{Pretty-Printing Multiple Sequence Alignments}\label{sec:msaPrettyPrint}
325 434
 
326 435
 As already mentioned above, the \MSA\ package offers the function
... ...
@@ -398,9 +507,14 @@ The color scheme of the sequence logo can be configured with the
398 507
 \verb+"standard area"+, and \verb+"accessible area"+. The above example
399 508
 uses the color scheme \verb+"rasmol"+.
400 509
 
401
-Finally note that a consensus sequence and a sequence logo can be displayed
510
+Note that a consensus sequence and a sequence logo can be displayed
402 511
 together, but only on opposite sides.
403 512
 
513
+Finally, a caveat: for computing consensus sequences,
514
+\verb+msaPrettyPrint()+ uses the functionality provided by \shade, therefore,
515
+the results need not match to the results of the methods described in
516
+Section~\ref{sec:msaProc} above.
517
+
404 518
 \subsection{Color Shading Modes}
405 519
 
406 520
 \shade\ offers different shading schemes for displaying the multiple sequence
... ...
@@ -541,12 +655,6 @@ has a minor memory leak, but the loss of data is so small that no major
541 655
 problems are to be expected except for thousands of executions of
542 656
 ClustalOmega.
543 657
 
544
-\subsubsection*{\shade: Alignment of Sequence Logos}
545
-
546
-\shade\ has some issues with aligning the sequence logo to the multiple
547
-sequence alignment. Under which conditions this happens, would require a
548
-more detailed investigation.
549
-
550 658
 \subsubsection*{ClustalOmega vs.\ Older GCC Versions on Linux/Unix}
551 659
 
552 660
 We have encountered peculiar behavior of ClustalOmega if the package was
... ...
@@ -613,6 +721,14 @@ bibliography below).
613 721
 \section{Change Log}
614 722
 
615 723
 \begin{description}
724
+\item[Version 1.0.2:] \mbox{ }  \begin{itemize}
725
+    \item new \verb+print()+ function for multiple alignments that also
726
+      allows for displaying alignments in their entirety (plus additional
727
+      customizations)
728
+    \item fix of improperly aligned sequence logos produced by
729
+      \verb+msaPrettyPrint()+
730
+    \item updated citation information
731
+  \end{itemize}
616 732
 \item[Version 1.1.1:] fix of \verb+msa()+ function
617 733
 \item[Version 1.1.0:] new branch for Bioconductor 3.2 devel
618 734
 \item[Version 1.0.0:] first official release as part of Bioconductor 3.1