Browse code

Last-minute fixes; version number bumped to 1.1.3

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@109581 bc3139a8-67e5-0310-9ffc-ced21a209358

Ulrich Bodenhofer authored on 13/10/2015 17:35:42
Showing9 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: msa
2 2
 Type: Package
3 3
 Title: Multiple Sequence Alignment
4
-Version: 1.1.2
5
-Date: 2015-10-10
4
+Version: 1.1.3
5
+Date: 2015-10-13
6 6
 Author: Enrico Bonatesta, Christoph Horejs-Kainrath, Ulrich Bodenhofer
7 7
 Maintainer: Ulrich Bodenhofer <bodenhofer@bioinf.jku.at>
8 8
 Description: This package provides a unified R/Bioconductor interface to the
... ...
@@ -695,7 +695,7 @@ checkProfileScore <- function(type, params){
695 695
         }
696 696
         ##consistency check
697 697
         ##type==RNA|DNA =>only spn==TRUE, all others FALSE possible
698
-        if (identical(type, "rna") | identical(type, "dna")) {
698
+        if (identical(type, "rna") || identical(type, "dna")) {
699 699
             if (!params[["spn"]] | params[["le"]] | params[["sp"]] |
700 700
                  params[["sv"]]){
701 701
                 stop("The used profile score is inconsistent. \n",
... ...
@@ -158,7 +158,7 @@ msaMuscle <- function(inputSeqs,
158 158
         headerNames <- c("A", "C", "D", "E", "F",
159 159
                          "G", "H", "I", "K", "L",
160 160
                          "M", "N", "P", "Q", "R",
161
-                         "S", "T",  "V","W", "Y")
161
+                         "S", "T", "V", "W", "Y")
162 162
 
163 163
         if (type == "protein")
164 164
             reqNames <- headerNames
... ...
@@ -177,6 +177,9 @@ msaMuscle <- function(inputSeqs,
177 177
 
178 178
         substitutionMatrix <- substitutionMatrix[rowPerm, colPerm]
179 179
 
180
+        if (type == "rna")
181
+            reqNames <- c("A", "C", "G", "T")
182
+
180 183
         auxMat <- matrix(0, length(headerNames), length(headerNames))
181 184
         rownames(auxMat) <- headerNames
182 185
         colnames(auxMat) <- headerNames
... ...
@@ -189,6 +192,25 @@ msaMuscle <- function(inputSeqs,
189 192
         if (any(is.na(substitutionMatrix)) || any(is.na(substitutionMatrix)) ||
190 193
             any(is.infinite(substitutionMatrix)))
191 194
             stop("substitutionMatrix contains invalid values!")
195
+
196
+        params[["le"]] <- FALSE
197
+        params[["sv"]] <- FALSE
198
+
199
+        if (type == "protein")
200
+        {
201
+            params[["sp"]] <- TRUE
202
+            params[["spn"]] <- FALSE
203
+        }
204
+        else
205
+        {
206
+            params[["sp"]] <- FALSE
207
+            params[["spn"]] <- TRUE
208
+        }
209
+
210
+        paramsCopy[["le"]] <- NULL
211
+        paramsCopy[["sv"]] <- NULL
212
+        paramsCopy[["sp"]] <- NULL
213
+        paramsCopy[["spn"]] <- NULL
192 214
      }
193 215
 
194 216
     ##############
... ...
@@ -205,13 +227,13 @@ msaMuscle <- function(inputSeqs,
205 227
     if (params$le) {
206 228
         gapOpening <- checkGapOpening2(gapOpening, substitutionMatrix, 2.9)
207 229
     }
208
-    if (params$sp) {
230
+    else if (params$sp) {
209 231
         gapOpening <- checkGapOpening2(gapOpening, substitutionMatrix, 1439)
210 232
     }
211
-    if (params$sv) {
233
+    else if (params$sv) {
212 234
         gapOpening <- checkGapOpening2(gapOpening, substitutionMatrix, 300)
213 235
     }
214
-    if (params$spn) {
236
+    else if (params$spn) {
215 237
         if (identical(type,"dna")) {
216 238
         gapOpening <- checkGapOpening2(gapOpening, substitutionMatrix, 400)
217 239
         }
... ...
@@ -222,7 +244,6 @@ msaMuscle <- function(inputSeqs,
222 244
            stop("If you use sequences of type \"protein\", \n",
223 245
                 "you can't use the parameter \"spn\"!")
224 246
         }
225
-
226 247
     }
227 248
 
228 249
     ##FIXME TODO: check default-Value for type=protein
... ...
@@ -4,6 +4,10 @@ Change history of package msa:
4 4
 Version 1.2.0:
5 5
 - new branch for Bioconductor 3.2 release
6 6
 
7
+Version 1.1.3: 
8
+- bug fix related to custom substitution matrices in the MUSCLE interface
9
+- correction and updates of documentation
10
+
7 11
 Version 1.1.2:
8 12
 - new print() function for multiple alignments that also
9 13
   allows for displaying alignments in their entirety (plus additional
... ...
@@ -24,7 +24,10 @@
24 24
     for amino acid sequences depends on the profile score settings:
25 25
     for the setting \code{le=TRUE}, the default is 2.9, for
26 26
     \code{sp=TRUE}, the default is 1,439, and for \code{sv=TRUE},
27
-    the default is 300.}
27
+    the default is 300. Note that these defaults may not be suitable
28
+    if custom substitution matrices are being used. In such a case,
29
+    a sensible choice of gap penalties that fits well to the
30
+    substitution matrix must be made.}
28 31
   \item{gapExtension}{gap extension penalty; the default is 0.}
29 32
   \item{maxiters}{maximum number of iterations; the default is 16.
30 33
     In the original MUSCLE implementation, it is also possible
... ...
@@ -120,5 +123,9 @@ msaMuscle(mySeqs)
120 123
 msaMuscle(mySeqs, gapOpening=12, gapExtension=3, maxiters=16,
121 124
           cluster="upgmamax", SUEFF=0.4, brenner=FALSE,
122 125
           order="input", verbose=FALSE)
126
+
127
+## call msaMuscle with a custom substitution matrix
128
+data(PAM120)
129
+msaMuscle(mySeqs, substitutionMatrix=PAM120)
123 130
 }
124 131
 \keyword{manip}
... ...
@@ -50,11 +50,11 @@
50 50
     is passed as argument \code{x}.}
51 51
   \item{alFile}{name of alignment file to be created;
52 52
     \code{msaPrettyPrint} first writes the multiple alignment \code{x}
53
-    to an alignment (\code{.aln}) file. The name of this file can be
53
+    to a \code{.fasta} file. The name of this file can be
54 54
     determined with the \code{alFile} argument. If no name 
55 55
     is given, the name of the output file defaults to name of the
56 56
     object provided as argument \code{x} along with the suffix
57
-    \code{.aln}. Note that this might lead to
57
+    \code{.fasta}. Note that this might lead to
58 58
     invalid file names if not the name of an object, but an R expression
59 59
     is passed as argument \code{x}.}
60 60
   \item{askForOverwrite}{if \code{TRUE} (default), 
... ...
@@ -147,7 +147,7 @@
147 147
 	applicable) is printed to the R session.}
148 148
 }
149 149
 \details{The \code{msaPrettyPrint} function writes a multiple alignment
150
-  to an alignment (\code{.aln}) file and creates LaTeX code for
150
+  to a \code{.fasta} file and creates LaTeX code for
151 151
   pretty-printing the multiple alignment on the basis of the
152 152
   LaTeX package \pkg{texshade.sty}. If \code{output="asis"},
153 153
   \code{msaPrettyPrint} prints a LaTeX fragment consisting of the
... ...
@@ -7,7 +7,7 @@ const int MAX_LINE = 4096;
7 7
 const int MAX_HEADINGS = 20;
8 8
 static char Heading[MAX_HEADINGS];
9 9
 static unsigned HeadingCount = 0;
10
-static float Mx[20][20];
10
+static float Mx[32][32];
11 11
 
12 12
 static void LogMx()
13 13
 	{
... ...
@@ -148,7 +148,7 @@ PTR_SCOREMATRIX ReadMx(TextFile &File)
148 148
 				  Mx[j][i]);
149 149
 				goto ExitLoop;
150 150
 				}
151
-		}
151
+			}
152 152
 ExitLoop:;
153 153
 
154 154
 	if (g_bVerbose)
... ...
@@ -172,7 +172,7 @@ PTR_SCOREMATRIX ReadMxFromR(std::vector<std::string> colnames, float matrix[32][
172 172
 
173 173
 #if TRACE
174 174
 	{
175
-	Log("ReadMx\n");
175
+	Log("ReadMxFromR\n");
176 176
 	Log("%d headings: ", HeadingCount);
177 177
 	for (unsigned i = 0; i < HeadingCount; ++i)
178 178
 		Log("%c", Heading[i]);
... ...
@@ -210,7 +210,7 @@ PTR_SCOREMATRIX ReadMxFromR(std::vector<std::string> colnames, float matrix[32][
210 210
 				Log("Row letter = %u\n", RowLetter);
211 211
 		#endif
212 212
 
213
-		for (unsigned Col = 0; Col < HeadingCount - 1; ++Col) {
213
+		for (unsigned Col = 0; Col < HeadingCount; ++Col) {
214 214
 			char HeaderChar = Heading[Col];
215 215
 			//printf("Header char: %c\n", HeaderChar);
216 216
 			if (IsResidueChar(HeaderChar)) {
... ...
@@ -4,7 +4,7 @@
4 4
 typedef unsigned char byte;
5 5
 typedef unsigned short ushort;
6 6
 
7
-typedef float SCOREMATRIX[20][20];
7
+typedef float SCOREMATRIX[32][32];
8 8
 typedef SCOREMATRIX *PTR_SCOREMATRIX;
9 9
 
10 10
 class MSA;
... ...
@@ -209,7 +209,7 @@ The example in Section~\ref{sec:impatient} above simply called
209 209
 the function \verb+msa()+ without any additional arguments.
210 210
 We mentioned already that, in this case, ClustalW is called with default
211 211
 parameters. We can also explicitly request ClustalW or one of the two
212
-other algorithms ClustalOmega or Muscle:
212
+other algorithms ClustalOmega or MUSCLE:
213 213
 <<OtherAlgorithms,>>=
214 214
 myClustalWAlignment <- msa(mySequences, "ClustalW")
215 215
 myClustalWAlignment
... ...
@@ -677,12 +677,6 @@ source package tarball, untar it, comment/uncomment the corresponding line in
677 677
 \verb+msa/src/ClustalOmega/msaMakefile+ (see first six lines), and
678 678
 build/install the package from source.
679 679
 
680
-\subsubsection*{MUSCLE with Custom Substitution Matrices}
681
-
682
-We are aware the that our MUSCLE interface is rather picky in terms of the
683
-format in which substitution matrices are passed to the \verb+msaMuscle()+
684
-function. This interface will be improved in future versions.
685
-
686 680
 \section{Future Extensions}\label{sec:future}
687 681
 
688 682
 We envision the following changes/extensions in future versions of the package:
... ...
@@ -721,10 +715,25 @@ bibliography below).
721 715
 \section{Change Log}
722 716
 
723 717
 \begin{description}
724
-\item[Version 1.0.2:] \mbox{ }  \begin{itemize}
718
+\item[Version 1.2.0:] \mbox{ }  \begin{itemize}
719
+    \item new branch for Bioconductor 3.2 release
720
+  \end{itemize}
721
+\item[Version 1.1.3:] \mbox{ }  \begin{itemize}
722
+    \item bug fix related to custom substitution matrices
723
+      in the MUSCLE interface
724
+    \item corrections and updates of documentation
725
+  \end{itemize}
726
+\item[Version 1.1.2:] \mbox{ }  \begin{itemize}
725 727
     \item new \verb+print()+ function for multiple alignments that also
726 728
       allows for displaying alignments in their entirety (plus additional
727 729
       customizations)
730
+    \item strongly improved handling of custom substitution matrices by
731
+      \verb+msaClustalW()+: now custom matrices can also be supplied for nucleotide
732
+      sequences which can also be passed via the \verb+substitutionMatrix+ argument.
733
+      The \verb+dnamatrix+ argument is still available for the sake of backwards
734
+      compatibility.
735
+    \item strongly improved handling of custom substitution matrices by
736
+      \verb+msaMuscle()+
728 737
     \item fix of improperly aligned sequence logos produced by
729 738
       \verb+msaPrettyPrint()+
730 739
     \item updated citation information