Browse code

conversions extended + corresponding changes in documentation; version number bumped to 1.5.3

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@118902 bc3139a8-67e5-0310-9ffc-ced21a209358

Ulrich Bodenhofer authored on 23/06/2016 13:33:29
Showing7 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: msa
2 2
 Type: Package
3 3
 Title: Multiple Sequence Alignment
4
-Version: 1.5.2
5
-Date: 2016-05-02
4
+Version: 1.5.3
5
+Date: 2016-06-23
6 6
 Author: Enrico Bonatesta, Christoph Horejs-Kainrath, Ulrich Bodenhofer
7 7
 Maintainer: Ulrich Bodenhofer <bodenhofer@bioinf.jku.at>
8 8
 Description: The 'msa' package provides a unified R/Bioconductor interface to
... ...
@@ -19,7 +19,7 @@ Copyright: See file inst/COPYRIGHT
19 19
 Depends: R (>= 3.1.0), methods, Biostrings (>= 2.30.0)
20 20
 Imports: Rcpp (>= 0.11.1), BiocGenerics, IRanges (>= 1.20.0),
21 21
         S4Vectors, tools
22
-Suggests: Biobase, knitr, seqinr
22
+Suggests: Biobase, knitr, seqinr, ape, phangorn
23 23
 LinkingTo: Rcpp
24 24
 SystemRequirements: GNU make
25 25
 VignetteBuilder: knitr
... ...
@@ -1,28 +1,62 @@
1 1
 msaConvert <- function(x, type=c("seqinr::alignment",
2
-                                 "bios2mds::align"))
2
+                                 "bios2mds::align",
3
+                                 "ape::AAbin",
4
+                                 "ape::DNAbin",
5
+                                 "phangorn::phyDat"))
3 6
 {
4 7
     type <- match.arg(type)
5 8
 
6 9
     if (!is(x, "MultipleAlignment"))
7 10
         stop("x must be a 'MultipleAlignment' object")
8 11
 
9
-    x <- as.character(unmasked(x))
12
+    xn <- as.character(unmasked(x))
10 13
 
11 14
     if (type == "seqinr::alignment")
12 15
     {
13
-        out <- list(nb=length(x),
14
-                    nam=names(x),
15
-                    seq=unname(x),
16
+        out <- list(nb=length(xn),
17
+                    nam=names(xn),
18
+                    seq=unname(xn),
16 19
                     com=NA)
17 20
 
18 21
         class(out) <- "alignment"
19 22
     }
20 23
     else if (type == "bios2mds::align")
21 24
     {
22
-        out <- .Call("SplitCharVector2List", x)
23
-        names(out) <- names(x)
25
+        out <- .Call("SplitCharVector2List", xn)
26
+        names(out) <- names(xn)
24 27
         class(out) <- "align"
25 28
     }
29
+    else if (type == "ape::AAbin")
30
+    {
31
+        if (!is(x, "AAMultipleAlignment"))
32
+            stop("conversion to 'ape::AAbin' only supported for ",
33
+                 "amino acid sequences")
34
+
35
+        out <- .Call("SplitCharVector2Matrix", xn, "X")
36
+        rownames(out) <- names(xn)
37
+        class(out) <- "AAbin"
38
+    }
39
+    else if (type == "ape::DNAbin")
40
+    {
41
+        if (!is(x, "DNAMultipleAlignment"))
42
+            stop("conversion to 'ape::DNAbin' only supported for ",
43
+                 "DNA sequences")
44
+
45
+        out <- .Call("SplitCharVector2Matrix", tolower(xn), "n")
46
+        rownames(out) <- names(xn)
47
+        class(out) <- "DNAbin"
48
+    }
49
+    else if (type == "phangorn::phyDat")
50
+    {
51
+        if (!is(x, "DNAMultipleAlignment"))
52
+            stop("conversion to 'phangorn::phyDat' only supported for ",
53
+                 "DNA sequences")
54
+
55
+        if (requireNamespace("phangorn", quietly=TRUE))
56
+            out <- as.phyDat(x)
57
+        else
58
+            stop("conversion to 'phyDat' requires package 'phangorn'")
59
+    }
26 60
 
27 61
     out
28 62
 }
... ...
@@ -1,6 +1,17 @@
1 1
 Change history of package msa:
2 2
 ==============================
3 3
 
4
+Version 1.5.3:
5
+- additional conversions implemented for msaConvert() function
6
+- corresponding changes in documentation
7
+
8
+Versions 1.5.1 / 1.5.2:
9
+- version number bumps for technical reasons related to Bioconductor
10
+  build servers
11
+
12
+Version 1.5.0:
13
+- new branch for Bioconductor 3.4 devel
14
+
4 15
 Version 1.4.0:
5 16
 - release as part of Bioconductor 3.3
6 17
 
... ...
@@ -7,7 +7,9 @@
7 7
 }
8 8
 \usage{
9 9
     msaConvert(x,
10
-               type=c("seqinr::alignment", "bios2mds::align"))
10
+               type=c("seqinr::alignment", "bios2mds::align",
11
+                      "ape::AAbin", "ape::DNAbin",
12
+                      "phangorn::phyDat"))
11 13
 }
12 14
 \arguments{
13 15
   \item{x}{an object of class \code{\linkS4class{MultipleAlignment}}
... ...
@@ -16,23 +18,42 @@
16 18
     \code{\linkS4class{MsaDNAMultipleAlignment}}, and
17 19
     \code{\linkS4class{MsaRNAMultipleAlignment}})}
18 20
   \item{type}{a character string specifying to which type of object
19
-    \code{x} should be converted; currently, the two
20
-    values \code{"seqinr::alignment"} and \code{"bios2mds::align"}
21
-    are supported, i.e. an object of class \code{"alignment"} as
22
-    defined by the \pkg{seqinr} package or an object of class
23
-    \code{"align"} as defined in the \pkg{bios2mds} package.}
21
+    \code{x} should be converted; currently, the
22
+    values \code{"seqinr::alignment"}, \code{"bios2mds::align"},
23
+    \code{"ape::AAbin"}, \code{"ape::DNAbin"}, and
24
+    \code{"phangorn::phyDat"}.}
24 25
   }
25 26
 \details{
26
-  The function converts \code{x} to the class of object
27
-  as specified by the \code{type} argument. Note that this conversion
28
-  happens independently of the packages \pkg{seqinr} and
29
-  \pkg{bios2mds}. More specifically, lists with the respective
30
-  class attributes are returned without actually loading any of
31
-  the two packages. They need not even be installed. This approach
32
-  has been chosen to avoid abundant dependencies and possible
33
-  incompatibilities. That is also why the standard S3/S4
34
-  mechanism of \code{as}/\code{as.class} functions is not
35
-  used.
27
+  The function converts \code{x} to the class of object as
28
+  specified by the \code{type} argument. The values possible
29
+  for the \code{type} argument follow the same principle
30
+  \emph{pkg}::\emph{cl}, i.e. \code{x} is converted to class
31
+  \emph{cl} as defined in the \emph{pkg} package.
32
+
33
+  The conversions for usage by the packages \pkg{seqinr},
34
+  \pkg{bios2mds}, and \pkg{ape} work independently of these
35
+  packages and do not strictly require these packages. They
36
+  need not even be installed. This approach has been chosen
37
+  to avoid abundant dependencies and possible incompatibilities.
38
+  That is also why the standard S3/S4 mechanism of
39
+  \code{as}/\code{as.class} functions is not used.
40
+
41
+  The conversion to the \code{phyDat} class can be done
42
+  easily using the \code{as.phyDat} function from the
43
+  \pkg{phangorn} package. The \code{msaConvert} function
44
+  still provides this conversion for the sake of consistency.
45
+  However, this conversion is just a wrapper function around
46
+  the \code{as.phyDat} function from the \pkg{phangorn}
47
+  package. Thus, the \pkg{phangorn} package needs to be installed.
48
+
49
+  The conversion \code{"ape::AAbin"}
50
+  only works for multiple alignments of amino acid sequences,
51
+  while the conversions \code{"ape::DNAbin"} and
52
+  \code{"phangorn::phyDat"} only work for multiple alignments
53
+  of DNA sequences. When converting to \code{"ape::AAbin"},
54
+  gaps/dashes are replaced by \sQuote{X}. Moreover, conversions
55
+  to \code{"ape::DNAbin"} also convert all characters to
56
+  lowercase and replace gaps/dashes by \sQuote{n}.
36 57
 }
37 58
 \value{
38 59
   The function returns an object of the class as specified by
... ...
@@ -2,6 +2,7 @@
2 2
 #include "RClustalW.h"
3 3
 #include "RMuscle.h"
4 4
 #include "SplitCharVector2List.h"
5
+#include "SplitCharVector2Matrix.h"
5 6
 
6 7
 #include <R.h>
7 8
 #include <Rinternals.h>
... ...
@@ -14,8 +15,10 @@ static const R_CallMethodDef callMethods[] = {
14 15
     {"RClustalW", (DL_FUNC) &RClustalW, 9},
15 16
     /* RClustalOmega.cpp */
16 17
     {"RClustalOmega", (DL_FUNC) &RClustalOmega, 9},
17
-    /* convertMultipleAlignment.cpp */
18
+    /* SplitCharVector2List.cpp */
18 19
     {"SplitCharVector2List", (DL_FUNC) &SplitCharVector2List, 1},
20
+    /* SplitCharVector2Matrix.cpp */
21
+    {"SplitCharVector2Matrix", (DL_FUNC) &SplitCharVector2Matrix, 2},
19 22
     {NULL, NULL, 0}
20 23
 };
21 24
 
... ...
@@ -9,6 +9,23 @@
9 9
   pages =	 {135--139}
10 10
 }
11 11
 
12
+@incollection{CharifLobry2007,
13
+  author =	 {D. Charif and J. R. Lobry},
14
+  title =	 {Seqin{R} 1.0-2: a contributed package to the {R}
15
+                  project for statistical computing devoted to
16
+                  biological sequences retrieval and analysis},
17
+  booktitle =	 {Structural approaches to sequence evolution:
18
+                  Molecules, networks, populations},
19
+  year =	 2007,
20
+  editor =	 {U. Bastolla and M. Porto and H. E. Roman and
21
+                  M. Vendruscolo},
22
+  series =	 {Biological and Medical Physics, Biomedical
23
+                  Engineering},
24
+  pages =	 {207--232},
25
+  address =	 {New York},
26
+  publisher =	 {Springer}
27
+}
28
+
12 29
 @article{Eddelbuettel2011,
13 30
   title =	 {{Rcpp}: Seamless {R} and {C++} Integration},
14 31
   author =	 {D. Eddelbuettel and R. Fran\c{c}ois},
... ...
@@ -103,6 +120,35 @@
103 120
   pages =	 {205--217}
104 121
 }
105 122
 
123
+@article{ParadisClaudeStrimmer2004,
124
+  title =	 {A{PE}: analyses of phylogenetics and evolution in
125
+                  {R} language},
126
+  author =	 {E. Paradis and J. Claude and K. Strimmer},
127
+  journal =	 {Bioinformatics},
128
+  year =	 2004,
129
+  volume =	 20,
130
+  pages =	 {289--290},
131
+}
132
+
133
+@manual{PeleBecuAbdiChabbert2012,
134
+  title =	 {{bios2mds: From BIOlogical Sequences to
135
+                  MultiDimensional Scaling}},
136
+  author =	 {J. Pele with J.-M. Becu and H. Abdi and
137
+                  M. Chabbert},
138
+  year =	 2012,
139
+  note =	 {R package version 1.2.2}
140
+}
141
+
142
+@article{Schliep2011,
143
+  title =	 {phangorn: phylogenetic analysis in {R}},
144
+  author =	 {K. P. Schliep},
145
+  journal =	 {Bioinformatics},
146
+  year =	 2011,
147
+  volume =	 27,
148
+  number =	 4,
149
+  pages =	 {592--593},
150
+}
151
+
106 152
 @article{Sievers2011,
107 153
   author =	 {F. Sievers and A. Wilm and D. Dineen and
108 154
                   T. J. Gibson and K. Karplus and W. Li and R. Lopez
... ...
@@ -135,3 +181,12 @@
135 181
   publisher =	 {Chapman \&\ Hall/CRC},
136 182
   year =	 2014
137 183
 }
184
+
185
+@article{YuSmithZhuGuanLam2016,
186
+  title =	 {ggtree: an {R} package for visualization and
187
+                  annotation of phylogenetic tree with different types
188
+                  of meta-data},
189
+  author =	 {G. Yu and D. Smith and H. Zhu and Y. Guan and
190
+                  T. T. Y. Lam},
191
+  year =	 {submitted}
192
+}
... ...
@@ -436,14 +436,16 @@ uses this function to compute the consensus sequence.
436 436
 \subsection{Interfacing to Other Packages}
437 437
 
438 438
 There are also other sequence analysis packages that use or make use of multiple
439
-sequence alignments. The \msa\ package does not directly interface to any of these packages
439
+sequence alignments. The \msa\ package does not directly interface to these packages
440 440
 in order to avoid dependencies and possible incompatibilities. However, \msa\ provides
441 441
 a function \verb+msaConvert()+ that allows for converting multiple sequence alignment
442
-objects to other types/classes. Currently, two such conversions are available, namely to
443
-objects of class \verb+alignment+ (as defined and used by the \verb+seqinr+ package) and
444
-to objects of class \verb+align+ (as defined and used by the \verb+bios2mds+ package).
445
-Note that the conversion is performed without loading or depending on the respective
446
-packages.
442
+objects to other types/classes. Currently, five such conversions are available, namely to
443
+the classes \verb+alignment+ (\verb+seqinr+ package \cite{CharifLobry2007}),
444
+\verb+align+ (\verb+bios2mds+ package \cite{PeleBecuAbdiChabbert2012}),
445
+\verb+AAbin+/\verb+DNAbin+ (\verb+ape+ package \cite{ParadisClaudeStrimmer2004}),
446
+and \verb+phyDat+ (\verb+phangorn+ package \cite{Schliep2011}). Except for the
447
+conversion to the class \verb+phyDat+, these conversion are performed without loading
448
+or depending on the respective packages.
447 449
 
448 450
 In the following example, we perform a multiple alignment of Hemoglobin alpha
449 451
 example sequences and convert the result for later processing with the \verb+seqinr+
... ...
@@ -461,14 +463,15 @@ the \verb+seqinr+ package:
461 463
 library(seqinr)
462 464
 
463 465
 d <- dist.alignment(hemoAln2, "identity")
464
-as.matrix(d)[3:4, 3:4]
466
+as.matrix(d)[2:5, "HBA1_Homo_sapiens", drop=FALSE]
465 467
 @
466
-Now we can construct a draft phylogenetic tree using the \verb+hclust()+ function from
467
-the \verb+stats+ package:
468
-<<HemoglobinTree,output.width='0.8\\textwidth',output.height='0.5\\textwidth'>>=
469
-hemoTree <- hclust(d)
470
-plot(hemoTree, main="Phylogenetic Tree of Hemoglobin Alpha Sequences",
471
-     xlab="", sub="")
468
+Now we can construct a phylogenetic tree with the neighbor joining algorithm using the
469
+\verb+nj()+ function from the \verb+ape+ package:
470
+<<HemoglobinTree,output.width='0.8\\textwidth',output.height='0.5\\textwidth',message=FALSE,results='hide'>>=
471
+library(ape)
472
+
473
+hemoTree <- nj(d)
474
+plot(hemoTree, main="Phylogenetic Tree of Hemoglobin Alpha Sequences")
472 475
 @
473 476
 
474 477
 The following example shows how to convert a multiple alignment object in an object of
... ...
@@ -478,6 +481,20 @@ hemoAln3 <- msaConvert(hemoAln, type="bios2mds::align")
478 481
 str(hemoAln3)
479 482
 @
480 483
 
484
+The conversions to the standard \verb+Biostrings+ classes are straightforward using
485
+standard \verb+as()+ methods and not provided by the \verb+msaConvert()+ function.
486
+The following example converts a multiple alignment object to class \verb+BStringSet+
487
+(e.g.\ the \verb+msaplot()+ function from the \verb+ggtree+ package
488
+\cite{YuSmithZhuGuanLam2016} accepts \verb+BStringSet+ objects):
489
+<<Hemoglobin4>>=
490
+hemoAln4 <- as(hemoAln, "BStringSet")
491
+hemoAln4
492
+@
493
+
494
+\notebox{The \texttt{msaConvert()} function has been introduced in version 1.3.3 of the
495
+  \MSA\ package. So, to have this function available, at least Bioconductor 3.3
496
+  is required, which requires at least R 3.3.0.}
497
+
481 498
 \section{Pretty-Printing Multiple Sequence Alignments}\label{sec:msaPrettyPrint}
482 499
 
483 500
 As already mentioned above, the \MSA\ package offers the function
... ...
@@ -817,6 +834,13 @@ bibliography below).
817 834
 \section{Change Log}
818 835
 
819 836
 \begin{description}
837
+\item[Version 1.5.3:] \mbox{ }  \begin{itemize}
838
+   \item additional conversions implemented for \verb+msaConvert()+ function
839
+   \item corresponding changes in documentation
840
+  \end{itemize}
841
+\item[Versions 1.5.1 and 1.5.2:] version number bumps for technical reasons
842
+  related to Bioconductor build servers
843
+\item[Version 1.5.0:] new branch for Bioconductor 3.4 devel
820 844
 \item[Version 1.4.0:] release as part of Bioconductor 3.3
821 845
 \item[Version 1.3.7:] \mbox{ }  \begin{itemize}
822 846
    \item fixes in \verb+msaPrettyPrint()+ function
... ...
@@ -847,7 +871,7 @@ bibliography below).
847 871
 \item[Version 1.3.1:] \mbox{ }  \begin{itemize}
848 872
    \item fixes in Makefiles and Makevars files to account for changes in build system
849 873
   \end{itemize}
850
-\item[Version 1.3.0:] devel branch created from version 1.2.0
874
+\item[Version 1.3.0:] new branch for Bioconductor 3.3 devel
851 875
 \item[Version 1.2.0:] release as part of Bioconductor 3.2
852 876
 \item[Version 1.1.3:] \mbox{ }  \begin{itemize}
853 877
     \item bug fix related to custom substitution matrices
... ...
@@ -872,84 +896,11 @@ bibliography below).
872 896
 \item[Version 1.1.1:]  \mbox{ }  \begin{itemize}
873 897
     \item fix of \verb+msa()+ function
874 898
   \end{itemize}
875
-\item[Version 1.1.0:] devel branch created from version 1.0.0
899
+\item[Version 1.1.0:] new branch for Bioconductor 3.2 devel
876 900
 \item[Version 1.0.0:] first official release as part of Bioconductor 3.1
877 901
 \end{description}
878 902
 
879
-%\bibliographystyle{plain}
880
-%\bibliography{lit}
881
-
882
-\begin{thebibliography}{10}
883
-
884
-\bibitem{Beitz2000}
885
-E.~Beitz.
886
-\newblock {\TeX shade}: shading and labeling of multiple sequence alignments
887
-  using {\LaTeX2e}.
888
-\newblock {\em Bioinformatics}, 16(2):135--139, 2000.
889
-
890
-\bibitem{Edgar2004b}
891
-R.~C. Edgar.
892
-\newblock {MUSCLE}: a multiple sequence alignment method with reduced time and
893
-  space complexity.
894
-\newblock {\em BMC Bioinformatics}, 5(5):113, 2004.
895
-
896
-\bibitem{Edgar2004a}
897
-R.~C. Edgar.
898
-\newblock {MUSCLE:} multiple sequence alignment with high accuracy and high
899
-  throughput.
900
-\newblock {\em Nucleic Acids Res.}, 32(5):1792--1797, 2004.
901
-
902
-\bibitem{Lamport1999}
903
-L.~Lamport.
904
-\newblock {\em {\LaTeX} --- A Document Preparation System. User's Guide and
905
-  Reference Manual}.
906
-\newblock Addison-Wesley Longman, Amsterdam, 1999.
907
-
908
-\bibitem{Leisch2002}
909
-F.~Leisch.
910
-\newblock Sweave: dynamic generation of statistical reports using literate data
911
-  analysis.
912
-\newblock In W.~H\"ardle and B.~R\"onz, editors, {\em Compstat 2002 ---
913
-  Proceedings in Computational Statistics}, pages 575--580, Heidelberg, 2002.
914
-  Physica-Verlag.
915
-
916
-\bibitem{Morgenstern1999}
917
-B.~Morgenstern.
918
-\newblock {DIALIGN 2}: improvement of the segment-to-segment approach to
919
-  multiple sequence alignment.
920
-\newblock {\em Bioinformatics}, 15(3):211--218, 1999.
921
-
922
-\bibitem{Nethercote2007}
923
-N.~Nethercote and J.~Seward.
924
-\newblock Valgrind: A framework for heavyweight dynamic binary instrumentation.
925
-\newblock In {\em Proc. of the ACM SIGPLAN 2007 Conf. on Programming Language
926
-  Design and Implementation}, San Diego, CA, 2007.
927
-
928
-\bibitem{Notredame2000}
929
-C.~Notredame, D.~G. Higgins, and J.~Heringa.
930
-\newblock {T-Coffee}: A novel method for fast and accurate multiple sequence
931
-  alignment.
932
-\newblock {\em J. Mol. Biol.}, 302(1):205--217, 2000.
933
-
934
-\bibitem{Sievers2011}
935
-F.~Sievers, A.~Wilm, D.~Dineen, T.~J. Gibson, K.~Karplus, W.~Li, R.~Lopez,
936
-  H.~McWilliam, M.~Remmert, J.~S\"oding, J.~D. Thompson, and D.~G. Higgins.
937
-\newblock Fast, scalable generation of high-quality protein multiple sequence
938
-  alignments using {Clustal Omega}.
939
-\newblock {\em Mol. Syst. Biol.}, 7:539, 2011.
940
-
941
-\bibitem{Thompson1994}
942
-J.~D. Thompson, D.~G. Higgins, and T.~J. Gibson.
943
-\newblock {CLUSTAL W}: improving the sensitivity of progressive multiple
944
-  sequence alignment through sequence weighting, position-specific gap
945
-  penalties and weight matrix choice.
946
-\newblock {\em Nucleic Acids Res.}, 22(22):4673--4680, 2004.
947
-
948
-\bibitem{Xie2014}
949
-Y.~Xie.
950
-\newblock {\em Dynamic Documents with R and knitr}.
951
-\newblock Chapman \&\ Hall/CRC, 2014.
952
-
953
-\end{thebibliography}
903
+\bibliographystyle{plain}
904
+\bibliography{lit}
954 905
 
955 906
 \end{document}