Browse code

added new conversion tools; version number bumped to 1.3.3

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@114012 bc3139a8-67e5-0310-9ffc-ced21a209358

Ulrich Bodenhofer authored on 25/02/2016 15:23:41
Showing9 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: msa
2 2
 Type: Package
3 3
 Title: Multiple Sequence Alignment
4
-Version: 1.3.2
5
-Date: 2015-12-21
4
+Version: 1.3.3
5
+Date: 2016-02-25
6 6
 Author: Enrico Bonatesta, Christoph Horejs-Kainrath, Ulrich Bodenhofer
7 7
 Maintainer: Ulrich Bodenhofer <bodenhofer@bioinf.jku.at>
8 8
 Description: This package provides a unified R/Bioconductor interface to the
... ...
@@ -19,7 +19,7 @@ Copyright: See file inst/COPYRIGHT
19 19
 Depends: R (>= 3.1.0), methods, Biostrings (>= 2.30.0)
20 20
 Imports: Rcpp (>= 0.11.1), BiocGenerics, IRanges (>= 1.20.0),
21 21
         S4Vectors, tools
22
-Suggests: Biobase, knitr
22
+Suggests: Biobase, knitr, seqinr
23 23
 LinkingTo: Rcpp
24 24
 SystemRequirements: GNU make
25 25
 VignetteBuilder: knitr
... ...
@@ -27,7 +27,7 @@ LazyLoad: yes
27 27
 Collate: AllClasses.R AllGenerics.R params-methods.R version-methods.R
28 28
         helperFunctions.R inputChecks.R convertRows.R msaPrettyPrint.R
29 29
         print-methods.R show-methods.R msa.R msaMuscle.R msaClustalW.R
30
-        msaClustalOmega.R
30
+        msaClustalOmega.R msaConvert.R
31 31
 biocViews: MultipleSequenceAlignment, Alignment, MultipleComparison,
32 32
         Sequencing
33 33
 NeedsCompilation: yes
... ...
@@ -8,8 +8,10 @@ import(S4Vectors)
8 8
 import(IRanges)
9 9
 importFrom(tools, texi2dvi)
10 10
 
11
-export(msa, msaMuscle, msaClustalW, msaClustalOmega, msaPrettyPrint)
11
+export(msa, msaMuscle, msaClustalW, msaClustalOmega, msaPrettyPrint,
12
+       msaConvert)
12 13
 
13 14
 exportClasses(MsaDNAMultipleAlignment, MsaRNAMultipleAlignment,
14 15
               MsaAAMultipleAlignment, MsaMetaData)
15 16
 exportMethods(params, version, show, print)
17
+
... ...
@@ -1 +1 @@
1
-setGeneric("version", function(object) standardGeneric("version"))
2 1
\ No newline at end of file
2
+setGeneric("version", function(object) standardGeneric("version"))
... ...
@@ -1,6 +1,11 @@
1 1
 Change history of package msa:
2 2
 ==============================
3 3
 
4
+Version 1.3.3:
5
+- added function for converting multiple sequence alignments for
6
+  use with other sequence alignment packages
7
+- corresponding changes in documentation
8
+
4 9
 Version 1.3.2:
5 10
 - further fixes in Makefiles and Makevars files to account for changes
6 11
   in build system
7 12
new file mode 100644
... ...
@@ -0,0 +1,70 @@
1
+\name{msaConvert}
2
+\alias{msaConvert}
3
+\title{Convert Multiple Sequence Alignment for Other Packages}
4
+\description{
5
+  This function converts a multiple sequence alignment object
6
+  to formats used in other sequence analysis packages.
7
+}
8
+\usage{
9
+    msaConvert(x,
10
+               type=c("seqinr::alignment", "bios2mds::align"))
11
+}
12
+\arguments{
13
+  \item{x}{an object of class \code{\linkS4class{MultipleAlignment}}
14
+    (which includes objects of classes
15
+    \code{\linkS4class{MsaAAMultipleAlignment}},
16
+    \code{\linkS4class{MsaDNAMultipleAlignment}}, and
17
+    \code{\linkS4class{MsaRNAMultipleAlignment}})}
18
+  \item{type}{a character string specifying to which type of object
19
+    \code{x} should be converted; currently, the two
20
+    values \code{"seqinr::alignment"} and \code{"bios2mds::align"}
21
+    are supported, i.e. an object of class \code{"alignment"} as
22
+    defined by the \pkg{seqinr} package or an object of class
23
+    \code{"align"} as defined in the \pkg{bios2mds} package.}
24
+  }
25
+\details{
26
+  The function returns converts \code{x} to the class of object
27
+  as specified by the \code{type} argument. Note that this conversion
28
+  happens independently of the packages \pkg{seqinr} and
29
+  \pkg{bios2mds}. More specifically, lists with the respective
30
+  class attributes are returned without actually loading any of
31
+  the two packages. They need not even be installed. This approach
32
+  has been chosed to avoid abundant dependencies and possible
33
+  incompatibilities. That is also why the standard S3/S4
34
+  mechanism of \code{as}/\code{as.class} functions is not
35
+  used.
36
+}
37
+\value{
38
+  The function returns an object of the class as specified by
39
+  the \code{type} argument.
40
+}
41
+\author{Ulrich Bodenhofer <msa@bioinf.jku.at>
42
+}
43
+\references{
44
+  \url{http://www.bioinf.jku.at/software/msa}
45
+  
46
+  U. Bodenhofer, E. Bonatesta, C. Horejs-Kainrath, and S. Hochreiter
47
+  (2015). msa: an R package for multiple sequence alignment. 
48
+  \emph{Bioinformatics} \bold{31}(24):3997-3999. DOI:
49
+  \href{http://dx.doi.org/10.1093/bioinformatics/btv494}{10.1093/bioinformatics/btv494}.
50
+}
51
+\seealso{\code{\link{msa}}, \code{\linkS4class{MsaAAMultipleAlignment}},
52
+  \code{\linkS4class{MsaDNAMultipleAlignment}},
53
+  \code{\linkS4class{MsaRNAMultipleAlignment}},
54
+  \code{\linkS4class{MsaMetaData}}
55
+}
56
+\examples{
57
+## read sequences
58
+filepath <- system.file("examples", "exampleAA.fasta", package="msa")
59
+mySeqs <- readAAStringSet(filepath)
60
+
61
+## perform multiple alignment
62
+myAlignment <- msa(mySeqs)
63
+
64
+## convert to an object of class 'alignment' (package 'seqinr')
65
+msaConvert(myAlignment, "seqinr::alignment")
66
+
67
+## convert to an object of class 'align' (package 'bios2mds')
68
+msaConvert(myAlignment, "bios2mds::align")
69
+}
70
+\keyword{manip}
... ...
@@ -1,6 +1,7 @@
1 1
 #include "RClustalOmega.h"
2 2
 #include "RClustalW.h"
3 3
 #include "RMuscle.h"
4
+#include "SplitCharVector2List.h"
4 5
 
5 6
 #include <R.h>
6 7
 #include <Rinternals.h>
... ...
@@ -13,6 +14,8 @@ static const R_CallMethodDef callMethods[] = {
13 14
     {"RClustalW", (DL_FUNC) &RClustalW, 9},
14 15
     /* RClustalOmega.cpp */
15 16
     {"RClustalOmega", (DL_FUNC) &RClustalOmega, 9},
17
+    /* convertMultipleAlignment.cpp */
18
+    {"SplitCharVector2List", (DL_FUNC) &SplitCharVector2List, 1},
16 19
     {NULL, NULL, 0}
17 20
 };
18 21
 
19 22
new file mode 100644
... ...
@@ -0,0 +1,26 @@
1
+#include <Rcpp.h>
2
+
3
+using namespace Rcpp;
4
+using namespace std;
5
+
6
+#include "SplitCharVector2List.h"
7
+
8
+RcppExport SEXP SplitCharVector2List(SEXP xR)
9
+{
10
+    vector<string> x = as< vector<string> >(xR);
11
+    int i, j, n = x.size();
12
+    List out;
13
+
14
+    for (i = 0; i < n; i++)
15
+    {
16
+	int len = x[i].length();
17
+	vector<string> tmp;
18
+	
19
+	for (j = 0; j < len; j++)
20
+	    tmp.push_back(x[i].substr(j, 1));
21
+
22
+	out.push_back(tmp);
23
+    }
24
+
25
+    return(out);
26
+}
0 27
new file mode 100644
... ...
@@ -0,0 +1,11 @@
1
+//
2
+// File SplitCharVector2List.h defining prototypes for SplitCharVector2List.cpp
3
+//
4
+
5
+#ifndef _SplitCharVector2List_H_
6
+
7
+#define _SplitCharVector2List_H_
8
+
9
+RcppExport SEXP SplitCharVector2List(SEXP xR);
10
+
11
+#endif
... ...
@@ -39,6 +39,7 @@ Linz\\Altenberger Str. 69, 4040 Linz, Austria\\
39 39
 options(width=65)
40 40
 set.seed(0)
41 41
 library(msa)
42
+library(seqinr)
42 43
 msaVersion <- packageDescription("msa")$Version
43 44
 msaDateRaw <- packageDescription("msa")$Date
44 45
 msaDateYear <- as.numeric(substr(msaDateRaw, 1, 4))
... ...
@@ -366,6 +367,8 @@ print(myFirstAlignment, showNames=FALSE, show="complete")
366 367
 
367 368
 \section{Processing Multiple Alignments}\label{sec:msaProc}
368 369
 
370
+\subsection{Methods Inherited From {\tt Biostrings}}
371
+
369 372
 The classes defined by the \MSA\ package for storing multiple alignment results
370 373
 have been derived from the corresponding classes defined by the
371 374
 \verb+Biostrings+ package. Therefore, all methods for processing
... ...
@@ -430,6 +433,51 @@ printSplitString(consensusString(unmasked(myMaskedAlignment)))
430 433
 \noindent Actually, the \verb+print()+ method (see Section~\ref{sec:msaPrint} above)
431 434
 uses this function to compute the consensus sequence.
432 435
 
436
+\subsection{Interfacing to Other Packages}
437
+
438
+There are also other sequence analysis packages that use or make use of multiple
439
+sequence alignments. The \msa\ package does not directly interface to any of these packages
440
+in order to avoid dependencies and possible incompatibilities. However, \msa\ provides
441
+a function \verb+msaConvert()+ that allows for converting multiple sequence alignment
442
+objects to other types/classes. Currently, two such conversions are available, namely to
443
+objects of class \verb+alignment+ (as defined and used by the \verb+seqinr+ package) and
444
+to objects of class \verb+align+ (as defined and used by the \verb+bios2mds+ package).
445
+Note that the conversion is performed without loading or depending on the respective
446
+packages.
447
+
448
+In the following example, we perform a multiple alignment of Hemoglobin alpha
449
+example sequences and convert the result for later processing with the \verb+seqinr+
450
+package:
451
+<<Hemoglobin1>>=
452
+hemoSeq <- readAAStringSet(system.file("examples/HemoglobinAA.fasta",
453
+                                       package="msa"))
454
+hemoAln <- msa(hemoSeq)
455
+hemoAln
456
+hemoAln2 <- msaConvert(hemoAln, type="seqinr::alignment")
457
+@
458
+Now we compute a distance matrix using the \verb+dist.alignment()+ function from
459
+the \verb+seqinr+ package:
460
+<<Hemoglobin2>>=
461
+library(seqinr)
462
+
463
+d <- dist.alignment(hemoAln2, "identity")
464
+as.matrix(d)[3:4, 3:4]
465
+@
466
+Now we can construct a draft phylogenetic tree using the \verb+hclust()+ function from
467
+the \verb+stats+ package:
468
+<<HemoglobinTree,output.width='0.8\\textwidth',output.height='0.5\\textwidth'>>=
469
+hemoTree <- hclust(d)
470
+plot(hemoTree, main="Phylogenetic Tree of Hemoglobin Alpha Sequences",
471
+     xlab="", sub="")
472
+@
473
+
474
+The following example shows how to convert a multiple alignment object in an object of
475
+class \verb+align+ as defined by the \verb+bios2mds+ package:
476
+<<Hemoglobin3>>=
477
+hemoAln3 <- msaConvert(hemoAln, type="bios2mds::align")
478
+str(hemoAln3)
479
+@
480
+
433 481
 \section{Pretty-Printing Multiple Sequence Alignments}\label{sec:msaPrettyPrint}
434 482
 
435 483
 As already mentioned above, the \MSA\ package offers the function
... ...
@@ -716,6 +764,11 @@ bibliography below).
716 764
 \section{Change Log}
717 765
 
718 766
 \begin{description}
767
+\item[Version 1.3.3:] \mbox{ }  \begin{itemize}
768
+   \item added function for converting multiple sequence alignments for
769
+     use with other sequence alignment packages
770
+   \item corresponding changes in documentation
771
+  \end{itemize}
719 772
 \item[Version 1.3.2:] \mbox{ }  \begin{itemize}
720 773
    \item further fixes in Makefiles and Makevars files to account for changes in build system
721 774
    \item update of citation information