@article{schliep_phangorn_2011,
	title = {phangorn: phylogenetic analysis in R},
	volume = {27},
	url = {http://bioinformatics.oxfordjournals.org/content/27/4/592.abstract},
	doi = {10.1093/bioinformatics/btq706},
	shorttitle = {phangorn},
	abstract = {Summary: phangorn is a package for phylogenetic reconstruction and analysis in the R language. Previously it was only possible to estimate phylogenetic trees with distance methods in R. phangorn, now offers the possibility of reconstructing phylogenies with distance based methods, maximum parsimony or maximum likelihood ({ML}) and performing Hadamard conjugation. Extending the general {ML} framework, this package provides the possibility of estimating mixture and partition models. Furthermore, phangorn offers several functions for comparing trees, phylogenetic models or splits, simulating character data and performing congruence analyses.Availability: phangorn can be obtained through the {CRAN} homepage http://cran.r-project.org/web/packages/phangorn/index.html. phangorn is licensed under {GPL} 2.Contact: klaus.kschliep@snv.jussieu.{frSupplementary} information: Supplementary data are available at Bioinformatics online.},
	pages = {592--593},
	number = {4},
	journaltitle = {Bioinformatics},
	author = {Schliep, Klaus Peter},
	urldate = {2011-03-05},
	date = {2011-02-15}
}

@article{hohna_probabilistic_2014,
	title = {Probabilistic Graphical Model Representation in Phylogenetics},
	volume = {63},
	issn = {1063-5157, 1076-836X},
	url = {http://sysbio.oxfordjournals.org/content/63/5/753},
	doi = {10.1093/sysbio/syu039},
	abstract = {Recent years have seen a rapid expansion of the model space explored in statistical phylogenetics, emphasizing the need for new approaches to statistical model representation and software development. Clear communication and representation of the chosen model is crucial for: (i) reproducibility of an analysis, (ii) model development, and (iii) software design. Moreover, a unified, clear and understandable framework for model representation lowers the barrier for beginners and nonspecialists to grasp complex phylogenetic models, including their assumptions and parameter/variable dependencies. Graphical modeling is a unifying framework that has gained in popularity in the statistical literature in recent years. The core idea is to break complex models into conditionally independent distributions. The strength lies in the comprehensibility, flexibility, and adaptability of this formalism, and the large body of computational work based on it. Graphical models are well-suited to teach statistical models, to facilitate communication among phylogeneticists and in the development of generic software for simulation and statistical inference. Here, we provide an introduction to graphical models for phylogeneticists and extend the standard graphical model representation to the realm of phylogenetics. We introduce a new graphical model component, tree plates, to capture the changing structure of the subgraph corresponding to a phylogenetic tree. We describe a range of phylogenetic models using the graphical model framework and introduce modules to simplify the representation of standard components in large and complex models. Phylogenetic model graphs can be readily used in simulation, maximum likelihood inference, and Bayesian inference using, for example, Metropolis–Hastings or Gibbs sampling of the posterior distribution. [Computation; graphical models; inference; modularization; statistical phylogenetics; tree plate.]},
	pages = {753--771},
	number = {5},
	journaltitle = {Systematic Biology},
	shortjournal = {Syst Biol},
	author = {Höhna, Sebastian and Heath, Tracy A. and Boussau, Bastien and Landis, Michael J. and Ronquist, Fredrik and Huelsenbeck, John P.},
	urldate = {2015-11-17},
	date = {2014-09-01},
	langid = {english},
	pmid = {24951559}
}

@article{boussau_genome-scale_2013,
	title = {Genome-scale coestimation of species and gene trees},
	volume = {23},
	issn = {1088-9051, 1549-5469},
	url = {http://genome.cshlp.org/content/23/2/323},
	doi = {10.1101/gr.141978.112},
	abstract = {Comparisons of gene trees and species trees are key to understanding major processes of genome evolution such as gene duplication and loss. Because current methods to reconstruct phylogenies fail to model the two-way dependency between gene trees and the species tree, they often misrepresent gene and species histories. We present a new probabilistic model to jointly infer rooted species and gene trees for dozens of genomes and thousands of gene families. We use simulations to show that this method accurately infers the species tree and gene trees, is robust to misspecification of the models of sequence and gene family evolution, and provides a precise historic record of gene duplications and losses throughout genome evolution. We simultaneously reconstruct the history of mammalian species and their genes based on 36 completely sequenced genomes, and use the reconstructed gene trees to infer the gene content and organization of ancestral mammalian genomes. We show that our method yields a more accurate picture of ancestral genomes than the trees available in the authoritative database Ensembl.},
	pages = {323--330},
	number = {2},
	journaltitle = {Genome Research},
	shortjournal = {Genome Res.},
	author = {Boussau, Bastien and SzöllÅ‘si, Gergely J. and Duret, Laurent and Gouy, Manolo and Tannier, Eric and Daubin, Vincent},
	urldate = {2015-11-17},
	date = {2013-02-01},
	langid = {english},
	pmid = {23132911}
}

@book{felsenstein_inferring_2003,
	address = {Sunderland, Mass},
	edition = {2 edition},
	title = {Inferring Phylogenies},
	isbn = {9780878931774},
	abstract = {Phylogenies (evolutionary trees) are basic to thinking about and analyzing differences between species. Statistical, computational, and algorithmic work on them has been ongoing for four decades, with great advances in understanding. Yet no book has summarized this work until now. Inferring Phylogenies explains clearly the assumptions and logic of making inferences about phylogenies, and using them to make inferences about evolutionary processes. It is an essential text and reference for anyone who wants to understand how phylogenies are reconstructed and how they are used. As phylogenies are inferred with various kinds of data, this book concentrates on some of the central ones: discretely coded characters, molecular sequences, gene frequencies, and quantitative traits. Also covered are restriction sites, {RAPDs}, and microsatellites. Inferring Phylogenies is intended for graduate-level courses, assuming some knowledge of statistics, mathematics (calculus and fundamental matrix algebra), molecular sequences, and quantitative genetics.},
	language = {English},
	publisher = {Sinauer Associates},
	author = {Felsenstein, Joseph},
	month = sep,
	year = {2003}
}

@book{wickham_ggplot2_2009,
	edition = {1},
	title = {ggplot2: Elegant Graphics for Data Analysis},
	isbn = {0387981403},
	shorttitle = {ggplot2},
	publisher = {Springer},
	author = {Wickham, Hadley},
	month = aug,
	year = {2009}
}

@article{paradis_ape_2004,
	title = {{APE}: Analyses of Phylogenetics and Evolution in R language},
	volume = {20},
	shorttitle = {{APE}},
	url = {http://bioinformatics.oxfordjournals.org/content/20/2/289.abstract},
	doi = {10.1093/bioinformatics/btg412},
	abstract = {Summary: Analysis of Phylogenetics and Evolution ({APE}) is a package written in the R language for use in molecular evolution and phylogenetics. {APE} provides both utility functions for reading and writing data and manipulating phylogenetic trees, as well as several advanced methods for phylogenetic and evolutionary analysis (e.g. comparative and population genetic methods). {APE} takes advantage of the many R functions for statistics and graphics, and also provides a flexible framework for developing and implementing further statistical methods for the analysis of evolutionary processes.Availability: The program is free and available from the official R package archive at http://cran.r-project.org/src/contrib/{PACKAGES}.html\#ape. {APE} is licensed under the {GNU} General Public License.},
	number = {2},
	urldate = {2011-03-04},
	journal = {Bioinformatics},
	author = {Paradis, Emmanuel and Claude, Julien and Strimmer, Korbinian},
	month = jan,
	year = {2004},
	pages = {289--290}
}

@article{matsen_pplacer_2010,
	title = {pplacer: linear time maximum-likelihood and Bayesian phylogenetic placement of sequences onto a fixed reference tree},
	volume = {11},
	issn = {1471-2105},
	shorttitle = {pplacer},
	url = {http://www.biomedcentral.com.eproxy1.lib.hku.hk/1471-2105/11/538},
	doi = {10.1186/1471-2105-11-538},
	language = {en},
	number = {1},
	urldate = {2015-01-05},
	journal = {{BMC} Bioinformatics},
	author = {Matsen, Frederick A and Kodner, Robin B and Armbrust, E Virginia},
	year = {2010},
	pages = {538}
}

@article{matsen_format_2012,
	title = {A Format for Phylogenetic Placements},
	volume = {7},
	url = {http://dx.doi.org/10.1371/journal.pone.0031009},
	doi = {10.1371/journal.pone.0031009},
	abstract = {We have developed a unified format for phylogenetic placements, that is, mappings of environmental sequence data (e.g., short reads) into a phylogenetic tree. We are motivated to do so by the growing number of tools for computing and post-processing phylogenetic placements, and the lack of an established standard for storing them. The format is lightweight, versatile, extensible, and is based on the {JSON} format, which can be parsed by most modern programming languages. Our format is already implemented in several tools for computing and post-processing parsimony- and likelihood-based phylogenetic placements and has worked well in practice. We believe that establishing a standard format for analyzing read placements at this early stage will lead to a more efficient development of powerful and portable post-analysis tools for the growing applications of phylogenetic placement.},
	number = {2},
	urldate = {2015-01-05},
	journal = {{PLoS} {ONE}},
	author = {Matsen, Frederick A. and Hoffman, Noah G. and Gallagher, Aaron and Stamatakis, Alexandros},
	month = feb,
	year = {2012},
	pages = {e31009}
}

@article{berger_EPA_2011,
	title = {Performance, accuracy, and Web server for evolutionary placement of short sequence reads under maximum likelihood},
	volume = {60},
	issn = {1076-836X},
	doi = {10.1093/sysbio/syr010},
	abstract = {We present an evolutionary placement algorithm ({EPA}) and a Web server for the rapid assignment of sequence fragments (short reads) to edges of a given phylogenetic tree under the maximum-likelihood model. The accuracy of the algorithm is evaluated on several real-world data sets and compared with placement by pair-wise sequence comparison, using edit distances and {BLAST}. We introduce a slow and accurate as well as a fast and less accurate placement algorithm. For the slow algorithm, we develop additional heuristic techniques that yield almost the same run times as the fast version with only a small loss of accuracy. When those additional heuristics are employed, the run time of the more accurate algorithm is comparable with that of a simple {BLAST} search for data sets with a high number of short query sequences. Moreover, the accuracy of the {EPA} is significantly higher, in particular when the sample of taxa in the reference topology is sparse or inadequate. Our algorithm, which has been integrated into {RAxML}, therefore provides an equally fast but more accurate alternative to {BLAST} for tree-based inference of the evolutionary origin and composition of short sequence reads. We are also actively developing a Web server that offers a freely available service for computing read placements on trees using the {EPA}.},
	language = {eng},
	number = {3},
	journal = {Systematic Biology},
	author = {Berger, Simon A. and Krompass, Denis and Stamatakis, Alexandros},
	month = may,
	year = {2011},
	pmid = {21436105},
	pmcid = {PMC3078422},
	keywords = {Algorithms, Amino Acid Sequence, Base Sequence, Computer Simulation, Evolution, Molecular, Internet, Likelihood Functions, Phylogeny, Sequence Alignment, Sequence Analysis, {DNA}, Sequence Analysis, Protein, Sequence Analysis, {RNA}, Software},
	pages = {291--302}
}

@article{stamatakis_raxml_2014,
	title = {{RAxML} Version 8: A tool for Phylogenetic Analysis and Post-Analysis of Large Phylogenies},
	issn = {1367-4803, 1460-2059},
	url = {http://bioinformatics.oxfordjournals.org.eproxy2.lib.hku.hk/content/early/2014/01/21/bioinformatics.btu033},
	doi = {10.1093/bioinformatics/btu033},
	shorttitle = {{RAxML} Version 8},
	abstract = {Motivation: Phylogenies are increasingly used in all fields of medical and biological research. Moreover, because of the next generation sequencing revolution, datasets used for conducting phylogenetic analyses grow at an unprecedented pace. {RAxML} (Randomized Axelerated Maximum Likelihood) is a popular program for phylogenetic analyses of large datasets under maximum likelihood. Since the last {RAxML} paper in 2006, it has been continuously maintained and extended to accommodate the increasingly growing input datasets and to serve the needs of the user community.
Results: I present some of the most notable new features and extensions of {RAxML}, such as, a substantial extension of substitution models and supported data types, the introduction of {SSE}3, {AVX}, and {AVX}2 vector intrinsics, techniques for reducing the memory requirements of the code and a plethora of operations for conducting post-analyses on sets of trees. In addition, an up-to-date, 50 page user manual covering all new {RAxML} options is available.
Availability: The code is available under {GNU} {GPL} at https://github.com/stamatak/standard-{RAxML}.
Contact: Alexandros.Stamatakis@h-its.org},
	pages = {btu033},
	journaltitle = {Bioinformatics},
	shortjournal = {Bioinformatics},
	author = {Stamatakis, Alexandros},
	urldate = {2015-12-28},
	date = {2014-01-21},
	langid = {english},
	pmid = {24451623}
}

@article{mcmurdie_phyloseq_2013,
	title = {phyloseq: An R Package for Reproducible Interactive Analysis and Graphics of Microbiome Census Data},
	volume = {8},
	shorttitle = {phyloseq},
	url = {http://dx.doi.org/10.1371/journal.pone.0061217},
	doi = {10.1371/journal.pone.0061217},
	abstract = {{BackgroundThe} analysis of microbial communities through {DNA} sequencing brings many challenges: the integration of different types of data with methods from ecology, genetics, phylogenetics, multivariate statistics, visualization and testing. With the increased breadth of experimental designs now being pursued, project-specific statistical analyses are often needed, and these analyses are often difficult (or impossible) for peer researchers to independently reproduce. The vast majority of the requisite tools for performing these analyses reproducibly are already implemented in R and its extensions (packages), but with limited support for high throughput microbiome census data.{ResultsHere} we describe a software project, phyloseq, dedicated to the object-oriented representation and analysis of microbiome census data in R. It supports importing data from a variety of common formats, as well as many analysis techniques. These include calibration, filtering, subsetting, agglomeration, multi-table comparisons, diversity analysis, parallelized Fast {UniFrac}, ordination methods, and production of publication-quality graphics; all in a manner that is easy to document, share, and modify. We show how to apply functions from other R packages to phyloseq-represented data, illustrating the availability of a large number of open source analysis techniques. We discuss the use of phyloseq with tools for reproducible research, a practice common in other fields but still rare in the analysis of highly parallel microbiome census data. We have made available all of the materials necessary to completely reproduce the analysis and figures included in this article, an example of best practices for reproducible research.{ConclusionsThe} phyloseq project for R is a new open-source software package, freely available on the web from both {GitHub} and Bioconductor.},
	number = {4},
	urldate = {2015-01-05},
	journal = {{PLoS} {ONE}},
	author = {McMurdie, Paul J. and Holmes, Susan},
	month = apr,
	year = {2013},
	pages = {e61217}
}

@article{marazzi_locating_2012,
	title = {Locating Evolutionary Precursors on a Phylogenetic Tree},
	volume = {66},
	rights = {© 2012 The Author(s). Evolution© 2012 The Society for the Study of Evolution.},
	issn = {1558-5646},
	url = {http://onlinelibrary.wiley.com.eproxy2.lib.hku.hk/doi/10.1111/j.1558-5646.2012.01720.x/abstract},
	doi = {10.1111/j.1558-5646.2012.01720.x},
	abstract = {Conspicuous innovations in the history of life are often preceded by more cryptic genetic and developmental precursors. In many cases, these appear to be associated with recurring origins of very similar traits in close relatives (parallelisms) or striking convergences separated by deep time (deep homologies). Although the phylogenetic distribution of gain and loss of traits hints strongly at the existence of such precursors, no models of trait evolution currently permit inference about their location on a tree. Here we develop a new stochastic model, which explicitly captures the dependency implied by a precursor and permits estimation of precursor locations. We apply it to the evolution of extrafloral nectaries ({EFNs}), an ecologically significant trait mediating a widespread mutualism between plants and ants. In legumes, a species-rich clade with morphologically diverse {EFNs}, the precursor model fits the data on {EFN} occurrences significantly better than conventional models. The model generates explicit hypotheses about the phylogenetic location of hypothetical precursors, which may help guide future studies of molecular genetic pathways underlying nectary position, development, and function.},
	pages = {3918--3930},
	number = {12},
	journaltitle = {Evolution},
	author = {Marazzi, Brigitte and Ané, Cécile and Simon, Marcelo F. and Delgado-Salinas, Alfonso and Luckow, Melissa and Sanderson, Michael J.},
	urldate = {2015-12-28},
	date = {2012-12-01},
	langid = {english},
	keywords = {Deep homology, extra-floral nectary, homoplasy, trait evolution}
}

@article{yang_paml_2007,
	title = {{PAML} 4: Phylogenetic Analysis by Maximum Likelihood},
	volume = {24},
	issn = {0737-4038, 1537-1719},
	shorttitle = {{PAML} 4},
	url = {http://mbe.oxfordjournals.org/content/24/8/1586},
	doi = {10.1093/molbev/msm088},
	abstract = {{PAML}, currently in version 4, is a package of programs for phylogenetic analyses of {DNA} and protein sequences using maximum likelihood ({ML}). The programs may be used to compare and test phylogenetic trees, but their main strengths lie in the rich repertoire of evolutionary models implemented, which can be used to estimate parameters in models of sequence evolution and to test interesting biological hypotheses. Uses of the programs include estimation of synonymous and nonsynonymous rates ({dN} and {dS}) between two protein-coding {DNA} sequences, inference of positive Darwinian selection through phylogenetic comparison of protein-coding genes, reconstruction of ancestral genes and proteins for molecular restoration studies of extinct life forms, combined analysis of heterogeneous data sets from multiple gene loci, and estimation of species divergence times incorporating uncertainties in fossil calibrations. This note discusses some of the major applications of the package, which includes example data sets to demonstrate their use. The package is written in {ANSI} C, and runs under Windows, Mac {OSX}, and {UNIX} systems. It is available at http://abacus.gene.ucl.ac.uk/software/paml.html.},
	language = {en},
	number = {8},
	urldate = {2015-01-05},
	journal = {Molecular Biology and Evolution},
	author = {Yang, Ziheng},
	month = aug,
	year = {2007},
	pmid = {17483113},
	keywords = {codon models, likelihood, {PAML}, phylogenetic analysis, Software},
	pages = {1586--1591},
	file = {Full Text PDF:/home/ygc/baiduYun/Zotero/storage/53H9DMTJ/Yang - 2007 - PAML 4 Phylogenetic Analysis by Maximum Likelihoo.pdf:application/pdf;Snapshot:/home/ygc/baiduYun/Zotero/storage/7VIHDNKN/1586.html:text/html}
}

@article{pond_hyphy_2005,
	title = {{HyPhy}: hypothesis testing using phylogenies},
	volume = {21},
	issn = {1367-4803, 1460-2059},
	shorttitle = {{HyPhy}},
	url = {http://bioinformatics.oxfordjournals.org.eproxy1.lib.hku.hk/content/21/5/676},
	doi = {10.1093/bioinformatics/bti079},
	abstract = {Summary: The {HyPhypackage} is designed to provide a flexible and unified platform for carrying out likelihood-based analyses on multiple alignments of molecular sequence data, with the emphasis on studies of rates and patterns of sequence evolution.
Availability: http://www.hyphy.org
Contact: muse@stat.ncsu.edu
Supplementary information: {HyPhydocumentation} and tutorials are available at http://www.hyphy.org},
	language = {en},
	number = {5},
	urldate = {2015-01-05},
	journal = {Bioinformatics},
	author = {Pond, Sergei L. Kosakovsky and Frost, Simon D. W. and Muse, Spencer V.},
	month = mar,
	year = {2005},
	pmid = {15509596},
	pages = {676--679}
}

@article{bouckaert_beast_2014,
	title = {{BEAST} 2: A Software Platform for Bayesian Evolutionary Analysis},
	volume = {10},
	shorttitle = {{BEAST} 2},
	url = {http://dx.doi.org/10.1371/journal.pcbi.1003537},
	doi = {10.1371/journal.pcbi.1003537},
	abstract = {We present a new open source, extensible and flexible software platform for Bayesian evolutionary analysis called {BEAST} 2. This software platform is a re-design of the popular {BEAST} 1 platform to correct structural deficiencies that became evident as the {BEAST} 1 software evolved. Key among those deficiencies was the lack of post-deployment extensibility. {BEAST} 2 now has a fully developed package management system that allows third party developers to write additional functionality that can be directly installed to the {BEAST} 2 analysis platform via a package manager without requiring a new software release of the platform. This package architecture is showcased with a number of recently published new models encompassing birth-death-sampling tree priors, phylodynamics and model averaging for substitution models and site partitioning. A second major improvement is the ability to read/write the entire state of the {MCMC} chain to/from disk allowing it to be easily shared between multiple instances of the {BEAST} software. This facilitates checkpointing and better support for multi-processor and high-end computing extensions. Finally, the functionality in new packages can be easily added to the user interface ({BEAUti} 2) by a simple {XML} template-based mechanism because {BEAST} 2 has been re-designed to provide greater integration between the analysis engine and the user interface so that, for example {BEAST} and {BEAUti} use exactly the same {XML} file format.},
	number = {4},
	urldate = {2015-01-05},
	journal = {{PLoS} Comput Biol},
	author = {Bouckaert, Remco and Heled, Joseph and Kühnert, Denise and Vaughan, Tim and Wu, Chieh-Hsi and Xie, Dong and Suchard, Marc A. and Rambaut, Andrew and Drummond, Alexei J.},
	month = apr,
	year = {2014},
	pages = {e1003537}
}