Browse code

using treedata object to store jplace tree

guangchuang yu authored on 05/12/2017 14:26:00
Showing4 changed files

... ...
@@ -2,6 +2,7 @@
2 2
 
3 3
 S3method(as.binary,phylo)
4 4
 S3method(as.data.frame,phylo)
5
+S3method(as_data_frame,phylo)
5 6
 S3method(as_data_frame,treedata)
6 7
 S3method(fortify,codeml)
7 8
 S3method(fortify,codeml_mlc)
... ...
@@ -32,8 +32,12 @@ fortify.treedata <- function(model, data, layout="rectangular", yscale="none",
32 32
         res <- scaleX_by_time_from_mrsd(res, mrsd, as.Date)
33 33
     }
34 34
 
35
-    ## ## angle for all layout, if 'rectangular', user use coord_polar, can still use angle
36
-    res <- calculate_angle(res)
35
+    if (layout == "slanted") {
36
+        res <- add_angle_slanted(res)
37
+    } else {
38
+        ## angle for all layout, if 'rectangular', user use coord_polar, can still use angle
39
+        res <- calculate_angle(res)
40
+    }
37 41
     scaleY(as.phylo(model), res, yscale, layout, ...)
38 42
 }
39 43
 
... ...
@@ -42,11 +46,10 @@ fortify.treedata <- function(model, data, layout="rectangular", yscale="none",
42 46
 ##' @export
43 47
 ## @importFrom treeio Nnode
44 48
 ## @importFrom treeio Ntip
45
-as_data_frame.treedata <- function(x, row.names, optional, branch.length = "branch.length", ...) {
49
+as_data_frame.treedata <- function(x, branch.length = "branch.length", ...) {
46 50
     tree <- set_branch_length(x, branch.length)
47 51
 
48
-    ## res <- as.data.frame(tree@phylo)
49
-    res <- as_data_frame_(tree@phylo)
52
+    res <- as_data_frame(tree@phylo)
50 53
     tree_anno <- as_data_frame(get_tree_data(x))
51 54
     if (nrow(tree_anno) > 0) {
52 55
         by <- "node"
... ...
@@ -61,11 +64,11 @@ as_data_frame.treedata <- function(x, row.names, optional, branch.length = "bran
61 64
     return(res)
62 65
 }
63 66
 
64
-##@method as.data.frame phylo
65
-##@export
67
+##' @method as_data_frame phylo
68
+##' @export
66 69
 ##' @importFrom tibble data_frame
67 70
 ##' @importFrom dplyr full_join
68
-as_data_frame_ <- function(x, row.names, optional, branch.length = "branch.length", ...) {
71
+as_data_frame.phylo <- function(x, ...) {
69 72
     phylo <- x
70 73
     ntip <- Ntip(phylo)
71 74
     N <- Nnode(phylo, internal.only=FALSE)
... ...
@@ -90,6 +93,18 @@ as_data_frame_ <- function(x, row.names, optional, branch.length = "branch.lengt
90 93
     idx <- is.na(res$parent)
91 94
     res$parent[idx] <- res$node[idx]
92 95
 
96
+    res <- res[order(res$node),]
97
+    aa <- names(attributes(phylo))
98
+    group <- aa[ ! aa %in% c("names", "class", "order", "reroot", "node_map")]
99
+    if (length(group) > 0) {
100
+        for (group_ in group) {
101
+            ## groupOTU & groupClade
102
+            group_info <- attr(phylo, group_)
103
+            if (length(group_info) == nrow(res)) {
104
+                res[[group_]] <- group_info
105
+            }
106
+        }
107
+    }
93 108
     return(res)
94 109
 }
95 110
 
... ...
@@ -1540,24 +1555,25 @@ getYcoord_scale_category <- function(tr, df, yscale, yscale_mapping=NULL, ...) {
1540 1555
 
1541 1556
 
1542 1557
 add_angle_slanted <- function(res) {
1543
-    dy <- (res[, "y"] - res[res$parent, "y"]) / diff(range(res[, "y"]))
1544
-    dx <- (res[, "x"] - res[res$parent, "x"]) / diff(range(res[, "x"]))
1558
+    x <- res[["x"]]
1559
+    y <- res[["y"]]
1560
+    dy <- (y - y[match(res$parent, res$node)]) / diff(range(y))
1561
+    dx <- (x - x[match(res$parent, res$node)]) / diff(range(x))
1545 1562
     theta <- atan(dy/dx)
1546 1563
     theta[is.na(theta)] <- 0 ## root node
1547 1564
     res$angle <- theta/pi * 180
1548 1565
 
1549
-    branch.y <- (res[res$parent, "y"] + res[, "y"])/2
1566
+    branch.y <- (y[match(res$parent, res$node)] + y)/2
1550 1567
     idx <- is.na(branch.y)
1551
-    branch.y[idx] <- res[idx, "y"]
1568
+    branch.y[idx] <- y[idx]
1552 1569
     res[, "branch.y"] <- branch.y
1553 1570
     return(res)
1554 1571
 }
1555 1572
 
1556 1573
 calculate_branch_mid <- function(res) {
1557 1574
     res$branch <- with(res, (x[match(parent, node)] + x)/2)
1558
-    ## res$branch <- (res[match(res$parent, res$node), "x"] + res[, "x"])/2
1559
-    if (!is.null(res$length)) {
1560
-        res$length[is.na(res$length)] <- 0
1575
+    if (!is.null(res$branch.length)) {
1576
+        res$branch.length[is.na(res$branch.length)] <- 0
1561 1577
     }
1562 1578
     res$branch[is.na(res$branch)] <- 0
1563 1579
     return(res)
... ...
@@ -101,7 +101,7 @@ Visualizing an annotated phylogenetic tree with numerical matrix (e.g. genotype
101 101
 
102 102
 # Vignette Entry
103 103
 
104
-+ [Tree Data Import](treeImport.html)
104
++ [Tree Data Import](https://bioconductor.org/packages/release/bioc/vignettes/treeio/inst/doc/treeio.html)
105 105
 + [Tree Visualization](treeVisualization.html)
106 106
 + [Tree Manipulation](treeManipulation.html)
107 107
 + [Tree Annotation](treeAnnotation.html)
108 108
deleted file mode 100644
... ...
@@ -1,424 +0,0 @@
1
-title: "Tree Data Import"
2
-author: "Guangchuang Yu and Tommy Tsan-Yuk Lam\\
3
-
4
-        School of Public Health, The University of Hong Kong"
5
-date: "`r Sys.Date()`"
6
-bibliography: ggtree.bib
7
-csl: nature.csl
8
-output:
9
-  prettydoc::html_pretty:
10
-    toc: true
11
-    theme: cayman
12
-    highlight: github
13
-  pdf_document:
14
-    toc: true
15
-vignette: >
16
-  %\VignetteIndexEntry{01 Tree Data Import}
17
-  %\VignetteEngine{knitr::rmarkdown}
18
-  %\VignetteDepends{scales}
19
-  %\VignetteDepends{Biostrings)
20
-  %\usepackage[utf8]{inputenc}
21
-
22
-```{r style, echo=FALSE, results="asis", message=FALSE}
23
-knitr::opts_chunk$set(tidy = FALSE,
24
-		   message = FALSE)
25
-```
26
-
27
-
28
-```{r echo=FALSE, results="hide", message=FALSE}
29
-library("ape")
30
-library("Biostrings")
31
-library("ggplot2")
32
-library("ggtree")
33
-```
34
-
35
-
36
-The `ggtree` package should not be viewed solely as a standalone software. While it is useful for viewing, annotating and manipulating phylogenetic trees, it is also an infrastructure that enables evolutionary evidences that inferred by commonly used software packages in the field to be used in `R`. For instance, *dN/dS* values or ancestral sequences inferred by [CODEML](http://abacus.gene.ucl.ac.uk/software/paml.html)[@yang_paml_2007], clade support values (posterior) inferred by [BEAST](http://beast2.org/)[@bouckaert_beast_2014] and short read placement by [EPA](http://sco.h-its.org/exelixis/web/software/epa/index.html)[@berger_EPA_2011] and [pplacer](http://matsen.fhcrc.org/pplacer/)[@matsen_pplacer_2010]. These evolutionary evidences are not only used in annotating phylogenetic tree in  `ggtree` but can also be further analyzed in `R`.
37
-
38
-# Supported File Formats
39
-
40
-Most of the tree viewer software (including `R` packages) focus on `Newick` and `Nexus` file formats, while there are file formats from different evolution analysis software that contain supporting evidences within the file that are ready for annotating a phylogenetic tree.
41
-The `ggtree` package define several parser functions and `S4` classes to store statistical evidences inferred by commonly used software packages. It supports several file formats, including:
42
-
43
-+ Newick (via `ape`)
44
-+ Nexus (via `ape`)
45
-+ New Hampshire eXtended format (NHX)
46
-+ Jplace
47
-+ Phylip
48
-
49
-and software output from:
50
-
51
-+ [BEAST](http://beast2.org/)[@bouckaert_beast_2014]
52
-+ [EPA](http://sco.h-its.org/exelixis/web/software/epa/index.html)[@berger_EPA_2011]
53
-+ [HYPHY](http://hyphy.org/w/index.php/Main_Page)[@pond_hyphy_2005]
54
-+ [PAML](http://abacus.gene.ucl.ac.uk/software/paml.html)[@yang_paml_2007]
55
-+ [PHYLDOG](http://pbil.univ-lyon1.fr/software/phyldog/)[@boussau_genome-scale_2013]
56
-+ [pplacer](http://matsen.fhcrc.org/pplacer/)[@matsen_pplacer_2010]
57
-+ [r8s](http://loco.biosci.arizona.edu/r8s/)[@marazzi_locating_2012]
58
-+ [RAxML](http://sco.h-its.org/exelixis/web/software/raxml/)[@stamatakis_raxml_2014]
59
-+ [RevBayes](http://revbayes.github.io/intro.html)[@hohna_probabilistic_2014]
60
-
61
-# Parser functions
62
-
63
-The `ggtree` package implement several parser functions, including:
64
-
65
-+ `read.beast` for parsing output of [BEASE](http://beast2.org/)
66
-+ `read.codeml` for parsing output of [CODEML](http://abacus.gene.ucl.ac.uk/software/paml.html) (`rst` and `mlc` files)
67
-+ `read.codeml_mlc` for parsing `mlc` file (output of `CODEML`)
68
-+ `read.hyphy` for parsing output of [HYPHY](http://hyphy.org/w/index.php/Main_Page)
69
-+ `read.jplace` for parsing `jplace` file including output from [EPA](http://sco.h-its.org/exelixis/web/software/epa/index.html) and [pplacer](http://matsen.fhcrc.org/pplacer/)
70
-+ `read.nhx` for parsing `NHX` file including output from [PHYLODOG](http://pbil.univ-lyon1.fr/software/phyldog/) and [RevBayes](http://revbayes.github.io/intro.html)
71
-+ `read.paml_rst` for parsing `rst` file (output of `BASEML` and `CODEML`)
72
-+ `read.r8s` for parsing output of [r8s](loco.biosci.arizona.edu/r8s/)
73
-+ `read.raxml` for parsing output of [RAxML](http://sco.h-its.org/exelixis/web/software/raxml/)
74
-
75
-# S4 classes
76
-
77
-Correspondingly, `ggtree` defines several `S4` classes to store evolutionary evidences inferred by these software packages, including:
78
-
79
-+ _`apeBootstrap`_ for bootstrap analysis of `ape::boot.phylo()`[@paradis_ape_2004], output of `apeBoot()` defined in `ggtree`
80
-+ _`beast`_ for storing output of `read.beast()`
81
-+ _`codeml`_ for storing output of `read.codeml()`
82
-+ _`codeml_mlc`_ for storing output of `read.codeml_mlc()`
83
-+ _`hyphy`_ for storing output of `read.hyphy()`
84
-+ _`jplace`_ for storing output of `read.jplace()`
85
-+ _`nhx`_ for storing output of `read.nhx()`
86
-+ _`paml_rst`_ for _`rst`_ file obtained by [PAML](http://abacus.gene.ucl.ac.uk/software/paml.html), including _`BASEML`_ and _`CODEML`_.
87
-+ _`phangorn`_ for storing ancestral sequences inferred by `R` package `phangorn`[@schliep_phangorn_2011], output of `phyPML()` defined in `ggtree`
88
-+ _`r8s`_ for storing output of `read.r8s()`
89
-+ _`raxml`_ for storing output of `read.raxml()`
90
-
91
-
92
-The _`jplace`_ class is also designed to store user specified annotation data.
93
-
94
-
95
-Here is an overview of these `S4` classes:
96
-
97
-![](figures/ggtree_objects_v2.png)
98
-
99
-In addition, `ggtree` also supports _`phylo`_, _`multiPhylo`_ (defined by `ape`[@paradis_ape_2004]), _`phylo4`_, _`phylo4d`_ (defined by `phylobase`) _`obkData`_ (defined in `OutbreakTools`) and _`phyloseq`_ (defined in `phyloseq`).
100
-
101
-
102
-In `ggtree`, tree objects can be merged and evidences inferred from different phylogenetic analyses can be combined or compared and visualized.
103
-
104
-Viewing a phylogenetic tree in `ggtree` is easy by using the command `ggtree(tree_object)` and annotating a phylogenetic tree is simple by adding graphic layers using the grammar of graphics.
105
-
106
-
107
-For each class, we defined _`get.fields`_ method to get the annotation features that available in the object that can be used to annotate a phylogenetic tree directly in `ggtree`. A _`get.tree`_ method can be used to convert tree object to `phylo` (or `multiPhylo` for `r8s`) object that are widely supported by other `R` packages.
108
-
109
-The _`groupOTU`_ method is used for clustering related OTUs (from tips to their most recent common ancestor). Related OTUs are not necessarily within a clade, they can be distantly related. _`groupOTU`_ works fine for monophyletic (clade), polyphyletic and paraphyletic, while _`groupClade`_ only works for clade (monophyletic). These methods are useful for clustering related OTUs or clades.
110
-
111
-The _`fortify`_ method is used to convert tree object to a `data.frame` which is familiar by `R` users and easy to manipulate. The output `data.frame` contains tree information and all evolutionary evidences (if available, e.g. _*dN/dS*_ in `codeml` object).
112
-
113
-Detail descriptions of `slots` defined in each class are documented in class man pages. Users can use `class?className` (e.g. `class?beast`) to access man page of a class.
114
-
115
-
116
-# Getting Tree Data into R
117
-
118
-## Parsing BEAST output
119
-
120
-```{r}
121
-file <- system.file("extdata/BEAST", "beast_mcc.tree", package="treeio")
122
-beast <- read.beast(file)
123
-beast
124
-```
125
-
126
-Since _`%`_ is not a valid character in _`names`_, all the feature names that contain _`x%`_ will convert to _`0.x`_. For example, _`length_95%_HPD`_ will be changed to _`length_0.95_HPD`_.
127
-
128
-The _`get.fields`_ method return all available features that can be used for annotation.
129
-```{r}
130
-get.fields(beast)
131
-```
132
-
133
-Users can use `ggtree(beast)` to visualize the tree and add layer to annotate it.
134
-
135
-```{r warning=FALSE, fig.width=10, fig.height=10}
136
-ggtree(beast, branch.length = 'none') + geom_text(aes(x=branch, label=range_format(length_0.95_HPD)), vjust=-.5, color='firebrick')
137
-```
138
-
139
-`ggtree` provides `geom_range` layer to display uncertainty of branch length.
140
-
141
-```{r warning=FALSE, fig.width=10, fig.height=10}
142
-ggtree(beast) + geom_range(range='length_0.95_HPD', color='red', alpha=.6, size=2)
143
-```
144
-
145
-In `FigTree`, only `heigh_0.95_HPD` is meaningful since the branch is scaled by `height`. In `ggtree` we can display HPD of `rate`, `height` or other variable if available since `ggtree` can rescale a tree using `rescale_tree` function or by specifing `branch.length` in `ggtree` function.
146
-
147
-```{r warning=FALSE, fig.width=10, fig.height=10}
148
-ggtree(beast, branch.length = 'rate') + geom_range(range='rate_0.95_HPD', color='red', alpha=.6, size=2)
149
-```
150
-
151
-With `ggtree`, evolutionary evidences inferred by commonly used software packages (`BEAST` in this example) can be easily transformed to a tidy `data.frame` by `fortify` method.
152
-
153
-```{r}
154
-beast_data <- fortify(beast)
155
-head(beast_data)
156
-```
157
-
158
-## Parsing PAML output
159
-
160
-### _`rst`_ file
161
-
162
-The _`read.paml_rst`_ function can parse `rst` file from `BASEML` and `CODEML`. The only difference is the space in the sequences. For `BASEML`, each ten bases are separated by one space, while for `CODEML`, each three bases (triplet) are separated by one space.
163
-
164
-```{r fig.width=12, fig.height=10, warning=FALSE, fig.align="center"}
165
-brstfile <- system.file("extdata/PAML_Baseml", "rst", package="treeio")
166
-brst <- read.paml_rst(brstfile)
167
-brst
168
-p <- ggtree(brst) +
169
-    geom_text(aes(x=branch, label=marginal_AA_subs), vjust=-.5, color='steelblue') +
170
-    theme_tree2()
171
-print(p)
172
-```
173
-
174
-Similarly, we can parse the `rst` file from `CODEML` and visualize the tree with amino acid substitution.
175
-```{r}
176
-crstfile <- system.file("extdata/PAML_Codeml", "rst", package="treeio")
177
-crst <- read.paml_rst(crstfile)
178
-crst
179
-```
180
-
181
-
182
-`ggtree` defines the `update` operator, `%<%`, that can update a tree view (applying the same pattern of visualization and annotation) with another tree object.
183
-
184
-```{r fig.width=12, fig.height=10, warning=FALSE, fig.align="center"}
185
-p %<% crst
186
-```
187
-
188
-We can find that these two figures have different evolution distances and subsitutions inferred by `BASEML` and `CODEML` are slightly different.
189
-
190
-### CODEML
191
-
192
-#### mlc file
193
-
194
-The `mlc` file output by `CODEML` contains _`dN/dS`_ estimation.
195
-
196
-
197
-```{r}
198
-mlcfile <- system.file("extdata/PAML_Codeml", "mlc", package="treeio")
199
-mlc <- read.codeml_mlc(mlcfile)
200
-mlc
201
-```
202
-
203
-Please aware that _`/`_ and _`*`_ are not valid characters in _`names`_, they were changed to _`_vs_`_ and _`_x_`_ respectively.
204
-
205
-So _`dN_vs_dS`_ is _`dN/dS`_, _`N_x_dN`_ is _`N*dN`_, and _`S_x_dS`_ is _`S*dS`_.
206
-
207
-
208
-```{r fig.width=8, fig.height=8, warning=FALSE, fig.align="center"}
209
-ggtree(mlc) + geom_text(aes(x=branch, label=dN_vs_dS), color='blue', vjust=-.2)
210
-```
211
-
212
-The paramter _`branch.length`_ can be one of available annotations:
213
-```{r}
214
-get.fields(mlc)
215
-```
216
-
217
-For example, if we set _`branch.length`_ to _`dN_vs_dS`_, the tree view will be re-scaled based on $\omega$ (_`dN/dS`_).
218
-
219
-```{r fig.width=8, width=60, warning=FALSE, fig.align="center"}
220
-ggtree(mlc, branch.length = "dN_vs_dS", aes(color=dN_vs_dS)) +
221
-    scale_color_continuous(name='dN/dS', limits=c(0, 1.5),
222
-                           oob=scales::squish, low="darkgreen", high="red")+
223
-    theme_tree2(legend.position=c(.9, .5))
224
-```
225
-
226
-We can also plot the _`dN`_ or _`dS`_ tree and others. More examples (including evidences inferred by BEAST) can be found in the [Tree Annotation](treeAnnotation.html) vignette.
227
-
228
-
229
-#### _`CODEML`_ output: rst and mlc files\
230
-
231
-
232
-We annotate the tree with information presented in _`rst`_ and _`mlc`_ file separately as demonstrated in previous sessions.
233
-
234
-We can also use both of them which is highly recommended.
235
-
236
-
237
-```{r}
238
-ml <- read.codeml(crstfile, mlcfile)
239
-ml
240
-head(fortify(ml))
241
-```
242
-
243
-All the features in both files are available for annotation. For example, we can annotate _`dN/dS`_ with the tree in _`rstfile`_ and amino acid substitutions with the tree in _`mlcfile`_.
244
-
245
-
246
-## Parsing HYPHY output
247
-
248
-```{r warning=FALSE}
249
-nwk <- system.file("extdata/HYPHY", "labelledtree.tree", package="treeio")
250
-ancseq <- system.file("extdata/HYPHY", "ancseq.nex", package="treeio")
251
-tipfas <- system.file("extdata", "pa.fas", package="treeio")
252
-hy <- read.hyphy(nwk, ancseq, tipfas)
253
-hy
254
-```
255
-
256
-```{r fig.width=16, fig.height=10, warning=FALSE, fig.align="center"}
257
-ggtree(hy) + geom_text(aes(x=branch, label=AA_subs), vjust=-.5)
258
-```
259
-
260
-## Parsing r8s output
261
-
262
-[r8s](http://loco.biosci.arizona.edu/r8s/) output contains 3 trees, namely `TREE`, `RATO` and `PHYLO` for time tree, rate tree and absolute substitution tree respectively.
263
-
264
-
265
-```{r fig.width=4, fig.height=6, width=60, warning=FALSE, fig.align="center"}
266
-r8s <- read.r8s(system.file("extdata/r8s", "H3_r8s_output.log", package="treeio"))
267
-ggtree(r8s, branch.length="TREE", mrsd="2014-01-01") + theme_tree2()
268
-```
269
-
270
-
271
-`branch.length` should be one of `TREE`, `RATO` or `PHYLO` for selecting the corresponding tree.
272
-
273
-User can also view 3 trees simultaneously.
274
-
275
-
276
-```{r fig.width=16, fig.height=10, width=60, warning=FALSE, fig.align="center"}
277
-ggtree(get.tree(r8s), aes(color=.id)) + facet_wrap(~.id, scales="free_x")
278
-```
279
-
280
-## Parsing output of RAxML bootstraping analysis
281
-
282
-```{r fig.width=12, fig.height=10, width=60, warning=FALSE, fig.align="center"}
283
-raxml_file <- system.file("extdata/RAxML", "RAxML_bipartitionsBranchLabels.H3", package="treeio")
284
-raxml <- read.raxml(raxml_file)
285
-ggtree(raxml) + geom_label(aes(label=bootstrap, fill=bootstrap)) + geom_tiplab() +
286
-    scale_fill_continuous(low='darkgreen', high='red') + theme_tree2(legend.position='right')
287
-```
288
-
289
-
290
-## Parsing NHX tree
291
-
292
-NHX (New Hampshire eXtended) format is commonly used in phylogenetics software (e.g. [PHYLDOG](http://pbil.univ-lyon1.fr/software/phyldog/)[@boussau_genome-scale_2013], [RevBayes](http://revbayes.github.io/intro.html)[@hohna_probabilistic_2014], *etc*.)
293
-
294
-```{r}
295
-nhxfile <- system.file("extdata/NHX", "ADH.nhx", package="treeio")
296
-nhx <- read.nhx(nhxfile)
297
-ggtree(nhx) + geom_tiplab() + geom_point(aes(color=S), size=5, alpha=.5) +
298
-    theme(legend.position="right") +
299
-    geom_text(aes(label=branch.length, x=branch), vjust=-.5) +
300
-    xlim(NA, 0.3)
301
-```
302
-
303
-## Parsing Phylip tree
304
-
305
-Phylip format contains taxa sequences with Newick tree text.
306
-
307
-```{r}
308
-phyfile <- system.file("extdata", "sample.phy", package="treeio")
309
-phylip <- read.phylip(phyfile)
310
-phylip
311
-ggtree(phylip) + geom_tiplab()
312
-```
313
-
314
-User can view sequence alignment with the tree via `msaplot()` function.
315
-
316
-```{r fig.width=10, fig.height=6}
317
-msaplot(phylip, offset=1)
318
-```
319
-
320
-
321
-## Parsing EPA and pplacer output
322
-
323
-[EPA](http://sco.h-its.org/exelixis/web/software/epa/index.html)[@berger_EPA_2011] and [PPLACER](http://matsen.fhcrc.org/pplacer/)[@matsen_pplacer_2010] have common output file format, `jplace`, which can be parsed by `read.jplace()` function.
324
-
325
-```{r}
326
-jpf <- system.file("extdata/sample.jplace",  package="treeio")
327
-jp <- read.jplace(jpf)
328
-print(jp)
329
-```
330
-
331
-In `ggtree`, we provide _`get.placements`_ method to access the placement.
332
-
333
-```{r}
334
-## get only best hit
335
-get.placements(jp, by="best")
336
-## get all placement
337
-get.placements(jp, by="all")
338
-```
339
-
340
-This is only a tiny sample file. In reality, [EPA](http://sco.h-its.org/exelixis/web/software/epa/index.html) and [PPLACER](http://matsen.fhcrc.org/pplacer/) may place thousands of short reads on a reference tree.
341
-
342
-
343
-We may, for example, count the number of placement and annotate this information in the tree.
344
-
345
-
346
-# merging tree objects
347
-
348
-
349
-In ggtree, tree objects can be merged and evidences inferred from different phylogenetic analyses can be combined or compared and visualized.
350
-
351
-User can use the command like:
352
-```r
353
-tree <- merge_tree(tree_object_1, tree_object_2)
354
-## it's chainable, and serveral tree objects can be merged into one tree object
355
-tree <- merge_tree(tree_object_1, tree_object_2) %>% merge_tree(tree_object_3) %>% merge_tree(tree_object_4)
356
-```
357
-
358
-Here we use a tree analyzed by BEAST and CodeML and merge them into one.
359
-
360
-```{r}
361
-beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
362
-beast_tree <- read.beast(beast_file)
363
-
364
-rst_file <- system.file("examples/rst", package="ggtree")
365
-mlc_file <- system.file("examples/mlc", package="ggtree")
366
-codeml_tree <- read.codeml(rst_file, mlc_file)
367
-
368
-merged_tree <- merge_tree(beast_tree, codeml_tree)
369
-
370
-merged_tree
371
-head(fortify(merged_tree))
372
-```
373
-
374
-After merging, all evidences inferred from different tools can be used to annotate the tree simultaneously. In this example, we used `dN/dS` inferred by CodeML to color the tree and annotate the tree with `posterior` inferred by BEAST.
375
-
376
-```{r fig.width=20, fig.height=26, warning=FALSE}
377
-ggtree(merged_tree, aes(color=dN_vs_dS), mrsd="2013-01-01", ndigits = 3) +
378
-    geom_text(aes(label=posterior), vjust=.1, hjust=-.1, size=5, color="black") +
379
-    scale_color_continuous(name='dN/dS', limits=c(0, 1.5),
380
-                           oob=scales::squish, low="green", high="red")+
381
-    theme_tree2(legend.position="right")
382
-```
383
-
384
-
385
-# `jplace` file format
386
-The _`jplace`_ file format was defined by Masten[@matsen_format_2012] for phylogenetic placements. We employed this file format to store phylogenetic tree and user specified annotation data. Suppose we have a tree, and the associated data as shown below:
387
-
388
-```{r}
389
-tree <- system.file("extdata", "pa.nwk", package="treeio")
390
-data <- read.csv(system.file("extdata", "pa_subs.csv", package="treeio"), stringsAsFactor=FALSE)
391
-print(tree)
392
-head(data)
393
-```
394
-
395
-The _`data`_ contains amino acid substitutions from parent node to child node and GC contents of each node. We can annotate the tree as demonstrated in [User specified annotation](treeAnnotation.html#user-specified-annotation) session of [Tree Annotation](treeAnnotation.html) vignette.
396
-
397
-
398
-`ggtree` provides a function, _`write.jplace`_, to combine a tree and an associated data and store them to a single _`jplace`_ file.
399
-```{r}
400
-outfile <- tempfile()
401
-write.jplace(tree, data, outfile)
402
-```
403
-
404
-The _`read.jplace`_ function was designed to read the _`jplace`_ file and store the information to a _`jplace`_ object.
405
-```{r}
406
-jp <- read.jplace(outfile)
407
-print(jp)
408
-```
409
-
410
-Now we know the _`jp`_ object that stores the tree and the associated amino acid substitution and GC content information, we can view the tree and display the associated annotation data on it directly by _`ggtree`_.
411
-
412
-<!-- ```{r fig.width=12, fig.height=12, warning=FALSE, fig.align="center"} -->
413
-<!-- ggtree(jp) +  -->
414
-<!--     geom_text(aes(x=branch, label=subs), color="purple", vjust=-1, size=3) +  -->
415
-<!--     geom_text(aes(label=gc), color="steelblue", hjust=-.6, size=3) + -->
416
-<!--     geom_tiplab() -->
417
-<!-- ``` -->
418
-
419
-
420
-
421
-# References
422
-