Browse code

ggdensitree

Guangchuang Yu authored on 11/09/2019 01:39:19
Showing6 changed files

... ...
@@ -1,5 +1,8 @@
1 1
 # Bradley Jones
2 2
 
3
+
4
++ `ggdensitree` function
5
+  - <https://github.com/YuLab-SMU/ggtree/pull/253>
3 6
 + made data usable with treedata in 'equal_angle' and 'daylight' layouts 
4 7
   - <https://github.com/GuangchuangYu/ggtree/pull/201>
5 8
 
... ...
@@ -1,7 +1,7 @@
1 1
 Package: ggtree
2 2
 Type: Package
3 3
 Title: an R package for visualization of tree and annotation data
4
-Version: 1.99.0
4
+Version: 1.99.1
5 5
 Authors@R: c(
6 6
 	   person("Guangchuang", "Yu",     email = "guangchuangyu@gmail.com", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0002-6485-8781")),
7 7
 	   person("Tommy Tsan-Yuk", "Lam", email = "tylam.tommy@gmail.com",   role = c("aut", "ths")),
... ...
@@ -1,3 +1,8 @@
1
+# ggtree 1.99.1
2
+
3
++ `ggdensitree` (2019-09-11, Wed)
4
+  - <https://github.com/YuLab-SMU/ggtree/pull/253>
5
+  
1 6
 # ggtree 1.99.0
2 7
 
3 8
 + prepare for ggtree v=2.0.0
... ...
@@ -4,6 +4,7 @@
4 4
 ##' @param data a list of phylo objects or any object with an as.phylo and fortify method
5 5
 ##' @param mapping aesthetic mapping
6 6
 ##' @param layout one of 'slanted', 'rectangluar', 'fan', 'circular' or 'radial' (default: 'slanted')
7
+##' @param branch.length variable to be used to scale branch length. Setting to 'branch.length' will use the branch lengths of the tree objects. Default is 'none' to discard branch length and only plot cladogram (more reasonable for densitree).
7 8
 ##' @param tip.order the order of the tips by a character vector of taxa names; or an integer, N, to order the tips by the order of the tips in the Nth tree; or 'mds' to order the tips based on MDS of the distance between the tips (default: 'mds')
8 9
 ##' @param dangle TRUE to align trees by their tips and FALSE to align treesby their root (default: TRUE)
9 10
 ##' @param jitter deviation to jitter tips
... ...
@@ -17,106 +18,77 @@
17 18
 ##' require(ape)
18 19
 ##' require(dplyr)
19 20
 ##' library(ape)
20
-##' library(tidyverse)
21 21
 ##' 
22 22
 ##' trees <- list(read.tree(text="((a,b),c);"), read.tree(text="((a,c),b);"));
23 23
 ##' ggdensitree(trees) + geom_tiplab()
24 24
 ##' 
25 25
 ##' trees.fort <- list(trees[[1]] %>% fortify %>% mutate(tree="a"), trees[[2]] %>% fortify %>% mutate(tree="b"));
26 26
 ##' ggdensitree(trees.fort, aes(colour=tree)) + geom_tiplab(colour='black')
27
-ggdensitree <- function(data=NULL, mapping=NULL, layout="slanted", tip.order='mds', dangle=TRUE, jitter=0, ...) {
28
-	# factorize to simplify code
29
-	trees <- lapply(data, as.phylo)
30
-	trees.f <- lapply(data, fortify, layout=layout, ...)
31
-	n.tips <- sum(trees.f[[1]]$isTip)
32
-	
33
-	# determine tip order
34
-	if (length(tip.order) == 1) {
35
-		if (tip.order == 'mds') {
36
-			first.label <- subset(trees.f[[1]], isTip)$label
37
-			
38
-			tip.order <- lapply(trees, . %$% tip.label %>% match(first.label))
39
-			
40
-			tip.2.tip <- lapply(trees, cophenetic.phylo.check.length)
41
-			tip.2.tip <- lapply(1:length(trees), function(i) tip.2.tip[[i]][tip.order[[i]], tip.order[[i]]])
42
-			
43
-			all.tab <- do.call(rbind, tip.2.tip)
44
-			rownames(all.tab) <- NULL
45
-			
46
-			distances <- dist(t(all.tab))
47
-			
48
-			res <- cmdscale(distances, k=1)
49
-			
50
-			tip.order <- first.label[order(res[,1])]
51
-		} else if (as.numeric(tip.order) && tip.order <= length(trees)) {
52
-			labels <- subset(trees.f[[tip.order]], isTip)$label
27
+ggdensitree <- function(data=NULL, mapping=NULL, layout="slanted", branch.length = "none",
28
+                        tip.order='mds', dangle=TRUE, jitter=0, ...) {
29
+    ## factorize to simplify code
30
+    trees <- lapply(data, as.phylo)
31
+    trees.f <- lapply(data, fortify, layout=layout, branch.length = branch.length, ...)
32
+    n.tips <- sum(trees.f[[1]]$isTip)
33
+
34
+    ## determine tip order
35
+    if (length(tip.order) == 1) {
36
+        if (tip.order == 'mds') {
37
+            first.label <- trees.f[[1]] %>%
38
+                dplyr::filter(.data$isTip) %>%
39
+                dplyr::pull(.data$label)
40
+            
41
+            tip.order <- lapply(trees, function(x) {
42
+                match(x$tip.label, first.label)
43
+            })
44
+            
45
+            tip.2.tip <- lapply(trees, cophenetic.phylo.check.length)
46
+            tip.2.tip <- lapply(1:length(trees), function(i) {
47
+                tip.2.tip[[i]][tip.order[[i]], tip.order[[i]]]
48
+            })
49
+            
50
+            all.tab <- do.call(rbind, tip.2.tip)
51
+            rownames(all.tab) <- NULL
52
+            
53
+            distances <- stats::dist(t(all.tab))
54
+            
55
+            res <- stats::cmdscale(distances, k=1)
53 56
 			
54
-			tip.order <- labels[as.integer(trees.f[[tip.order]]$y)]
55
-		}
56
-	}
57
+            tip.order <- first.label[order(res[,1])]
58
+        } else if (as.numeric(tip.order) && tip.order <= length(trees)) {
59
+            labels <- trees.f[[tip.order]] %>%
60
+                dplyr::filter(.data$isTip) %>%
61
+                dplyr::pull(.data$label)
62
+            
63
+            tip.order <- labels[as.integer(trees.f[[tip.order]]$y)]
64
+        }
65
+    }
57 66
 	
58
-	# reorder tips (and shift x id dangling)
67
+    ## reorder tips (and shift x id dangling)
68
+    max.x <- vapply(trees.f, function(x) max(x$x, na.rm = TRUE), numeric(1))
69
+    farthest <- max(max.x)
59 70
     trees.f <- lapply(1:length(trees), function(i) {
60
-    	trees.f[[i]]$y <- getYcoord_order(trees[[i]], tip.order)
61
-    	if (i > 1) {
62
-    		trees.f[[i]]$y[1:n.tips] %<>% add(rnorm(n.tips, mean=0, sd=jitter))
63
-    		if (dangle)
64
-   	    		trees.f[[i]]$x <- trees.f[[1]]$x[1] - trees.f[[i]]$x[match(trees.f[[1]]$label, trees.f[[i]]$label)][1] + trees.f[[i]]$x
65
-    	}
66
-    	trees.f[[i]]
71
+        trees.f[[i]]$y <- getYcoord(trees[[i]], tip.order = tip.order)
72
+        if (dangle) {
73
+            trees.f[[i]]$x <- trees.f[[i]]$x + (farthest - max.x[i])
74
+        }
75
+        if (i > 1 && jitter > 0) {
76
+            trees.f[[i]]$y[1:n.tips] %<>% add(stats::rnorm(n.tips, mean=0, sd=jitter))
77
+        }
78
+        trees.f[[i]]
67 79
     })
68
-	
69
-    # plot all trees together
70
-	p <- ggtree(tr=trees.f[[1]], mapping=mapping, layout=layout, ...)
71
-	for (x in trees.f[-1])
72
-		p <- p + geom_tree(mapping=mapping, data=x, layout=layout, ...)
73
-	p
80
+    
81
+    ## plot all trees together
82
+    p <- ggtree(tr=trees.f[[1]], mapping=mapping, layout=layout, ...)
83
+    for (x in trees.f[-1])
84
+        p <- p + geom_tree(mapping=mapping, data=x, layout=layout, ...)
85
+    p
74 86
 }
75 87
 
76 88
 ## wrapper for cohpenetic to ensure that branch lengths exist
77 89
 cophenetic.phylo.check.length <- function(tree) {
78
-	if (is.null(tree$edge.length))
79
-		tree$edge.length <- rep(1, nrow(tree$edge))
80
-	
81
-	cophenetic(tree)
82
-}
83
-
84
-## this is an adaptation of old code in ggtree
85
-## 
86
-##' @importFrom magrittr %>%
87
-##' @importFrom magrittr equals
88
-getYcoord_order <- function(tr, tip.order) {
89
-    Ntip <- length(tr[["tip.label"]])
90
-    N <- getNodeNum(tr)
91
-
92
-    edge <- tr[["edge"]]
93
-    parent <- edge[,1]
94
-    child <- edge[,2]
95
-
96
-    cl <- split(child, parent)
97
-    child_list <- list()
98
-    child_list[as.numeric(names(cl))] <- cl
99
-
100
-    y <- numeric(N)
101
-    y[1:Ntip] <- match(tr$tip.label, tip.order)
102
-    y[-(1:Ntip)] <- NA
103
-
104
-    pvec <- integer(max(tr$edge))
105
-    pvec[child] = parent
106
-
107
-    currentNode <- 1:Ntip
108
-    while(anyNA(y)) {
109
-        pNode <- unique(pvec[currentNode])
110
-
111
-        idx <- sapply(pNode, function(i) all(child_list[[i]] %in% currentNode))
112
-        newNode <- pNode[idx]
113
-
114
-        y[newNode] <- sapply(newNode, function(i) {
115
-            mean(y[child_list[[i]]], na.rm=TRUE)
116
-        })
117
-
118
-        currentNode <- c(currentNode[!currentNode %in% unlist(child_list[newNode])], newNode)
119
-    }
120
-
121
-    return(y)
90
+    if (is.null(tree$edge.length))
91
+        tree$edge.length <- rep(1, nrow(tree$edge))
92
+    
93
+    stats::cophenetic(tree)
122 94
 }
... ...
@@ -876,7 +876,7 @@ getXcoord <- function(tr) {
876 876
 
877 877
 ## @importFrom magrittr %>%
878 878
 ##' @importFrom magrittr equals
879
-getYcoord <- function(tr, step=1) {
879
+getYcoord <- function(tr, step=1, tip.order = NULL) {
880 880
     Ntip <- length(tr[["tip.label"]])
881 881
     N <- getNodeNum(tr)
882 882
 
... ...
@@ -889,9 +889,15 @@ getYcoord <- function(tr, step=1) {
889 889
     child_list[as.numeric(names(cl))] <- cl
890 890
 
891 891
     y <- numeric(N)
892
-    tip.idx <- child[child <= Ntip]
893
-    y[tip.idx] <- 1:Ntip * step
892
+    if (is.null(tip.order)) {
893
+        tip.idx <- child[child <= Ntip]
894
+        y[tip.idx] <- 1:Ntip * step
895
+    } else {
896
+        tip.idx <- 1:Ntip
897
+        y[tip.idx] <- match(tr$tip.label, tip.order) * step
898
+    }
894 899
     y[-tip.idx] <- NA
900
+    
895 901
 
896 902
     ## use lookup table
897 903
     pvec <- integer(max(tr$edge))
... ...
@@ -5,7 +5,8 @@
5 5
 \title{ggdensitree}
6 6
 \usage{
7 7
 ggdensitree(data = NULL, mapping = NULL, layout = "slanted",
8
-  tip.order = "mds", dangle = TRUE, jitter = 0, ...)
8
+  branch.length = "none", tip.order = "mds", dangle = TRUE,
9
+  jitter = 0, ...)
9 10
 }
10 11
 \arguments{
11 12
 \item{data}{a list of phylo objects or any object with an as.phylo and fortify method}
... ...
@@ -14,6 +15,8 @@ ggdensitree(data = NULL, mapping = NULL, layout = "slanted",
14 15
 
15 16
 \item{layout}{one of 'slanted', 'rectangluar', 'fan', 'circular' or 'radial' (default: 'slanted')}
16 17
 
18
+\item{branch.length}{variable to be used to scale branch length. Setting to 'branch.length' will use the branch lengths of the tree objects. Default is 'none' to discard branch length and only plot cladogram (more reasonable for densitree).}
19
+
17 20
 \item{tip.order}{the order of the tips by a character vector of taxa names; or an integer, N, to order the tips by the order of the tips in the Nth tree; or 'mds' to order the tips based on MDS of the distance between the tips (default: 'mds')}
18 21
 
19 22
 \item{dangle}{TRUE to align trees by their tips and FALSE to align treesby their root (default: TRUE)}
... ...
@@ -32,7 +35,6 @@ drawing phylogenetic trees from list of phylo objects
32 35
 require(ape)
33 36
 require(dplyr)
34 37
 library(ape)
35
-library(tidyverse)
36 38
 
37 39
 trees <- list(read.tree(text="((a,b),c);"), read.tree(text="((a,c),b);"));
38 40
 ggdensitree(trees) + geom_tiplab()