0c8aaf33 |
##' read beast output
##'
##'
##' @title read.beast
##' @param file beast file
##' @return \code{beast} object
##' @importFrom ape read.nexus
##' @export
##' @author Guangchuang Yu \url{http://ygc.name}
##' @examples
##' file <- system.file("extdata/BEAST", "beast_mcc.tree", package="ggtree")
##' read.beast(file)
read.beast <- function(file) {
|
3e0a61f1 |
translation <- read.trans_beast(file)
treetext <- read.treetext_beast(file)
|
0c8aaf33 |
stats <- read.stats_beast(file)
|
3e0a61f1 |
phylo <- read.nexus(file)
if (length(treetext) == 1) {
obj <- BEAST(file, treetext, translation, stats, phylo)
} else {
obj <- lapply(seq_along(treetext), function(i) {
BEAST(file, treetext[i], translation, stats[[i]], phylo[[i]])
})
class(obj) <- "beastList"
}
return(obj)
}
BEAST <- function(file, treetext, translation, stats, phylo) {
|
6c9cd355 |
stats$node %<>% gsub("\"*'*", "", .)
|
0c8aaf33 |
fields <- sub("_lower|_upper", "", names(stats)) %>% unique
fields %<>% `[`(.!="node")
|
3e0a61f1 |
phylo <- remove_quote_in_tree_label(phylo)
obj <- new("beast",
fields = fields,
treetext = treetext,
phylo = phylo,
translation = translation,
stats = stats,
file = filename(file)
)
return(obj)
}
|
41edd910 |
|
3e0a61f1 |
remove_quote_in_tree_label <- function(phylo) {
|
41edd910 |
if (!is.null(phylo$node.label)) {
phylo$node.label %<>% gsub("\"*'*", "", .)
}
if ( !is.null(phylo$tip.label)) {
phylo$tip.label %<>% gsub("\"*'*", "", .)
}
|
3e0a61f1 |
return(phylo)
|
0c8aaf33 |
}
##' @rdname get.fields-methods
##' @exportMethod get.fields
setMethod("get.fields", signature(object="beast"),
function(object, ...) {
|
63bdf4e0 |
get.fields.tree(object)
|
0c8aaf33 |
}
)
read.treetext_beast <- function(file) {
beast <- readLines(file)
|
3e0a61f1 |
## ii <- grep("^tree TREE1\\s+=", beast)
ii <- grep("^tree ", beast)
|
03beafbf |
if (length(ii) == 0) {
|
3e0a61f1 |
ii <- grep("[Bb]egin trees;", beast)
|
03beafbf |
}
|
443248f2 |
jj <- grep("[Ee]nd;", beast)
|
3e0a61f1 |
jj <- jj[jj > max(ii)][1]
## tree <- beast[ii:(jj-1)]
## if (length(tree) > 1) {
## tree <- paste0(tree)
## }
## tree %<>% sub("[^=]+=", "", .) %>%
## sub("\\s+\\[&R\\]\\s+", "", .) %>%
## sub("[^(]*", "", .)
jj <- c(ii[-1], jj)
trees <- sapply(seq_along(ii), function(i) {
tree <- beast[ii[i]:(jj[i]-1)]
if (length(tree) > 1) {
tree <- paste0(tree)
}
sub("[^(]*", "", tree)
})
|
03beafbf |
|
3e0a61f1 |
return(trees)
|
0c8aaf33 |
}
read.trans_beast <- function(file) {
beast <- readLines(file)
i <- grep("TRANSLATE", beast, ignore.case = TRUE)
|
443248f2 |
if (length(i) == 0) {
return(matrix())
}
|
0c8aaf33 |
end <- grep(";", beast)
j <- end[end %>% `>`(i) %>% which %>% `[`(1)]
trans <- beast[(i+1):j]
trans %<>% gsub("\\t+", "", .)
trans %<>% gsub(",|;", "", .)
trans %<>% `[`(nzchar(trans))
## remove quote if strings were quoted
trans %<>% gsub("'|\"", "",.)
trans %<>% sapply(., strsplit, split="\\s+")
trans %<>% do.call(rbind, .)
## trans is a matrix
return(trans)
}
|
3e0a61f1 |
|
0c8aaf33 |
read.stats_beast <- function(file) {
beast <- readLines(file)
|
3e0a61f1 |
trees <- read.treetext_beast(file)
if (length(trees) == 1) {
return(read.stats_beast_internal(beast, trees))
}
lapply(trees, read.stats_beast_internal, beast=beast)
}
|
0c8aaf33 |
|
3e0a61f1 |
read.stats_beast_internal <- function(beast, tree) {
|
0c8aaf33 |
tree2 <- gsub("\\[[^\\[]*\\]", "", tree)
phylo <- read.tree(text = tree2)
|
3e0a61f1 |
tree2 <- add_pseudo_nodelabel(phylo, tree2)
|
0c8aaf33 |
## node name corresponding to stats
nn <- strsplit(tree2, split=",") %>% unlist %>%
strsplit(., split="\\)") %>% unlist %>%
|
6c9cd355 |
gsub("\\(*", "", .) %>%
gsub("[:;].*", "", .)
|
0c8aaf33 |
phylo <- read.tree(text = tree2)
root <- getRoot(phylo)
nnode <- phylo$Nnode
## phylo2 <- read.nexus(file)
## treeinfo <- fortify.phylo(phylo)
## treeinfo2 <- fortify.phylo(phylo2)
## treeinfo$label2 <- NA
## treeinfo$label2[treeinfo$isTip] <- treeinfo2$node[as.numeric(treeinfo$label[treeinfo$isTip])]
## treeinfo$visited <- FALSE
## root <- getRoot(phylo2)
## treeinfo[root, "visited"] <- TRUE
## currentNode <- 1:Ntip(phylo2)
## while(any(treeinfo$visited == FALSE)) {
## pNode <- c()
## for (kk in currentNode) {
## i <- which(treeinfo$label2 == kk)
## treeinfo[i, "visited"] <- TRUE
## j <- which(treeinfo2$node == kk)
## ip <- treeinfo$parent[i]
## if (ip != root) {
## ii <- which(treeinfo$node == ip)
## if (treeinfo$visited[ii] == FALSE) {
## jp <- treeinfo2$parent[j]
## jj <- which(treeinfo2$node == jp)
## treeinfo[ii, "label2"] <- treeinfo2[jj, "node"]
## pNode <- c(pNode, jp)
## }
## treeinfo[ii, "visited"] <- TRUE
## }
## }
## currentNode <- unique(pNode)
## }
## treeinfo[root, "label2"] <- root
## ## convert nn to node that encoded in phylo2
## node <- treeinfo$label2[match(nn, treeinfo$label)]
####################################################
## ##
## after doing it in the hard way ##
## I finally figure out the following easy way ##
## ##
####################################################
treeinfo <- fortify.phylo(phylo)
|
6c9cd355 |
if (any(grepl("TRANSLATE", beast, ignore.case = TRUE))) {
label2 <- c(treeinfo[treeinfo$isTip, "label"],
root:(root+nnode-1))
node <- label2[match(nn, treeinfo$label)]
} else {
node <- as.character(treeinfo$node[match(nn, treeinfo$label)])
}
|
0c8aaf33 |
|
03beafbf |
## stats <- unlist(strsplit(tree, "\\["))[-1]
## stats <- sub(":.+$", "", stats
stats <- strsplit(tree, ":") %>% unlist
names(stats) <- node
stats <- stats[grep("\\[", stats)]
stats <- sub("[^\\[]+\\[", "", stats)
|
0c8aaf33 |
stats <- sub("^&", "", stats)
stats <- sub("];*$", "", stats)
stats2 <- lapply(stats, function(x) {
y <- unlist(strsplit(x, ","))
|
a4ffa2e3 |
sidx <- grep("=\\{", y)
eidx <- grep("\\}$", y)
|
5c5cf18e |
flag <- FALSE
|
a4ffa2e3 |
if (length(sidx) > 0) {
|
5c5cf18e |
flag <- TRUE
|
a4ffa2e3 |
SETS <- sapply(seq_along(sidx), function(k) {
p <- y[sidx[k]:eidx[k]]
gsub(".*=\\{", "", p) %>% gsub("\\}$", "", .) %>% list
|
5c5cf18e |
})
|
a4ffa2e3 |
names(SETS) <- gsub("=.*", "", y[sidx])
|
5c5cf18e |
|
a4ffa2e3 |
kk <- sapply(seq_along(sidx), function(k) sidx[k]:eidx[k]) %>% unlist
y <- y[-kk]
|
5c5cf18e |
}
|
0c8aaf33 |
|
a4ffa2e3 |
name <- gsub("=.*", "", y)
|
5c5cf18e |
val <- gsub(".*=", "", y) %>% gsub("^\\{", "", .) %>%
gsub("\\}$", "", .)
|
0c8aaf33 |
|
a4ffa2e3 |
|
5c5cf18e |
if (flag) {
|
a4ffa2e3 |
nn <- c(name, names(SETS))
} else {
nn <- name
|
5c5cf18e |
}
res <- character(length(nn))
names(res) <- nn
|
a4ffa2e3 |
for (i in seq_along(name)) {
res[i] <- val[i]
|
0c8aaf33 |
}
|
5c5cf18e |
if (flag) {
|
a4ffa2e3 |
j <- i
for (i in seq_along(SETS)) {
res[i+j] <- SETS[i]
|
5c5cf18e |
}
}
|
0c8aaf33 |
return(res)
})
nn <- lapply(stats2, names) %>% unlist %>%
unique %>% sort
## stats3 is a matrix
stats3 <- t(sapply(stats2, function(x) {
for (ii in nn[!nn %in% names(x)]) {
x[ii] <- NA
}
x[nn]
}))
stats3 <- as.data.frame(stats3)
|
03beafbf |
if (nrow(stats3) == 1) {
## only has one evidence
## transpose
stats3 <- data.frame(X=unlist(stats3[1,]))
colnames(stats3) <- nn
}
|
a4ffa2e3 |
colnames(stats3) <- gsub("(\\d+)%", "0.\\1", colnames(stats3))
|
5c5cf18e |
|
03beafbf |
## stats3$node <- node
stats3$node <- names(stats)
|
0c8aaf33 |
return(stats3)
}
|
3e0a61f1 |
add_pseudo_nodelabel <- function(phylo, treetext) {
if(is.null(phylo$node.label)) {
nnode <- phylo$Nnode
nlab <- paste("X", 1:nnode, sep="")
for (i in 1:nnode) {
treetext <- sub("\\)([:;])", paste0("\\)", nlab[i], "\\1"), treetext)
}
}
return(treetext)
}
|
03beafbf |
|
6e4f67da |
|