R/beast.R
0c8aaf33
 
 ##' read beast output
 ##'
 ##' 
 ##' @title read.beast
 ##' @param file beast file
 ##' @return \code{beast} object
 ##' @importFrom ape read.nexus
 ##' @export
 ##' @author Guangchuang Yu \url{http://ygc.name}
 ##' @examples
 ##' file <- system.file("extdata/BEAST", "beast_mcc.tree", package="ggtree")
 ##' read.beast(file)
 read.beast <- function(file) {
3e0a61f1
     translation <- read.trans_beast(file)
     treetext <- read.treetext_beast(file)
0c8aaf33
     stats <- read.stats_beast(file)
3e0a61f1
     phylo <- read.nexus(file)
     
     if (length(treetext) == 1) {
         obj <- BEAST(file, treetext, translation, stats, phylo)
     } else {
         obj <- lapply(seq_along(treetext), function(i) {
             BEAST(file, treetext[i], translation, stats[[i]], phylo[[i]])
         })
         class(obj) <- "beastList"
     }
     return(obj)
 }
 
 
 BEAST <- function(file, treetext, translation, stats, phylo) {
6c9cd355
     stats$node %<>% gsub("\"*'*", "", .)
     
0c8aaf33
     fields <- sub("_lower|_upper", "", names(stats)) %>% unique
     fields %<>% `[`(.!="node")
3e0a61f1
         
     phylo <- remove_quote_in_tree_label(phylo)
         
     obj <- new("beast",
                fields      = fields,
                treetext    = treetext,
                phylo       = phylo,
                translation = translation,
                stats       = stats,
                file        = filename(file)
                )
     return(obj)
 }
41edd910
 
3e0a61f1
 remove_quote_in_tree_label <- function(phylo) {
41edd910
     if (!is.null(phylo$node.label)) {
         phylo$node.label %<>% gsub("\"*'*", "", .)
     }
     if ( !is.null(phylo$tip.label)) {
         phylo$tip.label %<>% gsub("\"*'*", "", .) 
     }
3e0a61f1
     return(phylo)
0c8aaf33
 }
 
 
 ##' @rdname get.fields-methods
 ##' @exportMethod get.fields
 setMethod("get.fields", signature(object="beast"),
           function(object, ...) {
63bdf4e0
               get.fields.tree(object)
0c8aaf33
           }
           )
 
 
 read.treetext_beast <- function(file) {
     beast <- readLines(file)
3e0a61f1
     ## ii <- grep("^tree TREE1\\s+=", beast)
     ii <- grep("^tree ", beast)
03beafbf
     if (length(ii) == 0) {
3e0a61f1
         ii <- grep("[Bb]egin trees;", beast)
03beafbf
     }
     
443248f2
     jj <- grep("[Ee]nd;", beast)
3e0a61f1
     jj <- jj[jj > max(ii)][1]
 
     ## tree <- beast[ii:(jj-1)]
     ## if (length(tree) > 1) {
     ##     tree <- paste0(tree)
     ## }
     ## tree %<>% sub("[^=]+=", "", .) %>%
     ##     sub("\\s+\\[&R\\]\\s+", "", .) %>%
     ##         sub("[^(]*", "", .)
     
     jj <- c(ii[-1], jj)
     trees <- sapply(seq_along(ii), function(i) {
         tree <- beast[ii[i]:(jj[i]-1)]
         if (length(tree) > 1) {
             tree <- paste0(tree)
         }
         sub("[^(]*", "", tree)
     })
03beafbf
 
3e0a61f1
     return(trees)
0c8aaf33
 }
 
 read.trans_beast <- function(file) {
     beast <- readLines(file)
     i <- grep("TRANSLATE", beast, ignore.case = TRUE)
443248f2
     if (length(i) == 0) {
         return(matrix())
     }
0c8aaf33
     end <- grep(";", beast)
     j <- end[end %>% `>`(i) %>% which %>% `[`(1)]
     trans <- beast[(i+1):j]
     trans %<>% gsub("\\t+", "", .)
     trans %<>% gsub(",|;", "", .)
     trans %<>% `[`(nzchar(trans))
     ## remove quote if strings were quoted
     trans %<>% gsub("'|\"", "",.)
     trans %<>% sapply(., strsplit, split="\\s+")
     trans %<>% do.call(rbind, .)
     ## trans is a matrix
     return(trans)
 }
 
3e0a61f1
 
0c8aaf33
 read.stats_beast <- function(file) {
     beast <- readLines(file)
3e0a61f1
     trees <- read.treetext_beast(file)
     if (length(trees) == 1) {
         return(read.stats_beast_internal(beast, trees))
     }
     lapply(trees, read.stats_beast_internal, beast=beast)
 }
0c8aaf33
 
3e0a61f1
 read.stats_beast_internal <- function(beast, tree) {
0c8aaf33
     tree2 <- gsub("\\[[^\\[]*\\]", "", tree)
     phylo <- read.tree(text = tree2)
 
3e0a61f1
     tree2 <- add_pseudo_nodelabel(phylo, tree2)
     
0c8aaf33
     ## node name corresponding to stats
     nn <- strsplit(tree2, split=",") %>% unlist %>%
         strsplit(., split="\\)") %>% unlist %>%
6c9cd355
         gsub("\\(*", "", .) %>%
         gsub("[:;].*", "", .) 
0c8aaf33
     
     phylo <- read.tree(text = tree2)
     root <- getRoot(phylo)
     nnode <- phylo$Nnode
     
     ## phylo2 <- read.nexus(file)
     ## treeinfo <- fortify.phylo(phylo)
     ## treeinfo2 <- fortify.phylo(phylo2)
     ## treeinfo$label2 <- NA
     ## treeinfo$label2[treeinfo$isTip] <- treeinfo2$node[as.numeric(treeinfo$label[treeinfo$isTip])]
     ## treeinfo$visited <- FALSE
     ## root <- getRoot(phylo2)
     ## treeinfo[root, "visited"] <- TRUE
     ## currentNode <- 1:Ntip(phylo2)
     ## while(any(treeinfo$visited == FALSE)) {
     ##     pNode <- c()
     ##     for (kk in currentNode) {
     ##         i <- which(treeinfo$label2 == kk)
     ##         treeinfo[i, "visited"] <- TRUE
     ##         j <- which(treeinfo2$node == kk)
     ##         ip <- treeinfo$parent[i]
     ##         if (ip != root) {
     ##             ii <- which(treeinfo$node == ip)
     ##             if (treeinfo$visited[ii] == FALSE) {
     ##                 jp <- treeinfo2$parent[j]
     ##                 jj <- which(treeinfo2$node == jp)
     ##                 treeinfo[ii, "label2"] <- treeinfo2[jj, "node"]
     ##                 pNode <- c(pNode, jp)
     ##             }
     ##             treeinfo[ii, "visited"] <- TRUE
     ##         }
     ##     }
     ##     currentNode <- unique(pNode)
     ## }
     ## treeinfo[root, "label2"] <- root
     ## ## convert nn to node that encoded in phylo2
     ## node <- treeinfo$label2[match(nn, treeinfo$label)]
 
     
     ####################################################
     ##                                                ##
     ##  after doing it in the hard way                ##
     ##  I finally figure out the following easy way   ##
     ##                                                ##
     ####################################################
     treeinfo <- fortify.phylo(phylo)
6c9cd355
 
     if (any(grepl("TRANSLATE", beast, ignore.case = TRUE))) {
         label2 <- c(treeinfo[treeinfo$isTip, "label"],
                     root:(root+nnode-1))
         node <- label2[match(nn, treeinfo$label)]
     } else {
         node <- as.character(treeinfo$node[match(nn, treeinfo$label)])
     }
0c8aaf33
     
03beafbf
     ## stats <- unlist(strsplit(tree, "\\["))[-1]
     ## stats <- sub(":.+$", "", stats
     stats <- strsplit(tree, ":") %>% unlist
     names(stats) <- node
     stats <- stats[grep("\\[", stats)]
     stats <- sub("[^\\[]+\\[", "", stats)
0c8aaf33
 
     stats <- sub("^&", "", stats)
     stats <- sub("];*$", "", stats)
         
     stats2 <- lapply(stats, function(x) {
         y <- unlist(strsplit(x, ","))
a4ffa2e3
         sidx <- grep("=\\{", y)
         eidx <- grep("\\}$", y)
5c5cf18e
 
         flag <- FALSE
a4ffa2e3
         if (length(sidx) > 0) {
5c5cf18e
             flag <- TRUE
a4ffa2e3
             SETS <- sapply(seq_along(sidx), function(k) {
                 p <- y[sidx[k]:eidx[k]]
                 gsub(".*=\\{", "", p) %>% gsub("\\}$", "", .) %>% list                
5c5cf18e
             })
a4ffa2e3
             names(SETS) <- gsub("=.*", "", y[sidx])
5c5cf18e
 
a4ffa2e3
             kk <- sapply(seq_along(sidx), function(k) sidx[k]:eidx[k]) %>% unlist
             y <- y[-kk]
5c5cf18e
         }
0c8aaf33
         
a4ffa2e3
         
         name <- gsub("=.*", "", y)
5c5cf18e
         val <- gsub(".*=", "", y) %>% gsub("^\\{", "", .) %>%
             gsub("\\}$", "", .) 
0c8aaf33
 
a4ffa2e3
 
5c5cf18e
         if (flag) {
a4ffa2e3
             nn <- c(name, names(SETS))
         } else {
             nn <- name
5c5cf18e
         }
         
         res <- character(length(nn))
         names(res) <- nn
a4ffa2e3
 
         for (i in seq_along(name)) {
             res[i] <- val[i]
0c8aaf33
         }
5c5cf18e
         if (flag) {
a4ffa2e3
             j <- i
             for (i in seq_along(SETS)) {
                 res[i+j] <- SETS[i]
5c5cf18e
             }
         }
         
0c8aaf33
         return(res)
     })
 
     nn <- lapply(stats2, names) %>% unlist %>%
         unique %>% sort
 
     ## stats3 is a matrix
     stats3 <- t(sapply(stats2, function(x) {
         for (ii in nn[!nn %in% names(x)]) {
             x[ii] <- NA
         }
         x[nn]
     }))
 
     stats3 <- as.data.frame(stats3)
03beafbf
     if (nrow(stats3) == 1) {
         ## only has one evidence
         ## transpose
         stats3 <- data.frame(X=unlist(stats3[1,]))
         colnames(stats3) <- nn
     }
a4ffa2e3
     colnames(stats3) <- gsub("(\\d+)%", "0.\\1", colnames(stats3))
5c5cf18e
     
03beafbf
     ## stats3$node <- node
     stats3$node <- names(stats)
0c8aaf33
     return(stats3)
 }
 
3e0a61f1
 add_pseudo_nodelabel <- function(phylo, treetext) {
     if(is.null(phylo$node.label)) {
         nnode <- phylo$Nnode
         nlab <- paste("X", 1:nnode, sep="")
         for (i in 1:nnode) {
             treetext <- sub("\\)([:;])", paste0("\\)", nlab[i], "\\1"), treetext)
         }
     }
 
    return(treetext)
 }
 
03beafbf
 
6e4f67da