R/paml.R
d8a0e350
 ## ##' site mask
 ## ##'
 ## ##'
 ## ##' @title mask
 ## ##' @param tree_object tree object
 ## ##' @param field selected field
 ## ##' @param site site
 ## ##' @param mask_site if TRUE, site will be masked.
 ## ##'                  if FALSE, selected site will not be masked, while other sites will be masked.
 ## ##' @return updated tree object
 ## ##' @export
 ## ##' @author Guangchuang Yu
 ## mask <- function(tree_object, field, site, mask_site=FALSE) {
 ##     has_field <- has.field(tree_object, field)
 ##     if (has_field == FALSE) {
 ##         stop("'field' is not available in 'tree_object'...")
 ##     }
 
 ##     has_slot <- attr(has_field, "has_slot")
 ##     is_codeml <- attr(has_field, "is_codeml")
 
 ##     if (has_slot) {
 ##         if (is_codeml) {
 ##             field_info <- slot(tree_object@rst, field)
 ##         } else {
 ##             field_info <- slot(tree_object, field)
 ##         }
 ##         field_data <- field_info[,2]
 ##     } else {
 ##         field_data <- tree_object@extraInfo[, field]
 ##     }
 
 ##     field_data <- sapply(field_data, gsub, pattern="\n", replacement="/")
 
 ##     x <- field_data[field_data != ""]
 ##     x <- x[!is.na(x)]
 ##     pos <- strsplit(x, " / ") %>% unlist %>%
 ##         gsub("^[a-zA-Z]+", "", . ) %>%
 ##         gsub("[a-zA-Z]\\s*$", "", .) %>%
 ##         as.numeric
 
 ##     if (mask_site == FALSE) {
 ##         pos2 <- 1:max(pos)
 ##         pos2 <- pos2[-site]
 ##         site <- pos2
 ##     }
 
 ##     site <- site[site %in% pos]
 
 ##     for (i in seq_along(field_data)) {
 ##         if (is.na(field_data[i]))
 ##             next
 ##         for (j in seq_along(site)) {
 ##             pattern <- paste0("/*\\s*[a-zA-Z]", site[j], "[a-zA-Z]\\s*")
 ##             field_data[i] <- gsub(pattern, "",  field_data[i])
 ##         }
 ##         field_data[i] <- gsub("^/\\s", "", field_data[i]) %>% .add_new_line
 ##     }
 
 ##     if (has_slot) {
 ##         field_info[,2] <- field_data
 ##         if (is_codeml) {
 ##             slot(tree_object@rst, field) <- field_info
 ##         } else {
 ##             slot(tree_object, field) <- field_info
 ##         }
 ##     } else {
 ##         tree_object@extraInfo[, field] <- field_data
 ##     }
 ##     tree_object
 ## }
 
 
 ## read.tip_seq_mlc <- function(mlcfile) {
 ##     info <- getPhyInfo(mlcfile)
 ##     mlc <- readLines(mlcfile)
 ##     ## remove blank lines
 ##     blk <- grep("^\\s*$", mlc)
 ##     if (length(blk) > 0) {
 ##         mlc <- mlc[-blk]
 ##     }
 
 ##     seqs <- mlc[2:(info$num+1)]
 ##     seqs <- gsub("\\s+", "", seqs)
 ##     wd <- info$width
 ##     res <- sapply(seqs, function(x) substring(x, (base::nchar(x) - wd + 1), base::nchar(x)))
 ##     nn <- sapply(seqs, function(x) substring(x, 1, (base::nchar(x) - wd)))
 ##     names(res) <- nn
 ##     return(res)
 ## }
 
 ## read.tip_seq_mlb <- read.tip_seq_mlc
 
 ## read.dnds_mlc <- function(mlcfile) {
 ##     mlc <- readLines(mlcfile)
 ##     i <- grep("dN & dS for each branch", mlc)
 ##     j <- grep("tree length for dN", mlc)
 
 ##     mlc <- mlc[i:j]
 ##     hi <- grep("dN/dS", mlc)
 ##     cn <- strsplit(mlc[hi], " ") %>% unlist %>% `[`(nzchar(.))
 
 ##     ii <- grep("\\d+\\.\\.\\d+", mlc)
 ##     info <- mlc[ii]
 ##     info %<>% sub("^\\s+", "", .)
 ##     info %<>% sub("\\s+$", "", .)
 ##     res <- t(sapply(info, function(x) {
 ##         y <- unlist(strsplit(x, "\\s+"))
 ##         edge <- unlist(strsplit(y[1], "\\.\\."))
 ##         yy <- c(edge, y[-1])
 ##         as.numeric(yy)
 ##     }))
 
 ##     row.names(res) <- NULL
 ##     colnames(res) <- c("parent", "node", cn[-1])
 ##     colnames(res) <- gsub("\\*", "_x_", colnames(res))
 ##     colnames(res) <- gsub("\\/", "_vs_", colnames(res))
 ##     return(res)
 ## }
 
 ## read.treetext_paml_mlc <- function(mlcfile) {
 ##     read.treetext_paml(mlcfile, "mlc")
 ## }
 
 ## read.treetext_paml_rst<- function(rstfile) {
 ##     read.treetext_paml(rstfile, "rst")
 ## }
 
 ## read.treetext_paml <- function(file, by) {
 ##     ## works fine with baseml and codeml
 ##     x <- readLines(file)
 ##     tr.idx <- get_tree_index_paml(x)
 
 ##     if ( by == "rst" ) {
 ##         ii <- 1
 ##     } else if (by == "mlc") {
 ##         ii <- 3
 ##     } else {
 ##         stop("_by_ should be one of 'rst' or 'mlc'")
 ##     }
 
 ##     return(x[tr.idx][ii])
 ## }
 
 ## read.phylo_paml_mlc <- function(mlcfile) {
 ##     parent <- node <- label <- NULL
 
 ##     mlc <- readLines(mlcfile)
 ##     edge <- get_tree_edge_paml(mlc)
 
 ##     tr.idx <- get_tree_index_paml(mlc)
 ##     tr2 <- read.tree(text=mlc[tr.idx[2]])
 ##     tr3 <- read.tree(text=mlc[tr.idx[3]])
 
 ##     treeinfo <- as.data.frame(edge)
 ##     colnames(treeinfo) <- c("parent", "node")
 ##     treeinfo$length <- NA
 ##     treeinfo$label <- NA
 ##     treeinfo$isTip <- FALSE
 ##     ntip <- Ntip(tr3)
 ##     ii <- match(tr2$tip.label, treeinfo[, "node"])
 ##     treeinfo[ii, "label"] <- tr3$tip.label
 ##     treeinfo[ii, "isTip"] <- TRUE
 ##     ## jj <- match(1:ntip, tr3$edge[,2])
 ##     treeinfo[ii, "length"] <- tr2$edge.length[ii] ##tr3$edge.length[jj]
 
 ##     root <- getRoot(tr3) ## always == (Ntip(tr3) + 1)
 ##     currentNode <- treeinfo$label[ii]
 ##     treeinfo.tr3 <- fortify(tr3)
 
 ##     treeinfo$visited <- FALSE
 ##     while(any(treeinfo$visited == FALSE)) {
 ##         pNode <- c()
 ##         for( kk in currentNode ) {
 ##             i <- which(treeinfo$label == kk)
 ##             treeinfo[i, "visited"] <- TRUE
 ##             j <- which(treeinfo.tr3$label == kk)
 ##             ip <- treeinfo[i, "parent"]
 ##             if (ip != root) {
 ##                 ii <- which(treeinfo[, "node"] == ip)
 ##                 if (treeinfo$visited[ii] == FALSE) {
 ##                     jp <- treeinfo.tr3[j, "parent"]
 ##                     jj <- which(treeinfo.tr3[, "node"] == jp)
 ##                     treeinfo[ii, "label"] <- as.character(ip)
 ##                     treeinfo.tr3[jj, "label"] <- as.character(ip)
 ##                     treeinfo[ii, "length"] <- treeinfo.tr3[jj, "branch.length"]
 ##                     pNode <- c(pNode, ip)
 ##                 }
 ##                 treeinfo[ii, "visited"] <- TRUE
 ##             }
 
 ##         }
 ##         currentNode <- unique(pNode)
 ##     }
 
 ##     phylo <- with(treeinfo,
 ##                   list(edge= cbind(as.numeric(parent),
 ##                            as.numeric(node)),
 ##                        edge.length = length,
 ##                        tip.label = label[order(node)][1:ntip],
 ##                        Nnode = tr3$Nnode,
 ##                        node.label = c(root, label[order(node)][-c(1:ntip)])
 ##                        )
 ##                   )
 ##     class(phylo) <- "phylo"
 ##     phylo <- reorder.phylo(phylo, "cladewise")
 ##     return(phylo)
 ## }
 
 ## ##' @importFrom ape reorder.phylo
 ## read.phylo_paml_rst <- function(rstfile) {
 ##     parent <- node <- label <- NULL
 
 ##     ## works fine with baseml and codeml
 ##     rst <- readLines(rstfile)
 ##     tr.idx <- get_tree_index_paml(rst)
 
 ##     tr1 <- read.tree(text=rst[tr.idx][1])
 ##     tr3 <- read.tree(text=rst[tr.idx][3])
 
 ##     edge <- get_tree_edge_paml(rst)
 
 ##     label=c(tr3$tip.label, tr3$node.label)
 ##     root <- getRoot(tr3)
 ##     label %<>% `[`(. != root)
 
 ##     node.length <- data.frame(label=label,
 ##                               length=tr1$edge.length)
 
 ##     ## node.length$node <- sub("_\\w+", "", node.length$label
 ##     node.length$node <- gsub("^(\\d+)_.*", "\\1", node.length$label)
 ##     node.length$label %<>% sub("\\d+_", "", .)
 
 ##     edge <- as.data.frame(edge)
 ##     colnames(edge) <- c("parent", "node")
 
 ##     treeinfo <- merge(edge, node.length, by.x="node", by.y="node")
 ##     edge2 <- treeinfo[, c("parent", "node")]
 ##     edge2 %<>% as.matrix
 
 ##     ntip <- Ntip(tr3)
 
 ##     phylo <- with(treeinfo,
 ##                   list(edge= cbind(as.numeric(parent),
 ##                       as.numeric(node)),
 ##                        edge.length = length,
 ##                        tip.label = label[order(node)][1:ntip],
 ##                        Nnode = tr3$Nnode,
 ##                        node.label = c(root, label[order(node)][-c(1:ntip)])
 ##                        )
 ##                   )
 
 ##     class(phylo) <- "phylo"
 ##     phylo <- reorder.phylo(phylo, "cladewise")
 
 ##     return(phylo)
 ## }
 
 ## read.ancseq_paml_rst <- function(rstfile, by="Marginal") {
 ##     ## works fine with baseml and codeml
 ##     rst <- readLines(rstfile)
 
 ##     by <- match.arg(by, c("Marginal", "Joint"))
 ##     query <- paste(by, "reconstruction of ancestral sequences")
 ##     idx <- grep(query, rst)
 ##     if(length(idx) == 0) {
 ##         ## in some paml setting, joint_ancseq are not available.
 ##         return("")
 ##     }
 ##     si <- grep("reconstructed sequences", rst)
 ##     idx <- si[which.min(abs(si-idx))]
 
 ##     nl <- strsplit(rst[idx+2], split=" ") %>% unlist %<>% `[`(nzchar(.))
 ##     N <- as.numeric(nl[1])
 ##     seq.leng <- as.numeric(nl[2])
 
 ##     seqs <- rst[(idx+4):(idx+3+N)]
 
 ##     seq.name <- character(N)
 ##     res <- character(N)
 ##     for (i in 1:N) {
 ##         ss <- gsub(" ", "", seqs[i])
 ##         nn <- base::nchar(ss)
 ##         res[i] <- substring(ss, nn-seq.leng+1,nn)
 ##         seq.name[i] <- substring(ss, 1, nn-seq.leng)
 ##     }
 ##     seq.name <- sub("\\w+#", "", seq.name)
 ##     names(res) <- seq.name
 
 ##     return(res)
 ## }
 
 
 ## get_tree_index_paml <- function(paml) {
 ##     grep("\\)[ \\.0-9]*;", paml)
 ## }
 
 ## get_tree_edge_index_paml <- function(paml) {
 ##     grep("\\d+\\.\\.\\d+", paml)
 ## }
 
 ## get_tree_edge_paml <- function(paml) {
 ##     tr.idx <- get_tree_index_paml(paml)
 
 ##     edge.idx <- get_tree_edge_index_paml(paml)
 ##     edge.idx <- edge.idx[edge.idx < tr.idx[3]]
 
 ##     nodeNum <- strsplit(paml[edge.idx], split="\\.\\.") %>%
 ##         unlist %>% strsplit(split="[[:space:]]") %>% unlist
 
 ##     nodeNum %<>% `[`(nzchar(.))
 
 ##     edge <- matrix(as.numeric(nodeNum), ncol=2, byrow = TRUE)
 
 ##     return(edge)
 ## }
 
 ## set.paml_rst_ <- function(object) {
 ##     if (!is(object, "paml_rst")) {
 ##         stop("object should be an instance of 'paml_rst'")
 ##     }
 ##     if (length(object@tip_seq) == 0) {
 ##         return(object)
 ##     }
 
 ##     types <- get.fields(object)
 ##     for (type in types) {
 ##         value <- subs_paml_rst(object, type)
 ##         if (all(is.na(value)))
 ##             next
 
 ##         if (type == "marginal_subs") {
 ##             object@marginal_subs <- value
 ##         } else if (type == "marginal_AA_subs") {
 ##             object@marginal_AA_subs <- value
 ##         } else if (type == "joint_subs") {
 ##             object@joint_subs <- value
 ##         } else if (type == "joint_AA_subs") {
 ##             object@joint_AA_subs <- value
 ##         }
 ##     }
 ##     return(object)
 ## }
 
 
 ## get.subs_paml_rst <- function(object, type) {
 ##     if (!is(object, "paml_rst")) {
 ##         stop("object should be an instance of 'paml_rst'")
 ##     }
 ##     if (type == "marginal_subs") {
 ##         res <- object@marginal_subs
 ##     } else if (type == "marginal_AA_subs") {
 ##         res <- object@marginal_AA_subs
 ##     } else if (type == "joint_subs") {
 ##         res <- object@joint_subs
 ##     } else if (type == "joint_AA_subs") {
 ##         res <- object@joint_AA_subs
 ##     } else {
 ##         stop("type should be one of 'marginal_subs',
 ##                              'marginal_AA_subs', 'joint_subs' or 'joint_AA_subs'. ")
 ##     }
 ##     return(res)
 ## }
 
 ## subs_paml_rst <- function(x, type, ...) {
 ##     if (class(x) != "paml_rst") {
 ##         stop("x should be an object of paml_rst...")
 ##     }
 ##     seqs <- x@tip_seq
 ##     if (length(seqs) == 0) {
 ##         stop("tip sequences is not available...")
 ##     }
 ##     if (type %in% c("marginal_subs", "marginal_AA_subs")) {
 ##         ancseq <- x@marginal_ancseq
 ##         ## seqs <- c(seqs, x@marginal_ancseq)
 ##     } else if (type %in% c("joint_subs", "joint_AA_subs")){
 ##         ancseq <- x@joint_ancseq
 ##         ## seqs <- c(seqs, x@joint_ancseq)
 ##     } else {
 ##         stop("type should be one of 'marginal_subs',
 ##                              'marginal_AA_subs', 'joint_subs' or 'joint_AA_subs'. ")
 ##     }
 ##     if( type %in% c("marginal_subs", "joint_subs")) {
 ##         translate <- FALSE
 ##     } else {
 ##         translate <- TRUE
 ##     }
 
 ##     if (all(ancseq == "")) {
 ##         return(NA)
 ##     }
 ##     seqs <- c(seqs, ancseq)
 ##     get.subs_(x@phylo, seqs, translate=translate, ...)
 ## }