## ##' read HYPHY output ## ##' ## ##' ## ##' @title read.hyphy ## ##' @param nwk tree file in nwk format, one of hyphy output ## ##' @param ancseq ancestral sequence file in nexus format, ## ##' one of hyphy output ## ##' @param tip.fasfile tip sequence file ## ##' @return A hyphy object ## ## @importFrom Biostrings readBStringSet ## ## @importFrom Biostrings toString ## ##' @export ## ##' @author Guangchuang Yu \url{http://ygc.name} ## ##' @examples ## ##' nwk <- system.file("extdata/HYPHY", "labelledtree.tree", package="ggtree") ## ##' ancseq <- system.file("extdata/HYPHY", "ancseq.nex", package="ggtree") ## ##' read.hyphy(nwk, ancseq) ## read.hyphy <- function(nwk, ancseq, tip.fasfile=NULL) { ## anc <- scan(ancseq, what="", sep="\n", quiet=TRUE) ## end <- grep("END;", anc, ignore.case=TRUE) ## seq.start <- grep("MATRIX", anc, ignore.case=TRUE) ## seq.end <- end[end > seq.start][1] ## seq <- anc[(seq.start+1):(seq.end-1)] ## seq <- seq[seq != ";"] ## seq <- seq[seq != ""] ## seq <- gsub(" ", "", seq) ## seq <- gsub(";", "", seq) ## ## some files may only contains sequences (should have TAXALABELS block that contains seq names). ## ## some may contains sequence name like phylip format in MATRIX block (no need to have TAXALABELS block). ## ## ## ## extract sequence name if available ## if (all(grepl("\\s+", seq))) { ## ## if contains blank space, may contains seq name ## sn <- gsub("(\\w*)\\s.*", "\\1", seq) ## } ## seq <- gsub("\\w*\\s+", "", seq) ## label.start <- grep("TAXLABELS", anc, ignore.case = TRUE) ## if (length(label.start) == 0) { ## if (all(sn == "")) { ## stop("taxa labels is not available...") ## } ## label <- sn ## } else { ## label.end <- end[end > label.start][1] ## label <- anc[(label.start+1):(label.end-1)] ## label <- sub("^\t+", "", label) ## label <- sub("\\s*;$", "", label) ## label <- unlist(strsplit(label, split="\\s+")) ## label <- gsub("'|\"", "", label) ## } ## names(seq) <- label ## tr <- read.tree(nwk) ## nl <- tr$node.label ## ## root node may missing, which was supposed to be 'Node1' ## ## ## ## from a user's file, which is 'Node0', but it seems the file is not from the output of HYPHY. ## ## ## ## I am not sure. But it's safe to use "label[!label %in% nl]" instead of just assign it to "Node1". ## ## ## ## nl[nl == ""] <- "Node1" ## nl[nl == ""] <- label[!label %in% nl] ## tr$node.label <- nl ## type <- get_seqtype(seq) ## fields <- "subs" ## if (type == "NT") { ## fields <- c(fields, "AA_subs") ## } ## res <- new("hyphy", ## fields = fields, ## treetext = scan(nwk, what='', quiet=TRUE), ## phylo = tr, ## seq_type = type, ## ancseq = seq, ## tree.file = filename(nwk), ## ancseq.file = ancseq ## ) ## if ( !is.null(tip.fasfile) ) { ## readBStringSet <- get_fun_from_pkg("Biostrings", "readBStringSet") ## toString <- get_fun_from_pkg("Biostrings", "toString") ## tip_seq <- readBStringSet(tip.fasfile) ## nn <- names(tip_seq) ## tip_seq <- sapply(seq_along(tip_seq), function(i) { ## toString(tip_seq[i]) ## }) ## names(tip_seq) <- nn ## res@tip_seq <- tip_seq ## res@tip.fasfile <- tip.fasfile ## } ## set.hyphy_(res) ## } ## ##' @rdname groupOTU-methods ## ##' @exportMethod groupOTU ## setMethod("groupOTU", signature(object="hyphy"), ## function(object, focus, group_name="group") { ## groupOTU_(object, focus, group_name) ## } ## ) ## ##' @rdname groupClade-methods ## ##' @exportMethod groupClade ## setMethod("groupClade", signature(object="hyphy"), ## function(object, node, group_name="group") { ## groupClade_(object, node, group_name) ## } ## ) ## ##' @rdname scale_color-methods ## ##' @exportMethod scale_color ## setMethod("scale_color", signature(object="hyphy"), ## function(object, by, ...) { ## scale_color_(object, by, ...) ## }) ## ##' @rdname gzoom-methods ## ##' @exportMethod gzoom ## setMethod("gzoom", signature(object="hyphy"), ## function(object, focus, subtree=FALSE, widths=c(.3, .7)) { ## gzoom.phylo(get.tree(object), focus, subtree, widths) ## }) ## ##' @rdname show-methods ## ##' @exportMethod show ## setMethod("show", signature(object = "hyphy"), ## function(object) { ## cat("'hyphy' S4 object that stored information of \n\t", ## paste0("'", object@tree.file, "'")) ## if (length(object@tip_seq) == 0) { ## cat(paste0("and '", object@ancseq.file, "'"), ".\n") ## } else { ## cat(paste0(", \n\t'", object@ancseq.file, "'"), ## paste0("and \n\t'", object@tip.fasfile, "'."), ## "\n\n") ## } ## cat("...@ tree:") ## print.phylo(get.tree(object)) ## cat("\nwith the following features available:\n") ## cat("\t", paste0("'", ## paste(get.fields(object), collapse="',\t'"), ## "'."), ## "\n") ## }) ## ##' @rdname get.tree-methods ## ##' @exportMethod get.tree ## ##' @examples ## ##' nwk <- system.file("extdata/HYPHY", "labelledtree.tree", package="ggtree") ## ##' ancseq <- system.file("extdata/HYPHY", "ancseq.nex", package="ggtree") ## ##' hy <- read.hyphy(nwk, ancseq) ## ##' get.tree(hy) ## setMethod("get.tree", signature(object = "hyphy"), ## function(object) { ## object@phylo ## } ## ) ## ##' @rdname get.fields-methods ## ##' @exportMethod get.fields ## setMethod("get.fields", signature(object = "hyphy"), ## function(object, ...) { ## if(length(object@tip_seq) == 0) { ## warning("tip sequence not available...\n") ## } else { ## get.fields.tree(object) ## } ## }) ## ##' @rdname get.subs-methods ## ##' @exportMethod get.subs ## ##' @examples ## ##' nwk <- system.file("extdata/HYPHY", "labelledtree.tree", package="ggtree") ## ##' ancseq <- system.file("extdata/HYPHY", "ancseq.nex", package="ggtree") ## ##' tipfas <- system.file("extdata", "pa.fas", package="ggtree") ## ##' hy <- read.hyphy(nwk, ancseq, tipfas) ## ##' get.subs(hy, type="AA_subs") ## setMethod("get.subs", signature(object="hyphy"), ## function(object, type, ...) { ## if (length(object@tip_seq) == 0) { ## stop("tip sequence not available...\n") ## } ## if (type == "subs") { ## return(object@subs) ## } else { ## return(object@AA_subs) ## } ## }) ## set.hyphy_ <- function(object, ...) { ## if (!is(object, "hyphy")) { ## stop("object should be an instance of 'hyphy'") ## } ## if (length(object@tip_seq) == 0) { ## return(object) ## } ## types <- get.fields(object) ## seqs <- c(object@tip_seq, object@ancseq) ## for (type in types) { ## if (type == "subs") { ## translate <- FALSE ## } else { ## translate <- TRUE ## } ## subs <- get.subs_(object@phylo, seqs, translate, ...) ## if (type == "subs") { ## object@subs <- subs ## } else { ## object@AA_subs <- subs ## } ## } ## return(object) ## }