/* -*- mode: c; tab-width: 4; c-basic-offset: 4;  indent-tabs-mode: nil -*- */

/* This a mix of tree functions and data-structures from
 * Bob Edgar's Muscle (version 3.7) ported to pure C, topped up with
 * some of our own stuff.
 *
 * Used files: phy.cpp, tree.h, phytofile.cpp and phyfromclust.cpp
 *
 * Muscle's code is public domain and so is this code here.

 * From http://www.drive5.com/muscle/license.htm:
 * """
 * MUSCLE is public domain software
 *
 * The MUSCLE software, including object and source code and
 * documentation, is hereby donated to the public domain.
 *
 * Disclaimer of warranty
 * THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
 * EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * """
 *
 */

/*
 *  RCS $Id: muscle_tree.c 230 2011-04-09 15:37:50Z andreas $
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <assert.h>
#include <ctype.h>

#include "util.h"
#include "log.h"
#include "muscle_tree.h"


static const double VERY_NEGATIVE_DOUBLE = -9e29;
/*const double dInsane = VERY_NEGATIVE_DOUBLE;*/
static const double dInsane = -9e29;
static const unsigned uInsane = 8888888;

typedef enum 
{
    NTT_Unknown,

    /* Returned from Tree::GetToken: */
    NTT_Lparen,
    NTT_Rparen,
    NTT_Colon,
    NTT_Comma,
    NTT_Semicolon,
    NTT_String,
    
    /* Following are never returned from Tree::GetToken: */
    NTT_SingleQuotedString,
    NTT_DoubleQuotedString,
    NTT_Comment
} NEWICK_TOKEN_TYPE;


static void
InitCache(uint uCacheCount, tree_t *tree);
static void
TreeZero(tree_t *tree);
static uint
GetNeighborCount(unsigned uNodeIndex, tree_t *tree);
static bool
IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree);
static bool
HasEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree);
static void
TreeToFileNodeRooted(tree_t *tree, uint m_uRootNodeIndex, FILE *fp);
static void
ValidateNode(uint uNodeIndex, tree_t *tree);
static void
AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree);
static void
ExpandCache(tree_t *tree);
static void
TreeCreateRooted(tree_t *tree);
static bool
GetGroupFromFile(FILE *fp, uint uNodeIndex, double *ptrdEdgeLength, tree_t *tree);
static NEWICK_TOKEN_TYPE
GetToken(FILE *fp, char szToken[], uint uBytes);
/* stuff from textfile.cpp */
static void
FileSkipWhite(FILE *fp);
static bool
FileSkipWhiteX(FILE *fp);
static void
SetLeafName(uint uNodeIndex, const char *ptrName, tree_t *tree);
uint
AppendBranch(tree_t *tree, uint uExistingLeafIndex);
static void
SetEdgeLength(uint uNodeIndex1, uint uNodeIndex2,
              double dLength, tree_t *tree);
static uint
UnrootFromFile(tree_t *tree);
uint
GetNeighbor(uint uNodeIndex, uint uNeighborSubscript, tree_t *tree);
static void
InitNode(tree_t *prTree, uint uNodeIndex);


/**
 * @param[in] uNodeIndex
 * node to examine
 * @param[in] tree
 * tree to examine
 * @return id of left node
 * @note called GetRight in Muscle3.7
 */
uint
GetLeft(uint uNodeIndex, tree_t *tree) 
{
    assert(NULL != tree);
    assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
    return tree->m_uNeighbor2[uNodeIndex];
}
/***   end: GetLeft   ***/



/**
 * @param[in] uNodeIndex
 * node to examine
 * @param[in] tree
 * tree to examine
 * @return id of right node
 * @note called GetRight in Muscle3.7
 */
uint
GetRight(uint uNodeIndex, tree_t *tree)
{
    assert(NULL != tree);
    assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
    return tree->m_uNeighbor3[uNodeIndex];
}
/***   end: GetRight   ***/



/**
 * @param[in] uNodeIndex
 * node to examine
 * @param[in] tree
 * tree to examine
 * @return leaf id of current node
 */
uint
GetLeafId(uint uNodeIndex, tree_t *tree)
{
    assert(NULL != tree);
    assert(uNodeIndex < tree->m_uNodeCount);
    assert(IsLeaf(uNodeIndex, tree));
    
    return tree->m_Ids[uNodeIndex];
}
/***   end: GetLeafId   ***/



/**
 * @note originally called GetLeafName
 *
 */
char *
GetLeafName(unsigned uNodeIndex, tree_t *tree)
{
    assert(NULL != tree);
    assert(uNodeIndex < tree->m_uNodeCount);
    assert(IsLeaf(uNodeIndex, tree));
    return tree->m_ptrName[uNodeIndex];
}
/***   end: GetLeafName   ***/



/**
 * @brief returns first leaf node for a depth-first traversal of tree
 *
 * @param[in] tree
 * tree to traverse
 *
 * @return node index of first leaf node for depth-first traversal
 *
 * @note called FirstDepthFirstNode in Muscle3.7
 *
 */
uint
FirstDepthFirstNode(tree_t *tree)
{
    uint uNodeIndex;
        
    assert(NULL != tree);
    assert(IsRooted(tree));    
    
    /* Descend via left branches until we hit a leaf */
    uNodeIndex =  tree->m_uRootNodeIndex;
    while (!IsLeaf(uNodeIndex, tree))
        uNodeIndex = GetLeft(uNodeIndex, tree);
    return uNodeIndex;
}
/***   end: FirstDepthFirstNode   ***/


/**
 * @brief returns next leaf node index for depth-first traversal of
 * tree
 *
 * @param[in] tree
 * tree to traverse
 * @param[in] uNodeIndex
 * Current node index
 *
 * @return node index of next leaf node during depth-first traversal
 *
 * @note called NextDepthFirstNode in Muscle3.7
 */
uint
NextDepthFirstNode(uint uNodeIndex, tree_t *tree)
{
    uint uParent;
    
    assert(NULL != tree);
    assert(IsRooted(tree));
    assert(uNodeIndex < tree->m_uNodeCount);
    
    if (IsRoot(uNodeIndex, tree))
    {
        /* Node uNodeIndex is root, end of traversal */
        return NULL_NEIGHBOR;
    }
    
    uParent = GetParent(uNodeIndex, tree);
    if (GetRight(uParent, tree) == uNodeIndex) {
        /* Is right branch, return parent=uParent */
        return uParent;
    }
    
    uNodeIndex = GetRight(uParent, tree);
    /* Descend left from right sibling uNodeIndex */
    while (!IsLeaf(uNodeIndex, tree)) {
        uNodeIndex = GetLeft(uNodeIndex, tree);
    }
    
    /*  bottom out at leaf uNodeIndex */
    return uNodeIndex;
}
/***   end: NextDepthFirstNode   ***/



/**
 * @brief check if tree is a rooted tree
 * @param[in] tree
 * tree to check
 * @return TRUE if given tree is rooted, FALSE otherwise
 *
 */
bool
IsRooted(tree_t *tree) 
{
    assert(NULL != tree);
    return tree->m_bRooted;
}
/***   end: IsRooted   ***/


/**
 *
 */
void
FreeMuscleTree(tree_t *tree)
{
    uint i;

    assert(tree!=NULL);
    

    /* FIXME use GetLeafNodeIndex? or
       for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
       {
       if (tree.IsLeaf(uNodeIndex))
       {
       const char *ptrName =
       tree.GetLeafName(uNodeIndex);
    */
    /* IsLeaf needs m_uNodeCount and all m_uNeighbor's
     * so free first
     */
    for (i=0; i<tree->m_uNodeCount; i++) {
        /* IsLeaf needs neighbour count, so free those guys later */
        if (IsLeaf(i, tree)) {
            CKFREE(tree->m_ptrName[i]);
        }
    }
    CKFREE(tree->m_ptrName);

    CKFREE(tree->m_uNeighbor1);
    CKFREE(tree->m_uNeighbor2);
    CKFREE(tree->m_uNeighbor3);
    
    CKFREE(tree->m_Ids);
    
    CKFREE(tree->m_dEdgeLength1);
    CKFREE(tree->m_dEdgeLength2);
    CKFREE(tree->m_dEdgeLength3);
#if USE_HEIGHT
    CKFREE(tree->m_dHeight);
    CKFREE(tree->m_bHasHeight);
#endif 
    CKFREE(tree->m_bHasEdgeLength1);
    CKFREE(tree->m_bHasEdgeLength2);
    CKFREE(tree->m_bHasEdgeLength3);
    
    TreeZero(tree);

    free(tree);
}
/***   end: FreeMuscleTree   ***/



/**
 * @brief create a muscle tree
 *
 * @note Original comment in Muscle code: "Create rooted tree from a
 * vector description. Node indexes are 0..N-1 for leaves, N..2N-2 for
 * internal nodes. Vector subscripts are i-N and have values for
 * internal nodes only, but those values are node indexes 0..2N-2. So
 * e.g. if N=6 and Left[2]=1, this means that the third internal node
 * (node index 8) has the second leaf (node index 1) as its left
 * child. uRoot gives the vector subscript of the root, so add N to
 * get the node index."
 *
 * @param[out] tree
 * newly created tree
 * @param[in] uLeafCount
 * number of leaf nodes
 * @param[in] uRoot
 * Internal node index of root node
 * @param[in] Left
 * Node index of left sibling of an internal node.
 * Index range: 0--uLeafCount-1
 * @param[in] Right
 * Node index of right sibling of an internal node.
 * Index range: 0--uLeafCount-1
 * @param[in] LeftLength
 * Branch length of left branch of an internal node.
 * Index range: 0--uLeafCount-1
 * @param[in] RightLength
 * Branch length of right branch of an internal node.
 * Index range: 0--uLeafCount-1
 * @param[in] LeafIds
 * Leaf id. Index range: 0--uLeafCount
 * @param[in] LeafNames
 * Leaf label. Index range: 0--uLeafCount
 *
 */
void
MuscleTreeCreate(tree_t *tree,
                  uint uLeafCount, uint uRoot,
                  const uint *Left, const uint *Right,
                  const float *LeftLength, const float* RightLength,
                  const uint *LeafIds, char **LeafNames)
{
    uint uNodeIndex;

	TreeZero(tree);
    tree->m_uNodeCount = 2*uLeafCount - 1;
	InitCache(tree->m_uNodeCount, tree);
    
	for (uNodeIndex = 0; uNodeIndex < uLeafCount; ++uNodeIndex) {
		tree->m_Ids[uNodeIndex] = LeafIds[uNodeIndex];
		tree->m_ptrName[uNodeIndex] = CkStrdup(LeafNames[uNodeIndex]);
    }

	for (uNodeIndex = uLeafCount; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
		uint v = uNodeIndex - uLeafCount;
		uint uLeft = Left[v];
		uint uRight = Right[v];
		float fLeft = LeftLength[v];
		float fRight = RightLength[v];
        
		tree->m_uNeighbor2[uNodeIndex] = uLeft;
		tree->m_uNeighbor3[uNodeIndex] = uRight;
        
		tree->m_bHasEdgeLength2[uNodeIndex] = TRUE;
		tree->m_bHasEdgeLength3[uNodeIndex] = TRUE;

		tree->m_dEdgeLength2[uNodeIndex] = fLeft;
		tree->m_dEdgeLength3[uNodeIndex] = fRight;

		tree->m_uNeighbor1[uLeft] = uNodeIndex;
		tree->m_uNeighbor1[uRight] = uNodeIndex;

		tree->m_dEdgeLength1[uLeft] = fLeft;
		tree->m_dEdgeLength1[uRight] = fRight;

		tree->m_bHasEdgeLength1[uLeft] = TRUE;
		tree->m_bHasEdgeLength1[uRight] = TRUE;
    }

	tree->m_bRooted = TRUE;
	tree->m_uRootNodeIndex = uRoot + uLeafCount;
#ifndef NDEBUG
	TreeValidate(tree);
#endif
}
/***   end: MuscleTreeCreate   ***/



/**
 * @param[in] tree
 * tree to write
 * @param[out] fp
 * filepointer to write to2
 * 
 * @brief write a muscle tree to a file in newick format (distances
 * and all names)
 *
 * 
 */
void
MuscleTreeToFile(FILE *fp, tree_t *tree)
{
    assert(NULL != tree);
    if (IsRooted(tree)) {
        TreeToFileNodeRooted(tree, tree->m_uRootNodeIndex, fp);
        fprintf(fp, ";\n");
        return;
    } else {
        Log(&rLog, LOG_FATAL, "FIXME: output of unrooted muscle trees not implemented");
    }
}
/***   end: MuscleTreeToFile   ***/



/**
 * @brief check if given node is a leaf node
 *
 * @param[in] uNodeIndex
 * the node index
 * @param tree
 * the tree
 *
 * @return TRUE if given node is a leaf, FALSE otherwise
 *
 * @note called IsLeaf in Muscle3.7. See tree.h in original code
 */
bool
IsLeaf(uint uNodeIndex, tree_t *tree)
{
    assert(NULL != tree);
    assert(uNodeIndex < tree->m_uNodeCount);
    if (1 == tree->m_uNodeCount)
        return TRUE;
    return 1 == GetNeighborCount(uNodeIndex, tree);
}
/***   end: IsLeaf   ***/



/**
 */
bool
IsRoot(uint uNodeIndex, tree_t *tree)
{
    assert(NULL != tree);
    return IsRooted(tree) && tree->m_uRootNodeIndex == uNodeIndex;
}
/***   end: IsRoot   ***/



/**
 */
uint
GetNeighborCount(uint uNodeIndex, tree_t *tree)
{
    uint n1, n2, n3;
    assert(uNodeIndex < tree->m_uNodeCount);
    assert(NULL != tree);
    assert(NULL != tree->m_uNeighbor1);
    assert(NULL != tree->m_uNeighbor2);
    assert(NULL != tree->m_uNeighbor3);
    n1 = tree->m_uNeighbor1[uNodeIndex];
    n2 = tree->m_uNeighbor2[uNodeIndex];
    n3 = tree->m_uNeighbor3[uNodeIndex];
    return (NULL_NEIGHBOR != n1) + (NULL_NEIGHBOR != n2) + (NULL_NEIGHBOR != n3);
}
/***   end: GetNeighborCount   ***/


/**
 */
uint
GetParent(unsigned uNodeIndex, tree_t *tree)
{
    assert(NULL != tree);
    assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
    return tree->m_uNeighbor1[uNodeIndex];
}
/***   end: GetParent   ***/



/**
 */
bool
IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree)
{
    assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
    assert(NULL != tree);
    
    return tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2 ||
        tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2 ||
        tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2;
}
/***   end: IsEdge   ***/



/**
 */
bool
HasEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree)
{
    assert(NULL != tree);
    assert(uNodeIndex1 < tree->m_uNodeCount);
    assert(uNodeIndex2 < tree->m_uNodeCount);
    assert(IsEdge(uNodeIndex1, uNodeIndex2, tree));
    
    if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
        return tree->m_bHasEdgeLength1[uNodeIndex1];
    else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
        return tree->m_bHasEdgeLength2[uNodeIndex1];
    assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
    return tree->m_bHasEdgeLength3[uNodeIndex1];
}
/***   end:   ***/


/**
 */
double
GetEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree)
{
    assert(NULL != tree);
    assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
    if (!HasEdgeLength(uNodeIndex1, uNodeIndex2, tree))
    {
        Log(&rLog, LOG_FATAL, "Missing edge length in tree %u-%u", uNodeIndex1, uNodeIndex2);
    }
    
    if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
        return tree->m_dEdgeLength1[uNodeIndex1];
    else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
        return tree->m_dEdgeLength2[uNodeIndex1];
    assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
    return tree->m_dEdgeLength3[uNodeIndex1];
}
/***   end: GetEdgeLength   ***/


/**
 *
 */
void
InitNode(tree_t *prTree, uint uNodeIndex)
{
    prTree->m_uNeighbor1[uNodeIndex] = NULL_NEIGHBOR;
    prTree->m_uNeighbor2[uNodeIndex] = NULL_NEIGHBOR;
    prTree->m_uNeighbor3[uNodeIndex] = NULL_NEIGHBOR;
    prTree->m_bHasEdgeLength1[uNodeIndex] = FALSE;
    prTree->m_bHasEdgeLength2[uNodeIndex] = FALSE;
    prTree->m_bHasEdgeLength3[uNodeIndex] = FALSE;
#if USE_HEIGHT
    prTree->m_bHasHeight[uNodeIndex] = FALSE;
    prTree->m_dHeight[uNodeIndex] = dInsane;
#endif
    prTree->m_dEdgeLength1[uNodeIndex] = dInsane;
    prTree->m_dEdgeLength2[uNodeIndex] = dInsane;
    prTree->m_dEdgeLength3[uNodeIndex] = dInsane;
    
    prTree->m_ptrName[uNodeIndex] = NULL;
    prTree->m_Ids[uNodeIndex] = uInsane;
}
/***   end: InitNode   ***/


/**
 *
 */
void
InitCache(uint uCacheCount, tree_t *tree)
{
    uint uNodeIndex;
    
    tree->m_uCacheCount = uCacheCount;
    
    tree->m_uNeighbor1 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
    tree->m_uNeighbor2 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
    tree->m_uNeighbor3 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
    
    tree->m_Ids = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
    
    tree->m_dEdgeLength1 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
    tree->m_dEdgeLength2 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
    tree->m_dEdgeLength3 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
#if USE_HEIGHT
    tree->m_dHeight = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
    tree->m_bHasHeight = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
#endif    
    tree->m_bHasEdgeLength1 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
    tree->m_bHasEdgeLength2 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
    tree->m_bHasEdgeLength3 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);

    tree->m_ptrName = (char **) CKMALLOC(sizeof(char *) * tree->m_uCacheCount);
    
    for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
        InitNode(tree, uNodeIndex);
    }
}
/***   end: InitCache   ***/




/**
 *
 * @note Replacing Tree::Clear but no freeing of memory! Just setting
 * everything to 0/NULL
 */
void
TreeZero(tree_t *tree)
{
    assert(NULL != tree);
    tree->m_uNodeCount = 0;
    tree->m_uCacheCount = 0;

    tree->m_uNeighbor1 = NULL;
    tree->m_uNeighbor2 = NULL;
    tree->m_uNeighbor3 = NULL;
    
    tree->m_dEdgeLength1 = NULL;
    tree->m_dEdgeLength2 = NULL;
    tree->m_dEdgeLength3 = NULL;
    
#if USE_HEIGHT
    tree->m_dHeight = NULL;
    tree->m_bHasHeight = NULL;
#endif
    tree->m_bHasEdgeLength1 = NULL;
    tree->m_bHasEdgeLength2 = NULL;
    tree->m_bHasEdgeLength3 = NULL;

    tree->m_ptrName = NULL;
    tree->m_Ids = NULL;
    
    tree->m_bRooted = FALSE;
    tree->m_uRootNodeIndex = 0;
}
/* end: TreeZero */



/**
 *
 */
void
TreeToFileNodeRooted(tree_t *tree, uint uNodeIndex, FILE *fp)
{
    bool bGroup;
    
    assert(IsRooted(tree));
    assert(NULL != tree);
    bGroup = !IsLeaf(uNodeIndex, tree) || IsRoot(uNodeIndex, tree);
    
    if (bGroup) 
        fprintf(fp, "(\n");
    

    if (IsLeaf(uNodeIndex, tree)) {
        /* File.PutString(GetName(uNodeIndex)); */
        fprintf(fp, "%s", tree->m_ptrName[uNodeIndex]);
    } else {
        TreeToFileNodeRooted(tree, GetLeft(uNodeIndex, tree), fp);
        fprintf(fp, ",\n");
        TreeToFileNodeRooted(tree, GetRight(uNodeIndex, tree), fp);
    }

    if (bGroup)
        fprintf(fp, ")");

    if (!IsRoot(uNodeIndex, tree)) {
        uint uParent = GetParent(uNodeIndex, tree);
        if (HasEdgeLength(uNodeIndex, uParent, tree))
            fprintf(fp, ":%g", GetEdgeLength(uNodeIndex, uParent, tree));
    }
    fprintf(fp, "\n");
}
/***   end: TreeToFileNodeRooted   ***/


/**
 *
 *
 */
void
TreeValidate(tree_t *tree)
{
    uint uNodeIndex;
    assert(NULL != tree);
    for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
        ValidateNode(uNodeIndex, tree);
    }
    /* FIXME: maybe set negative length to zero? What impact would
     * that have? */
}
/***   end TreeValidate   ***/



/**
 *
 *
 */
void
ValidateNode(uint uNodeIndex, tree_t *tree)
{
    uint uNeighborCount;
    uint n1, n2, n3;
    assert(NULL != tree);
    
    if (uNodeIndex >= tree->m_uNodeCount)
        Log(&rLog, LOG_FATAL, "ValidateNode(%u), %u nodes", uNodeIndex, tree->m_uNodeCount);

    uNeighborCount = GetNeighborCount(uNodeIndex, tree);
    

    if (2 == uNeighborCount) {
        if (!tree->m_bRooted) {
            Log(&rLog, LOG_FATAL, "Tree::ValidateNode: Node %u has two neighbors, tree is not rooted",
                  uNodeIndex);
        }
        if (uNodeIndex != tree->m_uRootNodeIndex) {
            Log(&rLog, LOG_FATAL, "Tree::ValidateNode: Node %u has two neighbors, but not root node=%u",
                  uNodeIndex, tree->m_uRootNodeIndex);
        }
    }

    n1 = tree->m_uNeighbor1[uNodeIndex];
    n2 = tree->m_uNeighbor2[uNodeIndex];
    n3 = tree->m_uNeighbor3[uNodeIndex];
    
    if (NULL_NEIGHBOR == n2 && NULL_NEIGHBOR != n3) {
        Log(&rLog, LOG_FATAL, "Tree::ValidateNode, n2=null, n3!=null", uNodeIndex);
    }
    if (NULL_NEIGHBOR == n3 && NULL_NEIGHBOR != n2) {
        Log(&rLog, LOG_FATAL, "Tree::ValidateNode, n3=null, n2!=null", uNodeIndex);
    }
    
    if (n1 != NULL_NEIGHBOR)
        AssertAreNeighbors(uNodeIndex, n1, tree);
    if (n2 != NULL_NEIGHBOR)
        AssertAreNeighbors(uNodeIndex, n2, tree);
    if (n3 != NULL_NEIGHBOR)
        AssertAreNeighbors(uNodeIndex, n3, tree);


    
    if (n1 != NULL_NEIGHBOR && (n1 == n2 || n1 == n3)) {
        Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
    }
    if (n2 != NULL_NEIGHBOR && (n2 == n1 || n2 == n3)) {
        Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
    }
    if (n3 != NULL_NEIGHBOR && (n3 == n1 || n3 == n2)) {
        Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
    }


    if (IsRooted(tree)) {
        if (NULL_NEIGHBOR == GetParent(uNodeIndex, tree)) {
            if (uNodeIndex != tree->m_uRootNodeIndex) {
                Log(&rLog, LOG_FATAL, "Tree::ValiateNode(%u), no parent", uNodeIndex);
            }
        } else if (GetLeft(GetParent(uNodeIndex, tree), tree) != uNodeIndex &&
                   GetRight(GetParent(uNodeIndex, tree), tree) != uNodeIndex) {
            Log(&rLog, LOG_FATAL, "Tree::ValidateNode(%u), parent / child mismatch", uNodeIndex);
        }
    }
}
/***   end: ValidateNode   ***/


/**
 *
 *
 */
void
AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree)
{
    bool Has12, Has21;
    assert(NULL != tree);

    if (uNodeIndex1 >= tree->m_uNodeCount || uNodeIndex2 >= tree->m_uNodeCount)
        Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u), are %u nodes",
              uNodeIndex1, uNodeIndex2, tree->m_uNodeCount);

    if (tree->m_uNeighbor1[uNodeIndex1] != uNodeIndex2 &&
        tree->m_uNeighbor2[uNodeIndex1] != uNodeIndex2 &&
        tree->m_uNeighbor3[uNodeIndex1] != uNodeIndex2) {
        Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
    }

    
    if (tree->m_uNeighbor1[uNodeIndex2] != uNodeIndex1 &&
        tree->m_uNeighbor2[uNodeIndex2] != uNodeIndex1 &&
        tree->m_uNeighbor3[uNodeIndex2] != uNodeIndex1) {
        Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
    }


    Has12 = HasEdgeLength(uNodeIndex1, uNodeIndex2, tree);
    Has21 = HasEdgeLength(uNodeIndex2, uNodeIndex1, tree);
    if (Has12 != Has21) {
        HasEdgeLength(uNodeIndex1, uNodeIndex2, tree);
        HasEdgeLength(uNodeIndex2, uNodeIndex1, tree);
        Log(&rLog, LOG_ERROR, "HasEdgeLength(%u, %u)=%c HasEdgeLength(%u, %u)=%c\n",
              uNodeIndex1,
              uNodeIndex2,
              Has12 ? 'T' : 'F',
              uNodeIndex2,
              uNodeIndex1,
              Has21 ? 'T' : 'F');
        Log(&rLog, LOG_FATAL, "Tree::AssertAreNeighbors, HasEdgeLength not symmetric");
    }

    
    if (Has12) {
        double d12 = GetEdgeLength(uNodeIndex1, uNodeIndex2, tree);
        double d21 = GetEdgeLength(uNodeIndex2, uNodeIndex1, tree);
        if (d12 != d21) {
            Log(&rLog, LOG_FATAL, "Tree::AssertAreNeighbors, Edge length disagrees %u-%u=%.3g, %u-%u=%.3g",
                  uNodeIndex1, uNodeIndex2, d12,
                  uNodeIndex2, uNodeIndex1, d21);
        }
    }
}
/***   end: AssertAreNeighbors   ***/



/**
 *
 * @note see phyfromfile.cpp in Muscle3.7. Tree has to be a pointer to
 * an already allocated tree structure.
 *
 * return non-Zero on failure
 *
 * leafids will not be set here (FIXME:CHECK if true)
 */
int
MuscleTreeFromFile(tree_t *tree, char *ftree)
{
    double dEdgeLength;
    bool bEdgeLength;
    char szToken[16];
    NEWICK_TOKEN_TYPE NTT;
    unsigned uThirdNode;
    FILE *fp = NULL;

    assert(tree!=NULL);
    assert(ftree!=NULL);

    if (NULL == (fp=fopen(ftree, "r"))) {
        Log(&rLog, LOG_ERROR, "Couldn't open tree-file '%s' for reading. Skipping", ftree);
        return -1;
    }
    
    /* Assume rooted.
     * If we discover that it is unrooted, will convert on the fly.
     */
    TreeCreateRooted(tree);

    bEdgeLength = GetGroupFromFile(fp, 0, &dEdgeLength, tree);


    /* Next token should be either ';' for rooted tree or ',' for
     * unrooted.
     */
    NTT = GetToken(fp, szToken, sizeof(szToken));

    /* If rooted, all done. */
    if (NTT_Semicolon == NTT) {
        if (bEdgeLength)
            Log(&rLog, LOG_WARN, " *** Warning *** edge length on root group in Newick file %s\n", ftree);
        TreeValidate(tree);
        fclose(fp);
        return 0;
    }

    if (NTT_Comma != NTT)
        Log(&rLog, LOG_FATAL, "Tree::FromFile, expected ';' or ',', got '%s'", szToken);

    uThirdNode = UnrootFromFile(tree);
    bEdgeLength = GetGroupFromFile(fp, uThirdNode, &dEdgeLength, tree);
    if (bEdgeLength)
        SetEdgeLength(0, uThirdNode, dEdgeLength, tree);
    TreeValidate(tree);

    fclose(fp);
    return 0;
}
/***   end MuscleTreeFromFile   ***/



/**
 *
 */
void
ExpandCache(tree_t *tree)
{
    const uint uNodeCount = 100;
    uint uNewCacheCount;
    uint *uNewNeighbor1, *uNewNeighbor2, *uNewNeighbor3;
    uint *uNewIds;
    double *dNewEdgeLength1, *dNewEdgeLength2, *dNewEdgeLength3;
#if USE_HEIGHT
    double *dNewHeight;
    bool *bNewHasHeight;
#endif
    bool *bNewHasEdgeLength1, *bNewHasEdgeLength2, *bNewHasEdgeLength3;
    char **ptrNewName;

    assert(NULL != tree);
    uNewCacheCount = tree->m_uCacheCount + uNodeCount;
    uNewNeighbor1 = (uint *) CKMALLOC(
        uNewCacheCount * sizeof(uint));
    uNewNeighbor2 = (uint *) CKMALLOC(
        uNewCacheCount * sizeof(uint));
    uNewNeighbor3 = (uint *) CKMALLOC(
        uNewCacheCount * sizeof(uint));

    uNewIds = (uint *) CKCALLOC(
        uNewCacheCount, sizeof(uint));
    
    dNewEdgeLength1 = (double *) CKMALLOC(
        uNewCacheCount * sizeof(double));
    dNewEdgeLength2 =  (double *) CKMALLOC(
        uNewCacheCount * sizeof(double));
    dNewEdgeLength3 = (double *) CKMALLOC(
        uNewCacheCount * sizeof(double));
#if USE_HEIGHT
    dNewHeight = (double *) CKMALLOC(
        uNewCacheCount * sizeof(double));
    bNewHasHeight = (bool *) CKMALLOC(
        uNewCacheCount * sizeof(bool));
#endif
     
    bNewHasEdgeLength1 = (bool *) CKMALLOC(
        uNewCacheCount * sizeof(bool));
    bNewHasEdgeLength2 = (bool *) CKMALLOC(
        uNewCacheCount * sizeof(bool));
    bNewHasEdgeLength3 = (bool *) CKMALLOC(
        uNewCacheCount * sizeof(bool));
    ptrNewName = (char **) CKCALLOC(uNewCacheCount, sizeof(char*));

    if (tree->m_uCacheCount > 0) {
        uint uUnsignedBytes, uEdgeBytes;
        uint uBoolBytes, uNameBytes;

        uUnsignedBytes = tree->m_uCacheCount*sizeof(uint);
        
        memcpy(uNewNeighbor1, tree->m_uNeighbor1, uUnsignedBytes);
        memcpy(uNewNeighbor2, tree->m_uNeighbor2, uUnsignedBytes);
        memcpy(uNewNeighbor3, tree->m_uNeighbor3, uUnsignedBytes);

        memcpy(uNewIds, tree->m_Ids, uUnsignedBytes);

        uEdgeBytes = tree->m_uCacheCount*sizeof(double);
        memcpy(dNewEdgeLength1, tree->m_dEdgeLength1, uEdgeBytes);
        memcpy(dNewEdgeLength2, tree->m_dEdgeLength2, uEdgeBytes);
        memcpy(dNewEdgeLength3, tree->m_dEdgeLength3, uEdgeBytes);
#if USE_HEIGHT
        memcpy(dNewHeight, tree->m_dHeight, uEdgeBytes);
#endif        
          
        uBoolBytes = tree->m_uCacheCount*sizeof(bool);
        memcpy(bNewHasEdgeLength1, tree->m_bHasEdgeLength1, uBoolBytes);
        memcpy(bNewHasEdgeLength2, tree->m_bHasEdgeLength2, uBoolBytes);
        memcpy(bNewHasEdgeLength3, tree->m_bHasEdgeLength3, uBoolBytes);
#if USE_HEIGHT
        memcpy(bNewHasHeight, tree->m_bHasHeight, uBoolBytes);
#endif
        uNameBytes = tree->m_uCacheCount*sizeof(char *);
        memcpy(ptrNewName, tree->m_ptrName, uNameBytes);

        /* similiar to FreeMuscleTree
         */
        
        /* IsLeaf needs m_uNodeCount and all m_uNeighbor's
         * so free first
         */
#if 0
        for (i=0; i<tree->m_uNodeCount; i++) {
            if (IsLeaf(i, tree)) {
#ifndef NDEBUG
                if (NULL==tree->m_ptrName[i]) {
                    Log(&rLog, LOG_WARN, "FIXME tree->m_ptrName[%d] is already NULL", i);
                }
#endif
                CKFREE(tree->m_ptrName[i]);
            }
        }
#endif
        CKFREE(tree->m_ptrName);

        CKFREE(tree->m_uNeighbor1);
        CKFREE(tree->m_uNeighbor2);
        CKFREE(tree->m_uNeighbor3);

        CKFREE(tree->m_Ids);

        CKFREE(tree->m_dEdgeLength1);
        CKFREE(tree->m_dEdgeLength2);
        CKFREE(tree->m_dEdgeLength3);

        CKFREE(tree->m_bHasEdgeLength1);
        CKFREE(tree->m_bHasEdgeLength2);
        CKFREE(tree->m_bHasEdgeLength3);
#if USE_HEIGHT
        CKFREE(tree->m_bHasHeight);
        CKFREE(tree->m_dHeight);
#endif
    }
    
    tree->m_uCacheCount = uNewCacheCount;
    tree->m_uNeighbor1 = uNewNeighbor1;
    tree->m_uNeighbor2 = uNewNeighbor2;
    tree->m_uNeighbor3 = uNewNeighbor3;
    tree->m_Ids = uNewIds;
    tree->m_dEdgeLength1 = dNewEdgeLength1;
    tree->m_dEdgeLength2 = dNewEdgeLength2;
    tree->m_dEdgeLength3 = dNewEdgeLength3;
        
#ifdef USE_HEIGHT
    tree->m_dHeight = dNewHeight;
    tree->m_bHasHeight = bNewHasHeight;
#endif
    tree->m_bHasEdgeLength1 = bNewHasEdgeLength1;
    tree->m_bHasEdgeLength2 = bNewHasEdgeLength2;
    tree->m_bHasEdgeLength3 = bNewHasEdgeLength3;
        
    tree->m_ptrName = ptrNewName;

}
/***   end: ExpandCache   ***/



/**
 *
 * Tree must be pointer to an already allocated tree structure
 *
 */
void
TreeCreateRooted(tree_t *tree)
{
    TreeZero(tree);
    ExpandCache(tree);
    tree->m_uNodeCount = 1;

    tree->m_uNeighbor1[0] = NULL_NEIGHBOR;
    tree->m_uNeighbor2[0] = NULL_NEIGHBOR;
    tree->m_uNeighbor3[0] = NULL_NEIGHBOR;
    
    tree->m_bHasEdgeLength1[0] = FALSE;
    tree->m_bHasEdgeLength2[0] = FALSE;
    tree->m_bHasEdgeLength3[0] = FALSE;
#if USE_HEIGHT
    tree->m_bHasHeight[0] = FALSE;
#endif
    
    tree->m_uRootNodeIndex = 0;
    tree->m_bRooted = TRUE;
    
#ifndef NDEBUG
    TreeValidate(tree);
#endif
}
/***   end: TreeCreateRooted   ***/


/**
 *
 */
uint
UnrootFromFile(tree_t *tree)
{
    uint uThirdNode;
#if TRACE
    Log("Before unroot:\n");
    LogMe();
#endif
    
    if (!tree->m_bRooted)
        Log(&rLog, LOG_FATAL, "Tree::Unroot, not rooted");
    
    /* Convention: root node is always node zero */
    assert(IsRoot(0, tree));
    assert(NULL_NEIGHBOR == tree->m_uNeighbor1[0]);

    uThirdNode = tree->m_uNodeCount++;
    
    tree->m_uNeighbor1[0] = uThirdNode;
    tree->m_uNeighbor1[uThirdNode] = 0;
    
    tree->m_uNeighbor2[uThirdNode] = NULL_NEIGHBOR;
    tree->m_uNeighbor3[uThirdNode] = NULL_NEIGHBOR;
    
    tree->m_dEdgeLength1[0] = 0;
    tree->m_dEdgeLength1[uThirdNode] = 0;
    tree->m_bHasEdgeLength1[uThirdNode] = TRUE;
    
    tree->m_bRooted = FALSE;
    
#if TRACE
    Log("After unroot:\n");
    LogMe();
#endif

    return uThirdNode;
}
/***   end: UnrootFromFile   ***/



/**
 *
 *
 */
bool
GetGroupFromFile(FILE *fp, uint uNodeIndex, double *ptrdEdgeLength, tree_t *tree)
{
    char szToken[1024];
    NEWICK_TOKEN_TYPE NTT = GetToken(fp, szToken, sizeof(szToken));
    bool bRightLength;
    bool bEof;
    char c;

    /* Group is either leaf name or (left, right). */
    if (NTT_String == NTT) {
        SetLeafName(uNodeIndex, szToken, tree);
#if TRACE
        Log(&rLog, LOG_FORCED_DEBUG, "Group is leaf '%s'", szToken);
#endif
    } else if (NTT_Lparen == NTT) {
        const unsigned uLeft = AppendBranch(tree, uNodeIndex);
        const unsigned uRight = uLeft + 1;
        double dEdgeLength;
        bool bLeftLength = GetGroupFromFile(fp, uLeft, &dEdgeLength, tree);
        
        /* Left sub-group...
         */
#if TRACE
        Log(&rLog, LOG_FORCED_DEBUG, "%s", "Got '(', group is compound, expect left sub-group");

        if (bLeftLength) {
            Log(&rLog, LOG_FORCED_DEBUG, "Edge length for left sub-group: %.3g", dEdgeLength);
        } else {
            Log(&rLog, LOG_FORCED_DEBUG, "%s", "No edge length for left sub-group");
        }
#endif
        if (bLeftLength)
            SetEdgeLength(uNodeIndex, uLeft, dEdgeLength, tree);

        /* ... then comma ...
         */
#if TRACE
        Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect comma");
#endif
        NTT = GetToken(fp, szToken, sizeof(szToken));
        if (NTT_Comma != NTT)
            Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected ',', got '%s'", szToken);
        
        /* ...then right sub-group...
         */
#if TRACE
        Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect right sub-group");
#endif
        bRightLength = GetGroupFromFile(fp, uRight, &dEdgeLength, tree);
        if (bRightLength)
            SetEdgeLength(uNodeIndex, uRight, dEdgeLength, tree);
        
#if TRACE
        if (bRightLength)
            Log(&rLog, LOG_FORCED_DEBUG, "Edge length for right sub-group: %.3g", dEdgeLength);
        else
            Log(&rLog, LOG_FORCED_DEBUG, "%s", "No edge length for right sub-group");
#endif

        /* ... then closing parenthesis.
         */
#if TRACE
        Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect closing parenthesis (or comma if > 2-ary)");
#endif
        NTT = GetToken(fp, szToken, sizeof(szToken));
        if (NTT_Rparen == NTT)
            ;
        else if (NTT_Comma == NTT) {
            if (ungetc(',', fp)==EOF)
                Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
            return FALSE;
        } else
            Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected ')' or ',', got '%s'", szToken);
    } else {
        Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected '(' or leaf name, got '%s'",
              szToken);
    }
    
    /* Group may optionally be followed by edge length.
     */
    bEof = FileSkipWhiteX(fp);
    if (bEof)
        return FALSE;
    if ((c = fgetc(fp))==EOF) /* GetCharX */
        Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
#if TRACE
    Log(&rLog, LOG_FORCED_DEBUG, "Character following group, could be colon, is '%c'", c);
#endif
    if (':' == c) {
        NTT = GetToken(fp, szToken, sizeof(szToken));
        if (NTT_String != NTT)
            Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected edge length, got '%s'", szToken);
        *ptrdEdgeLength = atof(szToken);
        return TRUE;
    }
    if (ungetc(c, fp)==EOF)
        Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
    
    return FALSE;
}
/***   end: GetGroupFromFile   ***/




/**
 *
 */
void
FileSkipWhite(FILE *fp)
{
    bool bEof = FileSkipWhiteX(fp);
    if (bEof)
        Log(&rLog, LOG_FATAL, "%s", "End-of-file skipping white space");
}
/***   end: FileSkipWhite   ***/




/**
 *
 */
bool
FileSkipWhiteX(FILE *fp)
{
    for (;;) {
        int c;
        bool bEof;
        
        /* GetChar */
        if ((c = fgetc(fp))==EOF) {
            bEof = TRUE;
        } else {
            bEof = FALSE;
        }

        if (bEof)
            return TRUE;
        if (!isspace(c)) {
            if (ungetc(c, fp)==EOF)
                Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
            break;
        }
    }
    return FALSE;
}
/***   end: FileSkipWhiteX   ***/




/**
 *
 */
NEWICK_TOKEN_TYPE
GetToken(FILE *fp, char szToken[], uint uBytes)
{
    char c;
    unsigned uBytesCopied = 0;
    NEWICK_TOKEN_TYPE TT;

    /* Skip leading white space */
    FileSkipWhite(fp);

    if ((c = fgetc(fp))==EOF) /* GetCharX */
        Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
    
    /* In case a single-character token */
    szToken[0] = c;
    szToken[1] = 0;

    switch (c) {

    case '(':
        return NTT_Lparen;
        
    case ')':
        return NTT_Rparen;
        
    case ':':
        return NTT_Colon;
        
    case ';':
        return NTT_Semicolon;
        
    case ',':
        return NTT_Comma;
        
    case '\'':
        TT = NTT_SingleQuotedString;
        if ((c = fgetc(fp))==EOF) /* GetCharX */
            Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
        break;
        
    case '"':
        TT = NTT_DoubleQuotedString;
        if ((c = fgetc(fp))==EOF) /* GetCharX */
            Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
        break;
        
    case '[':
        TT = NTT_Comment;
        break;
        
    default:
        TT = NTT_String;
        break;
    }
    
    for (;;)
    {
        bool bEof;
        if (TT != NTT_Comment) {
            if (uBytesCopied < uBytes - 2)  {
                szToken[uBytesCopied++] = c;
                szToken[uBytesCopied] = 0;
            } else {
                Log(&rLog, LOG_FATAL, "Tree::GetToken: input buffer too small, token so far='%s'", szToken);
            }
        }
        c = fgetc(fp); /* GetChar */
        bEof = (c==EOF ? TRUE : FALSE);
        if (bEof)
            return TT;
        
        switch (TT) {

        case NTT_String:
            if (0 != strchr("():;,", c)) {
                if (ungetc(c, fp)==EOF)
                    Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
                return NTT_String;
            }
            if (isspace(c))
                return NTT_String;
            break;
            
        case NTT_SingleQuotedString:
            if ('\'' == c)
                return NTT_String;
            break;
            
        case NTT_DoubleQuotedString:
            if ('"' == c)
                return NTT_String;
            break;
            
        case NTT_Comment:
            if (']' == c)
                return GetToken(fp, szToken, uBytes);
            break;
            
        default:
            Log(&rLog, LOG_FATAL, "Tree::GetToken, invalid TT=%u", TT);
        }
    }
}
/***   end: GetToken   ***/



/***   SetLeafName
 *
 */
void
SetLeafName(unsigned uNodeIndex, const char *ptrName, tree_t *tree)
{
    assert(uNodeIndex < tree->m_uNodeCount);
    assert(IsLeaf(uNodeIndex, tree));
    free(tree->m_ptrName[uNodeIndex]);
    /*LOG_DEBUG("Setting tree->m_ptrName[uNodeIndex=%d] to %s", uNodeIndex, ptrName);*/
    tree->m_ptrName[uNodeIndex] = CkStrdup(ptrName);
}
/***   end: SetLeafName   ***/




/**
 *
 * Append a new branch. This adds two new nodes and joins them to an
 * existing leaf node. Return value is k, new nodes have indexes k and
 * k+1 respectively.
 *
 */
uint
AppendBranch(tree_t *tree, uint uExistingLeafIndex)
{
    uint uNewLeaf1;
    uint uNewLeaf2;
    
    assert(tree!=NULL);
    if (0 == tree->m_uNodeCount) {
        Log(&rLog, LOG_FATAL, "%s(): %s", __FUNCTION__, "tree has not been created");
    }
    assert(NULL_NEIGHBOR == tree->m_uNeighbor2[uExistingLeafIndex]);
    assert(NULL_NEIGHBOR == tree->m_uNeighbor3[uExistingLeafIndex]);
    assert(uExistingLeafIndex < tree->m_uNodeCount);
#ifndef NDEBUG
    if (!IsLeaf(uExistingLeafIndex, tree)) {
        Log(&rLog, LOG_FATAL, "AppendBranch(%u): not leaf", uExistingLeafIndex);
    }
#endif

    if (tree->m_uNodeCount >= tree->m_uCacheCount - 2) {
        ExpandCache(tree);
    }
    uNewLeaf1 = tree->m_uNodeCount;
    uNewLeaf2 = tree->m_uNodeCount + 1;

    tree->m_uNodeCount += 2;
    
    tree->m_uNeighbor2[uExistingLeafIndex] = uNewLeaf1;
    tree->m_uNeighbor3[uExistingLeafIndex] = uNewLeaf2;
    
    tree->m_uNeighbor1[uNewLeaf1] = uExistingLeafIndex;
    tree->m_uNeighbor1[uNewLeaf2] = uExistingLeafIndex;
    
    tree->m_uNeighbor2[uNewLeaf1] = NULL_NEIGHBOR;
    tree->m_uNeighbor2[uNewLeaf2] = NULL_NEIGHBOR;
    
    tree->m_uNeighbor3[uNewLeaf1] = NULL_NEIGHBOR;
    tree->m_uNeighbor3[uNewLeaf2] = NULL_NEIGHBOR;
    
    tree->m_dEdgeLength2[uExistingLeafIndex] = 0;
    tree->m_dEdgeLength3[uExistingLeafIndex] = 0;
    
    tree->m_dEdgeLength1[uNewLeaf1] = 0;
    tree->m_dEdgeLength2[uNewLeaf1] = 0;
    tree->m_dEdgeLength3[uNewLeaf1] = 0;
    
    tree->m_dEdgeLength1[uNewLeaf2] = 0;
    tree->m_dEdgeLength2[uNewLeaf2] = 0;
    tree->m_dEdgeLength3[uNewLeaf2] = 0;
    
    tree->m_bHasEdgeLength1[uNewLeaf1] = FALSE;
    tree->m_bHasEdgeLength2[uNewLeaf1] = FALSE;
    tree->m_bHasEdgeLength3[uNewLeaf1] = FALSE;
    
    tree->m_bHasEdgeLength1[uNewLeaf2] = FALSE;
    tree->m_bHasEdgeLength2[uNewLeaf2] = FALSE;
    tree->m_bHasEdgeLength3[uNewLeaf2] = FALSE;
    
#if USE_HEIGHT
    tree->m_bHasHeight[uNewLeaf1] = FALSE;
    tree->m_bHasHeight[uNewLeaf2] = FALSE;
#endif 
    tree->m_Ids[uNewLeaf1] = uInsane;
    tree->m_Ids[uNewLeaf2] = uInsane;
    
    return uNewLeaf1;
}
/***   end: AppendBranch   ***/


/**
 *
 *
 */
void
SetEdgeLength(uint uNodeIndex1, uint uNodeIndex2,
              double dLength, tree_t *tree)
{
    assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
    assert(IsEdge(uNodeIndex1, uNodeIndex2, tree));
    
    if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2) {
        tree->m_dEdgeLength1[uNodeIndex1] = dLength;
        tree->m_bHasEdgeLength1[uNodeIndex1] = TRUE;
    } else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2) {
        tree->m_dEdgeLength2[uNodeIndex1] = dLength;
        tree->m_bHasEdgeLength2[uNodeIndex1] = TRUE;
        
    } else {
        assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
        tree->m_dEdgeLength3[uNodeIndex1] = dLength;
        tree->m_bHasEdgeLength3[uNodeIndex1] = TRUE;
    }
    
    if (tree->m_uNeighbor1[uNodeIndex2] == uNodeIndex1) {
        tree->m_dEdgeLength1[uNodeIndex2] = dLength;
        tree->m_bHasEdgeLength1[uNodeIndex2] = TRUE;
    } else if (tree->m_uNeighbor2[uNodeIndex2] == uNodeIndex1) {
        tree->m_dEdgeLength2[uNodeIndex2] = dLength;
        tree->m_bHasEdgeLength2[uNodeIndex2] = TRUE;
    } else {
        assert(tree->m_uNeighbor3[uNodeIndex2] == uNodeIndex1);
        tree->m_dEdgeLength3[uNodeIndex2] = dLength;
        tree->m_bHasEdgeLength3[uNodeIndex2] = TRUE;
    }
}
/***   end: SetEdgeLength   ***/



/**
 *
 * Debug output
 *
 * LogMe in phy.cpp
 *
 */
void
LogTree(tree_t *tree, FILE *fp)
{
    uint uNodeIndex;
    uint n1, n2, n3;
    char *ptrName;
    
    fprintf(fp, "This is a tree with %u nodes, which is ", tree->m_uNodeCount);
    
    if (IsRooted(tree)) {
        fprintf(fp, "rooted:\n");
        fprintf(fp, "Index  Parnt  LengthP  Left   LengthL  Right  LengthR     Id  Name\n");
        fprintf(fp, "-----  -----  -------  ----   -------  -----  -------  -----  ----\n");

    } else {
        fprintf(fp, "unrooted;\n");
        fprintf(fp, "Index  Nbr_1  Length1  Nbr_2  Length2  Nbr_3  Length3     Id  Name\n");
        fprintf(fp, "-----  -----  -------  -----  -------  -----  -------  -----  ----\n");
    }
    
    for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
        fprintf(fp, "%5u  ", uNodeIndex);
        n1 = tree->m_uNeighbor1[uNodeIndex];
        n2 = tree->m_uNeighbor2[uNodeIndex];
        n3 = tree->m_uNeighbor3[uNodeIndex];
        
        if (NULL_NEIGHBOR != n1) {
            fprintf(fp, "%5u  ", n1);
            if (tree->m_bHasEdgeLength1[uNodeIndex])
                fprintf(fp, "%7.3g  ", tree->m_dEdgeLength1[uNodeIndex]);
            else
                fprintf(fp, "      *  ");
        } else {
            fprintf(fp, "                ");
        }
        
        if (NULL_NEIGHBOR != n2) {
            fprintf(fp, "%5u  ", n2);
            if (tree->m_bHasEdgeLength2[uNodeIndex])
                fprintf(fp, "%7.3g  ", tree->m_dEdgeLength2[uNodeIndex]);
            else
                fprintf(fp, "      *  ");
        } else {
            fprintf(fp, "                ");
        }
        
        if (NULL_NEIGHBOR != n3) {
            fprintf(fp, "%5u  ", n3);
            if (tree->m_bHasEdgeLength3[uNodeIndex])
                fprintf(fp, "%7.3g  ", tree->m_dEdgeLength3[uNodeIndex]);
            else
                fprintf(fp, "      *  ");
        } else {
            fprintf(fp, "                ");
        }
        
        if (tree->m_Ids != 0 && IsLeaf(uNodeIndex, tree)) {
            unsigned uId = tree->m_Ids[uNodeIndex];
            if (uId == uInsane)
                fprintf(fp, "    *");
            else
                fprintf(fp, "%5u", uId);
        } else {
            fprintf(fp, "     ");
        }
        
        if (tree->m_bRooted && uNodeIndex == tree->m_uRootNodeIndex)
            fprintf(fp, "  [ROOT] ");
        ptrName = tree->m_ptrName[uNodeIndex];
        if (ptrName != 0)
            fprintf(fp, "  %s", ptrName);
        
        fprintf(fp, "\n");
    }     
}
/***   end: LogTree   ***/



/**
 *
 * replaces m_uLeafCount
 */
uint
GetLeafCount(tree_t *tree)
{
    assert(tree!=NULL);
    
    return (tree->m_uNodeCount+1)/2;
}
/***   GetLeafCount   ***/



/**
 *
 */
uint
GetNodeCount(tree_t *tree)
{
    assert(tree!=NULL);
    
    return 2*(GetLeafCount(tree)) - 1;
}
/***   end: GetNodeCount   ***/


/**
 *
 */
uint
GetNeighbor(uint uNodeIndex, uint uNeighborSubscript, tree_t *prTree)
{
    assert(uNodeIndex < prTree->m_uNodeCount);
    switch (uNeighborSubscript)
    {
    case 0:
        return prTree->m_uNeighbor1[uNodeIndex];
    case 1:
        return prTree->m_uNeighbor2[uNodeIndex];
    case 2:
        return prTree->m_uNeighbor3[uNodeIndex];
    }
    Log(&rLog, LOG_FATAL, "Internal error in %s: sub=%u", __FUNCTION__, uNeighborSubscript);
    return NULL_NEIGHBOR;
}
/***   end: GetNeighbor   ***/





/**
 *
 */
void
SetLeafId(tree_t *tree, uint uNodeIndex, uint uId)
{
    assert(uNodeIndex < tree->m_uNodeCount);
    assert(IsLeaf(uNodeIndex, tree));
    tree->m_Ids[uNodeIndex] = uId;
}
/***   end: SetLeafId    ***/


/**
 *
 */
uint
GetRootNodeIndex(tree_t *tree)
{
    assert(NULL!=tree);
    return tree->m_uRootNodeIndex;
}
/***   end: GetRootNodeIndex   ***/



/**
 * @note avoid usage if you want to iterate over all indices, because
 * this will be slow
 *
 */
uint
LeafIndexToNodeIndex(uint uLeafIndex, tree_t *prTree) {
    uint uLeafCount = 0;
    unsigned uNodeCount = GetNodeCount(prTree);
    uint uNodeIndex;
    
    for (uNodeIndex = 0; uNodeIndex < uNodeCount; uNodeIndex++) {
        if (IsLeaf(uNodeIndex, prTree)) {
            if (uLeafCount == uLeafIndex) {
                return uNodeIndex;
            } else {
                uLeafCount++;
            }
        }
    }
    Log(&rLog, LOG_FATAL, "Internal error: node index out of range");
    return 0;
}
/***   end: LeafIndexToNodeIndex   ***/




/**
 * @brief Append a (source) tree to a (dest) tree to a given node
 * which will be replaced. All other nodes in that tree will stay the
 * same.
 *
 * @param[out] prDstTree
 * The tree to append to
 * @param[in] uDstTreeReplaceNodeIndex
 * Dest tree node to which source tree will be appended
 * @param[in] prSrcTree
 * The tree to append
 * 
 * @note No nodes inside prDstTree will change except
 * uDstTreeReplaceNodeIndex
 *
 *
 * @warning: Function won't check or touch the m_Ids/leaf-indices!
 * That means if you want to join two trees with leaf indices 1-10 and
 * 1-10 your m_Ids/leaf-indices won't be unique anymore and the
 * association between your sequences and the tree are broken. Make
 * sure m_Ids are unique before calling me.
 *
 * The easiest would have been to do this by recursively calling
 * AppendBranch() (after adding uSrcTreeNodeIndex as extra argument to
 * this function). But recursion is evil. Yet another version would be
 * to setup all the data and call MuscleTreeCreate() to create a third
 * tree, which seems complicated and wasteful. The approach taken here
 * is the following: increase dest tree memory, copy over each src
 * tree node data and update the indices and counters. This is tricky
 * and has a lot of potential for bugs if tree interface is changed
 * (and even if it isn't).
 *
 */
void
AppendTree(tree_t *prDstTree, uint uDstTreeReplaceNodeIndex, tree_t *prSrcTree)
{
    uint uSrcTreeNodeIndex;
    uint uOrgDstTreeNodeCount;

    assert(NULL!=prDstTree);
    assert(NULL!=prSrcTree);
    assert(uDstTreeReplaceNodeIndex < prDstTree->m_uNodeCount);
    assert(IsLeaf(uDstTreeReplaceNodeIndex, prDstTree));
    assert(IsRooted(prDstTree) && IsRooted(prSrcTree));

    
    uOrgDstTreeNodeCount = prDstTree->m_uNodeCount;

    
    /* increase dest tree memory
     */
    while (prDstTree->m_uCacheCount
           <
           (GetNodeCount(prDstTree) + GetNodeCount(prSrcTree))) {
        ExpandCache(prDstTree);
    }


    /* copy all src tree nodes
     *
     */
    for (uSrcTreeNodeIndex=0;
         uSrcTreeNodeIndex<GetNodeCount(prSrcTree); uSrcTreeNodeIndex++) {
        uint uNewDstNodeIndex = prDstTree->m_uNodeCount;
        
        /* distinguish 4 cases for src nodes to copy:
         *
         * 1. src node is the only node, i.e. root & leaf
         *
         * 2. src node is root: set only left & right, but not parent
         * and just replace the given dest index
         *
         * 3. src node is leaf: set only parent
         *
         * 4. src node is internal node: update all three neighbours
         *
         * FIXME: this is messy. Is there a cleaner way to do this by
         * merging all cases?
         *
         */
        if (IsRoot(uSrcTreeNodeIndex, prSrcTree) && IsLeaf(uSrcTreeNodeIndex, prSrcTree)) {
            /* special case: if this is the only member in
             * tree, i.e. it's root and leaf. Copy leaf name and leaf
             * id. No neighbours to update
             */

            /* free dst node name if set */
            if (NULL != prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]) {
                CKFREE(prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]);
            }

            prDstTree->m_ptrName[uDstTreeReplaceNodeIndex] =
                CkStrdup(GetLeafName(uSrcTreeNodeIndex, prSrcTree));

            prDstTree->m_Ids[uDstTreeReplaceNodeIndex] =
                prSrcTree->m_Ids[uSrcTreeNodeIndex];

            /* no updated of uNodeCount, because we used the replace node */

#if TRACE
            Log(&rLog, LOG_FORCED_DEBUG, "Updated dst rpl node %d with the only src leaf node: parent=%d (%f)",
                      uDstTreeReplaceNodeIndex,
                      prDstTree->m_uNeighbor1[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength1[uDstTreeReplaceNodeIndex]);
#endif
            
        } else if (IsRoot(uSrcTreeNodeIndex, prSrcTree)) {
            /* src node is root: replace uDstTreeReplaceNodeIndex
             * (not uNewDstNodeIndex) with src root, i.e. the
             * uDstTreeReplaceNodeIndex becomes an internal node now.
             *
             * We only have two neighbours 2 & 3 (no parent). Keep old
             * parent info (neighbor 1).
             */

            /* free dst node name if set */
            if (NULL != prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]) {
                CKFREE(prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]);
            }
            
            prDstTree->m_uNeighbor2[uDstTreeReplaceNodeIndex] =
                prSrcTree->m_uNeighbor2[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
            prDstTree->m_uNeighbor3[uDstTreeReplaceNodeIndex] =
                prSrcTree->m_uNeighbor3[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;          
            
            prDstTree->m_bHasEdgeLength2[uDstTreeReplaceNodeIndex] =
                prSrcTree->m_bHasEdgeLength2[uSrcTreeNodeIndex];
            prDstTree->m_bHasEdgeLength3[uDstTreeReplaceNodeIndex] =
                prSrcTree->m_bHasEdgeLength3[uSrcTreeNodeIndex];            

            prDstTree->m_dEdgeLength2[uDstTreeReplaceNodeIndex] =
                prSrcTree->m_dEdgeLength2[uSrcTreeNodeIndex];
            prDstTree->m_dEdgeLength3[uDstTreeReplaceNodeIndex] =
                prSrcTree->m_dEdgeLength3[uSrcTreeNodeIndex];

            /* make Id invalid */
            prDstTree->m_Ids[uDstTreeReplaceNodeIndex] = uInsane;

            /* no updated of uNodeCount, because we used the replace node */

#if TRACE
            Log(&rLog, LOG_FORCED_DEBUG, "Updated dst rpl node %d with the src root node: (untouched) parent=%d (%f) left=%d (%f) right=%d (%f)",
                      uDstTreeReplaceNodeIndex,
                      prDstTree->m_uNeighbor1[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength1[uDstTreeReplaceNodeIndex],
                      prDstTree->m_uNeighbor2[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength2[uDstTreeReplaceNodeIndex],
                      prDstTree->m_uNeighbor3[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength3[uDstTreeReplaceNodeIndex]);
#endif
            
        } else if (IsLeaf(uSrcTreeNodeIndex, prSrcTree)) {
            /* src node is a leaf, which means we only have one
             * neighbour, and that is its parent, i.e. n1
             *
             */

            /* initialise/zero new node to default values
             */
            InitNode(prDstTree, uNewDstNodeIndex);

        
            /*  update m_ptrName/leaf name
             */
            prDstTree->m_ptrName[uNewDstNodeIndex] =
                CkStrdup(GetLeafName(uSrcTreeNodeIndex, prSrcTree));

            /* update parent node (beware of special case: parent was
               src tree root */
            if (IsRoot(prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex], prSrcTree)) {
                    prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
                        uDstTreeReplaceNodeIndex;
            } else {
                prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
                    prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
            }

            /* update edge length info to parent
             */
            prDstTree->m_bHasEdgeLength1[uNewDstNodeIndex] =
                prSrcTree->m_bHasEdgeLength1[uSrcTreeNodeIndex];
            prDstTree->m_dEdgeLength1[uNewDstNodeIndex] =
                prSrcTree->m_dEdgeLength1[uSrcTreeNodeIndex];

            /* update sequence/object id
             */
            prDstTree->m_Ids[uNewDstNodeIndex] =
                prSrcTree->m_Ids[uSrcTreeNodeIndex];            

            /* we used a new node so increase their count */
            prDstTree->m_uNodeCount += 1;
            
#if TRACE
            Log(&rLog, LOG_FORCED_DEBUG, "Updated dst node %d with a src leaf node: parent=%d (%f)",
                      uNewDstNodeIndex,
                      prDstTree->m_uNeighbor1[uNewDstNodeIndex], prDstTree->m_dEdgeLength1[uNewDstNodeIndex]);
#endif
            
        } else  {
            /* src node is not root neither leaf, means we have an
             * internal node. Update all neighbour info
             * 
             */

            /* initialise/zero node values to default values
             */
            InitNode(prDstTree, uNewDstNodeIndex);
          
            /* update neigbours
             */
            /* parent: special case if parent was src tree root */
            if (IsRoot(prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex], prSrcTree)) {
                    prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
                        uDstTreeReplaceNodeIndex;
            } else {
                prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
                    prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
            }
            /* left */
            prDstTree->m_uNeighbor2[uNewDstNodeIndex] =
                prSrcTree->m_uNeighbor2[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
            /* right */
            prDstTree->m_uNeighbor3[uNewDstNodeIndex] =
                prSrcTree->m_uNeighbor3[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;

            /* update edge length info
             */
            /* parent */
            prDstTree->m_bHasEdgeLength1[uNewDstNodeIndex] =
                prSrcTree->m_bHasEdgeLength1[uSrcTreeNodeIndex];
            prDstTree->m_dEdgeLength1[uNewDstNodeIndex] =
                prSrcTree->m_dEdgeLength1[uSrcTreeNodeIndex];
            /* left */
            prDstTree->m_bHasEdgeLength2[uNewDstNodeIndex] =
                prSrcTree->m_bHasEdgeLength2[uSrcTreeNodeIndex];
            prDstTree->m_dEdgeLength2[uNewDstNodeIndex] =
                prSrcTree->m_dEdgeLength2[uSrcTreeNodeIndex];
            /* right */
            prDstTree->m_bHasEdgeLength3[uNewDstNodeIndex] =
                prSrcTree->m_bHasEdgeLength3[uSrcTreeNodeIndex];
            prDstTree->m_dEdgeLength3[uNewDstNodeIndex] =
                prSrcTree->m_dEdgeLength3[uSrcTreeNodeIndex];

            /* we used a new node so increase their count */
            prDstTree->m_uNodeCount += 1;
            
#if TRACE
            Log(&rLog, LOG_FORCED_DEBUG, "Updated dst node %d with an internal src node: parent=%d (%f) left=%d (%f) right=%d (%f)",
                      uNewDstNodeIndex,
                      prDstTree->m_uNeighbor1[uNewDstNodeIndex], prDstTree->m_dEdgeLength1[uNewDstNodeIndex],
                      prDstTree->m_uNeighbor2[uNewDstNodeIndex], prDstTree->m_dEdgeLength2[uNewDstNodeIndex],
                      prDstTree->m_uNeighbor3[uNewDstNodeIndex], prDstTree->m_dEdgeLength3[uNewDstNodeIndex]);
#endif
        }

    }
    /* end for each src tree node */

    
    /*
     * m_uRootNodeIndex stays the same.
     *
     * No need to touch m_uCacheCount.
     *
     */    
#if USE_HEIGHT
    Log(&rLog, LOG_FATAL, "Internal error: Height usage not implemented in %s", __FUNCTION__);
#endif 

    
#ifndef NDEBUG
    TreeValidate(prDstTree);
#endif
        
    return;
}
/***   end: AppendTree()   ***/