```/*
cfunctions.c

Version : 1.1.04
Author  : Niko Beerenwinkel

(c) Max-Planck-Institut fuer Informatik, 2004
*/

#include "include/mtreemix.h"
#include "include/cfunctions.h"
#include "include/Rtreemix_patch.h"
//R includes
#include <R.h>

integer_matrix resample(integer_matrix& pattern, matrix& resp, int S, matrix& bresp)
{
// resample S rows from pattern(,)
// returns resampled data; corresponding responsibilities are in bresp

int N = pattern.dim1();  // sample size
int L = pattern.dim2();  // pattern length
int K = resp.dim1();

integer_vector x(L);
vector gamma(K);  // responsibility of k-th tree
integer_matrix R(S,L);

for (int s=0; s<S; s++)
{
int ri = (int) ((double) N * rand() / (RAND_MAX + 1.0));

// pattern:
x = pattern.row(ri);
for (int j=0; j<L; j++)
R(s,j) = x[j];

// responsibilities:
gamma = resp.col(ri);
for (int k=0; k<K; k++)
bresp(k,s) = gamma[k];
}

return R;
}

vector CI(list<double>& L, double alpha)
{
// calculate (alpha*100)% confidence interval from list of doubles L
// returns a 2-dim vector whose first entry is the lower limit
// and whose second entry is the upper limit of the confidence interval

vector ci(0.0, 1.0);

int len = L.length();
if (len >= 2)
{
L.sort();  // we can be more efficient than this...

// compute indices of confidence limits:
double eps = .5 / (double) len;  // for rounding
int lower_index = std::max(0, int(alpha*(double)len - eps) - 1);
int upper_index = std::min(len - 1, int((1.0-alpha)*(double)len + 1.0));

//ci[0] = L.contents(L.get_item(lower_index));  // lower limit
//ci[1] = L.contents(L.get_item(upper_index));  // upper limit
ci[0] = L.contents(lower_index);  // lower limit
ci[1] = L.contents(upper_index);  // upper limit
}

return ci;
}

array< map<edge,double> > mtreemix_bootstrap(array<string>& profile, integer_matrix& pattern, int K, array<graph>& G, array< map<int,node> >& node_no, matrix& resp, int B, double eps, int special_weighing, int uniform_noise, array< map<edge,double> >& lower, array< map<edge,double> >& upper, vector& alpha_lower, vector& alpha_upper, double confidence_level)
{
// Bootstrap analysis
// modifies the responsibilities resp(,) ! (see below)

int N = pattern.dim1();  // sample size
int L = pattern.dim2();  // pattern length

vector oneN = ones(N);

// for running mtreemix_fit1:
vector                     balpha(1);      // mixture parameter
array< graph >             bG(1);          // digraph
array< map<int,node> >     bnode_no(1);    // node of mutation index
array< map<node,string> >  bevent(1);      // substitution event
array< map<edge,double> >  bcond_prob(1);  // conditional probabilities

// map nodes onto event indices:
array< map<node,int> > no_of_node(K);
for (int k=0; k<K; k++)
for (int j=0; j<L; j++)
no_of_node[k][node_no[k][j]] = j;

// for bootstrapping:
integer_matrix bpat(N,L);  // one bootstrap sample of size N
array< map<edge,double> > supp(K);  // bootstrap edge support
edge e, be;

for (int k=0; k<K; k++)
{

list<double> alpha_distr;  // empirical distribution of alpha as estimated by the bootstrap
array< list<double> > bcond_prob_distr(L);  // empirical distribution of cond_prob as estimated by the bootstrap (edges are indexed by the index of the target node)

for (int b=0; b<B; b++)
{
matrix bresp(K,N);
bpat = resample(pattern, resp, N, bresp);
double bN_k = oneN * bresp.row(k);

// estimate alpha:
alpha_distr.append(bN_k / N);  // == sum(bresp(k,:) / N

// normalize bresp to obtain distribution over samples (instead of over the tree components)
for (int i=0; i<N; i++)
bresp(k,i) /= bN_k;

if (k == 0)
{
mtreemix_fit0(profile, bpat, balpha, bG, bnode_no, bevent, bcond_prob, bresp.row(k), uniform_noise, special_weighing);
}
else  // k >= 1
mtreemix_fit1(profile, bpat, balpha, bG, bnode_no, bevent, bcond_prob, bresp.row(k), eps, special_weighing);

for (int j1=0; j1<L; j1++)
for (int j2=0; j2<L; j2++)
if ((be = edge_between(bnode_no[0][j1], bnode_no[0][j2])) != NULL) //nil)  // edge in bootstrap tree
if ((e = edge_between(node_no[k][j1], node_no[k][j2])) != NULL) //nil)  // edge in k-th tree of model
{
// count edge:
// supp[k][e] += (1.0 / (double) B);  // relative counts
supp[k][e] += 1.0;  // absolute counts

// record cond_prob estimate:
bcond_prob_distr[j2].append(bcond_prob[0][be]);
}
}

// calculate confidence intervals:
vector ci = CI(alpha_distr, confidence_level);
alpha_lower[k] = ci[0];
alpha_upper[k] = ci[1];

forall_edges(e, G[k])
{
list<double> D = bcond_prob_distr[no_of_node[k][target(e)]];  // distrinution of p(e) in G[k]
ci = CI(D, confidence_level);
lower[k][e] = ci[0];
upper[k][e] = ci[1];
}

}

bG.clear();
return supp;
}

void mtreemix_wait(int L, vector& alpha, array<graph>& G, array< map<edge,double> >& lambda, array< map<int,node> >& node_no, array< map<edge,double> >& cond_prob, int n, int sampling_mode, double sampling_param, integer_matrix& pattern, vector& wtime, vector& stime)
{
// simulate patterns and their waiting times

int K = alpha.dim();

integer_vector pat(L);

// map nodes onto event indices:
array< map<node,int> > no_of_node(K);
for (int k=0; k<K; k++)
for (int j=0; j<L; j++)
no_of_node[k][node_no[k][j]] = j;

int i = 0;
while (i < n)
{
// draw branching number k:
int k = discrand(alpha);

// draw/set sampling time:
if (sampling_mode == EXPONENTIAL)
stime[i] = expcdf(sampling_param);
if (sampling_mode == CONSTANT)
stime[i] = sampling_param;

// wait on branching k until sampling time stime[i]:
wtime[i] = mtree_wait(G[k], node_no[k][0], cond_prob[k], lambda[k], no_of_node[k], stime[i], pat);

for (int j=0; j<L; j++)  // store pattern
pattern(i,j) = pat[j];

i++;
}

}

array< list<double> > mtreemix_time(int L, graph& G_k, map<edge,double>& lambda_k, map<int,node>& node_no_k, map<node,int>& no_of_node_k, map<edge,double>& cond_prob_k, int n)
{
// simulate waiting process on a given tree component G_k

int N = pow2(L-1);

array< list<double> > wtime(N);  // lists of waiting times for all patterns

int i = 0;
while (i < n)
{
mtree_time(G_k, node_no_k[0], cond_prob_k, lambda_k, no_of_node_k, wtime); // wait on branching k

i++;
}

return wtime;
}

array< map<edge,double> > rescale_cond_prob(array< graph >& G, array< map<edge,double> >& cond_prob, int sampling_mode, double sampling_param, double output_param)
{
int K = cond_prob.size();
edge e;

array< map<edge,double> > cond_prob_prime(K);

switch(sampling_mode)
{
case CONSTANT: //  t_0  -->  t
// p(t) = 1 - (1 - p)^(t/t_0)
for (int k=0; k<K; k++)
forall_edges(e, G[k])
cond_prob_prime[k][e] = 1.0 - pow(1.0 - cond_prob[k][e], output_param / sampling_param);
break;

case EXPONENTIAL:  //  lambda_0  -->  lambda
// p(lambda) = 1 / (1 + (1/p - 1) * lambda/lambda_0)
for (int k=0; k<K; k++)
forall_edges(e, G[k])
cond_prob_prime[k][e] = 1.0 / (1.0 + ((1.0 / cond_prob[k][e]) - 1.0) * (output_param / sampling_param));
break;

default :
std::cerr << "Unknown sampling_mode -- " << sampling_mode << std::endl;
_Rtreemix_exit(1);
}

return cond_prob_prime;
}

```