326f362a |
|
6c315cf8 |
#include "R_interface.h"
|
8a70266d |
static ScaleHMM* hmm; // declare as static outside the function because we only need one and this enables memory-cleanup on R_CheckUserInterrupt()
static double** multiD;
|
6ad4e3b8 |
|
a0b1f4ae |
// ===================================================================================================================================================
|
fa363ffb |
// This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the EM and returns the result to R.
|
a0b1f4ae |
// ===================================================================================================================================================
|
b1ab269a |
void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, double* maxPosterior, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* algorithm, int* verbosity)
|
a0b1f4ae |
{
|
6ad4e3b8 |
// Define logging level
|
a0b1f4ae |
// FILE* pFile = fopen("chromStar.log", "w");
|
6ad4e3b8 |
// Output2FILE::Stream() = pFile;
|
8a70266d |
// FILELog::ReportingLevel() = FILELog::FromString("NONE");
|
41c75ecc |
// FILELog::ReportingLevel() = FILELog::FromString("DEBUG2");
|
6ad4e3b8 |
|
7d8b241d |
// // Parallelization settings
|
a9469ef4 |
// omp_set_num_threads(*num_threads);
|
6ad4e3b8 |
// Print some information
|
8a70266d |
//FILE_LOG(logINFO) << "number of states = " << *N;
|
b1ab269a |
if (*verbosity>=1) Rprintf("number of states = %d\n", *N);
|
8a70266d |
//FILE_LOG(logINFO) << "number of bins = " << *T;
|
b1ab269a |
if (*verbosity>=1) Rprintf("number of bins = %d\n", *T);
|
6ad4e3b8 |
if (*maxiter < 0)
{
|
8a70266d |
//FILE_LOG(logINFO) << "maximum number of iterations = none";
|
b1ab269a |
if (*verbosity>=1) Rprintf("maximum number of iterations = none\n");
|
6ad4e3b8 |
} else {
|
8a70266d |
//FILE_LOG(logINFO) << "maximum number of iterations = " << *maxiter;
|
b1ab269a |
if (*verbosity>=1) Rprintf("maximum number of iterations = %d\n", *maxiter);
|
6ad4e3b8 |
}
if (*maxtime < 0)
{
|
8a70266d |
//FILE_LOG(logINFO) << "maximum running time = none";
|
b1ab269a |
if (*verbosity>=1) Rprintf("maximum running time = none\n");
|
6ad4e3b8 |
} else {
|
8a70266d |
//FILE_LOG(logINFO) << "maximum running time = " << *maxtime << " sec";
|
b1ab269a |
if (*verbosity>=1) Rprintf("maximum running time = %d sec\n", *maxtime);
|
6ad4e3b8 |
}
|
8a70266d |
//FILE_LOG(logINFO) << "epsilon = " << *eps;
|
b1ab269a |
if (*verbosity>=1) Rprintf("epsilon = %g\n", *eps);
|
a0b1f4ae |
|
8a70266d |
//FILE_LOG(logDEBUG3) << "observation vector";
|
6ad4e3b8 |
for (int t=0; t<50; t++) {
|
8a70266d |
//FILE_LOG(logDEBUG3) << "O["<<t<<"] = " << O[t];
|
6ad4e3b8 |
}
|
b1ab269a |
// Flush if (*verbosity>=1) Rprintf statements to console
|
a0b1f4ae |
R_FlushConsole();
|
6ad4e3b8 |
// Create the HMM
|
8a70266d |
//FILE_LOG(logDEBUG1) << "Creating a univariate HMM";
hmm = new ScaleHMM(*T, *N);
|
1fe9061a |
// LogHMM* hmm = new LogHMM(*T, *N);
|
a0b1f4ae |
hmm->set_cutoff(*read_cutoff);
|
6ad4e3b8 |
// Initialize the transition probabilities and proba
|
079638e5 |
hmm->initialize_transition_probs(initial_A, *use_initial_params);
hmm->initialize_proba(initial_proba, *use_initial_params);
|
6ad4e3b8 |
// Calculate mean and variance of data
double mean = 0, variance = 0;
for(int t=0; t<*T; t++)
{
mean+= O[t];
}
mean = mean / *T;
for(int t=0; t<*T; t++)
{
variance+= pow(O[t] - mean, 2);
}
variance = variance / *T;
|
8a70266d |
//FILE_LOG(logINFO) << "data mean = " << mean << ", data variance = " << variance;
|
b1ab269a |
if (*verbosity>=1) Rprintf("data mean = %g, data variance = %g\n", mean, variance);
|
6ad4e3b8 |
|
7d8b241d |
// Create the emission densities and initialize
|
f4cdf709 |
for (int i_state=0; i_state<*N; i_state++)
{
if (distr_type[i_state] == 1)
|
a0b1f4ae |
{
|
8a70266d |
//FILE_LOG(logDEBUG1) << "Using delta distribution for state " << i_state;
|
1fe9061a |
ZeroInflation *d = new ZeroInflation(O, *T); // delete is done inside ~ScaleHMM()
|
a0b1f4ae |
hmm->densityFunctions.push_back(d);
}
|
f4cdf709 |
else if (distr_type[i_state] == 2)
|
ecb41d4a |
{
|
8a70266d |
//FILE_LOG(logDEBUG1) << "Using geometric distribution for state " << i_state;
|
7d8b241d |
Geometric *d = new Geometric(O, *T, initial_prob[i_state]); // delete is done inside ~ScaleHMM()
|
1fe9061a |
hmm->densityFunctions.push_back(d);
}
|
f4cdf709 |
else if (distr_type[i_state] == 3)
|
1fe9061a |
{
|
8a70266d |
//FILE_LOG(logDEBUG1) << "Using negative binomial for state " << i_state;
|
1fe9061a |
NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]); // delete is done inside ~ScaleHMM()
|
ecb41d4a |
hmm->densityFunctions.push_back(d);
}
|
7d8b241d |
else if (distr_type[i_state] == 4)
{
|
8a70266d |
//FILE_LOG(logDEBUG1) << "Using binomial for state " << i_state;
|
7d8b241d |
NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]); // delete is done inside ~ScaleHMM()
hmm->densityFunctions.push_back(d);
}
|
ecb41d4a |
else
{
|
8a70266d |
//FILE_LOG(logWARNING) << "Density not specified, using default negative binomial for state " << i_state;
|
a0b1f4ae |
NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]);
|
ecb41d4a |
hmm->densityFunctions.push_back(d);
}
|
6ad4e3b8 |
}
|
b1ab269a |
// Flush if (*verbosity>=1) Rprintf statements to console
|
a0b1f4ae |
R_FlushConsole();
|
fa363ffb |
// Do the EM to estimate the parameters
|
a0b1f4ae |
try
{
|
fa363ffb |
if (*algorithm == 1)
{
hmm->baumWelch();
}
else if (*algorithm == 3)
{
//FILE_LOG(logDEBUG1) << "Starting EM estimation";
hmm->EM(maxiter, maxtime, eps);
//FILE_LOG(logDEBUG1) << "Finished with EM estimation";
}
|
a0b1f4ae |
}
catch (std::exception& e)
{
|
fa363ffb |
//FILE_LOG(logERROR) << "Error in EM/baumWelch: " << e.what();
|
b1ab269a |
if (*verbosity>=1) Rprintf("Error in EM/baumWelch: %s\n", e.what());
|
8a70266d |
if (strcmp(e.what(),"nan detected")==0) { *error = 1; }
|
a0b1f4ae |
else { *error = 2; }
}
|
631d6738 |
|
fba447b1 |
// // Compute the posteriors and save results directly to the R pointer
// //FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
// #pragma omp parallel for
// for (int iN=0; iN<*N; iN++)
// {
// for (int t=0; t<*T; t++)
// {
// posteriors[t + iN * (*T)] = hmm->get_posterior(iN, t);
// }
// }
|
631d6738 |
// Compute the states from posteriors
|
8a70266d |
//FILE_LOG(logDEBUG1) << "Computing states from posteriors";
int ind_max;
std::vector<double> posterior_per_t(*N);
|
631d6738 |
for (int t=0; t<*T; t++)
|
6ad4e3b8 |
{
|
631d6738 |
for (int iN=0; iN<*N; iN++)
|
6ad4e3b8 |
{
|
631d6738 |
posterior_per_t[iN] = hmm->get_posterior(iN, t);
|
6ad4e3b8 |
}
|
8a70266d |
ind_max = std::distance(posterior_per_t.begin(), std::max_element(posterior_per_t.begin(), posterior_per_t.end()));
states[t] = state_labels[ind_max];
|
fba447b1 |
maxPosterior[t] = posterior_per_t[ind_max];
|
6ad4e3b8 |
}
|
8a70266d |
//FILE_LOG(logDEBUG1) << "Return parameters";
|
6ad4e3b8 |
// also return the estimated transition matrix and the initial probs
|
079638e5 |
for (int i=0; i<*N; i++)
|
6ad4e3b8 |
{
|
079638e5 |
proba[i] = hmm->get_proba(i);
for (int j=0; j<*N; j++)
|
6ad4e3b8 |
{
|
7d8b241d |
A[i * (*N) + j] = hmm->get_A(j,i);
|
6ad4e3b8 |
}
}
// copy the estimated distribution params
|
079638e5 |
for (int i=0; i<*N; i++)
|
6ad4e3b8 |
{
|
a0b1f4ae |
if (hmm->densityFunctions[i]->get_name() == NEGATIVE_BINOMIAL)
{
NegativeBinomial* d = (NegativeBinomial*)(hmm->densityFunctions[i]);
size[i] = d->get_size();
prob[i] = d->get_prob();
}
|
1fe9061a |
else if (hmm->densityFunctions[i]->get_name() == GEOMETRIC)
{
Geometric* d = (Geometric*)(hmm->densityFunctions[i]);
size[i] = 0;
prob[i] = d->get_prob();
}
|
a0b1f4ae |
else if (hmm->densityFunctions[i]->get_name() == ZERO_INFLATION)
{
// These values for a Negative Binomial define a zero-inflation (delta distribution)
size[i] = 0;
prob[i] = 1;
}
|
7d8b241d |
else if (hmm->densityFunctions[i]->get_name() == BINOMIAL)
{
Binomial* d = (Binomial*)(hmm->densityFunctions[i]);
size[i] = d->get_size();
prob[i] = d->get_prob();
}
|
6ad4e3b8 |
}
|
079638e5 |
*loglik = hmm->get_logP();
hmm->calc_weights(weights);
|
6ad4e3b8 |
|
8a70266d |
//FILE_LOG(logDEBUG1) << "Deleting the hmm";
|
079638e5 |
delete hmm;
|
8a70266d |
hmm = NULL; // assign NULL to defuse the additional delete in on.exit() call
|
6ad4e3b8 |
}
|
95bcf480 |
// =====================================================================================================================================================
|
fa363ffb |
// This function takes parameters from R, creates a multivariate HMM object, runs the EM and returns the result to R.
|
95bcf480 |
// =====================================================================================================================================================
|
c683cb96 |
void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, double* maxPosterior, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm, int* verbosity)
|
95bcf480 |
{
// Define logging level {"ERROR", "WARNING", "INFO", "ITERATION", "DEBUG", "DEBUG1", "DEBUG2", "DEBUG3", "DEBUG4"}
// FILE* pFile = fopen("chromStar.log", "w");
// Output2FILE::Stream() = pFile;
// FILELog::ReportingLevel() = FILELog::FromString("ITERATION");
// FILELog::ReportingLevel() = FILELog::FromString("DEBUG2");
|
8a70266d |
// FILELog::ReportingLevel() = FILELog::FromString("ERROR");
|
95bcf480 |
// Parallelization settings
|
a9469ef4 |
// omp_set_num_threads(*num_threads);
|
95bcf480 |
// Print some information
|
8a70266d |
//FILE_LOG(logINFO) << "number of states = " << *N;
|
b1ab269a |
if (*verbosity>=1) Rprintf("number of states = %d\n", *N);
|
8a70266d |
//FILE_LOG(logINFO) << "number of bins = " << *T;
|
b1ab269a |
if (*verbosity>=1) Rprintf("number of bins = %d\n", *T);
|
95bcf480 |
if (*maxiter < 0)
{
|
8a70266d |
//FILE_LOG(logINFO) << "maximum number of iterations = none";
|
b1ab269a |
if (*verbosity>=1) Rprintf("maximum number of iterations = none\n");
|
95bcf480 |
} else {
|
8a70266d |
//FILE_LOG(logINFO) << "maximum number of iterations = " << *maxiter;
|
b1ab269a |
if (*verbosity>=1) Rprintf("maximum number of iterations = %d\n", *maxiter);
|
95bcf480 |
}
if (*maxtime < 0)
{
|
8a70266d |
//FILE_LOG(logINFO) << "maximum running time = none";
|
b1ab269a |
if (*verbosity>=1) Rprintf("maximum running time = none\n");
|
95bcf480 |
} else {
|
8a70266d |
//FILE_LOG(logINFO) << "maximum running time = " << *maxtime << " sec";
|
b1ab269a |
if (*verbosity>=1) Rprintf("maximum running time = %d sec\n", *maxtime);
|
95bcf480 |
}
|
8a70266d |
//FILE_LOG(logINFO) << "epsilon = " << *eps;
|
b1ab269a |
if (*verbosity>=1) Rprintf("epsilon = %g\n", *eps);
|
8a70266d |
//FILE_LOG(logINFO) << "number of modifications = " << *Nmod;
|
b1ab269a |
if (*verbosity>=1) Rprintf("number of modifications = %d\n", *Nmod);
|
95bcf480 |
|
b1ab269a |
// Flush if (*verbosity>=1) Rprintf statements to console
|
95bcf480 |
R_FlushConsole();
// Recode the densities vector to matrix representation
// clock_t clocktime = clock(), dtime;
|
8a70266d |
multiD = CallocDoubleMatrix(*N, *T);
|
95bcf480 |
for (int iN=0; iN<*N; iN++)
{
for (int t=0; t<*T; t++)
{
multiD[iN][t] = D[iN*(*T)+t];
}
}
// dtime = clock() - clocktime;
|
8a70266d |
// //FILE_LOG(logDEBUG1) << "recoding densities vector to matrix representation: " << dtime << " clicks";
|
95bcf480 |
// Create the HMM
|
8a70266d |
//FILE_LOG(logDEBUG1) << "Creating the multivariate HMM";
hmm = new ScaleHMM(*T, *N, *Nmod, multiD);
|
95bcf480 |
// Initialize the transition probabilities and proba
hmm->initialize_transition_probs(initial_A, *use_initial_params);
hmm->initialize_proba(initial_proba, *use_initial_params);
// Print logproba and A
// for (int iN=0; iN<*N; iN++)
// {
|
8a70266d |
// //FILE_LOG(logDEBUG) << "proba["<<iN<<"] = " <<exp(hmm->logproba[iN]);
|
95bcf480 |
// for (int jN=0; jN<*N; jN++)
// {
|
8a70266d |
// //FILE_LOG(logDEBUG) << "A["<<iN<<"]["<<jN<<"] = " << hmm->A[iN][jN];
|
95bcf480 |
// }
// }
|
fa363ffb |
// Do the EM to estimate the parameters
|
95bcf480 |
try
{
|
fa363ffb |
if (*algorithm == 1)
{
hmm->baumWelch();
}
else if (*algorithm == 3)
{
//FILE_LOG(logDEBUG1) << "Starting EM estimation";
hmm->EM(maxiter, maxtime, eps);
//FILE_LOG(logDEBUG1) << "Finished with EM estimation";
}
|
95bcf480 |
}
catch (std::exception& e)
{
|
fa363ffb |
//FILE_LOG(logERROR) << "Error in EM/baumWelch: " << e.what();
|
b1ab269a |
if (*verbosity>=1) Rprintf("Error in EM/baumWelch: %s\n", e.what());
|
8a70266d |
if (strcmp(e.what(),"nan detected")==0) { *error = 1; }
|
95bcf480 |
else { *error = 2; }
}
// // Compute the posteriors and save results directly to the R pointer
|
8a70266d |
// //FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
|
95bcf480 |
// for (int iN=0; iN<*N; iN++)
// {
// for (int t=0; t<*T; t++)
// {
// posteriors[t + iN * (*T)] = hmm->get_posterior(iN, t);
// }
// }
// Compute the states from posteriors
|
8a70266d |
//FILE_LOG(logDEBUG1) << "Computing states from posteriors";
int ind_max;
std::vector<double> posterior_per_t(*N);
|
95bcf480 |
for (int t=0; t<*T; t++)
{
for (int iN=0; iN<*N; iN++)
{
posterior_per_t[iN] = hmm->get_posterior(iN, t);
}
|
8a70266d |
ind_max = std::distance(posterior_per_t.begin(), std::max_element(posterior_per_t.begin(), posterior_per_t.end()));
states[t] = comb_states[ind_max];
|
c683cb96 |
maxPosterior[t] = posterior_per_t[ind_max];
|
95bcf480 |
}
|
8a70266d |
//FILE_LOG(logDEBUG1) << "Return parameters";
|
95bcf480 |
// also return the estimated transition matrix and the initial probs
for (int i=0; i<*N; i++)
{
proba[i] = hmm->get_proba(i);
for (int j=0; j<*N; j++)
{
A[i * (*N) + j] = hmm->get_A(j,i);
}
}
*loglik = hmm->get_logP();
|
8a70266d |
//FILE_LOG(logDEBUG1) << "Deleting the hmm";
|
95bcf480 |
delete hmm;
|
8a70266d |
hmm = NULL; // assign NULL to defuse the additional delete in on.exit() call
// FreeDoubleMatrix(multiD, *N);
|
95bcf480 |
}
|
8a70266d |
// =======================================================
// This function make a cleanup if anything was left over
// =======================================================
|
08327ec8 |
void univariate_cleanup()
|
8a70266d |
{
// //FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__; // This message will be shown if interrupt happens before start of C-code
delete hmm;
}
|
08327ec8 |
void multivariate_cleanup(int* N)
|
8a70266d |
{
delete hmm;
FreeDoubleMatrix(multiD, *N);
}
|
fba447b1 |
// ====================================================================
// C version of apply(array2D, 1, which.max) and apply(array2D, 1, max)
// ====================================================================
void array2D_which_max(double* array2D, int* dim, int* ind_max, double* value_max)
{
// array2D is actually a vector, but is intended to originate from a 2D array in R
std::vector<double> value_per_i0(dim[1]);
for (int i0=0; i0<dim[0]; i0++)
{
for (int i1=0; i1<dim[1]; i1++)
{
value_per_i0[i1] = array2D[i1 * dim[0] + i0];
|
b1ab269a |
// if (*verbosity>=1) Rprintf("i0=%d, i1=%d, value_per_i0[%d] = %g\n", i0, i1, i1, value_per_i0[i1]);
|
fba447b1 |
}
ind_max[i0] = 1 + std::distance(value_per_i0.begin(), std::max_element(value_per_i0.begin(), value_per_i0.end()));
value_max[i0] = *std::max_element(value_per_i0.begin(), value_per_i0.end());
}
|
b1ab269a |
}
|