49a5b154 |
#include "GapsRunner.h"
|
0733d393 |
|
16472f10 |
#ifdef __GAPS_R_BUILD__
#include <Rcpp.h>
|
0733d393 |
#endif
|
49a5b154 |
|
e13559eb |
#ifdef __GAPS_OPENMP__
#include <omp.h>
#endif
|
bd6604c5 |
///////////////////////////// RAII wrapper /////////////////////////////////////
GapsRunner::~GapsRunner()
{
delete mRunner;
}
|
7bbb1f81 |
GapsResult GapsRunner::run()
|
dfc276e0 |
{
|
bd6604c5 |
return mRunner->run();
}
///////////////////////// Abstract Interface ///////////////////////////////////
AbstractGapsRunner::AbstractGapsRunner(const GapsParameters ¶ms)
:
mStatistics(params.nGenes, params.nSamples, params.nPatterns),
mCheckpointOutFile(params.checkpointOutFile),
mCurrentIteration(0),
mMaxIterations(params.nIterations),
mMaxThreads(params.maxThreads),
mOutputFrequency(params.outputFrequency),
mCheckpointInterval(params.checkpointInterval),
mNumPatterns(params.nPatterns),
mNumUpdatesA(0),
mNumUpdatesP(0),
mSeed(params.seed),
mPrintMessages(params.printMessages),
mPrintThreadUsage(params.printThreadUsage),
mPhase('C'),
mFixedMatrix(params.whichFixedMatrix)
{}
GapsResult AbstractGapsRunner::run()
{
GAPS_ASSERT(mPhase == 'C' || mPhase == 'S');
|
dfc276e0 |
mStartTime = bpt_now();
|
e13559eb |
// calculate appropiate number of threads if compiled with openmp
#ifdef __GAPS_OPENMP__
|
7bbb1f81 |
if (mPrintMessages && mPrintThreadUsage)
|
e13559eb |
{
unsigned availableThreads = omp_get_max_threads();
|
c5bcadbc |
mMaxThreads = gaps::min(availableThreads, mMaxThreads);
|
e13559eb |
gaps_printf("Running on %d out of %d available threads\n",
mMaxThreads, availableThreads);
}
#endif
// cascade through phases, allows algorithm to be resumed in either phase
switch (mPhase)
{
case 'C':
if (mPrintMessages)
{
gaps_printf("-- Calibration Phase --\n");
}
runOnePhase();
mPhase = 'S';
mCurrentIteration = 0;
case 'S':
if (mPrintMessages)
{
gaps_printf("-- Sampling Phase --\n");
}
runOnePhase();
break;
}
GapsResult result(mStatistics);
|
bd6604c5 |
result.meanChiSq = meanChiSq();
|
e13559eb |
return result;
|
dfc276e0 |
}
|
bd6604c5 |
void AbstractGapsRunner::runOnePhase()
|
49a5b154 |
{
|
e13559eb |
for (; mCurrentIteration < mMaxIterations; ++mCurrentIteration)
|
49a5b154 |
{
|
34779e16 |
createCheckpoint();
|
16472f10 |
#ifdef __GAPS_R_BUILD__
|
e4a86274 |
Rcpp::checkUserInterrupt();
|
16472f10 |
#endif
|
bd6604c5 |
// set annealing temperature in calibration phase
|
e13559eb |
if (mPhase == 'C')
|
16472f10 |
{
|
e13559eb |
float temp = static_cast<float>(2 * mCurrentIteration)
/ static_cast<float>(mMaxIterations);
|
bd6604c5 |
setAnnealingTemp(gaps::min(1.f, temp));
|
16472f10 |
}
// number of updates per iteration is poisson
|
bd6604c5 |
unsigned nA = mRng.poisson(gaps::max(nAtoms('A'), 10));
unsigned nP = mRng.poisson(gaps::max(nAtoms('P'), 10));
|
e13559eb |
updateSampler(nA, nP);
|
0ffa520a |
|
e13559eb |
if (mPhase == 'S')
|
16472f10 |
{
|
bd6604c5 |
updateStatistics();
|
16472f10 |
}
|
e13559eb |
displayStatus();
}
|
2cdb0256 |
}
|
731c4313 |
// sum coef * log(i) for i = 1 to total, fit coef from number of atoms
// approximates sum of number of atoms (stirling approx to factorial)
// this should be proportional to total number of updates
static double estimatedNumUpdates(double current, double total, float nAtoms)
{
double coef = nAtoms / std::log(current);
|
c5bcadbc |
return coef * std::log(std::sqrt(2 * total * gaps::pi)) +
|
731c4313 |
total * coef * std::log(total) - total * coef;
}
|
bd6604c5 |
double AbstractGapsRunner::estimatedPercentComplete() const
|
731c4313 |
{
double nIter = static_cast<double>(mCurrentIteration);
|
bd6604c5 |
double nAtomsA = static_cast<double>(nAtoms('A'));
double nAtomsP = static_cast<double>(nAtoms('P'));
|
731c4313 |
if (mPhase == 'S')
{
nIter += mMaxIterations;
}
double totalIter = 2.0 * static_cast<double>(mMaxIterations);
double estimatedCompleted = estimatedNumUpdates(nIter, nIter, nAtomsA) +
estimatedNumUpdates(nIter, nIter, nAtomsP);
double estimatedTotal = estimatedNumUpdates(nIter, totalIter, nAtomsA) +
estimatedNumUpdates(nIter, totalIter, nAtomsP);
return estimatedCompleted / estimatedTotal;
}
|
bd6604c5 |
void AbstractGapsRunner::displayStatus()
|
49a5b154 |
{
|
e13559eb |
if (mPrintMessages && mOutputFrequency > 0 && ((mCurrentIteration + 1) % mOutputFrequency) == 0)
|
2cdb0256 |
{
|
dfc276e0 |
bpt::time_duration diff = bpt_now() - mStartTime;
|
731c4313 |
double nSecondsCurrent = diff.total_seconds();
double nSecondsTotal = nSecondsCurrent / estimatedPercentComplete();
unsigned elapsedSeconds = static_cast<unsigned>(nSecondsCurrent);
unsigned totalSeconds = static_cast<unsigned>(nSecondsTotal);
unsigned elapsedHours = elapsedSeconds / 3600;
elapsedSeconds -= elapsedHours * 3600;
unsigned elapsedMinutes = elapsedSeconds / 60;
elapsedSeconds -= elapsedMinutes * 60;
|
dfc276e0 |
|
731c4313 |
unsigned totalHours = totalSeconds / 3600;
totalSeconds -= totalHours * 3600;
unsigned totalMinutes = totalSeconds / 60;
totalSeconds -= totalMinutes * 60;
|
dfc276e0 |
|
731c4313 |
gaps_printf("%d of %d, Atoms: %lu(%lu), ChiSq: %.0f, Time: %02d:%02d:%02d / %02d:%02d:%02d\n",
|
bd6604c5 |
mCurrentIteration + 1, mMaxIterations, nAtoms('A'),
nAtoms('P'), chiSq(), elapsedHours, elapsedMinutes,
|
731c4313 |
elapsedSeconds, totalHours, totalMinutes, totalSeconds);
|
b525c989 |
gaps_flush();
|
2cdb0256 |
}
|
aca38e04 |
}
|
dfc276e0 |
|
bd6604c5 |
void AbstractGapsRunner::createCheckpoint()
|
e13559eb |
{
if (mCheckpointInterval > 0 && ((mCurrentIteration + 1) % mCheckpointInterval) == 0)
{
// create backup file
std::rename(mCheckpointOutFile.c_str(), (mCheckpointOutFile + ".backup").c_str());
// create checkpoint file
Archive ar(mCheckpointOutFile, ARCHIVE_WRITE);
|
7bbb1f81 |
ar << mNumPatterns << mSeed << mMaxIterations << mFixedMatrix << mPhase
|
bd6604c5 |
<< mCurrentIteration << mNumUpdatesA << mNumUpdatesP << mRng;
writeSamplers(ar);
|
2ec5cfe6 |
GapsRng::save(ar);
|
e13559eb |
// delete backup file
std::remove((mCheckpointOutFile + ".backup").c_str());
}
}
|
bd6604c5 |
///////////////////// DenseGapsRunner Implementation ///////////////////////////
float DenseGapsRunner::chiSq() const
{
// doesn't matter which sampler is called
return mPSampler.chiSq();
}
float DenseGapsRunner::meanChiSq() const
{
// need to pass P sampler (due to configuration of internal data)
return mStatistics.meanChiSq(mPSampler);
}
unsigned DenseGapsRunner::nAtoms(char which) const
{
return which == 'A' ? mASampler.nAtoms() : mPSampler.nAtoms();
}
void DenseGapsRunner::setAnnealingTemp(float temp)
{
mASampler.setAnnealingTemp(temp);
mPSampler.setAnnealingTemp(temp);
}
void DenseGapsRunner::updateStatistics()
{
mStatistics.update(mASampler, mPSampler);
}
Archive& DenseGapsRunner::readSamplers(Archive &ar)
{
ar >> mASampler >> mPSampler;
return ar;
}
Archive& DenseGapsRunner::writeSamplers(Archive &ar)
{
ar << mASampler << mPSampler;
return ar;
}
void DenseGapsRunner::updateSampler(unsigned nA, unsigned nP)
{
if (mFixedMatrix != 'A')
{
mNumUpdatesA += nA;
mASampler.update(nA, mMaxThreads);
if (mFixedMatrix != 'P')
{
mPSampler.sync(mASampler, mMaxThreads);
}
GAPS_ASSERT(mASampler.internallyConsistent());
}
if (mFixedMatrix != 'P')
{
mNumUpdatesP += nP;
mPSampler.update(nP, mMaxThreads);
if (mFixedMatrix != 'A')
{
mASampler.sync(mPSampler, mMaxThreads);
}
GAPS_ASSERT(mPSampler.internallyConsistent());
}
}
void DenseGapsRunner::setUncertainty(const Matrix &unc, const GapsParameters ¶ms)
{
mASampler.setUncertainty(unc, !params.transposeData, !params.subsetGenes, params);
mPSampler.setUncertainty(unc, params.transposeData, params.subsetGenes, params);
}
void DenseGapsRunner::setUncertainty(const std::string &unc, const GapsParameters ¶ms)
{
mASampler.setUncertainty(unc, !params.transposeData, !params.subsetGenes, params);
mPSampler.setUncertainty(unc, params.transposeData, params.subsetGenes, params);
}
///////////////////// SparseGapsRunner Implementation //////////////////////////
float SparseGapsRunner::chiSq() const
{
// doesn't matter which sampler is called
return mPSampler.chiSq();
}
float SparseGapsRunner::meanChiSq() const
{
// need to pass P sampler (due to configuration of internal data)
return mStatistics.meanChiSq(mPSampler);
}
unsigned SparseGapsRunner::nAtoms(char which) const
{
return which == 'A' ? mASampler.nAtoms() : mPSampler.nAtoms();
}
void SparseGapsRunner::setAnnealingTemp(float temp)
{
mASampler.setAnnealingTemp(temp);
mPSampler.setAnnealingTemp(temp);
}
void SparseGapsRunner::updateStatistics()
{
mStatistics.update(mASampler, mPSampler);
}
Archive& SparseGapsRunner::readSamplers(Archive &ar)
{
ar >> mASampler >> mPSampler;
return ar;
}
Archive& SparseGapsRunner::writeSamplers(Archive &ar)
{
ar << mASampler << mPSampler;
return ar;
}
void SparseGapsRunner::updateSampler(unsigned nA, unsigned nP)
{
if (mFixedMatrix != 'A')
{
mNumUpdatesA += nA;
mASampler.update(nA, mMaxThreads);
if (mFixedMatrix != 'P')
{
mPSampler.sync(mASampler, mMaxThreads);
}
GAPS_ASSERT(mASampler.internallyConsistent());
}
if (mFixedMatrix != 'P')
{
mNumUpdatesP += nP;
mPSampler.update(nP, mMaxThreads);
if (mFixedMatrix != 'A')
{
mASampler.sync(mPSampler, mMaxThreads);
}
GAPS_ASSERT(mPSampler.internallyConsistent());
}
}
void SparseGapsRunner::setUncertainty(const Matrix &unc, const GapsParameters ¶ms)
{
// nothing happens - SparseGibbsSampler assumes default uncertainty always
}
void SparseGapsRunner::setUncertainty(const std::string &unc, const GapsParameters ¶ms)
{
// nothing happens - SparseGibbsSampler assumes default uncertainty always
}
|