src/GapsRunner.cpp
49a5b154
 #include "GapsRunner.h"
0733d393
 
16472f10
 #ifdef __GAPS_R_BUILD__
 #include <Rcpp.h>
0733d393
 #endif
49a5b154
 
e13559eb
 #ifdef __GAPS_OPENMP__
 #include <omp.h>
 #endif
 
bd6604c5
 ///////////////////////////// RAII wrapper /////////////////////////////////////
 
 GapsRunner::~GapsRunner()
 {
     delete mRunner;
 }
 
7bbb1f81
 GapsResult GapsRunner::run()
dfc276e0
 {
bd6604c5
     return mRunner->run();
 }
 
 ///////////////////////// Abstract Interface ///////////////////////////////////
 
 AbstractGapsRunner::AbstractGapsRunner(const GapsParameters &params)
     :
 mStatistics(params.nGenes, params.nSamples, params.nPatterns),
 mCheckpointOutFile(params.checkpointOutFile),
 mCurrentIteration(0),
 mMaxIterations(params.nIterations),
 mMaxThreads(params.maxThreads),
 mOutputFrequency(params.outputFrequency),
 mCheckpointInterval(params.checkpointInterval),
 mNumPatterns(params.nPatterns),
 mNumUpdatesA(0),
 mNumUpdatesP(0),
 mSeed(params.seed),
 mPrintMessages(params.printMessages),
 mPrintThreadUsage(params.printThreadUsage),
 mPhase('C'),
 mFixedMatrix(params.whichFixedMatrix)
 {}
 
 GapsResult AbstractGapsRunner::run()
 {
     GAPS_ASSERT(mPhase == 'C' || mPhase == 'S');
 
dfc276e0
     mStartTime = bpt_now();
e13559eb
 
     // calculate appropiate number of threads if compiled with openmp
     #ifdef __GAPS_OPENMP__
7bbb1f81
     if (mPrintMessages && mPrintThreadUsage)
e13559eb
     {
         unsigned availableThreads = omp_get_max_threads();
c5bcadbc
         mMaxThreads = gaps::min(availableThreads, mMaxThreads);
e13559eb
         gaps_printf("Running on %d out of %d available threads\n",
             mMaxThreads, availableThreads);
     }
     #endif
 
     // cascade through phases, allows algorithm to be resumed in either phase
     switch (mPhase)
     {
         case 'C':
             if (mPrintMessages)
             {
                 gaps_printf("-- Calibration Phase --\n");
             }
             runOnePhase();
             mPhase = 'S';
             mCurrentIteration = 0;
 
         case 'S':
             if (mPrintMessages)
             {
                 gaps_printf("-- Sampling Phase --\n");
             }
             runOnePhase();
             break;
     }
     GapsResult result(mStatistics);
bd6604c5
     result.meanChiSq = meanChiSq();
e13559eb
     return result;    
dfc276e0
 }
 
bd6604c5
 void AbstractGapsRunner::runOnePhase()
49a5b154
 {
e13559eb
     for (; mCurrentIteration < mMaxIterations; ++mCurrentIteration)
49a5b154
     {
34779e16
         createCheckpoint();
 
16472f10
         #ifdef __GAPS_R_BUILD__
e4a86274
         Rcpp::checkUserInterrupt();
16472f10
         #endif
         
bd6604c5
         // set annealing temperature in calibration phase
e13559eb
         if (mPhase == 'C')
16472f10
         {        
e13559eb
             float temp = static_cast<float>(2 * mCurrentIteration)
                 / static_cast<float>(mMaxIterations);
bd6604c5
             setAnnealingTemp(gaps::min(1.f, temp));
16472f10
         }
     
         // number of updates per iteration is poisson 
bd6604c5
         unsigned nA = mRng.poisson(gaps::max(nAtoms('A'), 10));
         unsigned nP = mRng.poisson(gaps::max(nAtoms('P'), 10));
e13559eb
         updateSampler(nA, nP);
0ffa520a
 
e13559eb
         if (mPhase == 'S')
16472f10
         {
bd6604c5
             updateStatistics();
16472f10
         }
e13559eb
         displayStatus();
     }
2cdb0256
 }
 
731c4313
 // sum coef * log(i) for i = 1 to total, fit coef from number of atoms
 // approximates sum of number of atoms (stirling approx to factorial)
 // this should be proportional to total number of updates
 static double estimatedNumUpdates(double current, double total, float nAtoms)
 {
     double coef = nAtoms / std::log(current);
c5bcadbc
     return coef * std::log(std::sqrt(2 * total * gaps::pi)) +
731c4313
         total * coef * std::log(total) - total * coef;
 }
 
bd6604c5
 
 double AbstractGapsRunner::estimatedPercentComplete() const
731c4313
 {
     double nIter = static_cast<double>(mCurrentIteration);
bd6604c5
     double nAtomsA = static_cast<double>(nAtoms('A'));
     double nAtomsP = static_cast<double>(nAtoms('P'));
731c4313
     
     if (mPhase == 'S')
     {
         nIter += mMaxIterations;
     }
 
     double totalIter = 2.0 * static_cast<double>(mMaxIterations);
 
     double estimatedCompleted = estimatedNumUpdates(nIter, nIter, nAtomsA) + 
         estimatedNumUpdates(nIter, nIter, nAtomsP);
 
     double estimatedTotal = estimatedNumUpdates(nIter, totalIter, nAtomsA) + 
         estimatedNumUpdates(nIter, totalIter, nAtomsP);
 
     return estimatedCompleted / estimatedTotal;
 }
 
bd6604c5
 void AbstractGapsRunner::displayStatus()
49a5b154
 {
e13559eb
     if (mPrintMessages && mOutputFrequency > 0 && ((mCurrentIteration + 1) % mOutputFrequency) == 0)
2cdb0256
     {
dfc276e0
         bpt::time_duration diff = bpt_now() - mStartTime;
731c4313
         double nSecondsCurrent = diff.total_seconds();
         double nSecondsTotal = nSecondsCurrent / estimatedPercentComplete();
 
         unsigned elapsedSeconds = static_cast<unsigned>(nSecondsCurrent);
         unsigned totalSeconds = static_cast<unsigned>(nSecondsTotal);
 
         unsigned elapsedHours = elapsedSeconds / 3600;
         elapsedSeconds -= elapsedHours * 3600;
         unsigned elapsedMinutes = elapsedSeconds / 60;
         elapsedSeconds -= elapsedMinutes * 60;
dfc276e0
 
731c4313
         unsigned totalHours = totalSeconds / 3600;
         totalSeconds -= totalHours * 3600;
         unsigned totalMinutes = totalSeconds / 60;
         totalSeconds -= totalMinutes * 60;
dfc276e0
 
731c4313
         gaps_printf("%d of %d, Atoms: %lu(%lu), ChiSq: %.0f, Time: %02d:%02d:%02d / %02d:%02d:%02d\n",
bd6604c5
             mCurrentIteration + 1, mMaxIterations, nAtoms('A'),
             nAtoms('P'), chiSq(), elapsedHours, elapsedMinutes,
731c4313
             elapsedSeconds, totalHours, totalMinutes, totalSeconds);
b525c989
         gaps_flush();
2cdb0256
     }
aca38e04
 }
dfc276e0
 
bd6604c5
 void AbstractGapsRunner::createCheckpoint()
e13559eb
 {
     if (mCheckpointInterval > 0 && ((mCurrentIteration + 1) % mCheckpointInterval) == 0)
     {
         // create backup file
         std::rename(mCheckpointOutFile.c_str(), (mCheckpointOutFile + ".backup").c_str());
     
         // create checkpoint file
         Archive ar(mCheckpointOutFile, ARCHIVE_WRITE);
7bbb1f81
         ar << mNumPatterns << mSeed << mMaxIterations << mFixedMatrix << mPhase
bd6604c5
             << mCurrentIteration << mNumUpdatesA << mNumUpdatesP << mRng;
         writeSamplers(ar);
2ec5cfe6
         GapsRng::save(ar);
e13559eb
 
         // delete backup file
         std::remove((mCheckpointOutFile + ".backup").c_str());
     }
 }
bd6604c5
 
 ///////////////////// DenseGapsRunner Implementation ///////////////////////////
 
 float DenseGapsRunner::chiSq() const
 {
     // doesn't matter which sampler is called
     return mPSampler.chiSq();
 }
 
 float DenseGapsRunner::meanChiSq() const
 {
     // need to pass P sampler (due to configuration of internal data)
     return mStatistics.meanChiSq(mPSampler);
 }
 
 unsigned DenseGapsRunner::nAtoms(char which) const
 {
     return which == 'A' ? mASampler.nAtoms() : mPSampler.nAtoms();
 }
 
 void DenseGapsRunner::setAnnealingTemp(float temp)
 {
     mASampler.setAnnealingTemp(temp);
     mPSampler.setAnnealingTemp(temp);
 }
 
 void DenseGapsRunner::updateStatistics()
 {
     mStatistics.update(mASampler, mPSampler);
 }
 
 Archive& DenseGapsRunner::readSamplers(Archive &ar)
 {
     ar >> mASampler >> mPSampler;
     return ar;
 }
 
 Archive& DenseGapsRunner::writeSamplers(Archive &ar)
 {
     ar << mASampler << mPSampler;
     return ar;
 }
 
 void DenseGapsRunner::updateSampler(unsigned nA, unsigned nP)
 {
     if (mFixedMatrix != 'A')
     {
         mNumUpdatesA += nA;
         mASampler.update(nA, mMaxThreads);
         if (mFixedMatrix != 'P')
         {
             mPSampler.sync(mASampler, mMaxThreads);
         }
         GAPS_ASSERT(mASampler.internallyConsistent());
     }
 
     if (mFixedMatrix != 'P')
     {
         mNumUpdatesP += nP;
         mPSampler.update(nP, mMaxThreads);
         if (mFixedMatrix != 'A')
         {
             mASampler.sync(mPSampler, mMaxThreads);
         }
         GAPS_ASSERT(mPSampler.internallyConsistent());
     }
 }
 
 void DenseGapsRunner::setUncertainty(const Matrix &unc, const GapsParameters &params)
 {
     mASampler.setUncertainty(unc, !params.transposeData, !params.subsetGenes, params);
     mPSampler.setUncertainty(unc, params.transposeData, params.subsetGenes, params);
 }
 
 void DenseGapsRunner::setUncertainty(const std::string &unc, const GapsParameters &params)
 {
     mASampler.setUncertainty(unc, !params.transposeData, !params.subsetGenes, params);
     mPSampler.setUncertainty(unc, params.transposeData, params.subsetGenes, params);
 }
 
 ///////////////////// SparseGapsRunner Implementation //////////////////////////
 
 float SparseGapsRunner::chiSq() const
 {
     // doesn't matter which sampler is called
     return mPSampler.chiSq();
 }
 
 float SparseGapsRunner::meanChiSq() const
 {
     // need to pass P sampler (due to configuration of internal data)
     return mStatistics.meanChiSq(mPSampler);
 }
 
 unsigned SparseGapsRunner::nAtoms(char which) const
 {
     return which == 'A' ? mASampler.nAtoms() : mPSampler.nAtoms();
 }
 
 void SparseGapsRunner::setAnnealingTemp(float temp)
 {
     mASampler.setAnnealingTemp(temp);
     mPSampler.setAnnealingTemp(temp);
 }
 
 void SparseGapsRunner::updateStatistics()
 {
     mStatistics.update(mASampler, mPSampler);
 }
 
 Archive& SparseGapsRunner::readSamplers(Archive &ar)
 {
     ar >> mASampler >> mPSampler;
     return ar;
 }
 
 Archive& SparseGapsRunner::writeSamplers(Archive &ar)
 {
     ar << mASampler << mPSampler;
     return ar;
 }
 
 void SparseGapsRunner::updateSampler(unsigned nA, unsigned nP)
 {
     if (mFixedMatrix != 'A')
     {
         mNumUpdatesA += nA;
         mASampler.update(nA, mMaxThreads);
         if (mFixedMatrix != 'P')
         {
             mPSampler.sync(mASampler, mMaxThreads);
         }
         GAPS_ASSERT(mASampler.internallyConsistent());
     }
 
     if (mFixedMatrix != 'P')
     {
         mNumUpdatesP += nP;
         mPSampler.update(nP, mMaxThreads);
         if (mFixedMatrix != 'A')
         {
             mASampler.sync(mPSampler, mMaxThreads);
         }
         GAPS_ASSERT(mPSampler.internallyConsistent());
     }
 }
 
 void SparseGapsRunner::setUncertainty(const Matrix &unc, const GapsParameters &params)
 {
     // nothing happens - SparseGibbsSampler assumes default uncertainty always
 }
 
 void SparseGapsRunner::setUncertainty(const std::string &unc, const GapsParameters &params)
 {
     // nothing happens - SparseGibbsSampler assumes default uncertainty always
 }