Browse code

dense sampler appears to be working

Tom Sherman authored on 15/10/2018 20:52:28
Showing73 changed files

... ...
@@ -45,19 +45,21 @@ static Rcpp::NumericMatrix createRMatrix(const GenericMatrix &mat)
45 45
 
46 46
 ////////// converts R parameters to single GapsParameters struct ///////////////
47 47
 
48
-GapsParameters getGapsParameters(const Rpp::List &allParams, bool isMaster,
48
+template <class DataType>
49
+GapsParameters getGapsParameters(const DataType &data,
50
+const Rcpp::List &allParams, bool isMaster,
49 51
 const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix,
50 52
 const Rcpp::Nullable<Rcpp::IntegerVector> &indices)
51 53
 {
52 54
     // Standard CoGAPS parameters struct
53
-    GapsParameters params;
55
+    GapsParameters params(data);
54 56
 
55 57
     // get configuration parameters
56 58
     params.maxThreads = allParams["nThreads"];
57 59
     params.printMessages = allParams["messages"] && isMaster;
58 60
     params.transposeData = allParams["transposeData"];
59 61
     params.outputFrequency = allParams["outputFrequency"];
60
-    params.checkpointOutFile = allParams["checkpointOutFile"];
62
+    params.checkpointOutFile = Rcpp::as<std::string>(allParams["checkpointOutFile"]);
61 63
     params.checkpointInterval = allParams["checkpointInterval"];
62 64
 
63 65
     // extract model specific parameters from list
... ...
@@ -103,7 +105,6 @@ const Rcpp::Nullable<Rcpp::IntegerVector> &indices)
103 105
     return params;
104 106
 }
105 107
 
106
-
107 108
 ////////// main function that creates a GapsRunner and runs CoGAPS /////////////
108 109
 
109 110
 // note uncertainty matrix gets special treatment since it's the same size as
... ...
@@ -116,11 +117,12 @@ const DataType &uncertainty, const Rcpp::Nullable<Rcpp::IntegerVector> &indices,
116 117
 const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix, bool isMaster)
117 118
 {
118 119
     // convert R parameters to GapsParameters struct
119
-    GapsParameters gapsParams(getGapsParameters(allParams, isMaster,
120
+    GapsParameters gapsParams(getGapsParameters(data, allParams, isMaster,
120 121
         fixedMatrix, indices));
121 122
 
122 123
     // create GapsRunner, note we must first initialize the random generator
123
-    GapsRng::setSeed(params.seed);
124
+    GapsRng::setSeed(gapsParams.seed);
125
+    gaps_printf("Loading Data...");
124 126
     GapsRunner runner(data, gapsParams);
125 127
 
126 128
     // set uncertainty
... ...
@@ -128,6 +130,7 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix, bool isMaster)
128 130
     {
129 131
         runner.setUncertainty(uncertainty, gapsParams);
130 132
     }
133
+    gaps_printf("Done!\n");
131 134
     
132 135
     // run cogaps
133 136
     GapsResult result(runner.run());
... ...
@@ -144,7 +147,7 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix, bool isMaster)
144 147
         Rcpp::Named("Pmean") = createRMatrix(result.Pmean),
145 148
         Rcpp::Named("Asd") = createRMatrix(result.Asd),
146 149
         Rcpp::Named("Psd") = createRMatrix(result.Psd),
147
-        Rcpp::Named("seed") = params.seed,
150
+        Rcpp::Named("seed") = gapsParams.seed,
148 151
         Rcpp::Named("meanChiSq") = result.meanChiSq,
149 152
         Rcpp::Named("geneNames") = allParams["geneNames"],
150 153
         Rcpp::Named("sampleNames") = allParams["sampleNames"],
151 154
new file mode 100644
... ...
@@ -0,0 +1,41 @@
1
+#include "GapsParameters.h"
2
+
3
+void GapsParameters::peekCheckpoint(const std::string &file)
4
+{
5
+    Archive ar(file, ARCHIVE_READ);
6
+    ar >> nPatterns >> seed >> nIterations >> whichFixedMatrix;
7
+}
8
+    
9
+void GapsParameters::calculateDataDimensions(const std::string &file)
10
+{
11
+    FileParser fp(file);
12
+    
13
+    nGenes = transposeData ? fp.nCol() : fp.nRow();
14
+    nSamples = transposeData ? fp.nRow() : fp.nCol();
15
+
16
+    if (subsetData && subsetGenes)
17
+    {
18
+        nGenes = dataIndicesSubset.size();
19
+    }
20
+    
21
+    if (subsetData && !subsetGenes)
22
+    {
23
+        nSamples = dataIndicesSubset.size();
24
+    }
25
+}
26
+
27
+void GapsParameters::calculateDataDimensions(const Matrix &mat)
28
+{
29
+    nGenes = transposeData ? mat.nCol() : mat.nRow();
30
+    nSamples = transposeData ? mat.nRow() : mat.nCol();
31
+
32
+    if (subsetData && subsetGenes)
33
+    {
34
+        nGenes = dataIndicesSubset.size();
35
+    }
36
+    
37
+    if (subsetData && !subsetGenes)
38
+    {
39
+        nSamples = dataIndicesSubset.size();
40
+    }
41
+}
... ...
@@ -1,8 +1,21 @@
1 1
 #ifndef __COGAPS_GAPS_PARAMETERS_H__
2 2
 #define __COGAPS_GAPS_PARAMETERS_H__
3 3
 
4
+#include "data_structures/Matrix.h"
5
+#include "file_parser/FileParser.h"
6
+
7
+#include <vector>
8
+#include <string>
9
+
4 10
 struct GapsParameters
5 11
 {
12
+public:
13
+
14
+    template <class DataType>
15
+    GapsParameters(const DataType &data);
16
+
17
+    void peekCheckpoint(const std::string &file);
18
+
6 19
     Matrix fixedMatrix;
7 20
 
8 21
     std::vector<unsigned> dataIndicesSubset;
... ...
@@ -12,6 +25,8 @@ struct GapsParameters
12 25
 
13 26
     uint32_t seed;
14 27
 
28
+    unsigned nGenes;
29
+    unsigned nSamples;
15 30
     unsigned nPatterns;
16 31
     unsigned nIterations;
17 32
     unsigned maxThreads;
... ...
@@ -31,36 +46,47 @@ struct GapsParameters
31 46
     bool printMessages;
32 47
     bool subsetGenes;
33 48
     bool printThreadUsage;
49
+    bool useSparseOptimization;
34 50
 
35 51
     char whichFixedMatrix;
36 52
 
37
-    GapsParameters() :
38
-        checkpointOutFile("gaps_checkpoint.out"),
39
-        seed(0),
40
-        nIterations(1000),
41
-        maxThreads(1),
42
-        outputFrequency(500),
43
-        checkpointInterval(250),
44
-        alphaA(0.01f),
45
-        alphaP(0.01f),
46
-        maxGibbsMassA(100.f),
47
-        maxGibbsMassP(100.f),
48
-        useFixedMatrix(false),
49
-        subsetData(false),
50
-        useCheckpoint(false),
51
-        transposeData(false),
52
-        singleCell(false),
53
-        printMessages(true),
54
-        subsetGenes(false),
55
-        printThreadUsage(true),
56
-        whichFixedMatrix('N')
57
-    {}
58
-
59
-    void peekCheckpoint(const std::string &file)
60
-    {
61
-        Archive ar(file, ARCHIVE_READ);
62
-        ar >> nPatterns >> seed >> nIterations >> whichFixedMatrix;
63
-    }
53
+private:
54
+
55
+    void calculateDataDimensions(const std::string &file);
56
+    void calculateDataDimensions(const Matrix &mat);
64 57
 };
65 58
 
59
+template <class DataType>
60
+GapsParameters::GapsParameters(const DataType &data)
61
+    :
62
+fixedMatrix(Matrix()),
63
+dataIndicesSubset(std::vector<unsigned>()),
64
+checkpointFile(std::string()),
65
+checkpointOutFile("gaps_checkpoint.out"),
66
+seed(0),
67
+nGenes(0),
68
+nSamples(0),
69
+nPatterns(3),
70
+nIterations(1000),
71
+maxThreads(1),
72
+outputFrequency(500),
73
+checkpointInterval(250),
74
+alphaA(0.01f),
75
+alphaP(0.01f),
76
+maxGibbsMassA(100.f),
77
+maxGibbsMassP(100.f),
78
+useFixedMatrix(false),
79
+subsetData(false),
80
+useCheckPoint(false),
81
+transposeData(false),
82
+singleCell(false),
83
+printMessages(true),
84
+subsetGenes(false),
85
+printThreadUsage(true),
86
+useSparseOptimization(false),
87
+whichFixedMatrix('N')
88
+{
89
+    calculateDataDimensions(data);
90
+}
91
+
66 92
 #endif
67 93
\ No newline at end of file
... ...
@@ -3,6 +3,14 @@
3 3
 #include "GapsStatistics.h"
4 4
 #include "file_parser/FileParser.h"
5 5
 
6
+template <class T>
7
+static std::string to_string(T a)
8
+{
9
+    std::stringstream ss;
10
+    ss << a;
11
+    return ss.str();
12
+}
13
+
6 14
 GapsResult::GapsResult(const GapsStatistics &stat)
7 15
     :
8 16
 Amean(stat.Amean()), Asd(stat.Asd()), Pmean(stat.Pmean()),
... ...
@@ -26,7 +34,7 @@ void GapsResult::writeToFile(const std::string &fullPath)
26 34
 void GapsResult::writeCsv(const std::string &path)
27 35
 {
28 36
     unsigned nPatterns = Amean.nCol();
29
-    std::string label("_" + gaps::to_string(nPatterns) + "_");
37
+    std::string label("_" + to_string(nPatterns) + "_");
30 38
     FileParser::writeToCsv(path + label + "Amean.csv", Amean);
31 39
     FileParser::writeToCsv(path + label + "Pmean.csv", Pmean);
32 40
     FileParser::writeToCsv(path + label + "Asd.csv", Asd);
... ...
@@ -36,7 +44,7 @@ void GapsResult::writeCsv(const std::string &path)
36 44
 void GapsResult::writeTsv(const std::string &path)
37 45
 {
38 46
     unsigned nPatterns = Amean.nCol();
39
-    std::string label("_" + gaps::to_string(nPatterns) + "_");
47
+    std::string label("_" + to_string(nPatterns) + "_");
40 48
     FileParser::writeToCsv(path + label + "Amean.tsv", Amean);
41 49
     FileParser::writeToCsv(path + label + "Pmean.tsv", Pmean);
42 50
     FileParser::writeToCsv(path + label + "Asd.tsv", Asd);
... ...
@@ -46,7 +54,7 @@ void GapsResult::writeTsv(const std::string &path)
46 54
 void GapsResult::writeGct(const std::string &path)
47 55
 {
48 56
     unsigned nPatterns = Amean.nCol();
49
-    std::string label("_" + gaps::to_string(nPatterns) + "_");
57
+    std::string label("_" + to_string(nPatterns) + "_");
50 58
     FileParser::writeToCsv(path + label + "Amean.gct", Amean);
51 59
     FileParser::writeToCsv(path + label + "Pmean.gct", Pmean);
52 60
     FileParser::writeToCsv(path + label + "Asd.gct", Asd);
... ...
@@ -9,10 +9,10 @@
9 9
 
10 10
 struct GapsResult
11 11
 {
12
-    ColMatrix Amean;
13
-    ColMatrix Asd;
14
-    ColMatrix Pmean;
15
-    ColMatrix Psd;
12
+    Matrix Amean;
13
+    Matrix Asd;
14
+    Matrix Pmean;
15
+    Matrix Psd;
16 16
     
17 17
     float meanChiSq;
18 18
     uint32_t seed;
... ...
@@ -1,10 +1,4 @@
1 1
 #include "GapsRunner.h"
2
-#include "math/Algorithms.h"
3
-#include "math/Random.h"
4
-#include "math/SIMD.h"
5
-#include "utils/GapsAssert.h"
6
-#include "utils/GapsPrint.h"
7
-#include "utils/GlobalConfig.h"
8 2
 
9 3
 #ifdef __GAPS_R_BUILD__
10 4
 #include <Rcpp.h>
... ...
@@ -14,8 +8,43 @@
14 8
 #include <omp.h>
15 9
 #endif
16 10
 
11
+///////////////////////////// RAII wrapper /////////////////////////////////////
12
+
13
+GapsRunner::~GapsRunner()
14
+{
15
+    delete mRunner;
16
+}
17
+
17 18
 GapsResult GapsRunner::run()
18 19
 {
20
+    return mRunner->run();
21
+}
22
+
23
+///////////////////////// Abstract Interface ///////////////////////////////////
24
+
25
+AbstractGapsRunner::AbstractGapsRunner(const GapsParameters &params)
26
+    :
27
+mStatistics(params.nGenes, params.nSamples, params.nPatterns),
28
+mCheckpointOutFile(params.checkpointOutFile),
29
+mCurrentIteration(0),
30
+mMaxIterations(params.nIterations),
31
+mMaxThreads(params.maxThreads),
32
+mOutputFrequency(params.outputFrequency),
33
+mCheckpointInterval(params.checkpointInterval),
34
+mNumPatterns(params.nPatterns),
35
+mNumUpdatesA(0),
36
+mNumUpdatesP(0),
37
+mSeed(params.seed),
38
+mPrintMessages(params.printMessages),
39
+mPrintThreadUsage(params.printThreadUsage),
40
+mPhase('C'),
41
+mFixedMatrix(params.whichFixedMatrix)
42
+{}
43
+
44
+GapsResult AbstractGapsRunner::run()
45
+{
46
+    GAPS_ASSERT(mPhase == 'C' || mPhase == 'S');
47
+
19 48
     mStartTime = bpt_now();
20 49
 
21 50
     // calculate appropiate number of threads if compiled with openmp
... ...
@@ -30,7 +59,6 @@ GapsResult GapsRunner::run()
30 59
     #endif
31 60
 
32 61
     // cascade through phases, allows algorithm to be resumed in either phase
33
-    GAPS_ASSERT(mPhase == 'C' || mPhase == 'S');
34 62
     switch (mPhase)
35 63
     {
36 64
         case 'C':
... ...
@@ -51,11 +79,11 @@ GapsResult GapsRunner::run()
51 79
             break;
52 80
     }
53 81
     GapsResult result(mStatistics);
54
-    result.meanChiSq = mStatistics.meanChiSq(mPSampler);
82
+    result.meanChiSq = meanChiSq();
55 83
     return result;    
56 84
 }
57 85
 
58
-void GapsRunner::runOnePhase()
86
+void AbstractGapsRunner::runOnePhase()
59 87
 {
60 88
     for (; mCurrentIteration < mMaxIterations; ++mCurrentIteration)
61 89
     {
... ...
@@ -65,53 +93,27 @@ void GapsRunner::runOnePhase()
65 93
         Rcpp::checkUserInterrupt();
66 94
         #endif
67 95
         
96
+        // set annealing temperature in calibration phase
68 97
         if (mPhase == 'C')
69 98
         {        
70 99
             float temp = static_cast<float>(2 * mCurrentIteration)
71 100
                 / static_cast<float>(mMaxIterations);
72
-            mASampler->setAnnealingTemp(gaps::min(1.f, temp));
73
-            mPSampler->setAnnealingTemp(gaps::min(1.f, temp));
101
+            setAnnealingTemp(gaps::min(1.f, temp));
74 102
         }
75 103
     
76 104
         // number of updates per iteration is poisson 
77
-        unsigned nA = mRng.poisson(gaps::max(mASampler->nAtoms(), 10));
78
-        unsigned nP = mRng.poisson(gaps::max(mPSampler->nAtoms(), 10));
105
+        unsigned nA = mRng.poisson(gaps::max(nAtoms('A'), 10));
106
+        unsigned nP = mRng.poisson(gaps::max(nAtoms('P'), 10));
79 107
         updateSampler(nA, nP);
80 108
 
81 109
         if (mPhase == 'S')
82 110
         {
83
-            mStatistics.update(mASampler, mPSampler);
111
+            updateStatistics();
84 112
         }
85
-
86 113
         displayStatus();
87 114
     }
88 115
 }
89 116
 
90
-void GapsRunner::updateSampler(unsigned nA, unsigned nP)
91
-{
92
-    if (mFixedMatrix != 'A')
93
-    {
94
-        mNumUpdatesA += nA;
95
-        mASampler->update(nA, mMaxThreads);
96
-        if (mFixedMatrix != 'P')
97
-        {
98
-            mPSampler->sync(mASampler, mMaxThreads);
99
-        }
100
-        GAPS_ASSERT(mASampler->internallyConsistent());
101
-    }
102
-
103
-    if (mFixedMatrix != 'P')
104
-    {
105
-        mNumUpdatesP += nP;
106
-        mPSampler->update(nP, mMaxThreads);
107
-        if (mFixedMatrix != 'A')
108
-        {
109
-            mASampler->sync(mPSampler, mMaxThreads);
110
-        }
111
-        GAPS_ASSERT(mPSampler->internallyConsistent());
112
-    }
113
-}
114
-
115 117
 // sum coef * log(i) for i = 1 to total, fit coef from number of atoms
116 118
 // approximates sum of number of atoms (stirling approx to factorial)
117 119
 // this should be proportional to total number of updates
... ...
@@ -122,11 +124,12 @@ static double estimatedNumUpdates(double current, double total, float nAtoms)
122 124
         total * coef * std::log(total) - total * coef;
123 125
 }
124 126
 
125
-double GapsRunner::estimatedPercentComplete() const
127
+
128
+double AbstractGapsRunner::estimatedPercentComplete() const
126 129
 {
127 130
     double nIter = static_cast<double>(mCurrentIteration);
128
-    double nAtomsA = static_cast<double>(mASampler->nAtoms());
129
-    double nAtomsP = static_cast<double>(mPSampler->nAtoms());
131
+    double nAtomsA = static_cast<double>(nAtoms('A'));
132
+    double nAtomsP = static_cast<double>(nAtoms('P'));
130 133
     
131 134
     if (mPhase == 'S')
132 135
     {
... ...
@@ -144,7 +147,7 @@ double GapsRunner::estimatedPercentComplete() const
144 147
     return estimatedCompleted / estimatedTotal;
145 148
 }
146 149
 
147
-void GapsRunner::displayStatus()
150
+void AbstractGapsRunner::displayStatus()
148 151
 {
149 152
     if (mPrintMessages && mOutputFrequency > 0 && ((mCurrentIteration + 1) % mOutputFrequency) == 0)
150 153
     {
... ...
@@ -166,14 +169,14 @@ void GapsRunner::displayStatus()
166 169
         totalSeconds -= totalMinutes * 60;
167 170
 
168 171
         gaps_printf("%d of %d, Atoms: %lu(%lu), ChiSq: %.0f, Time: %02d:%02d:%02d / %02d:%02d:%02d\n",
169
-            mCurrentIteration + 1, mMaxIterations, mASampler->nAtoms(),
170
-            mPSampler->nAtoms(), mPSampler->chi2(), elapsedHours, elapsedMinutes,
172
+            mCurrentIteration + 1, mMaxIterations, nAtoms('A'),
173
+            nAtoms('P'), chiSq(), elapsedHours, elapsedMinutes,
171 174
             elapsedSeconds, totalHours, totalMinutes, totalSeconds);
172 175
         gaps_flush();
173 176
     }
174 177
 }
175 178
 
176
-void GapsRunner::createCheckpoint()
179
+void AbstractGapsRunner::createCheckpoint()
177 180
 {
178 181
     if (mCheckpointInterval > 0 && ((mCurrentIteration + 1) % mCheckpointInterval) == 0)
179 182
     {
... ...
@@ -183,11 +186,167 @@ void GapsRunner::createCheckpoint()
183 186
         // create checkpoint file
184 187
         Archive ar(mCheckpointOutFile, ARCHIVE_WRITE);
185 188
         ar << mNumPatterns << mSeed << mMaxIterations << mFixedMatrix << mPhase
186
-            << mCurrentIteration << mNumUpdatesA << mNumUpdatesP << mRng
187
-            << *mASampler << *mPSampler;
189
+            << mCurrentIteration << mNumUpdatesA << mNumUpdatesP << mRng;
190
+        writeSamplers(ar);
188 191
         GapsRng::save(ar);
189 192
 
190 193
         // delete backup file
191 194
         std::remove((mCheckpointOutFile + ".backup").c_str());
192 195
     }
193 196
 }
197
+
198
+///////////////////// DenseGapsRunner Implementation ///////////////////////////
199
+
200
+float DenseGapsRunner::chiSq() const
201
+{
202
+    // doesn't matter which sampler is called
203
+    return mPSampler.chiSq();
204
+}
205
+
206
+float DenseGapsRunner::meanChiSq() const
207
+{
208
+    // need to pass P sampler (due to configuration of internal data)
209
+    return mStatistics.meanChiSq(mPSampler);
210
+}
211
+
212
+unsigned DenseGapsRunner::nAtoms(char which) const
213
+{
214
+    return which == 'A' ? mASampler.nAtoms() : mPSampler.nAtoms();
215
+}
216
+
217
+void DenseGapsRunner::setAnnealingTemp(float temp)
218
+{
219
+    mASampler.setAnnealingTemp(temp);
220
+    mPSampler.setAnnealingTemp(temp);
221
+}
222
+
223
+void DenseGapsRunner::updateStatistics()
224
+{
225
+    mStatistics.update(mASampler, mPSampler);
226
+}
227
+
228
+Archive& DenseGapsRunner::readSamplers(Archive &ar)
229
+{
230
+    ar >> mASampler >> mPSampler;
231
+    return ar;
232
+}
233
+
234
+Archive& DenseGapsRunner::writeSamplers(Archive &ar)
235
+{
236
+    ar << mASampler << mPSampler;
237
+    return ar;
238
+}
239
+
240
+void DenseGapsRunner::updateSampler(unsigned nA, unsigned nP)
241
+{
242
+    if (mFixedMatrix != 'A')
243
+    {
244
+        mNumUpdatesA += nA;
245
+        mASampler.update(nA, mMaxThreads);
246
+        if (mFixedMatrix != 'P')
247
+        {
248
+            mPSampler.sync(mASampler, mMaxThreads);
249
+        }
250
+        GAPS_ASSERT(mASampler.internallyConsistent());
251
+    }
252
+
253
+    if (mFixedMatrix != 'P')
254
+    {
255
+        mNumUpdatesP += nP;
256
+        mPSampler.update(nP, mMaxThreads);
257
+        if (mFixedMatrix != 'A')
258
+        {
259
+            mASampler.sync(mPSampler, mMaxThreads);
260
+        }
261
+        GAPS_ASSERT(mPSampler.internallyConsistent());
262
+    }
263
+}
264
+
265
+void DenseGapsRunner::setUncertainty(const Matrix &unc, const GapsParameters &params)
266
+{
267
+    mASampler.setUncertainty(unc, !params.transposeData, !params.subsetGenes, params);
268
+    mPSampler.setUncertainty(unc, params.transposeData, params.subsetGenes, params);
269
+}
270
+
271
+void DenseGapsRunner::setUncertainty(const std::string &unc, const GapsParameters &params)
272
+{
273
+    mASampler.setUncertainty(unc, !params.transposeData, !params.subsetGenes, params);
274
+    mPSampler.setUncertainty(unc, params.transposeData, params.subsetGenes, params);
275
+}
276
+
277
+///////////////////// SparseGapsRunner Implementation //////////////////////////
278
+
279
+float SparseGapsRunner::chiSq() const
280
+{
281
+    // doesn't matter which sampler is called
282
+    return mPSampler.chiSq();
283
+}
284
+
285
+float SparseGapsRunner::meanChiSq() const
286
+{
287
+    // need to pass P sampler (due to configuration of internal data)
288
+    return mStatistics.meanChiSq(mPSampler);
289
+}
290
+
291
+unsigned SparseGapsRunner::nAtoms(char which) const
292
+{
293
+    return which == 'A' ? mASampler.nAtoms() : mPSampler.nAtoms();
294
+}
295
+
296
+void SparseGapsRunner::setAnnealingTemp(float temp)
297
+{
298
+    mASampler.setAnnealingTemp(temp);
299
+    mPSampler.setAnnealingTemp(temp);
300
+}
301
+
302
+void SparseGapsRunner::updateStatistics()
303
+{
304
+    mStatistics.update(mASampler, mPSampler);
305
+}
306
+
307
+Archive& SparseGapsRunner::readSamplers(Archive &ar)
308
+{
309
+    ar >> mASampler >> mPSampler;
310
+    return ar;
311
+}
312
+
313
+Archive& SparseGapsRunner::writeSamplers(Archive &ar)
314
+{
315
+    ar << mASampler << mPSampler;
316
+    return ar;
317
+}
318
+
319
+void SparseGapsRunner::updateSampler(unsigned nA, unsigned nP)
320
+{
321
+    if (mFixedMatrix != 'A')
322
+    {
323
+        mNumUpdatesA += nA;
324
+        mASampler.update(nA, mMaxThreads);
325
+        if (mFixedMatrix != 'P')
326
+        {
327
+            mPSampler.sync(mASampler, mMaxThreads);
328
+        }
329
+        GAPS_ASSERT(mASampler.internallyConsistent());
330
+    }
331
+
332
+    if (mFixedMatrix != 'P')
333
+    {
334
+        mNumUpdatesP += nP;
335
+        mPSampler.update(nP, mMaxThreads);
336
+        if (mFixedMatrix != 'A')
337
+        {
338
+            mASampler.sync(mPSampler, mMaxThreads);
339
+        }
340
+        GAPS_ASSERT(mPSampler.internallyConsistent());
341
+    }
342
+}
343
+
344
+void SparseGapsRunner::setUncertainty(const Matrix &unc, const GapsParameters &params)
345
+{
346
+    // nothing happens - SparseGibbsSampler assumes default uncertainty always
347
+}
348
+
349
+void SparseGapsRunner::setUncertainty(const std::string &unc, const GapsParameters &params)
350
+{
351
+    // nothing happens - SparseGibbsSampler assumes default uncertainty always
352
+}
... ...
@@ -4,13 +4,27 @@
4 4
 #include "GapsParameters.h"
5 5
 #include "GapsResult.h"
6 6
 #include "GapsStatistics.h"
7
-#include "GibbsSampler.h"
7
+#include "gibbs_sampler/GibbsSampler.h"
8
+#include "gibbs_sampler/DenseGibbsSampler.h"
9
+#include "gibbs_sampler/SparseGibbsSampler.h"
10
+
11
+#include <string>
8 12
 
9 13
 // boost time helpers
10 14
 #include <boost/date_time/posix_time/posix_time.hpp>
11 15
 namespace bpt = boost::posix_time;
12 16
 #define bpt_now() bpt::microsec_clock::local_time()
13 17
 
18
+// forward declarations
19
+class AbstractGapsRunner;
20
+
21
+///////////////////////////// RAII wrapper /////////////////////////////////////
22
+
23
+// This is the class that is exposed to the top-level CoGAPS routine - all 
24
+// aspects of CoGAPS can be managed through this class. The class itself is
25
+// just a lightweight wrapper around an abstract interface, which allows for
26
+// multiple types of GapsRunner to be declared. Which implementation is used
27
+// depends on the parameters passed to the GapsRunner constructor.
14 28
 class GapsRunner
15 29
 {
16 30
 public:
... ...
@@ -18,15 +32,44 @@ public:
18 32
     template <class DataType>
19 33
     GapsRunner(const DataType &data, const GapsParameters &params);
20 34
 
35
+    ~GapsRunner();
36
+
21 37
     template <class DataType>
22 38
     void setUncertainty(const DataType &unc, const GapsParameters &params);
23 39
 
24 40
     GapsResult run();
25 41
 
26 42
 private:
27
-    
28
-    GibbsSampler *mASampler;
29
-    GibbsSampler *mPSampler;
43
+
44
+    AbstractGapsRunner *mRunner;
45
+
46
+    GapsRunner(const GapsRunner &p); // don't allow copies
47
+    GapsRunner& operator=(const GapsRunner &p); // don't allow copies    
48
+};
49
+
50
+///////////////////////// Abstract Interface ///////////////////////////////////
51
+
52
+// This class is the abstract interface that any implementation of GapsRunner
53
+// must satisfy. It provides a factory method that will create the appropiate
54
+// derived class depending on the parameters passed in.
55
+class AbstractGapsRunner
56
+{
57
+public:
58
+
59
+    AbstractGapsRunner(const GapsParameters &params);
60
+    virtual ~AbstractGapsRunner() {}
61
+
62
+    template <class DataType>
63
+    static AbstractGapsRunner* create(const DataType &data, const GapsParameters &params);
64
+
65
+    // can't use template with virtual function
66
+    virtual void setUncertainty(const Matrix &unc, const GapsParameters &params) = 0;
67
+    virtual void setUncertainty(const std::string &unc, const GapsParameters &params) = 0;
68
+
69
+    GapsResult run();
70
+
71
+protected:
72
+
30 73
     GapsStatistics mStatistics;
31 74
 
32 75
     mutable GapsRng mRng;
... ...
@@ -52,40 +95,119 @@ private:
52 95
     char mFixedMatrix;
53 96
         
54 97
     void runOnePhase();
55
-    void updateSampler(unsigned nA, unsigned nP);
56 98
     double estimatedPercentComplete() const;
57 99
     void displayStatus();
58 100
     void createCheckpoint();
101
+
102
+    virtual float chiSq() const = 0;
103
+    virtual float meanChiSq() const = 0;
104
+    virtual unsigned nAtoms(char which) const = 0;
105
+    virtual void setAnnealingTemp(float temp) = 0;
106
+    virtual void updateStatistics() = 0;
107
+    virtual Archive& readSamplers(Archive &ar) = 0;
108
+    virtual Archive& writeSamplers(Archive &ar) = 0;
109
+    virtual void updateSampler(unsigned nA, unsigned nP) = 0;
110
+};
111
+
112
+///////////////////// GapsRunner Implementations ///////////////////////////////
113
+
114
+// This implementation uses a DenseGibbsSampler internally
115
+class DenseGapsRunner : public AbstractGapsRunner
116
+{
117
+public:
118
+
119
+    ~DenseGapsRunner() {}
120
+
121
+    template <class DataType>
122
+    DenseGapsRunner(const DataType &data, const GapsParameters &params);
123
+
124
+    void setUncertainty(const Matrix &unc, const GapsParameters &params);
125
+    void setUncertainty(const std::string &unc, const GapsParameters &params);
126
+
127
+private:
128
+
129
+    DenseGibbsSampler mASampler;
130
+    DenseGibbsSampler mPSampler;
131
+
132
+    float chiSq() const;
133
+    float meanChiSq() const;
134
+    unsigned nAtoms(char which) const;
135
+    void setAnnealingTemp(float temp);
136
+    void updateStatistics();
137
+    Archive& readSamplers(Archive &ar);
138
+    Archive& writeSamplers(Archive &ar);
139
+    void updateSampler(unsigned nA, unsigned nP);
140
+};
141
+
142
+// This implementation uses a SparseGibbsSampler internally
143
+class SparseGapsRunner : public AbstractGapsRunner
144
+{
145
+public:
146
+
147
+    ~SparseGapsRunner() {}
148
+
149
+    template <class DataType>
150
+    SparseGapsRunner(const DataType &data, const GapsParameters &params);
151
+
152
+    void setUncertainty(const Matrix &unc, const GapsParameters &params);
153
+    void setUncertainty(const std::string &unc, const GapsParameters &params);
154
+
155
+private:
156
+
157
+    SparseGibbsSampler mASampler;
158
+    SparseGibbsSampler mPSampler;
159
+
160
+    float chiSq() const;
161
+    float meanChiSq() const;
162
+    unsigned nAtoms(char which) const;
163
+    void setAnnealingTemp(float temp);
164
+    void updateStatistics();
165
+    Archive& readSamplers(Archive &ar);
166
+    Archive& writeSamplers(Archive &ar);
167
+    void updateSampler(unsigned nA, unsigned nP);
59 168
 };
60 169
 
170
+/////////////////////// GapsRunner - templated functions ///////////////////////
171
+
61 172
 template <class DataType>
62 173
 GapsRunner::GapsRunner(const DataType &data, const GapsParameters &params)
63
-    :
64
-mASampler(new DenseGibbsSampler(data, !params.transposeData, params.nPatterns, params.subsetGenes, params.dataIndicesSubset)),
65
-mPSampler(new DenseGibbsSampler(data, params.transposeData, params.nPatterns, params.subsetGenes, params.dataIndicesSubset)),
66
-mStatistics(mPSampler->dataRows(), mPSampler->dataCols(), params.nPatterns),
67
-mCheckpointOutFile(params.checkpointOutFile),
68
-mMaxIterations(params.nIterations),
69
-mMaxThreads(params.mMaxThreads),
70
-mOutputFrequency(params.mOutputFrequency),
71
-mCheckpointInterval(params.mCheckpointInterval),
72
-mNumPatterns(params.nPatterns),
73
-mNumUpdatesA(0),
74
-mNumUpdatesP(0),
75
-mSeed(params.seed),
76
-mPrintMessages(params.printMessages),
77
-mPrintThreadUsage(params.printThreadUsage),
78
-mPhase('C'),
79
-mFixedMatrix(params.whichFixedMatrix)
174
+    : mRunner(AbstractGapsRunner::create(data, params))
175
+{}
176
+
177
+template <class DataType>
178
+void GapsRunner::setUncertainty(const DataType &unc, const GapsParameters &params)
179
+{
180
+    mRunner->setUncertainty(unc, params);
181
+}
182
+
183
+/////////////////// AbstractGapsRunner - templated functions ///////////////////
184
+
185
+template <class DataType>
186
+AbstractGapsRunner* AbstractGapsRunner::create(const DataType &data,
187
+const GapsParameters &params)
80 188
 {
81
-    mASampler->setSparsity(params.alphaA, params.maxGibbsMassA, params.singleCell);
82
-    mPSampler->setSparsity(params.alphaP, params.maxGibbsMassP, params.singleCell);
189
+    if (params.useSparseOptimization)
190
+    {
191
+        return new SparseGapsRunner(data, params);
192
+    }
193
+    return new DenseGapsRunner(data, params);
194
+}
195
+
196
+//////////////////// DenseGapsRunner - templated functions /////////////////////
83 197
 
198
+template <class DataType>
199
+DenseGapsRunner::DenseGapsRunner(const DataType &data,
200
+const GapsParameters &params)
201
+    :
202
+AbstractGapsRunner(params),
203
+mASampler(data, !params.transposeData, !params.subsetGenes, params.alphaA, params.maxGibbsMassA, params),
204
+mPSampler(data, params.transposeData, params.subsetGenes, params.alphaP, params.maxGibbsMassP, params)
205
+{
84 206
     switch (mFixedMatrix)
85 207
     {
86
-        case 'A' : mASampler->setMatrix(params.fixedMatrix); break;
87
-        case 'P' : mPSampler->setMatrix(params.fixedMatrix); break;
88
-        default: break;
208
+        case 'A' : mASampler.setMatrix(params.fixedMatrix); break;
209
+        case 'P' : mPSampler.setMatrix(params.fixedMatrix); break;
210
+        default: break; // 'N' for none
89 211
     }
90 212
 
91 213
     // overwrite with info from checkpoint file
... ...
@@ -93,24 +215,51 @@ mFixedMatrix(params.whichFixedMatrix)
93 215
     {
94 216
         Archive ar(params.checkpointFile, ARCHIVE_READ);
95 217
         ar >> mNumPatterns >> mSeed >> mMaxIterations >> mFixedMatrix >> mPhase
96
-            >> mCurrentIteration >> mNumUpdatesA >> mNumUpdatesP >> mRng
97
-            >> *mASampler >> *mPSampler;
218
+            >> mCurrentIteration >> mNumUpdatesA >> mNumUpdatesP >> mRng;
219
+        readSamplers(ar);
98 220
         GapsRng::load(ar);
99 221
     }
100 222
 
101
-    mASampler->sync(mPSampler);
102
-    mPSampler->sync(mASampler);
103
-    mASampler->recalculateAPMatrix();
104
-    mPSampler->recalculateAPMatrix();
223
+    mASampler.sync(mPSampler);
224
+    mPSampler.sync(mASampler);
225
+
226
+    // AP matrix not stored in checkpoint
227
+    if (params.useCheckPoint)
228
+    {
229
+        mASampler.recalculateAPMatrix();
230
+        mPSampler.recalculateAPMatrix();
231
+    }
105 232
 }
106 233
 
234
+//////////////////// SparseGapsRunner - templated functions ////////////////////
235
+
107 236
 template <class DataType>
108
-void GapsRunner::setUncertainty(const DataType &unc, const GapsParameters &params)
237
+SparseGapsRunner::SparseGapsRunner(const DataType &data,
238
+const GapsParameters &params)
239
+    :
240
+AbstractGapsRunner(params),
241
+mASampler(data, !params.transposeData, !params.subsetGenes, params.alphaA, params.maxGibbsMassA, params),
242
+mPSampler(data, params.transposeData, params.subsetGenes, params.alphaP, params.maxGibbsMassP, params)
109 243
 {
110
-    mASampler->setUncertainty(unc, !params.transposeData, params.nPatterns,
111
-        params.subsetGenes, params.dataIndicesSubset);
112
-    mPSampler->setUncertainty(unc, params.transposeData, params.nPatterns,
113
-        params.subsetGenes, params.dataIndicesSubset);
244
+    switch (mFixedMatrix)
245
+    {
246
+        case 'A' : mASampler.setMatrix(params.fixedMatrix); break;
247
+        case 'P' : mPSampler.setMatrix(params.fixedMatrix); break;
248
+        default: break;
249
+    }
250
+
251
+    // overwrite with info from checkpoint file
252
+    if (params.useCheckPoint)
253
+    {
254
+        Archive ar(params.checkpointFile, ARCHIVE_READ);
255
+        ar >> mNumPatterns >> mSeed >> mMaxIterations >> mFixedMatrix >> mPhase
256
+            >> mCurrentIteration >> mNumUpdatesA >> mNumUpdatesP >> mRng;
257
+        readSamplers(ar);
258
+        GapsRng::load(ar);
259
+    }
260
+
261
+    mASampler.sync(mPSampler);
262
+    mPSampler.sync(mASampler);
114 263
 }
115 264
 
116
-#endif // __COGAPS_GAPS_RUNNER_H__
117 265
\ No newline at end of file
266
+#endif // __COGAPS_GAPS_RUNNER_H__
... ...
@@ -1,5 +1,5 @@
1 1
 #include "GapsStatistics.h"
2
-#include "math/Algorithms.h"
2
+#include "math/Math.h"
3 3
 
4 4
 GapsStatistics::GapsStatistics(unsigned nRow, unsigned nCol, unsigned nPatterns)
5 5
     :
... ...
@@ -8,56 +8,72 @@ mPMeanMatrix(nCol, nPatterns), mPStdMatrix(nCol, nPatterns),
8 8
 mStatUpdates(0), mNumPatterns(nPatterns)
9 9
 {}
10 10
 
11
-void GapsStatistics::update(const GibbsSampler &ASampler,
12
-const GibbsSampler &PSampler)
11
+Matrix GapsStatistics::Amean() const
13 12
 {
14
-    mStatUpdates++;
15
-
16
-    // update     
17
-    for (unsigned j = 0; j < mNumPatterns; ++j)
18
-    {
19
-        float norm = gaps::algo::max(PSampler.mMatrix.getCol(j));
20
-        norm = norm == 0.f ? 1.f : norm;
21
-
22
-        Vector quot(PSampler.mMatrix.getCol(j) / norm);
23
-        mPMeanMatrix.getCol(j) += quot;
24
-        mPStdMatrix.getCol(j) += gaps::algo::elementSq(quot);
25
-
26
-        Vector prod(ASampler.mMatrix.getCol(j) * norm);
27
-        mAMeanMatrix.getCol(j) += prod;
28
-        mAStdMatrix.getCol(j) += gaps::algo::elementSq(prod); // precision loss? use double?
29
-    }
13
+    return mAMeanMatrix / static_cast<float>(mStatUpdates);
30 14
 }
31 15
 
32
-ColMatrix GapsStatistics::Amean() const
16
+Matrix GapsStatistics::Asd() const
33 17
 {
34
-    return mAMeanMatrix / mStatUpdates;
18
+    Matrix mat(mAStdMatrix.nRow(), mAStdMatrix.nCol());
19
+    for (unsigned i = 0; i < mat.nRow(); ++i)
20
+    {
21
+        for (unsigned j = 0; j < mat.nCol(); ++j)
22
+        {
23
+            float meanTerm = GAPS_SQ(mAMeanMatrix(i,j)) / static_cast<float>(mStatUpdates);
24
+            float numer = gaps::max(0.f, mAStdMatrix(i,j) - meanTerm);
25
+            mat(i,j) = std::sqrt(numer / (static_cast<float>(mStatUpdates) - 1.f));
26
+        }
27
+    }
28
+    return mat;
35 29
 }
36 30
 
37
-ColMatrix GapsStatistics::Asd() const
31
+Matrix GapsStatistics::Pmean() const
38 32
 {
39
-    return gaps::algo::computeStdDev(mAStdMatrix, mAMeanMatrix,
40
-        mStatUpdates);
33
+    return mPMeanMatrix / static_cast<float>(mStatUpdates);
41 34
 }
42 35
 
43
-ColMatrix GapsStatistics::Pmean() const
36
+Matrix GapsStatistics::Psd() const
44 37
 {
45
-    return mPMeanMatrix / mStatUpdates;
38
+    Matrix mat(mPStdMatrix.nRow(), mPStdMatrix.nCol());
39
+    for (unsigned i = 0; i < mat.nRow(); ++i)
40
+    {
41
+        for (unsigned j = 0; j < mat.nCol(); ++j)
42
+        {
43
+            float meanTerm = GAPS_SQ(mPMeanMatrix(i,j)) / static_cast<float>(mStatUpdates);
44
+            float numer = gaps::max(0.f, mPStdMatrix(i,j) - meanTerm);
45
+            mat(i,j) = std::sqrt(numer / (static_cast<float>(mStatUpdates) - 1.f));
46
+        }
47
+    }
48
+    return mat;
46 49
 }
47 50
 
48
-ColMatrix GapsStatistics::Psd() const
51
+float GapsStatistics::meanChiSq(const DenseGibbsSampler &PSampler) const
49 52
 {
50
-    return gaps::algo::computeStdDev(mPStdMatrix, mPMeanMatrix,
51
-        mStatUpdates);
53
+    Matrix A(mAMeanMatrix / static_cast<float>(mStatUpdates));
54
+    Matrix P(mPMeanMatrix / static_cast<float>(mStatUpdates));
55
+    
56
+    float chisq = 0.f;
57
+    for (unsigned i = 0; i < PSampler.mDMatrix.nRow(); ++i)
58
+    {
59
+        for (unsigned j = 0; j < PSampler.mDMatrix.nCol(); ++j)
60
+        {
61
+            float m = 0.f;
62
+            for (unsigned k = 0; k < A.nCol(); ++k)
63
+            {
64
+                m += A(i,k) * P(j,k);
65
+            }
66
+            chisq += GAPS_SQ((PSampler.mDMatrix(i,j) - m) / PSampler.mSMatrix(i,j));
67
+        }
68
+    }
69
+    return chisq;
52 70
 }
53 71
 
54
-float GapsStatistics::meanChiSq(const GibbsSampler &PSampler) const
72
+float GapsStatistics::meanChiSq(const SparseGibbsSampler &PSampler) const
55 73
 {
56
-    ColMatrix A = mAMeanMatrix / mStatUpdates;
57
-    ColMatrix P = mPMeanMatrix / mStatUpdates;
58
-    ColMatrix M(gaps::algo::matrixMultiplication(A, P));
59
-    return 2.f * gaps::algo::loglikelihood(PSampler.mDMatrix, PSampler.mSMatrix,
60
-        M);
74
+    Matrix A(mAMeanMatrix / static_cast<float>(mStatUpdates));
75
+    Matrix P(mPMeanMatrix / static_cast<float>(mStatUpdates));
76
+    return 0.f; // TODO
61 77
 }
62 78
 
63 79
 Archive& operator<<(Archive &ar, GapsStatistics &stat)
... ...
@@ -1,8 +1,14 @@
1 1
 #ifndef __COGAPS_GAPS_STATISTICS_H__
2 2
 #define __COGAPS_GAPS_STATISTICS_H__
3 3
 
4
-#include "GibbsSampler.h"
4
+#include "math/Math.h"
5
+#include "math/MatrixMath.h"
6
+#include "math/VectorMath.h"
5 7
 #include "data_structures/Matrix.h"
8
+#include "gibbs_sampler/DenseGibbsSampler.h"
9
+#include "gibbs_sampler/SparseGibbsSampler.h"
10
+
11
+#define GAPS_SQ(x) ((x) * (x))
6 12
 
7 13
 class GapsStatistics
8 14
 {
... ...
@@ -18,20 +24,43 @@ private:
18 24
 
19 25
 public:
20 26
 
21
-    GapsStatistics(unsigned nRow, unsigned nCol, unsigned nPatterns);
27
+    GapsStatistics(unsigned nGenes, unsigned nSamples, unsigned nPatterns);
22 28
 
23
-    void update(const GibbsSampler *ASampler, const GibbsSampler *PSampler);
29
+    template <class Sampler>
30
+    void update(const Sampler &ASampler, const Sampler &PSampler);
24 31
 
25 32
     Matrix Amean() const;
26 33
     Matrix Pmean() const;
27 34
     Matrix Asd() const;
28 35
     Matrix Psd() const;
29
-
30
-    float meanChiSq(const GibbsSampler *PSampler) const;
36
+    
37
+    float meanChiSq(const DenseGibbsSampler &PSampler) const;
38
+    float meanChiSq(const SparseGibbsSampler &PSampler) const;
31 39
 
32 40
     // serialization
33 41
     friend Archive& operator<<(Archive &ar, GapsStatistics &stat);
34 42
     friend Archive& operator>>(Archive &ar, GapsStatistics &stat);
35 43
 };
36 44
 
45
+template <class Sampler>
46
+void GapsStatistics::update(const Sampler &ASampler, const Sampler &PSampler)
47
+{
48
+    mStatUpdates++;
49
+
50
+    // update     
51
+    for (unsigned j = 0; j < mNumPatterns; ++j)
52
+    {
53
+        float norm = gaps::max(PSampler.mMatrix.getCol(j));
54
+        norm = norm == 0.f ? 1.f : norm;
55
+
56
+        Vector quot(PSampler.mMatrix.getCol(j) / norm);
57
+        mPMeanMatrix.getCol(j) += quot;
58
+        mPStdMatrix.getCol(j) += gaps::elementSq(quot);
59
+
60
+        Vector prod(ASampler.mMatrix.getCol(j) * norm);
61
+        mAMeanMatrix.getCol(j) += prod;
62
+        mAStdMatrix.getCol(j) += gaps::elementSq(prod); // precision loss? use double?
63
+    }
64
+}
65
+
37 66
 #endif
38 67
\ No newline at end of file
39 68
deleted file mode 100644
... ...
@@ -1,392 +0,0 @@
1
-#include "GibbsSampler.h"
2
-#include "math/Algorithms.h"
3
-#include "math/SIMD.h"
4
-
5
-static float getDeltaLL(AlphaParameters alpha, float mass)
6
-{
7
-    return mass * (alpha.s_mu - alpha.s * mass / 2.f);
8
-}
9
-
10
-static OptionalFloat gibbsMass(AlphaParameters alpha, float a, float b,
11
-float lambda, GapsRng *rng)
12
-{
13
-    if (alpha.s > gaps::epsilon)
14
-    {
15
-        float mean = (alpha.s_mu - lambda) / alpha.s;
16
-        float sd = 1.f / std::sqrt(alpha.s);
17
-        return rng->truncNormal(a, b, mean, sd);
18
-    }
19
-    return OptionalFloat();
20
-}
21
-
22
-unsigned GibbsSampler::dataRows() const
23
-{
24
-    return mDMatrix.nRow();
25
-}
26
-
27
-unsigned GibbsSampler::dataCols() const
28
-{
29
-    return mDMatrix.nCol();
30
-}
31
-
32
-void GibbsSampler::setSparsity(float alpha, float maxGibbsMass, bool singleCell)
33
-{
34
-    float meanD = singleCell ? gaps::algo::nonZeroMean(mDMatrix) :
35
-        gaps::algo::mean(mDMatrix);
36
-
37
-    mAlpha = alpha;
38
-    mLambda = alpha * std::sqrt(mNumPatterns / meanD);
39
-    mQueue.setAlpha(alpha);
40
-    mQueue.setLambda(mLambda);
41
-    mMaxGibbsMass = maxGibbsMass / mLambda;
42
-}
43
-
44
-void GibbsSampler::setAnnealingTemp(float temp)
45
-{
46
-    mAnnealingTemp = temp;
47
-}
48
-
49
-void GibbsSampler::setMatrix(const Matrix &mat)
50
-{   
51
-    GAPS_ASSERT(mat.nRow() == mMatrix.nRow());
52
-    GAPS_ASSERT(mat.nCol() == mMatrix.nCol());
53
-
54
-    mMatrix = mat;
55
-}
56
-
57
-float GibbsSampler::chi2() const
58
-{   
59
-    return 2.f * gaps::algo::loglikelihood(mDMatrix, mSMatrix, mAPMatrix);
60
-}
61
-  
62
-uint64_t GibbsSampler::nAtoms() const
63
-{   
64
-    return mDomain.size();
65
-}
66
-
67
-void GibbsSampler::recalculateAPMatrix()
68
-{
69
-    mAPMatrix = gaps::algo::matrixMultiplication(*mOtherMatrix, mMatrix);
70
-}
71
-
72
-void GibbsSampler::sync(const GibbsSampler &sampler, unsigned nThreads)
73
-{
74
-    mOtherMatrix = &(sampler.mMatrix);
75
-    gaps::algo::copyTranspose(&mAPMatrix, sampler.mAPMatrix, nThreads);
76
-}
77
-
78
-void GibbsSampler::update(unsigned nSteps, unsigned nCores)
79
-{
80
-    unsigned n = 0;
81
-    while (n < nSteps)
82
-    {
83
-        // populate queue, prepare domain for this queue
84
-        mQueue.populate(mDomain, nSteps - n);
85
-        n += mQueue.size();
86
-        
87
-        // process all proposed updates
88
-        #pragma omp parallel for num_threads(nCores)
89
-        for (unsigned i = 0; i < mQueue.size(); ++i)
90
-        {
91
-            processProposal(mQueue[i]);
92
-        }
93
-        mQueue.clear();
94
-    }
95
-
96
-    GAPS_ASSERT(internallyConsistent());
97
-    GAPS_ASSERT(mDomain.isSorted());
98
-}
99
-
100
-void GibbsSampler::processProposal(const AtomicProposal &prop)
101
-{
102
-    switch (prop.type)
103
-    {
104
-        case 'B':
105
-            birth(prop);
106
-            break;
107
-        case 'D':
108
-            death(prop);
109
-            break;
110
-        case 'M':
111
-            move(prop);
112
-            break;
113
-        case 'E':
114
-            exchange(prop);
115
-            break;
116
-    }
117
-}
118
-
119
-// add an atom at a random position, calculate mass either with an
120
-// exponential distribution or with the gibbs mass distribution
121
-void GibbsSampler::birth(const AtomicProposal &prop)
122
-{
123
-    // try to get mass using gibbs, resort to exponential if needed
124
-    OptionalFloat mass = canUseGibbs(prop.c1)
125
-        ? gibbsMass(alphaParameters(prop.r1, prop.c1) * mAnnealingTemp, 0.f,
126
-            mMaxGibbsMass, mLambda, &(prop.rng))
127
-        : prop.rng.exponential(mLambda);
128
-
129
-    // accept mass as long as gibbs succeded or it's non-zero
130
-    if (mass.hasValue() && mass.value() >= gaps::epsilon)
131
-    {
132
-        mQueue.acceptBirth();
133
-        prop.atom1->mass = mass.value();
134
-        changeMatrix(prop.r1, prop.c1, mass.value());
135
-    }
136
-    else
137
-    {
138
-        mQueue.rejectBirth();
139
-        mDomain.erase(prop.atom1->pos);
140
-    }
141
-}
142
-
143
-// automatically accept death, attempt a rebirth at the same position, using
144
-// the original mass or the gibbs mass distribution
145
-void GibbsSampler::death(const AtomicProposal &prop)
146
-{
147
-    // calculate alpha parameters assuming atom dies
148
-    AlphaParameters alpha = alphaParametersWithChange(prop.r1, prop.c1,
149
-        -prop.atom1->mass);
150
-
151
-    float rebirthMass = prop.atom1->mass;
152
-    if (canUseGibbs(prop.c1))
153
-    {
154
-        OptionalFloat gMass = gibbsMass(alpha * mAnnealingTemp, 0.f,
155
-            mMaxGibbsMass, mLambda, &(prop.rng));
156
-        if (gMass.hasValue())
157
-        {
158
-            rebirthMass = gMass.value();
159
-        }
160
-    }
161
-
162
-    // accept/reject rebirth
163
-    float deltaLL = getDeltaLL(alpha, rebirthMass) * mAnnealingTemp;
164
-    if (std::log(prop.rng.uniform()) < deltaLL) // accept rebirth
165
-    {
166
-        mQueue.rejectDeath();
167
-        if (rebirthMass != prop.atom1->mass)
168
-        {
169
-            safelyChangeMatrix(prop.r1, prop.c1, rebirthMass - prop.atom1->mass);
170
-        }
171
-        prop.atom1->mass = rebirthMass;
172
-    }
173
-    else // reject rebirth
174
-    {
175
-        mQueue.acceptDeath();
176
-        safelyChangeMatrix(prop.r1, prop.c1, -prop.atom1->mass);
177
-        mDomain.erase(prop.atom1->pos);
178
-    }
179
-}
180
-
181
-// move mass from src to dest in the atomic domain
182
-void GibbsSampler::move(const AtomicProposal &prop)
183
-{
184
-    AlphaParameters alpha = alphaParameters(prop.r1, prop.c1, prop.r2, prop.c2);
185
-    if (std::log(prop.rng.uniform()) < getDeltaLL(alpha, -prop.atom1->mass) * mAnnealingTemp)
186
-    {
187
-        prop.atom1->pos = prop.pos;
188
-        safelyChangeMatrix(prop.r1, prop.c1, -prop.atom1->mass);
189
-        changeMatrix(prop.r2, prop.c2, prop.atom1->mass);
190
-    }
191
-}
192
-
193
-// exchange some amount of mass between two positions, note it is possible
194
-// for one of the atoms to be deleted if it's mass becomes too small
195
-void GibbsSampler::exchange(const AtomicProposal &prop)
196
-{
197
-    // attempt gibbs distribution exchange
198
-    AlphaParameters alpha = alphaParameters(prop.r1, prop.c1, prop.r2, prop.c2);
199
-    if (canUseGibbs(prop.c1, prop.c2))
200
-    {
201
-        OptionalFloat gMass = gibbsMass(alpha * mAnnealingTemp,
202
-            -prop.atom1->mass, prop.atom2->mass, 0.f, &(prop.rng));
203
-        if (gMass.hasValue())
204
-        {
205
-            acceptExchange(prop, gMass.value());
206
-            return;
207
-        }
208
-    }
209
-
210
-    // resort to metropolis-hastings if gibbs fails
211
-    exchangeUsingMetropolisHastings(prop, alpha);
212
-}
213
-
214
-void GibbsSampler::exchangeUsingMetropolisHastings(const AtomicProposal &prop,
215
-AlphaParameters alpha)
216
-{
217
-    // compute amount of mass to be exchanged
218
-    float totalMass = prop.atom1->mass + prop.atom2->mass;
219
-    float newMass = prop.rng.truncGammaUpper(totalMass, 1.f / mLambda);
220
-
221
-    // compute amount to change atom1 by - always change larger mass to newMass
222
-    float delta = (prop.atom1->mass > prop.atom2->mass)
223
-        ? newMass - prop.atom1->mass
224
-        : prop.atom2->mass - newMass;
225
-
226
-    // choose mass for priorLL calculation
227
-    float oldMass = (2.f * newMass > totalMass)
228
-        ? gaps::max(prop.atom1->mass, prop.atom2->mass)
229
-        : gaps::min(prop.atom1->mass, prop.atom2->mass);
230
-
231
-    // calculate priorLL
232
-    float pNew = gaps::d_gamma(newMass, 2.f, 1.f / mLambda);
233
-    float pOld = gaps::d_gamma(oldMass, 2.f, 1.f / mLambda);
234
-    float priorLL = (pNew == 0.f) ? 1.f : pOld / pNew;
235
-
236
-    // accept/reject
237
-    float deltaLL = getDeltaLL(alpha, delta) * mAnnealingTemp;
238
-    if (priorLL == 0.f || std::log(prop.rng.uniform() * priorLL) < deltaLL)
239
-    {
240
-        acceptExchange(prop, delta);
241
-        return;
242
-    }
243
-}
244
-
245
-// helper function for exchange step
246
-void GibbsSampler::acceptExchange(const AtomicProposal &prop, float delta)
247
-{
248
-    if (prop.atom1->mass + delta > gaps::epsilon && prop.atom2->mass - delta > gaps::epsilon)
249
-    {
250
-        prop.atom1->mass += delta;
251
-        prop.atom2->mass -= delta;
252
-
253
-        changeMatrix(prop.r1, prop.c1, delta);
254
-        changeMatrix(prop.r2, prop.c2, -delta);
255
-    }
256
-}
257
-
258
-// here mass + delta is guaranteed to be positive
259
-void GibbsSampler::changeMatrix(unsigned row, unsigned col, float delta)
260
-{
261
-    mMatrix(row, col) += delta;
262
-    updateAPMatrix(row, col, delta);
263
-
264
-    GAPS_ASSERT(mMatrix(row, col) >= 0.f);
265
-}
266
-
267
-// delta could be negative, this is needed to prevent negative values in matrix
268
-void GibbsSampler::safelyChangeMatrix(unsigned row, unsigned col, float delta)
269
-{
270
-    float newVal = gaps::max(mMatrix(row, col) + delta, 0.f);
271
-    updateAPMatrix(row, col, newVal - mMatrix(row, col));
272
-    mMatrix(row, col) = newVal;
273
-}
274
-
275
-void GibbsSampler::updateAPMatrix(unsigned row, unsigned col, float delta)
276
-{
277
-    const float *other = mOtherMatrix->colPtr(col);
278
-    float *ap = mAPMatrix.colPtr(row);
279
-    unsigned size = mAPMatrix.nRow();
280
-
281
-    gaps::simd::PackedFloat pOther, pAP;
282
-    gaps::simd::Index i(0);
283
-    gaps::simd::PackedFloat pDelta(delta);
284
-    for (; i <= size - i.increment(); ++i)
285
-    {
286
-        pOther.load(other + i);
287
-        pAP.load(ap + i);
288
-        pAP += pDelta * pOther;
289
-        pAP.store(ap + i);
290
-    }
291
-
292
-    for (unsigned j = i.value(); j < size; ++j)
293
-    {
294
-        ap[j] += delta * other[j];
295
-    }
296
-}
297
-
298
-bool GibbsSampler::canUseGibbs(unsigned col) const
299
-{
300
-    return !gaps::algo::isVectorZero(mOtherMatrix->colPtr(col),
301
-        mOtherMatrix->nRow());
302
-}
303
-
304
-bool GibbsSampler::canUseGibbs(unsigned c1, unsigned c2) const
305
-{
306
-    return canUseGibbs(c1) || canUseGibbs(c2);
307
-}
308
-
309
-AlphaParameters GibbsSampler::alphaParameters(unsigned row, unsigned col)
310
-{
311
-    return gaps::algo::alphaParameters(mDMatrix.nRow(), mDMatrix.colPtr(row),
312
-        mSMatrix.colPtr(row), mAPMatrix.colPtr(row), mOtherMatrix->colPtr(col));
313
-}
314
-
315
-AlphaParameters GibbsSampler::alphaParameters(unsigned r1, unsigned c1,
316
-unsigned r2, unsigned c2)
317
-{
318
-    if (r1 == r2)
319
-    {
320
-        return gaps::algo::alphaParameters(mDMatrix.nRow(), mDMatrix.colPtr(r1),
321
-            mSMatrix.colPtr(r1), mAPMatrix.colPtr(r1), mOtherMatrix->colPtr(c1),
322
-            mOtherMatrix->colPtr(c2));
323
-    }
324
-    return alphaParameters(r1, c1) + alphaParameters(r2, c2);
325
-}
326
-
327
-AlphaParameters GibbsSampler::alphaParametersWithChange(unsigned row,
328
-unsigned col, float ch)
329
-{
330
-    return gaps::algo::alphaParametersWithChange(mDMatrix.nRow(),
331
-        mDMatrix.colPtr(row), mSMatrix.colPtr(row), mAPMatrix.colPtr(row),
332
-        mOtherMatrix->colPtr(col), ch);
333
-}
334
-
335
-Archive& operator<<(Archive &ar, GibbsSampler &s)
336
-{
337
-    ar << s.mMatrix << s.mDomain << s.mQueue << s.mAlpha
338
-        << s.mLambda << s.mMaxGibbsMass << s.mAnnealingTemp << s.mNumPatterns
339
-        << s.mNumBins << s.mBinLength << s.mDomainLength;
340
-    return ar;
341
-}
342
-
343
-Archive& operator>>(Archive &ar, GibbsSampler &s)
344
-{
345
-    ar >> s.mMatrix >> s.mDomain >> s.mQueue >> s.mAlpha
346
-        >> s.mLambda >> s.mMaxGibbsMass >> s.mAnnealingTemp >> s.mNumPatterns
347
-        >> s.mNumBins >> s.mBinLength >> s.mDomainLength;
348
-    return ar;
349
-}
350
-
351
-#ifdef GAPS_DEBUG
352
-bool GibbsSampler::internallyConsistent()
353
-{
354
-    if (mDomain.begin() == mDomain.end())
355
-    {
356
-        float sum = gaps::algo::sum(mMatrix);
357
-        if (sum != 0.f)
358
-        {
359
-            gaps_printf("non-zero matrix (%f) with zero domain\n", sum);
360
-        }        
361
-        return sum == 0.f;
362
-    }
363
-
364
-    std::vector<Atom*>::iterator it = mDomain.begin();
365
-    float current = (*it)->mass;
366
-    unsigned row = ((*it)->pos / mBinLength) / mNumPatterns;
367
-    unsigned col = ((*it)->pos / mBinLength) % mNumPatterns;
368
-    ++it;
369
-    
370
-    for (; it != mDomain.end(); ++it)
371
-    {
372
-        if (((*it)->pos / mBinLength) / mNumPatterns != row
373
-        || ((*it)->pos / mBinLength) % mNumPatterns != col)
374
-        {
375
-            if (std::abs(current - mMatrix(row, col)) > 0.1f)
376
-            {
377
-                Rprintf("mass difference detected at row %lu, column %lu: %f %f\n",
378
-                    row, col, current, mMatrix(row, col));
379
-                return false;
380
-            }
381
-            current = (*it)->mass;
382
-            row = ((*it)->pos / mBinLength) / mNumPatterns;
383
-            col = ((*it)->pos / mBinLength) % mNumPatterns;
384
-        }
385
-        else
386
-        {
387
-            current += (*it)->mass;
388
-        }
389
-    }   
390
-    return true;  
391
-}
392
-#endif // GAPS_DEBUG
393 0
deleted file mode 100644
... ...
@@ -1,200 +0,0 @@
1
-#ifndef __COGAPS_GIBBS_SAMPLER_H__
2
-#define __COGAPS_GIBBS_SAMPLER_H__
3
-
4
-#define DEFAULT_ALPHA           0.01f
5
-#define DEFAULT_MAX_GIBBS_MASS  100.f
6
-
7
-#include "AtomicDomain.h"
8
-#include "ProposalQueue.h"
9
-#include "data_structures/Matrix.h"
10
-#include "math/Algorithms.h"
11
-#include "math/Random.h"
12
-
13
-#include <vector>
14
-
15
-////////////////////////////// CLASS DEFINITIONS ///////////////////////////////
16
-
17
-// These classes provide the various implementations of a GibbsSampler. Compile
18
-// time polymorphism is used to reduce code duplication.
19
-
20
-// can't be constructed, interface is avaiable through derived classes
21
-template <class T>
22
-class GibbsSamplerImplementation
23
-{
24
-private:
25
-
26
-    friend class T;         
27
-
28
-    GibbsSamplerImplementation();
29
-
30
-    void setAnnealingTemp(float temp);
31
-
32
-    void update(unsigned nSteps, unsigned nThreads);
33
-    void processProposal(const AtomicProposal &prop);
34
-    void birth(const AtomicProposal &prop);
35
-    void death(const AtomicProposal &prop);
36
-    void move(const AtomicProposal &prop);
37
-    void exchange(const AtomicProposal &prop);
38
-    void exchangeUsingMetropolisHastings(const AtomicProposal &prop,
39
-        AlphaParameters alpha);
40
-    void acceptExchange(const AtomicProposal &prop, float delta);
41
-    bool updateAtomMass(Atom *atom, float delta);
42
-
43
-    AtomicDomain mDomain; // data structure providing access to atoms
44
-    ProposalQueue mQueue; // creates queue of proposals that get evaluated by sampler
45
-
46
-    float mAlpha;
47
-    float mLambda;
48
-    float mMaxGibbsMass;
49
-    float mAnnealingTemp;
50
-    
51
-    unsigned mNumPatterns;
52
-    uint64_t mBinLength;
53
-};
54
-
55
-class DenseGibbsSamplerImplementation : public GibbsSamplerImplementation<DenseGibbsSamplerImplementation>
56
-{
57
-private:
58
-
59
-    friend class DenseGibbsSampler;
60
-
61
-    DenseColMatrix mDMatrix; // samples by genes for A, genes by samples for P
62
-    DenseColMatrix mSMatrix; // same configuration as D
63
-    DenseColMatrix mAPMatrix; // cached product of A and P, same configuration as D
64
-    DenseColMatrix mMatrix; // genes by patterns for A, samples by patterns for P
65
-    const DenseColMatrix *mOtherMatrix; // pointer to P if this is A, and vice versa
66
-    
67
-    // private constructor allows only DenseGibbsSampler to construct this
68
-    DenseGibbsSamplerImplementation(const DataType &data, bool transpose,
69
-        unsigned nPatterns, bool partitionRows,
70
-        const std::vector<unsigned> &indices);
71
-
72
-    template <class DataType>
73
-    void setUncertainty(const DataType &data, bool transpose, unsigned nPatterns,
74
-        bool partitionRows, const std::vector<unsigned> &indices);
75
-
76
-    unsigned dataRows() const;
77
-    unsigned dataCols() const;
78
-
79
-    void setSparsity(float alpha, float maxGibbsMass, bool singleCell);
80
-    void setMatrix(const Matrix &mat);
81
-
82
-    void sync(const GibbsSampler &sampler)
83
-
84
-    void changeMatrix(unsigned row, unsigned col, float delta);
85
-    void safelyChangeMatrix(unsigned row, unsigned col, float delta);
86
-    void updateAPMatrix(unsigned row, unsigned col, float delta);
87
-
88
-    bool canUseGibbs(unsigned col) const;
89
-    bool canUseGibbs(unsigned c1, unsigned c2) const;
90
-    AlphaParameters alphaParameters(unsigned row, unsigned col);
91
-    AlphaParameters alphaParameters(unsigned r1, unsigned c1, unsigned r2, unsigned c2);
92
-    AlphaParameters alphaParametersWithChange(unsigned row, unsigned col, float ch);
93
-};
94
-
95
-class SparseGibbsSamplerImplementation : public GibbsSamplerImplementation<SparseGibbsSamplerImplementation>
96
-{
97
-private :
98
-
99
-    friend class SparseGibbsSampler;
100
-
101
-    SparseColMatrix mDMatrix;
102
-    HybridColMatrix mSparseMatrix;
103
-    DenseRowMatrix mDenseMatrix;
104
-    const HybridColMatrix *mOtherSparseMatrix;
105
-    const DenseRowMatrix *mOtherDenseMatrix;
106
-
107
-    // private constructor allows only SparseGibbsSampler to construct this
108
-    SparseGibbsSamplerImplementation(const DataType &data, bool transpose,
109
-        unsigned nPatterns, bool partitionRows,
110
-        const std::vector<unsigned> &indices);
111
-
112
-    unsigned dataRows() const;
113
-    unsigned dataCols() const;
114
-
115
-    void setSparsity(float alpha, float maxGibbsMass, bool singleCell);
116
-    void setMatrix(const Matrix &mat);
117
-
118
-    float chiSq() const;
119
-
120
-    bool canUseGibbs(unsigned col) const;
121
-    bool canUseGibbs(unsigned c1, unsigned c2) const;
122
-    AlphaParameters alphaParameters(unsigned row, unsigned col);
123
-    AlphaParameters alphaParameters(unsigned r1, unsigned c1, unsigned r2, unsigned c2);
124
-    AlphaParameters alphaParametersWithChange(unsigned row, unsigned col, float ch);
125
-};
126
-
127
-//////////////////////IMPLEMENTATION OF TEMPLATED FUNCTIONS ////////////////////
128
-
129
-template <class T>
130
-void GibbsSamplerImplementation<T>::update(unsigned nSteps, unsigned nThreads)
131
-{
132
-    unsigned n = 0;
133
-    while (n < nSteps)
134
-    {
135
-        // populate queue, prepare domain for this queue
136
-        mQueue.populate(mDomain, nSteps - n);
137
-        n += mQueue.size();
138
-        
139
-        // process all proposed updates
140
-        #pragma omp parallel for num_threads(nThreads)
141
-        for (unsigned i = 0; i < mQueue.size(); ++i)
142
-        {
143
-            processProposal(mQueue[i]);
144
-        }
145
-        mQueue.clear();
146
-    }
147
-
148
-    GAPS_ASSERT(internallyConsistent());
149
-    GAPS_ASSERT(mDomain.isSorted());
150
-}
151
-
152
-template <class T>
153
-void GibbsSamplerImplementation<T>::processProposal(const AtomicProposal &prop)
154
-{
155
-    switch (prop.type)
156
-    {
157
-        case 'B':
158
-            birth(prop);
159
-            break;
160
-        case 'D':
161
-            death(prop);
162
-            break;
163
-        case 'M':
164
-            move(prop);
165
-            break;
166
-        case 'E':
167
-            exchange(prop);
168
-            break;
169
-    }
170
-}
171
-
172
-// add an atom at a random position, calculate mass either with an
173
-// exponential distribution or with the gibbs mass distribution
174
-void GibbsSampler::birth(const AtomicProposal &prop)
175
-{
176
-    // calculate proposed mass
177
-    float mass = canUseGibbs(prop.c1)
178
-        ? gibbsMass(alphaParameters(prop.r1, prop.c1), &(prop.rng)).value()
179
-        : prop.rng.exponential(mLambda);
180
-
181
-    // accept mass as long as it's non-zero
182
-    if (mass >= gaps::epsilon)
183
-    {
184
-        mQueue.acceptBirth();
185
-        prop.atom1->mass = mass;
186
-        changeMatrix(prop.r1, prop.c1, mass);
187
-    }
188
-    else
189
-    {
190
-        mQueue.rejectBirth();
191
-        mDomain.erase(prop.atom1->pos);
192
-    }
193
-}
194
-
195
-
196
-
197
-
198
-
199
-
200
-#endif // __COGAPS_GIBBS_SAMPLER_H__
201 0
\ No newline at end of file
202 1
deleted file mode 100644
... ...
@@ -1,116 +0,0 @@
1
-#ifndef __COGAPS_GIBBS_SAMPLER_H__
2
-#define __COGAPS_GIBBS_SAMPLER_H__
3
-
4
-#define DEFAULT_ALPHA           0.01f
5
-#define DEFAULT_MAX_GIBBS_MASS  100.f
6
-
7
-#include "AtomicDomain.h"
8
-#include "ProposalQueue.h"
9
-#include "data_structures/Matrix.h"
10
-#include "math/Algorithms.h"
11
-#include "math/Random.h"
12
-
13
-#include <vector>
14
-
15
-// This is a polymorphic wrapper to an underlying implementation. The purpose of
16
-// this is to move the virtual functions to the highest level possible where
17
-// they are called infrequently, rather than pay the performance cost of
18
-// dynamic dispatch of frequently called, inexpensive functions.
19
-
20
-// interface
21
-class GibbsSampler
22
-{
23
-public:
24
-
25
-    virtual unsigned dataRows() const = 0;
26
-    virtual unsigned dataCols() const = 0;
27
-    
28
-    virtual void setSparsity(float alpha, float maxGibbsMass, bool singleCell) = 0;
29
-    virtual void setAnnealingTemp(float temp) = 0;
30
-    virtual void setMatrix(const Matrix &mat) = 0;
31
-
32
-    virtual float chi2() const = 0;
33
-    virtual uint64_t nAtoms() const = 0;
34
-
35
-    virtual void recalculateAPMatrix() = 0;
36
-    virtual void sync(const GibbsSampler *sampler, unsigned nThreads=1) = 0;
37
-    virtual void update(unsigned nSteps, unsigned nCores) = 0;
38
-};
39
-
40
-// wrapper for a dense GibbsSampler implementation - all data in this class
41
-// is stored as a dense matrix
42
-class DenseGibbsSampler : public GibbsSampler
43
-{
44
-public:
45
-
46
-    template <class DataType>
47
-    DenseGibbsSampler(const DataType &data, bool transpose, unsigned nPatterns,
48
-    bool partitionRows, const std::vector<unsigned> &indices)
49
-        : mImplementation(data, transpose, nPatterns, partitionRows, indices);
50
-    {}
51
-
52
-    template <class DataType>
53
-    void setUncertainty(const DataType &data, bool transpose, unsigned nPatterns,
54
-    bool partitionRows, const std::vector<unsigned> &indices)
55
-    {
56
-        mImplementation.setUncertainty(data, transpose, nPatterns, partitionRows, indices);
57
-    }
58
-
59
-    unsigned dataRows() const { return mImplementation.dataRows(); }
60
-    unsigned dataCols() const { return mImplementation.dataCols(); }
61
-
62
-    void setSparsity(float alpha, float maxGibbsMass, bool singleCell) { mImplementation.setSparsity(alpha, maxGibbsMass, singleCell); }
63
-    void setAnnealingTemp(float temp) { mImplementation.setAnnealingTemp(temp); }
64
-    void setMatrix(const Matrix &mat) { mImplementation.setMatrix(mat); }
65
-
66
-    float chi2() const { return mImplementation.chi2(); }
67
-    uint64_t nAtoms() const { return mImplementation.nAtoms(); }
68
-
69
-    void recalculateAPMatrix() { mImplementation.recalculateAPMatrix(); }
70
-    void sync(const GibbsSampler *sampler, unsigned nThreads=1) { mImplementation.sync(sampler->mImplementation, nThreads); }
71
-    void update(unsigned nSteps, unsigned nCores) { mImplementation.update(nSteps, nCores); }
72
-
73
-private:
74
-
75
-    DenseGibbsSamplerImplementation mImplementation;
76
-};
77
-
78
-// wrapper for a sparse GibbsSampler implementation - all data in this class
79
-// is stored as a sparse matrix
80
-class SparseGibbsSampler : public GibbsSampler
81
-{
82
-public:
83
-
84
-    template <class DataType>
85
-    SparseGibbsSampler(const DataType &data, bool transpose, unsigned nPatterns,
86
-    bool partitionRows, const std::vector<unsigned> &indices)
87
-        : mImplementation(data, transpose, nPatterns, partitionRows, indices);
88
-    {}
89
-
90
-    template <class DataType>
91
-    void setUncertainty(const DataType &data, bool transpose, unsigned nPatterns,
92
-    bool partitionRows, const std::vector<unsigned> &indices)
93
-    {
94
-        GAPS_ASSERT(false); // should never reach
95
-    }
96
-
97
-    unsigned dataRows() const { return mImplementation.dataRows(); }
98
-    unsigned dataCols() const { return mImplementation.dataCols(); }
99
-
100
-    void setSparsity(float alpha, float maxGibbsMass, bool singleCell) { mImplementation.setSparsity(alpha, maxGibbsMass, singleCell); }
101
-    void setAnnealingTemp(float temp) { mImplementation.setAnnealingTemp(temp); }
102
-    void setMatrix(const Matrix &mat) { mImplementation.setMatrix(mat); }
103
-
104
-    float chi2() const { return mImplementation.chi2(); }
105
-    uint64_t nAtoms() const { return mImplementation.nAtoms(); }
106
-
107
-    void recalculateAPMatrix() { GAPS_ASSERT(false); /* should never reach */}
108
-    void sync(const GibbsSampler &sampler, unsigned nThreads=1) { mImplementation.sync(sampler); }
109
-    void update(unsigned nSteps, unsigned nCores) { mImplementation.update(nSteps, nCores); }
110
-
111
-private:
112
-    
113
-    SparseGibbsSamplerImplementation mImplementation;
114
-};
115
-
116
-#endif // __COGAPS_GIBBS_SAMPLER_H__
117 0
\ No newline at end of file
... ...
@@ -2,30 +2,49 @@ PKG_CPPFLAGS = @GAPS_CPP_FLAGS@
2 2
 PKG_CXXFLAGS = @GAPS_CXX_FLAGS@
3 3
 PKG_LIBS = @GAPS_LIBS@
4 4
 
5
-OBJECTS =   AtomAllocator.o \
6
-            AtomicDomain.o \
7
-            Cogaps.o \
5
+OBJECTS =   Cogaps.o \
6
+            GapsParameters.o \
8 7
             GapsResult.o \
9 8
             GapsRunner.o \
10 9
             GapsStatistics.o \
11
-            GibbsSampler.o \
12
-            ProposalQueue.o \
13 10
             RcppExports.o \
14
-            test-runner.o \
11
+\
12
+            atomic/AtomAllocator.o \
13
+            atomic/AtomicDomain.o \
14
+            atomic/ProposalQueue.o \
15
+\
15 16
             data_structures/HashSets.o \
17
+            data_structures/HybridMatrix.o \
18
+            data_structures/HybridVector.o \
16 19
             data_structures/Matrix.o \
20
+            data_structures/SparseIterator.o \
21
+            data_structures/SparseMatrix.o \
22
+            data_structures/SparseVector.o \
17 23
             data_structures/Vector.o \
24
+\
18 25
             file_parser/CsvParser.o \
19 26
             file_parser/GctParser.o \
20 27
             file_parser/FileParser.o \
21 28
             file_parser/TsvParser.o \
22 29
             file_parser/MtxParser.o \
23
-            math/Algorithms.o \
30
+\
31
+            gibbs_sampler/DenseGibbsSampler.o \
32
+            gibbs_sampler/SparseGibbsSampler.o \
33
+\
34
+            math/Math.o \
35
+            math/MatrixMath.o \
24 36
             math/Random.o \
25
-            cpp_tests/testAlgorithms.o \
37
+            math/VectorMath.o \
38
+\
39
+            cpp_tests/test-runner.o \
26 40
             cpp_tests/testAtomicDomain.o \
27
-            cpp_tests/testFileParsers.o \
41
+            cpp_tests/testHashSets.o \
42
+            cpp_tests/testHybridMatrix.o \
43
+            cpp_tests/testHybridVector.o \
28 44
             cpp_tests/testMatrix.o \
29
-            cpp_tests/testRandom.o \
30
-            cpp_tests/testSerialization.o
31
-
45
+            cpp_tests/testSparseIterator.o \
46
+            cpp_tests/testSparseMatrix.o \
47
+            cpp_tests/testSparseVector.o \
48
+            cpp_tests/testVector.o \
49
+            cpp_tests/testFileParsers.o \
50
+            cpp_tests/testDenseGibbsSampler.o
32 51
\ No newline at end of file
33 52
similarity index 64%
34 53
rename from src/AtomAllocator.cpp
35 54
rename to src/atomic/AtomAllocator.cpp
... ...
@@ -76,76 +76,75 @@ void IndexFlagSet::release(uint64_t n)
76 76
 
77 77
 AtomPool::AtomPool()
78 78
 {
79
-    //mPool = new Atom[POOL_SIZE];
79
+    mPool = new Atom[POOL_SIZE];
80 80
 }
81 81
 
82 82
 AtomPool::~AtomPool()
83 83
 {
84
-    //delete[] mPool;
84
+    delete[] mPool;
85 85
 }
86 86
 
87 87
 Atom* AtomPool::alloc()
88 88
 {
89
-    //unsigned n = mIndexFlags.getFirstFree();
90
-    //mIndexFlags.set(n);
91
-    //Atom *a = &(mPool[n]);
92
-    //a->poolIndex = n;
93
-    //return a;
94
-    return new Atom();
89
+    unsigned n = mIndexFlags.getFirstFree();
90
+    mIndexFlags.set(n);
91
+    Atom *a = &(mPool[n]);
92
+    a->poolIndex = n;
93
+    return a;
95 94
 }
96 95
 
97 96
 void AtomPool::free(Atom* a)
98 97
 {
99
-    //mIndexFlags.release(a->poolIndex);
100
-    delete a;
98
+    mIndexFlags.release(a->poolIndex);
101 99
 }
102 100
 
103 101
 bool AtomPool::depleted() const
104 102
 {
105
-    //return !mIndexFlags.isAnyFree();
106
-    return false;
103
+    return !mIndexFlags.isAnyFree();
107 104
 }
108 105
 
109 106
 ////////////////////////////// AtomAllocator ///////////////////////////////////
110 107
 
111 108
 AtomAllocator::AtomAllocator()
112 109
 {
113
-    mIndex = 0;
114
-    mPools.push_back(new AtomPool());
110
+    //mIndex = 0;
111
+    //mPools.push_back(new AtomPool());
115 112
 }
116 113
 
117 114
 AtomAllocator::~AtomAllocator()
118 115
 {
119
-    std::vector<AtomPool*>::iterator it = mPools.begin();
120
-    for (; it != mPools.end(); ++it)
121
-    {
122
-        delete *it;
123
-    }
116
+    //std::vector<AtomPool*>::iterator it = mPools.begin();
117
+    //for (; it != mPools.end(); ++it)
118
+    //{
119
+    //    delete *it;
120
+    //}
124 121
 }
125 122
 
126 123
 Atom* AtomAllocator::createAtom(uint64_t pos, float mass)
127 124
 {
128
-    GAPS_ASSERT(mPools.size() < 65536);
125
+    //GAPS_ASSERT(mPools.size() < 65536);
129 126
 
130
-    if (mPools[mIndex]->depleted() && mIndex == mPools.size() - 1)
131
-    {
132
-        mPools.push_back(new AtomPool());
133
-        mIndex = 0; // loop back around all pools before getting to new one
134
-    }
127
+    //if (mPools[mIndex]->depleted() && mIndex == mPools.size() - 1)
128
+    //{
129
+    //    mPools.push_back(new AtomPool());
130
+    //    mIndex = 0; // loop back around all pools before getting to new one
131
+    //}
135 132
 
136
-    while (mPools[mIndex]->depleted())
137
-    {
138
-        ++mIndex;
139
-    }
133
+    //while (mPools[mIndex]->depleted())
134
+    //{
135
+    //    ++mIndex;
136
+    //}
140 137
 
141
-    Atom *a = mPools[mIndex]->alloc();
142
-    a->allocatorIndex = mIndex;
143
-    a->pos = pos;
144
-    a->mass = mass;
145
-    return a;
138
+    //Atom *a = mPools[mIndex]->alloc();
139
+    //a->allocatorIndex = mIndex;
140
+    //a->pos = pos;
141
+    //a->mass = mass;
142
+    //return a;
143
+    return new Atom(pos, mass);
146 144
 }
147 145
 
148 146
 void AtomAllocator::destroyAtom(Atom *a)
149 147
 {
150
-    mPools[a->allocatorIndex]->free(a);
148
+    //mPools[a->allocatorIndex]->free(a);