Browse code

fix compiler warnings

sherman5 authored on 25/06/2019 22:21:06
Showing1 changed files
... ...
@@ -1,9 +1,9 @@
1 1
 #ifndef __COGAPS_GAPS_RUNNER_H__
2 2
 #define __COGAPS_GAPS_RUNNER_H__
3 3
 
4
-class GapsResult;
4
+struct GapsResult;
5
+struct GapsParameters;
5 6
 class Matrix;
6
-class GapsParameters;
7 7
 class GapsRandomState;
8 8
 
9 9
 #include <string>
Browse code

clean up linter warnings

Tom Sherman authored on 24/06/2019 19:44:02
Showing1 changed files
... ...
@@ -1,10 +1,12 @@
1 1
 #ifndef __COGAPS_GAPS_RUNNER_H__
2 2
 #define __COGAPS_GAPS_RUNNER_H__
3 3
 
4
-#include "GapsResult.h"
5
-#include "GapsParameters.h"
6
-#include "data_structures/Matrix.h"
7
-#include "math/Random.h"
4
+class GapsResult;
5
+class Matrix;
6
+class GapsParameters;
7
+class GapsRandomState;
8
+
9
+#include <string>
8 10
 
9 11
 // these two functions are the top-level functions exposed to the C++
10 12
 // code that is being wrapped by any given language
Browse code

implemented policy based design for sparse/dense storage in the GibbsSampler

Tom Sherman authored on 08/01/2019 22:16:52
Showing1 changed files
... ...
@@ -18,6 +18,7 @@ namespace gaps
18 18
     // data stored in file
19 19
     GapsResult run(const std::string &data, GapsParameters &params,
20 20
         const std::string &uncertainty, GapsRandomState *randState);
21
+
21 22
 }; // namespace gaps
22 23
 
23 24
 #endif // __COGAPS_GAPS_RUNNER_H__
24 25
\ No newline at end of file
Browse code

cleaned up version and regenerated vignette

Tom Sherman authored on 02/11/2018 20:05:05
Showing1 changed files
... ...
@@ -18,6 +18,6 @@ namespace gaps
18 18
     // data stored in file
19 19
     GapsResult run(const std::string &data, GapsParameters &params,
20 20
         const std::string &uncertainty, GapsRandomState *randState);
21
-};
21
+}; // namespace gaps
22 22
 
23 23
 #endif // __COGAPS_GAPS_RUNNER_H__
24 24
\ No newline at end of file
Browse code

updated config to commit file permissions

Tom Sherman authored on 29/10/2018 19:56:14
Showing1 changed files
1 1
old mode 100644
2 2
new mode 100755
Browse code

use free functions instead of class for gaps runner

Tom Sherman authored on 23/10/2018 21:37:09
Showing1 changed files
... ...
@@ -1,265 +1,23 @@
1 1
 #ifndef __COGAPS_GAPS_RUNNER_H__
2 2
 #define __COGAPS_GAPS_RUNNER_H__
3 3
 
4
-#include "GapsParameters.h"
5 4
 #include "GapsResult.h"
6
-#include "GapsStatistics.h"
7
-#include "gibbs_sampler/GibbsSampler.h"
8
-#include "gibbs_sampler/DenseGibbsSampler.h"
9
-#include "gibbs_sampler/SparseGibbsSampler.h"
10
-
11
-#include <string>
12
-
13
-// boost time helpers
14
-#include <boost/date_time/posix_time/posix_time.hpp>
15
-namespace bpt = boost::posix_time;
16
-#define bpt_now() bpt::microsec_clock::local_time()
17
-
18
-// forward declarations
19
-class AbstractGapsRunner;
20
-
21
-///////////////////////////// RAII wrapper /////////////////////////////////////
22
-
23
-// This is the class that is exposed to the top-level CoGAPS routine - all 
24
-// aspects of CoGAPS can be managed through this class. The class itself is
25
-// just a lightweight wrapper around an abstract interface, which allows for
26
-// multiple types of GapsRunner to be declared. Which implementation is used
27
-// depends on the parameters passed to the GapsRunner constructor.
28
-class GapsRunner
29
-{
30
-public:
31
-
32
-    template <class DataType>
33
-    GapsRunner(const DataType &data, const GapsParameters &params);
34
-
35
-    ~GapsRunner();
36
-
37
-    template <class DataType>
38
-    void setUncertainty(const DataType &unc, const GapsParameters &params);
39
-
40
-    GapsResult run();
41
-
42
-private:
43
-
44
-    AbstractGapsRunner *mRunner;
45
-
46
-    GapsRunner(const GapsRunner &p); // don't allow copies
47
-    GapsRunner& operator=(const GapsRunner &p); // don't allow copies    
48
-};
49
-
50
-///////////////////////// Abstract Interface ///////////////////////////////////
51
-
52
-// This class is the abstract interface that any implementation of GapsRunner
53
-// must satisfy. It provides a factory method that will create the appropiate
54
-// derived class depending on the parameters passed in.
55
-class AbstractGapsRunner
56
-{
57
-public:
58
-
59
-    AbstractGapsRunner(const GapsParameters &params);
60
-    virtual ~AbstractGapsRunner() {}
61
-
62
-    template <class DataType>
63
-    static AbstractGapsRunner* create(const DataType &data, const GapsParameters &params);
64
-
65
-    // can't use template with virtual function
66
-    virtual void setUncertainty(const Matrix &unc, const GapsParameters &params) = 0;
67
-    virtual void setUncertainty(const std::string &unc, const GapsParameters &params) = 0;
68
-
69
-    GapsResult run();
70
-
71
-protected:
72
-
73
-    GapsStatistics mStatistics;
74
-
75
-    mutable GapsRng mRng;
76
-
77
-    std::string mCheckpointOutFile;
78
-
79
-    bpt::ptime mStartTime;
80
-
81
-    unsigned mCurrentIteration;
82
-    unsigned mMaxIterations;
83
-    unsigned mMaxThreads;
84
-    unsigned mOutputFrequency;
85
-    unsigned mCheckpointInterval;
86
-    unsigned mNumPatterns;
87
-    unsigned mNumUpdatesA;
88
-    unsigned mNumUpdatesP;
89
-    uint32_t mSeed;
90
-
91
-    bool mPrintMessages;
92
-    bool mPrintThreadUsage;
93
-
94
-    char mPhase;
95
-    char mFixedMatrix;
96
-        
97
-    void runOnePhase();
98
-    double estimatedPercentComplete() const;
99
-    void displayStatus();
100
-    void createCheckpoint();
101
-
102
-    virtual float chiSq() const = 0;
103
-    virtual float meanChiSq() const = 0;
104
-    virtual unsigned nAtoms(char which) const = 0;
105
-    virtual void setAnnealingTemp(float temp) = 0;
106
-    virtual void updateStatistics() = 0;
107
-    virtual Archive& readSamplers(Archive &ar) = 0;
108
-    virtual Archive& writeSamplers(Archive &ar) = 0;
109
-    virtual void updateSampler(unsigned nA, unsigned nP) = 0;
110
-};
111
-
112
-///////////////////// GapsRunner Implementations ///////////////////////////////
113
-
114
-// This implementation uses a DenseGibbsSampler internally
115
-class DenseGapsRunner : public AbstractGapsRunner
116
-{
117
-public:
118
-
119
-    ~DenseGapsRunner() {}
120
-
121
-    template <class DataType>
122
-    DenseGapsRunner(const DataType &data, const GapsParameters &params);
123
-
124
-    void setUncertainty(const Matrix &unc, const GapsParameters &params);
125
-    void setUncertainty(const std::string &unc, const GapsParameters &params);
126
-
127
-private:
128
-
129
-    DenseGibbsSampler mASampler;
130
-    DenseGibbsSampler mPSampler;
5
+#include "GapsParameters.h"
6
+#include "data_structures/Matrix.h"
7
+#include "math/Random.h"
131 8
 
132
-    float chiSq() const;
133
-    float meanChiSq() const;
134
-    unsigned nAtoms(char which) const;
135
-    void setAnnealingTemp(float temp);
136
-    void updateStatistics();
137
-    Archive& readSamplers(Archive &ar);
138
-    Archive& writeSamplers(Archive &ar);
139
-    void updateSampler(unsigned nA, unsigned nP);
140
-};
9
+// these two functions are the top-level functions exposed to the C++
10
+// code that is being wrapped by any given language
141 11
 
142
-// This implementation uses a SparseGibbsSampler internally
143
-class SparseGapsRunner : public AbstractGapsRunner
12
+namespace gaps
144 13
 {
145
-public:
146
-
147
-    ~SparseGapsRunner() {}
148
-
149
-    template <class DataType>
150
-    SparseGapsRunner(const DataType &data, const GapsParameters &params);
151
-
152
-    void setUncertainty(const Matrix &unc, const GapsParameters &params);
153
-    void setUncertainty(const std::string &unc, const GapsParameters &params);
14
+    // data stored in matrix
15
+    GapsResult run(const Matrix &data, GapsParameters &params,
16
+        const Matrix &uncertainty, GapsRandomState *randState);
154 17
 
155
-private:
156
-
157
-    SparseGibbsSampler mASampler;
158
-    SparseGibbsSampler mPSampler;
159
-
160
-    float chiSq() const;
161
-    float meanChiSq() const;
162
-    unsigned nAtoms(char which) const;
163
-    void setAnnealingTemp(float temp);
164
-    void updateStatistics();
165
-    Archive& readSamplers(Archive &ar);
166
-    Archive& writeSamplers(Archive &ar);
167
-    void updateSampler(unsigned nA, unsigned nP);
18
+    // data stored in file
19
+    GapsResult run(const std::string &data, GapsParameters &params,
20
+        const std::string &uncertainty, GapsRandomState *randState);
168 21
 };
169 22
 
170
-/////////////////////// GapsRunner - templated functions ///////////////////////
171
-
172
-template <class DataType>
173
-GapsRunner::GapsRunner(const DataType &data, const GapsParameters &params)
174
-    : mRunner(AbstractGapsRunner::create(data, params))
175
-{}
176
-
177
-template <class DataType>
178
-void GapsRunner::setUncertainty(const DataType &unc, const GapsParameters &params)
179
-{
180
-    mRunner->setUncertainty(unc, params);
181
-}
182
-
183
-/////////////////// AbstractGapsRunner - templated functions ///////////////////
184
-
185
-template <class DataType>
186
-AbstractGapsRunner* AbstractGapsRunner::create(const DataType &data,
187
-const GapsParameters &params)
188
-{
189
-    if (params.useSparseOptimization)
190
-    {
191
-        return new SparseGapsRunner(data, params);
192
-    }
193
-    return new DenseGapsRunner(data, params);
194
-}
195
-
196
-//////////////////// DenseGapsRunner - templated functions /////////////////////
197
-
198
-template <class DataType>
199
-DenseGapsRunner::DenseGapsRunner(const DataType &data,
200
-const GapsParameters &params)
201
-    :
202
-AbstractGapsRunner(params),
203
-mASampler(data, !params.transposeData, !params.subsetGenes, params.alphaA, params.maxGibbsMassA, params),
204
-mPSampler(data, params.transposeData, params.subsetGenes, params.alphaP, params.maxGibbsMassP, params)
205
-{
206
-    switch (mFixedMatrix)
207
-    {
208
-        case 'A' : mASampler.setMatrix(params.fixedMatrix); break;
209
-        case 'P' : mPSampler.setMatrix(params.fixedMatrix); break;
210
-        default: break; // 'N' for none
211
-    }
212
-
213
-    // overwrite with info from checkpoint file
214
-    if (params.useCheckPoint)
215
-    {
216
-        Archive ar(params.checkpointFile, ARCHIVE_READ);
217
-        ar >> mNumPatterns >> mSeed >> mMaxIterations >> mFixedMatrix >> mPhase
218
-            >> mCurrentIteration >> mNumUpdatesA >> mNumUpdatesP >> mRng;
219
-        readSamplers(ar);
220
-        GapsRng::load(ar);
221
-    }
222
-
223
-    mASampler.sync(mPSampler);
224
-    mPSampler.sync(mASampler);
225
-
226
-    // AP matrix not stored in checkpoint
227
-    if (params.useCheckPoint)
228
-    {
229
-        mASampler.recalculateAPMatrix();
230
-        mPSampler.recalculateAPMatrix();
231
-    }
232
-}
233
-
234
-//////////////////// SparseGapsRunner - templated functions ////////////////////
235
-
236
-template <class DataType>
237
-SparseGapsRunner::SparseGapsRunner(const DataType &data,
238
-const GapsParameters &params)
239
-    :
240
-AbstractGapsRunner(params),
241
-mASampler(data, !params.transposeData, !params.subsetGenes, params.alphaA, params.maxGibbsMassA, params),
242
-mPSampler(data, params.transposeData, params.subsetGenes, params.alphaP, params.maxGibbsMassP, params)
243
-{
244
-    switch (mFixedMatrix)
245
-    {
246
-        case 'A' : mASampler.setMatrix(params.fixedMatrix); break;
247
-        case 'P' : mPSampler.setMatrix(params.fixedMatrix); break;
248
-        default: break;
249
-    }
250
-
251
-    // overwrite with info from checkpoint file
252
-    if (params.useCheckPoint)
253
-    {
254
-        Archive ar(params.checkpointFile, ARCHIVE_READ);
255
-        ar >> mNumPatterns >> mSeed >> mMaxIterations >> mFixedMatrix >> mPhase
256
-            >> mCurrentIteration >> mNumUpdatesA >> mNumUpdatesP >> mRng;
257
-        readSamplers(ar);
258
-        GapsRng::load(ar);
259
-    }
260
-
261
-    mASampler.sync(mPSampler);
262
-    mPSampler.sync(mASampler);
263
-}
264
-
265
-#endif // __COGAPS_GAPS_RUNNER_H__
23
+#endif // __COGAPS_GAPS_RUNNER_H__
266 24
\ No newline at end of file
Browse code

dense sampler appears to be working

Tom Sherman authored on 15/10/2018 20:52:28
Showing1 changed files
... ...
@@ -4,13 +4,27 @@
4 4
 #include "GapsParameters.h"
5 5
 #include "GapsResult.h"
6 6
 #include "GapsStatistics.h"
7
-#include "GibbsSampler.h"
7
+#include "gibbs_sampler/GibbsSampler.h"
8
+#include "gibbs_sampler/DenseGibbsSampler.h"
9
+#include "gibbs_sampler/SparseGibbsSampler.h"
10
+
11
+#include <string>
8 12
 
9 13
 // boost time helpers
10 14
 #include <boost/date_time/posix_time/posix_time.hpp>
11 15
 namespace bpt = boost::posix_time;
12 16
 #define bpt_now() bpt::microsec_clock::local_time()
13 17
 
18
+// forward declarations
19
+class AbstractGapsRunner;
20
+
21
+///////////////////////////// RAII wrapper /////////////////////////////////////
22
+
23
+// This is the class that is exposed to the top-level CoGAPS routine - all 
24
+// aspects of CoGAPS can be managed through this class. The class itself is
25
+// just a lightweight wrapper around an abstract interface, which allows for
26
+// multiple types of GapsRunner to be declared. Which implementation is used
27
+// depends on the parameters passed to the GapsRunner constructor.
14 28
 class GapsRunner
15 29
 {
16 30
 public:
... ...
@@ -18,15 +32,44 @@ public:
18 32
     template <class DataType>
19 33
     GapsRunner(const DataType &data, const GapsParameters &params);
20 34
 
35
+    ~GapsRunner();
36
+
21 37
     template <class DataType>
22 38
     void setUncertainty(const DataType &unc, const GapsParameters &params);
23 39
 
24 40
     GapsResult run();
25 41
 
26 42
 private:
27
-    
28
-    GibbsSampler *mASampler;
29
-    GibbsSampler *mPSampler;
43
+
44
+    AbstractGapsRunner *mRunner;
45
+
46
+    GapsRunner(const GapsRunner &p); // don't allow copies
47
+    GapsRunner& operator=(const GapsRunner &p); // don't allow copies    
48
+};
49
+
50
+///////////////////////// Abstract Interface ///////////////////////////////////
51
+
52
+// This class is the abstract interface that any implementation of GapsRunner
53
+// must satisfy. It provides a factory method that will create the appropiate
54
+// derived class depending on the parameters passed in.
55
+class AbstractGapsRunner
56
+{
57
+public:
58
+
59
+    AbstractGapsRunner(const GapsParameters &params);
60
+    virtual ~AbstractGapsRunner() {}
61
+
62
+    template <class DataType>
63
+    static AbstractGapsRunner* create(const DataType &data, const GapsParameters &params);
64
+
65
+    // can't use template with virtual function
66
+    virtual void setUncertainty(const Matrix &unc, const GapsParameters &params) = 0;
67
+    virtual void setUncertainty(const std::string &unc, const GapsParameters &params) = 0;
68
+
69
+    GapsResult run();
70
+
71
+protected:
72
+
30 73
     GapsStatistics mStatistics;
31 74
 
32 75
     mutable GapsRng mRng;
... ...
@@ -52,40 +95,119 @@ private:
52 95
     char mFixedMatrix;
53 96
         
54 97
     void runOnePhase();
55
-    void updateSampler(unsigned nA, unsigned nP);
56 98
     double estimatedPercentComplete() const;
57 99
     void displayStatus();
58 100
     void createCheckpoint();
101
+
102
+    virtual float chiSq() const = 0;
103
+    virtual float meanChiSq() const = 0;
104
+    virtual unsigned nAtoms(char which) const = 0;
105
+    virtual void setAnnealingTemp(float temp) = 0;
106
+    virtual void updateStatistics() = 0;
107
+    virtual Archive& readSamplers(Archive &ar) = 0;
108
+    virtual Archive& writeSamplers(Archive &ar) = 0;
109
+    virtual void updateSampler(unsigned nA, unsigned nP) = 0;
110
+};
111
+
112
+///////////////////// GapsRunner Implementations ///////////////////////////////
113
+
114
+// This implementation uses a DenseGibbsSampler internally
115
+class DenseGapsRunner : public AbstractGapsRunner
116
+{
117
+public:
118
+
119
+    ~DenseGapsRunner() {}
120
+
121
+    template <class DataType>
122
+    DenseGapsRunner(const DataType &data, const GapsParameters &params);
123
+
124
+    void setUncertainty(const Matrix &unc, const GapsParameters &params);
125
+    void setUncertainty(const std::string &unc, const GapsParameters &params);
126
+
127
+private:
128
+
129
+    DenseGibbsSampler mASampler;
130
+    DenseGibbsSampler mPSampler;
131
+
132
+    float chiSq() const;
133
+    float meanChiSq() const;
134
+    unsigned nAtoms(char which) const;
135
+    void setAnnealingTemp(float temp);
136
+    void updateStatistics();
137
+    Archive& readSamplers(Archive &ar);
138
+    Archive& writeSamplers(Archive &ar);
139
+    void updateSampler(unsigned nA, unsigned nP);
140
+};
141
+
142
+// This implementation uses a SparseGibbsSampler internally
143
+class SparseGapsRunner : public AbstractGapsRunner
144
+{
145
+public:
146
+
147
+    ~SparseGapsRunner() {}
148
+
149
+    template <class DataType>
150
+    SparseGapsRunner(const DataType &data, const GapsParameters &params);
151
+
152
+    void setUncertainty(const Matrix &unc, const GapsParameters &params);
153
+    void setUncertainty(const std::string &unc, const GapsParameters &params);
154
+
155
+private:
156
+
157
+    SparseGibbsSampler mASampler;
158
+    SparseGibbsSampler mPSampler;
159
+
160
+    float chiSq() const;
161
+    float meanChiSq() const;
162
+    unsigned nAtoms(char which) const;
163
+    void setAnnealingTemp(float temp);
164
+    void updateStatistics();
165
+    Archive& readSamplers(Archive &ar);
166
+    Archive& writeSamplers(Archive &ar);
167
+    void updateSampler(unsigned nA, unsigned nP);
59 168
 };
60 169
 
170
+/////////////////////// GapsRunner - templated functions ///////////////////////
171
+
61 172
 template <class DataType>
62 173
 GapsRunner::GapsRunner(const DataType &data, const GapsParameters &params)
63
-    :
64
-mASampler(new DenseGibbsSampler(data, !params.transposeData, params.nPatterns, params.subsetGenes, params.dataIndicesSubset)),
65
-mPSampler(new DenseGibbsSampler(data, params.transposeData, params.nPatterns, params.subsetGenes, params.dataIndicesSubset)),
66
-mStatistics(mPSampler->dataRows(), mPSampler->dataCols(), params.nPatterns),
67
-mCheckpointOutFile(params.checkpointOutFile),
68
-mMaxIterations(params.nIterations),
69
-mMaxThreads(params.mMaxThreads),
70
-mOutputFrequency(params.mOutputFrequency),
71
-mCheckpointInterval(params.mCheckpointInterval),
72
-mNumPatterns(params.nPatterns),
73
-mNumUpdatesA(0),
74
-mNumUpdatesP(0),
75
-mSeed(params.seed),
76
-mPrintMessages(params.printMessages),
77
-mPrintThreadUsage(params.printThreadUsage),
78
-mPhase('C'),
79
-mFixedMatrix(params.whichFixedMatrix)
174
+    : mRunner(AbstractGapsRunner::create(data, params))
175
+{}
176
+
177
+template <class DataType>
178
+void GapsRunner::setUncertainty(const DataType &unc, const GapsParameters &params)
179
+{
180
+    mRunner->setUncertainty(unc, params);
181
+}
182
+
183
+/////////////////// AbstractGapsRunner - templated functions ///////////////////
184
+
185
+template <class DataType>
186
+AbstractGapsRunner* AbstractGapsRunner::create(const DataType &data,
187
+const GapsParameters &params)
80 188
 {
81
-    mASampler->setSparsity(params.alphaA, params.maxGibbsMassA, params.singleCell);
82
-    mPSampler->setSparsity(params.alphaP, params.maxGibbsMassP, params.singleCell);
189
+    if (params.useSparseOptimization)
190
+    {
191
+        return new SparseGapsRunner(data, params);
192
+    }
193
+    return new DenseGapsRunner(data, params);
194
+}
195
+
196
+//////////////////// DenseGapsRunner - templated functions /////////////////////
83 197
 
198
+template <class DataType>
199
+DenseGapsRunner::DenseGapsRunner(const DataType &data,
200
+const GapsParameters &params)
201
+    :
202
+AbstractGapsRunner(params),
203
+mASampler(data, !params.transposeData, !params.subsetGenes, params.alphaA, params.maxGibbsMassA, params),
204
+mPSampler(data, params.transposeData, params.subsetGenes, params.alphaP, params.maxGibbsMassP, params)
205
+{
84 206
     switch (mFixedMatrix)
85 207
     {
86
-        case 'A' : mASampler->setMatrix(params.fixedMatrix); break;
87
-        case 'P' : mPSampler->setMatrix(params.fixedMatrix); break;
88
-        default: break;
208
+        case 'A' : mASampler.setMatrix(params.fixedMatrix); break;
209
+        case 'P' : mPSampler.setMatrix(params.fixedMatrix); break;
210
+        default: break; // 'N' for none
89 211
     }
90 212
 
91 213
     // overwrite with info from checkpoint file
... ...
@@ -93,24 +215,51 @@ mFixedMatrix(params.whichFixedMatrix)
93 215
     {
94 216
         Archive ar(params.checkpointFile, ARCHIVE_READ);
95 217
         ar >> mNumPatterns >> mSeed >> mMaxIterations >> mFixedMatrix >> mPhase
96
-            >> mCurrentIteration >> mNumUpdatesA >> mNumUpdatesP >> mRng
97
-            >> *mASampler >> *mPSampler;
218
+            >> mCurrentIteration >> mNumUpdatesA >> mNumUpdatesP >> mRng;
219
+        readSamplers(ar);
98 220
         GapsRng::load(ar);
99 221
     }
100 222
 
101
-    mASampler->sync(mPSampler);
102
-    mPSampler->sync(mASampler);
103
-    mASampler->recalculateAPMatrix();
104
-    mPSampler->recalculateAPMatrix();
223
+    mASampler.sync(mPSampler);
224
+    mPSampler.sync(mASampler);
225
+
226
+    // AP matrix not stored in checkpoint
227
+    if (params.useCheckPoint)
228
+    {
229
+        mASampler.recalculateAPMatrix();
230
+        mPSampler.recalculateAPMatrix();
231
+    }
105 232
 }
106 233
 
234
+//////////////////// SparseGapsRunner - templated functions ////////////////////
235
+
107 236
 template <class DataType>
108
-void GapsRunner::setUncertainty(const DataType &unc, const GapsParameters &params)
237
+SparseGapsRunner::SparseGapsRunner(const DataType &data,
238
+const GapsParameters &params)
239
+    :
240
+AbstractGapsRunner(params),
241
+mASampler(data, !params.transposeData, !params.subsetGenes, params.alphaA, params.maxGibbsMassA, params),
242
+mPSampler(data, params.transposeData, params.subsetGenes, params.alphaP, params.maxGibbsMassP, params)
109 243
 {
110
-    mASampler->setUncertainty(unc, !params.transposeData, params.nPatterns,
111
-        params.subsetGenes, params.dataIndicesSubset);
112
-    mPSampler->setUncertainty(unc, params.transposeData, params.nPatterns,
113
-        params.subsetGenes, params.dataIndicesSubset);
244
+    switch (mFixedMatrix)
245
+    {
246
+        case 'A' : mASampler.setMatrix(params.fixedMatrix); break;
247
+        case 'P' : mPSampler.setMatrix(params.fixedMatrix); break;
248
+        default: break;
249
+    }
250
+
251
+    // overwrite with info from checkpoint file
252
+    if (params.useCheckPoint)
253
+    {
254
+        Archive ar(params.checkpointFile, ARCHIVE_READ);
255
+        ar >> mNumPatterns >> mSeed >> mMaxIterations >> mFixedMatrix >> mPhase
256
+            >> mCurrentIteration >> mNumUpdatesA >> mNumUpdatesP >> mRng;
257
+        readSamplers(ar);
258
+        GapsRng::load(ar);
259
+    }
260
+
261
+    mASampler.sync(mPSampler);
262
+    mPSampler.sync(mASampler);
114 263
 }
115 264
 
116
-#endif // __COGAPS_GAPS_RUNNER_H__
117 265
\ No newline at end of file
266
+#endif // __COGAPS_GAPS_RUNNER_H__
Browse code

polymorphic structure

Tom Sherman authored on 02/10/2018 20:54:21
Showing1 changed files
... ...
@@ -1,12 +1,11 @@
1 1
 #ifndef __COGAPS_GAPS_RUNNER_H__
2 2
 #define __COGAPS_GAPS_RUNNER_H__
3 3
 
4
+#include "GapsParameters.h"
4 5
 #include "GapsResult.h"
5 6
 #include "GapsStatistics.h"
6 7
 #include "GibbsSampler.h"
7 8
 
8
-#include "data_structures/Matrix.h"
9
-
10 9
 // boost time helpers
11 10
 #include <boost/date_time/posix_time/posix_time.hpp>
12 11
 namespace bpt = boost::posix_time;
... ...
@@ -14,94 +13,104 @@ namespace bpt = boost::posix_time;
14 13
 
15 14
 class GapsRunner
16 15
 {
16
+public:
17
+
18
+    template <class DataType>
19
+    GapsRunner(const DataType &data, const GapsParameters &params);
20
+
21
+    template <class DataType>
22
+    void setUncertainty(const DataType &unc, const GapsParameters &params);
23
+
24
+    GapsResult run();
25
+
17 26
 private:
18 27
     
19
-    GibbsSampler mASampler;
20
-    GibbsSampler mPSampler;
28
+    GibbsSampler *mASampler;
29
+    GibbsSampler *mPSampler;
21 30
     GapsStatistics mStatistics;
22 31
 
23
-    char mFixedMatrix;
24
-    unsigned mMaxIterations;
25
-    
26
-    unsigned mMaxThreads;
27
-    bool mPrintMessages;
28
-    unsigned mOutputFrequency;
32
+    mutable GapsRng mRng;
33
+
29 34
     std::string mCheckpointOutFile;
30
-    unsigned mCheckpointInterval;
31 35
 
32 36
     bpt::ptime mStartTime;
33
-    char mPhase;
34
-    unsigned mCurrentIteration;
35 37
 
36
-    // only kept since they need to be written to the start of every checkpoint
38
+    unsigned mCurrentIteration;
39
+    unsigned mMaxIterations;
40
+    unsigned mMaxThreads;
41
+    unsigned mOutputFrequency;
42
+    unsigned mCheckpointInterval;
37 43
     unsigned mNumPatterns;
38
-    uint32_t mSeed;
39
-
40 44
     unsigned mNumUpdatesA;
41 45
     unsigned mNumUpdatesP;
46
+    uint32_t mSeed;
42 47
 
43
-    mutable GapsRng mRng;
48
+    bool mPrintMessages;
49
+    bool mPrintThreadUsage;
50
+
51
+    char mPhase;
52
+    char mFixedMatrix;
44 53
         
45 54
     void runOnePhase();
46 55
     void updateSampler(unsigned nA, unsigned nP);
47 56
     double estimatedPercentComplete() const;
48 57
     void displayStatus();
49 58
     void createCheckpoint();
50
-
51
-public:
52
-
53
-    template <class DataType>
54
-    GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns,
55
-        bool partitionRows, const std::vector<unsigned> &indices);
56
-
57
-    template <class DataType>
58
-    void setUncertainty(const DataType &unc, bool transposeData,
59
-        bool partitionRows, const std::vector<unsigned> &indices);
60
-
61
-    void setFixedMatrix(char which, const Matrix &mat);
62
-
63
-    void recordSeed(uint32_t seed);
64
-    uint32_t getSeed() const;
65
-
66
-    void setMaxIterations(unsigned nIterations);
67
-    void setSparsity(float alphaA, float alphaP, float maxA, float maxP,
68
-        bool singleCell);
69
-    
70
-    void setMaxThreads(unsigned nThreads);
71
-    void setPrintMessages(bool print);
72
-    void setOutputFrequency(unsigned n);
73
-    void setCheckpointOutFile(const std::string &outFile);
74
-    void setCheckpointInterval(unsigned interval);
75
-
76
-    GapsResult run(bool printThreads=true);
77
-
78
-    // serialization
79
-    friend Archive& operator>>(Archive &ar, GapsRunner &runner);
80 59
 };
81 60
 
82
-// problem with passing file parser - need to read it twice
83 61
 template <class DataType>
84
-GapsRunner::GapsRunner(const DataType &data, bool transposeData,
85
-unsigned nPatterns, bool partitionRows, const std::vector<unsigned> &indices)
62
+GapsRunner::GapsRunner(const DataType &data, const GapsParameters &params)
86 63
     :
87
-mASampler(data, !transposeData, nPatterns, !partitionRows, indices),
88
-mPSampler(data, transposeData, nPatterns, partitionRows, indices),
89
-mStatistics(mPSampler.dataRows(), mPSampler.dataCols(), nPatterns),
90
-mFixedMatrix('N'), mMaxIterations(1000), mMaxThreads(1), mPrintMessages(true),
91
-mOutputFrequency(500), mCheckpointOutFile("gaps_checkpoint.out"),
92
-mCheckpointInterval(0), mPhase('C'), mCurrentIteration(0),
93
-mNumPatterns(nPatterns), mSeed(0), mNumUpdatesA(0), mNumUpdatesP(0)
64
+mASampler(new DenseGibbsSampler(data, !params.transposeData, params.nPatterns, params.subsetGenes, params.dataIndicesSubset)),
65
+mPSampler(new DenseGibbsSampler(data, params.transposeData, params.nPatterns, params.subsetGenes, params.dataIndicesSubset)),
66
+mStatistics(mPSampler->dataRows(), mPSampler->dataCols(), params.nPatterns),
67
+mCheckpointOutFile(params.checkpointOutFile),
68
+mMaxIterations(params.nIterations),
69
+mMaxThreads(params.mMaxThreads),
70
+mOutputFrequency(params.mOutputFrequency),
71
+mCheckpointInterval(params.mCheckpointInterval),
72
+mNumPatterns(params.nPatterns),
73
+mNumUpdatesA(0),
74
+mNumUpdatesP(0),
75
+mSeed(params.seed),
76
+mPrintMessages(params.printMessages),
77
+mPrintThreadUsage(params.printThreadUsage),
78
+mPhase('C'),
79
+mFixedMatrix(params.whichFixedMatrix)
94 80
 {
95
-    mASampler.sync(mPSampler);
96
-    mPSampler.sync(mASampler);
81
+    mASampler->setSparsity(params.alphaA, params.maxGibbsMassA, params.singleCell);
82
+    mPSampler->setSparsity(params.alphaP, params.maxGibbsMassP, params.singleCell);
83
+
84
+    switch (mFixedMatrix)
85
+    {
86
+        case 'A' : mASampler->setMatrix(params.fixedMatrix); break;
87
+        case 'P' : mPSampler->setMatrix(params.fixedMatrix); break;
88
+        default: break;
89
+    }
90
+
91
+    // overwrite with info from checkpoint file
92
+    if (params.useCheckPoint)
93
+    {
94
+        Archive ar(params.checkpointFile, ARCHIVE_READ);
95
+        ar >> mNumPatterns >> mSeed >> mMaxIterations >> mFixedMatrix >> mPhase
96
+            >> mCurrentIteration >> mNumUpdatesA >> mNumUpdatesP >> mRng
97
+            >> *mASampler >> *mPSampler;
98
+        GapsRng::load(ar);
99
+    }
100
+
101
+    mASampler->sync(mPSampler);
102
+    mPSampler->sync(mASampler);
103
+    mASampler->recalculateAPMatrix();
104
+    mPSampler->recalculateAPMatrix();
97 105
 }
98 106
 
99 107
 template <class DataType>
100
-void GapsRunner::setUncertainty(const DataType &unc, bool transposeData,
101
-bool partitionRows, const std::vector<unsigned> &indices)
108
+void GapsRunner::setUncertainty(const DataType &unc, const GapsParameters &params)
102 109
 {
103
-    mASampler.setUncertainty(unc, !transposeData, !partitionRows, indices);
104
-    mPSampler.setUncertainty(unc, transposeData, partitionRows, indices);
110
+    mASampler->setUncertainty(unc, !params.transposeData, params.nPatterns,
111
+        params.subsetGenes, params.dataIndicesSubset);
112
+    mPSampler->setUncertainty(unc, params.transposeData, params.nPatterns,
113
+        params.subsetGenes, params.dataIndicesSubset);
105 114
 }
106 115
 
107 116
 #endif // __COGAPS_GAPS_RUNNER_H__
108 117
\ No newline at end of file
Browse code

no longer crashing with checkpoints; still not consistent

Tom Sherman authored on 01/10/2018 17:41:04
Showing1 changed files
... ...
@@ -84,7 +84,7 @@ template <class DataType>
84 84
 GapsRunner::GapsRunner(const DataType &data, bool transposeData,
85 85
 unsigned nPatterns, bool partitionRows, const std::vector<unsigned> &indices)
86 86
     :
87
-mASampler(data, !transposeData, nPatterns,!partitionRows, indices),
87
+mASampler(data, !transposeData, nPatterns, !partitionRows, indices),
88 88
 mPSampler(data, transposeData, nPatterns, partitionRows, indices),
89 89
 mStatistics(mPSampler.dataRows(), mPSampler.dataCols(), nPatterns),
90 90
 mFixedMatrix('N'), mMaxIterations(1000), mMaxThreads(1), mPrintMessages(true),
Browse code

simplified gibbs calculation

Tom Sherman authored on 26/09/2018 23:09:37
Showing1 changed files
... ...
@@ -64,8 +64,8 @@ public:
64 64
     uint32_t getSeed() const;
65 65
 
66 66
     void setMaxIterations(unsigned nIterations);
67
-    void setSparsity(float alphaA, float alphaP, bool singleCell);
68
-    void setMaxGibbsMass(float maxA, float maxP);
67
+    void setSparsity(float alphaA, float alphaP, float maxA, float maxP,
68
+        bool singleCell);
69 69
     
70 70
     void setMaxThreads(unsigned nThreads);
71 71
     void setPrintMessages(bool print);
Browse code

more work on consistency of queue

Tom Sherman authored on 09/09/2018 18:52:57
Showing1 changed files
... ...
@@ -52,8 +52,7 @@ public:
52 52
 
53 53
     template <class DataType>
54 54
     GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns,
55
-        bool partitionRows, const std::vector<unsigned> &indices,
56
-        uint32_t seed);
55
+        bool partitionRows, const std::vector<unsigned> &indices);
57 56
 
58 57
     template <class DataType>
59 58
     void setUncertainty(const DataType &unc, bool transposeData,
... ...
@@ -61,6 +60,7 @@ public:
61 60
 
62 61
     void setFixedMatrix(char which, const Matrix &mat);
63 62
 
63
+    void recordSeed(uint32_t seed);
64 64
     uint32_t getSeed() const;
65 65
 
66 66
     void setMaxIterations(unsigned nIterations);
... ...
@@ -82,8 +82,7 @@ public:
82 82
 // problem with passing file parser - need to read it twice
83 83
 template <class DataType>
84 84
 GapsRunner::GapsRunner(const DataType &data, bool transposeData,
85
-unsigned nPatterns, bool partitionRows, const std::vector<unsigned> &indices,
86
-uint32_t seed)
85
+unsigned nPatterns, bool partitionRows, const std::vector<unsigned> &indices)
87 86
     :
88 87
 mASampler(data, !transposeData, nPatterns,!partitionRows, indices),
89 88
 mPSampler(data, transposeData, nPatterns, partitionRows, indices),
... ...
@@ -91,14 +90,10 @@ mStatistics(mPSampler.dataRows(), mPSampler.dataCols(), nPatterns),
91 90
 mFixedMatrix('N'), mMaxIterations(1000), mMaxThreads(1), mPrintMessages(true),
92 91
 mOutputFrequency(500), mCheckpointOutFile("gaps_checkpoint.out"),
93 92
 mCheckpointInterval(0), mPhase('C'), mCurrentIteration(0),
94
-mNumPatterns(nPatterns), mSeed(seed), mNumUpdatesA(0), mNumUpdatesP(0),
95
-mRng(seed)
93
+mNumPatterns(nPatterns), mSeed(0), mNumUpdatesA(0), mNumUpdatesP(0)
96 94
 {
97 95
     mASampler.sync(mPSampler);
98 96
     mPSampler.sync(mASampler);
99
-
100
-    mASampler.setSeed(mRng.uniform64());
101
-    mPSampler.setSeed(mRng.uniform64());
102 97
 }
103 98
 
104 99
 template <class DataType>
Browse code

basic framework in place for full async

Tom Sherman authored on 29/08/2018 21:41:05
Showing1 changed files
... ...
@@ -52,7 +52,8 @@ public:
52 52
 
53 53
     template <class DataType>
54 54
     GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns,
55
-        bool partitionRows, const std::vector<unsigned> &indices);
55
+        bool partitionRows, const std::vector<unsigned> &indices,
56
+        uint32_t seed);
56 57
 
57 58
     template <class DataType>
58 59
     void setUncertainty(const DataType &unc, bool transposeData,
... ...
@@ -60,7 +61,6 @@ public:
60 61
 
61 62
     void setFixedMatrix(char which, const Matrix &mat);
62 63
 
63
-    void recordSeed(uint32_t seed);
64 64
     uint32_t getSeed() const;
65 65
 
66 66
     void setMaxIterations(unsigned nIterations);
... ...
@@ -82,7 +82,8 @@ public:
82 82
 // problem with passing file parser - need to read it twice
83 83
 template <class DataType>
84 84
 GapsRunner::GapsRunner(const DataType &data, bool transposeData,
85
-unsigned nPatterns, bool partitionRows, const std::vector<unsigned> &indices)
85
+unsigned nPatterns, bool partitionRows, const std::vector<unsigned> &indices,
86
+uint32_t seed)
86 87
     :
87 88
 mASampler(data, !transposeData, nPatterns,!partitionRows, indices),
88 89
 mPSampler(data, transposeData, nPatterns, partitionRows, indices),
... ...
@@ -90,10 +91,14 @@ mStatistics(mPSampler.dataRows(), mPSampler.dataCols(), nPatterns),
90 91
 mFixedMatrix('N'), mMaxIterations(1000), mMaxThreads(1), mPrintMessages(true),
91 92
 mOutputFrequency(500), mCheckpointOutFile("gaps_checkpoint.out"),
92 93
 mCheckpointInterval(0), mPhase('C'), mCurrentIteration(0),
93
-mNumPatterns(nPatterns), mSeed(0), mNumUpdatesA(0), mNumUpdatesP(0)
94
+mNumPatterns(nPatterns), mSeed(seed), mNumUpdatesA(0), mNumUpdatesP(0),
95
+mRng(seed)
94 96
 {
95 97
     mASampler.sync(mPSampler);
96 98
     mPSampler.sync(mASampler);
99
+
100
+    mASampler.setSeed(mRng.uniform64());
101
+    mPSampler.setSeed(mRng.uniform64());
97 102
 }
98 103
 
99 104
 template <class DataType>
Browse code

Single Gibbs Sampler - consistent with old version

Tom Sherman authored on 29/08/2018 15:12:10
Showing1 changed files
... ...
@@ -16,8 +16,8 @@ class GapsRunner
16 16
 {
17 17
 private:
18 18
     
19
-    AmplitudeGibbsSampler mASampler;
20
-    PatternGibbsSampler mPSampler;
19
+    GibbsSampler mASampler;
20
+    GibbsSampler mPSampler;
21 21
     GapsStatistics mStatistics;
22 22
 
23 23
     char mFixedMatrix;
... ...
@@ -84,9 +84,9 @@ template <class DataType>
84 84
 GapsRunner::GapsRunner(const DataType &data, bool transposeData,
85 85
 unsigned nPatterns, bool partitionRows, const std::vector<unsigned> &indices)
86 86
     :
87
-mASampler(data, transposeData, nPatterns, partitionRows, indices),
87
+mASampler(data, !transposeData, nPatterns,!partitionRows, indices),
88 88
 mPSampler(data, transposeData, nPatterns, partitionRows, indices),
89
-mStatistics(mASampler.dataRows(), mASampler.dataCols(), nPatterns),
89
+mStatistics(mPSampler.dataRows(), mPSampler.dataCols(), nPatterns),
90 90
 mFixedMatrix('N'), mMaxIterations(1000), mMaxThreads(1), mPrintMessages(true),
91 91
 mOutputFrequency(500), mCheckpointOutFile("gaps_checkpoint.out"),
92 92
 mCheckpointInterval(0), mPhase('C'), mCurrentIteration(0),
... ...
@@ -100,7 +100,7 @@ template <class DataType>
100 100
 void GapsRunner::setUncertainty(const DataType &unc, bool transposeData,
101 101
 bool partitionRows, const std::vector<unsigned> &indices)
102 102
 {
103
-    mASampler.setUncertainty(unc, transposeData, partitionRows, indices);
103
+    mASampler.setUncertainty(unc, !transposeData, !partitionRows, indices);
104 104
     mPSampler.setUncertainty(unc, transposeData, partitionRows, indices);
105 105
 }
106 106
 
Browse code

started making changes

Tom Sherman authored on 28/08/2018 19:53:08
Showing1 changed files
... ...
@@ -1,6 +1,7 @@
1 1
 #ifndef __COGAPS_GAPS_RUNNER_H__
2 2
 #define __COGAPS_GAPS_RUNNER_H__
3 3
 
4
+#include "GapsResult.h"
4 5
 #include "GapsStatistics.h"
5 6
 #include "GibbsSampler.h"
6 7
 
... ...
@@ -11,66 +12,6 @@
11 12
 namespace bpt = boost::posix_time;
12 13
 #define bpt_now() bpt::microsec_clock::local_time()
13 14
 
14
-struct GapsResult
15
-{
16
-    ColMatrix Amean;
17
-    ColMatrix Asd;
18
-    RowMatrix Pmean;
19
-    RowMatrix Psd;
20
-    
21
-    float meanChiSq;
22
-    uint32_t seed;
23
-
24
-    GapsResult(const GapsStatistics &stat) :
25
-        Amean(stat.Amean()), Asd(stat.Asd()), Pmean(stat.Pmean()),
26
-        Psd(stat.Psd()), meanChiSq(0.f), seed(0)
27
-    {}
28
-
29
-    void writeToFile(const std::string &fullPath)
30
-    {
31
-        std::size_t pos = fullPath.find_last_of('.');
32
-        std::string base = fullPath.substr(0, pos);
33
-
34
-        switch (FileParser::fileType(fullPath))
35
-        {
36
-            case GAPS_CSV: return writeCsv(base);
37
-            case GAPS_TSV: return writeTsv(base);
38
-            case GAPS_GCT: return writeGct(base);
39
-            default: GAPS_ERROR("Invalid file type\n");
40
-        }
41
-    }
42
-
43
-    void writeCsv(const std::string &path)
44
-    {
45
-        unsigned nPatterns = Amean.nCol();
46
-        std::string label("_" + gaps::to_string(nPatterns) + "_");
47
-        FileParser::writeToCsv(path + label + "Amean.csv", Amean);
48
-        FileParser::writeToCsv(path + label + "Pmean.csv", Pmean);
49
-        FileParser::writeToCsv(path + label + "Asd.csv", Asd);
50
-        FileParser::writeToCsv(path + label + "Psd.csv", Psd);
51
-    }
52
-
53
-    void writeTsv(const std::string &path)
54
-    {
55
-        unsigned nPatterns = Amean.nCol();
56
-        std::string label("_" + gaps::to_string(nPatterns) + "_");
57
-        FileParser::writeToCsv(path + label + "Amean.tsv", Amean);
58
-        FileParser::writeToCsv(path + label + "Pmean.tsv", Pmean);
59
-        FileParser::writeToCsv(path + label + "Asd.tsv", Asd);
60
-        FileParser::writeToCsv(path + label + "Psd.tsv", Psd);
61
-    }
62
-
63
-    void writeGct(const std::string &path)
64
-    {
65
-        unsigned nPatterns = Amean.nCol();
66
-        std::string label("_" + gaps::to_string(nPatterns) + "_");
67
-        FileParser::writeToCsv(path + label + "Amean.gct", Amean);
68
-        FileParser::writeToCsv(path + label + "Pmean.gct", Pmean);
69
-        FileParser::writeToCsv(path + label + "Asd.gct", Asd);
70
-        FileParser::writeToCsv(path + label + "Psd.gct", Psd);
71
-    }
72
-};
73
-
74 15
 class GapsRunner
75 16
 {
76 17
 private:
Browse code

fixed clang compiler error (hopefully)

Tom Sherman authored on 24/08/2018 15:53:56
Showing1 changed files
... ...
@@ -36,6 +36,7 @@ struct GapsResult
36 36
             case GAPS_CSV: return writeCsv(base);
37 37
             case GAPS_TSV: return writeTsv(base);
38 38
             case GAPS_GCT: return writeGct(base);
39
+            default: GAPS_ERROR("Invalid file type\n");
39 40
         }
40 41
     }
41 42
 
Browse code

lazily bundled rng

Tom Sherman authored on 15/08/2018 16:25:00
Showing1 changed files
... ...
@@ -97,6 +97,8 @@ private:
97 97
 
98 98
     unsigned mNumUpdatesA;
99 99
     unsigned mNumUpdatesP;
100
+
101
+    mutable GapsRng mRng;
100 102
         
101 103
     void runOnePhase();
102 104
     void updateSampler(unsigned nA, unsigned nP);
Browse code

added more features to GWCoGAPS and scCoGAPS

Tom Sherman authored on 08/08/2018 22:34:56
Showing1 changed files
... ...
@@ -26,9 +26,48 @@ struct GapsResult
26 26
         Psd(stat.Psd()), meanChiSq(0.f), seed(0)
27 27
     {}
28 28
 
29
-    void writeCsv(const std::string &path);
30
-    void writeTsv(const std::string &path);
31
-    void writeGct(const std::string &path);
29
+    void writeToFile(const std::string &fullPath)
30
+    {
31
+        std::size_t pos = fullPath.find_last_of('.');
32
+        std::string base = fullPath.substr(0, pos);
33
+
34
+        switch (FileParser::fileType(fullPath))
35
+        {
36
+            case GAPS_CSV: return writeCsv(base);
37
+            case GAPS_TSV: return writeTsv(base);
38
+            case GAPS_GCT: return writeGct(base);
39
+        }
40
+    }
41
+
42
+    void writeCsv(const std::string &path)
43
+    {
44
+        unsigned nPatterns = Amean.nCol();
45
+        std::string label("_" + gaps::to_string(nPatterns) + "_");
46
+        FileParser::writeToCsv(path + label + "Amean.csv", Amean);
47
+        FileParser::writeToCsv(path + label + "Pmean.csv", Pmean);
48
+        FileParser::writeToCsv(path + label + "Asd.csv", Asd);
49
+        FileParser::writeToCsv(path + label + "Psd.csv", Psd);
50
+    }
51
+
52
+    void writeTsv(const std::string &path)
53
+    {
54
+        unsigned nPatterns = Amean.nCol();
55
+        std::string label("_" + gaps::to_string(nPatterns) + "_");
56
+        FileParser::writeToCsv(path + label + "Amean.tsv", Amean);
57
+        FileParser::writeToCsv(path + label + "Pmean.tsv", Pmean);
58
+        FileParser::writeToCsv(path + label + "Asd.tsv", Asd);
59
+        FileParser::writeToCsv(path + label + "Psd.tsv", Psd);
60
+    }
61
+
62
+    void writeGct(const std::string &path)
63
+    {
64
+        unsigned nPatterns = Amean.nCol();
65
+        std::string label("_" + gaps::to_string(nPatterns) + "_");
66
+        FileParser::writeToCsv(path + label + "Amean.gct", Amean);
67
+        FileParser::writeToCsv(path + label + "Pmean.gct", Pmean);
68
+        FileParser::writeToCsv(path + label + "Asd.gct", Asd);
69
+        FileParser::writeToCsv(path + label + "Psd.gct", Psd);
70
+    }
32 71
 };
33 72
 
34 73
 class GapsRunner
Browse code

clean up output

Tom Sherman authored on 02/08/2018 16:52:31
Showing1 changed files
... ...
@@ -90,7 +90,7 @@ public:
90 90
     void setCheckpointOutFile(const std::string &outFile);
91 91
     void setCheckpointInterval(unsigned interval);
92 92
 
93
-    GapsResult run();
93
+    GapsResult run(bool printThreads=true);
94 94
 
95 95
     // serialization
96 96
     friend Archive& operator>>(Archive &ar, GapsRunner &runner);
Browse code

vignette coming together

Tom Sherman authored on 01/08/2018 20:53:40
Showing1 changed files
... ...
@@ -61,6 +61,7 @@ private:
61 61
         
62 62
     void runOnePhase();
63 63
     void updateSampler(unsigned nA, unsigned nP);
64
+    double estimatedPercentComplete() const;
64 65
     void displayStatus();
65 66
     void createCheckpoint();
66 67
 
Browse code

moved file writers to FileParser

Tom Sherman authored on 30/07/2018 02:34:58
Showing1 changed files
... ...
@@ -68,8 +68,7 @@ public:
68 68
 
69 69
     template <class DataType>
70 70
     GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns,
71
-        uint32_t seed, bool partitionRows,
72
-        const std::vector<unsigned> &indices);
71
+        bool partitionRows, const std::vector<unsigned> &indices);
73 72
 
74 73
     template <class DataType>
75 74
     void setUncertainty(const DataType &unc, bool transposeData,
... ...
@@ -77,6 +76,9 @@ public:
77 76
 
78 77
     void setFixedMatrix(char which, const Matrix &mat);
79 78
 
79
+    void recordSeed(uint32_t seed);
80
+    uint32_t getSeed() const;
81
+
80 82
     void setMaxIterations(unsigned nIterations);
81 83
     void setSparsity(float alphaA, float alphaP, bool singleCell);
82 84
     void setMaxGibbsMass(float maxA, float maxP);
... ...
@@ -95,7 +97,7 @@ public:
95 97
 
96 98
 // problem with passing file parser - need to read it twice
97 99
 template <class DataType>
98
-GapsRunner::GapsRunner(const DataType &data, bool transposeData, uint32_t seed,
100
+GapsRunner::GapsRunner(const DataType &data, bool transposeData,
99 101
 unsigned nPatterns, bool partitionRows, const std::vector<unsigned> &indices)
100 102
     :
101 103
 mASampler(data, transposeData, nPatterns, partitionRows, indices),
... ...
@@ -104,7 +106,7 @@ mStatistics(mASampler.dataRows(), mASampler.dataCols(), nPatterns),
104 106
 mFixedMatrix('N'), mMaxIterations(1000), mMaxThreads(1), mPrintMessages(true),
105 107
 mOutputFrequency(500), mCheckpointOutFile("gaps_checkpoint.out"),
106 108
 mCheckpointInterval(0), mPhase('C'), mCurrentIteration(0),
107
-mNumPatterns(nPatterns), mSeed(seed), mNumUpdatesA(0), mNumUpdatesP(0)
109
+mNumPatterns(nPatterns), mSeed(0), mNumUpdatesA(0), mNumUpdatesP(0)
108 110
 {
109 111
     mASampler.sync(mPSampler);
110 112
     mPSampler.sync(mASampler);
Browse code

compiling

Tom Sherman authored on 26/07/2018 00:41:45
Showing1 changed files
... ...
@@ -11,6 +11,26 @@
11 11
 namespace bpt = boost::posix_time;
12 12
 #define bpt_now() bpt::microsec_clock::local_time()
13 13
 
14
+struct GapsResult
15
+{
16
+    ColMatrix Amean;
17
+    ColMatrix Asd;
18
+    RowMatrix Pmean;
19
+    RowMatrix Psd;
20
+    
21
+    float meanChiSq;
22
+    uint32_t seed;
23
+
24
+    GapsResult(const GapsStatistics &stat) :
25
+        Amean(stat.Amean()), Asd(stat.Asd()), Pmean(stat.Pmean()),
26
+        Psd(stat.Psd()), meanChiSq(0.f), seed(0)
27
+    {}
28
+
29
+    void writeCsv(const std::string &path);
30
+    void writeTsv(const std::string &path);
31
+    void writeGct(const std::string &path);
32
+};
33
+
14 34
 class GapsRunner
15 35
 {
16 36
 private:
... ...
@@ -19,96 +39,77 @@ private:
19 39
     PatternGibbsSampler mPSampler;
20 40
     GapsStatistics mStatistics;
21 41
 
42
+    char mFixedMatrix;
43
+    unsigned mMaxIterations;
44
+    
45
+    unsigned mMaxThreads;
22 46
     bool mPrintMessages;
23 47
     unsigned mOutputFrequency;
24
-    char mFixedMatrix;
25
-    bool mSamplePhase;
48
+    std::string mCheckpointOutFile;
49
+    unsigned mCheckpointInterval;
26 50
 
27
-    unsigned mNumUpdatesA;
28
-    unsigned mNumUpdatesP;
29
-        
30 51
     bpt::ptime mStartTime;
52
+    char mPhase;
53
+    unsigned mCurrentIteration;
31 54
 
32
-    void updateSampler(unsigned nA, unsigned nP, unsigned nCores);
33
-    void displayStatus(unsigned current, unsigned total);
55
+    // only kept since they need to be written to the start of every checkpoint
56
+    unsigned mNumPatterns;
57
+    uint32_t mSeed;
34 58
 
35
-    double estimatePercentComplete();
59
+    unsigned mNumUpdatesA;
60
+    unsigned mNumUpdatesP;
61
+        
62
+    void runOnePhase();
63
+    void updateSampler(unsigned nA, unsigned nP);
64
+    void displayStatus();
65
+    void createCheckpoint();
36 66
 
37 67
 public:
38 68
 
39 69
     template <class DataType>
40
-    GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns);
41
-
42
-    template <class DataType>
43
-    GapsRunner(const DataType &data, bool transposeData, bool partitionRows,
44
-        const std::vector<unsigned> &indices, unsigned nPatterns);
45
-
46
-    template <class DataType>
47
-    void setUncertainty(const DataType &unc, bool transposeData);
70
+    GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns,
71
+        uint32_t seed, bool partitionRows,
72
+        const std::vector<unsigned> &indices);
48 73
 
49 74
     template <class DataType>
50 75
     void setUncertainty(const DataType &unc, bool transposeData,
51 76
         bool partitionRows, const std::vector<unsigned> &indices);
52 77
 
53
-    void printMessages(bool print);
54
-    void setOutputFrequency(unsigned n);
78
+    void setFixedMatrix(char which, const Matrix &mat);
79
+
80
+    void setMaxIterations(unsigned nIterations);
55 81
     void setSparsity(float alphaA, float alphaP, bool singleCell);
56 82
     void setMaxGibbsMass(float maxA, float maxP);
83
+    
84
+    void setMaxThreads(unsigned nThreads);
85
+    void setPrintMessages(bool print);
86
+    void setOutputFrequency(unsigned n);
87
+    void setCheckpointOutFile(const std::string &outFile);
88
+    void setCheckpointInterval(unsigned interval);
57 89
 
58
-    void setFixedMatrix(char which, const RowMatrix &mat);
59
-
60
-    void startSampling();
61
-
62
-    void run(unsigned nIter, unsigned nCores);
63
-
64
-    unsigned nRow() const;
65
-    unsigned nCol() const;
66
-
67
-    ColMatrix Amean() const;
68
-    RowMatrix Pmean() const;
69
-    ColMatrix Asd() const;
70
-    RowMatrix Psd() const;
71
-    float meanChiSq() const;
72
-
73
-    void startClock();
90
+    GapsResult run();
74 91
 
75 92
     // serialization
76
-    friend Archive& operator<<(Archive &ar, GapsRunner &runner);
77 93
     friend Archive& operator>>(Archive &ar, GapsRunner &runner);
78 94
 };
79 95
 
96
+// problem with passing file parser - need to read it twice
80 97
 template <class DataType>
81
-GapsRunner::GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns)
82
-    :
83
-mASampler(data, transposeData, nPatterns),
84
-mPSampler(data, transposeData, nPatterns),
85
-mStatistics(mASampler.dataRows(), mPSampler.dataCols(), nPatterns),
86
-mSamplePhase(false), mNumUpdatesA(0), mNumUpdatesP(0), mFixedMatrix('N')
87
-{
88
-    mASampler.sync(mPSampler);
89
-    mPSampler.sync(mASampler);
90
-}
91
-
92
-template <class DataType>
93
-GapsRunner::GapsRunner(const DataType &data, bool transposeData,
94
-bool partitionRows, const std::vector<unsigned> &indices, unsigned nPatterns)
98
+GapsRunner::GapsRunner(const DataType &data, bool transposeData, uint32_t seed,
99
+unsigned nPatterns, bool partitionRows, const std::vector<unsigned> &indices)
95 100
     :
96 101
 mASampler(data, transposeData, nPatterns, partitionRows, indices),
97 102
 mPSampler(data, transposeData, nPatterns, partitionRows, indices),
98
-mStatistics(mASampler.dataRows(), mPSampler.dataCols(), nPatterns),
99
-mSamplePhase(false), mNumUpdatesA(0), mNumUpdatesP(0), mFixedMatrix('N')
103
+mStatistics(mASampler.dataRows(), mASampler.dataCols(), nPatterns),
104
+mFixedMatrix('N'), mMaxIterations(1000), mMaxThreads(1), mPrintMessages(true),
105
+mOutputFrequency(500), mCheckpointOutFile("gaps_checkpoint.out"),
106
+mCheckpointInterval(0), mPhase('C'), mCurrentIteration(0),
107
+mNumPatterns(nPatterns), mSeed(seed), mNumUpdatesA(0), mNumUpdatesP(0)
100 108
 {
101 109
     mASampler.sync(mPSampler);
102 110
     mPSampler.sync(mASampler);
103 111
 }
104 112
 
105
-template <class DataType>
106
-void GapsRunner::setUncertainty(const DataType &unc, bool transposeData)
107
-{
108
-    mASampler.setUncertainty(unc, transposeData);
109
-    mPSampler.setUncertainty(unc, transposeData);
110
-}
111
-
112 113
 template <class DataType>
113 114
 void GapsRunner::setUncertainty(const DataType &unc, bool transposeData,
114 115
 bool partitionRows, const std::vector<unsigned> &indices)
Browse code

clean compiler errors

Tom Sherman authored on 25/07/2018 18:47:20
Showing1 changed files
... ...
@@ -44,7 +44,11 @@ public:
44 44
         const std::vector<unsigned> &indices, unsigned nPatterns);
45 45
 
46 46
     template <class DataType>
47
-    void setUncertainty(const DataType &unc);
47
+    void setUncertainty(const DataType &unc, bool transposeData);
48
+
49
+    template <class DataType>
50
+    void setUncertainty(const DataType &unc, bool transposeData,
51
+        bool partitionRows, const std::vector<unsigned> &indices);
48 52
 
49 53
     void printMessages(bool print);
50 54
     void setOutputFrequency(unsigned n);
... ...
@@ -89,8 +93,8 @@ template <class DataType>
89 93
 GapsRunner::GapsRunner(const DataType &data, bool transposeData,
90 94
 bool partitionRows, const std::vector<unsigned> &indices, unsigned nPatterns)
91 95
     :
92
-mASampler(data, transposeData, partitionRows, indices, nPatterns),
93
-mPSampler(data, transposeData, partitionRows, indices, nPatterns),
96
+mASampler(data, transposeData, nPatterns, partitionRows, indices),
97
+mPSampler(data, transposeData, nPatterns, partitionRows, indices),
94 98
 mStatistics(mASampler.dataRows(), mPSampler.dataCols(), nPatterns),
95 99
 mSamplePhase(false), mNumUpdatesA(0), mNumUpdatesP(0), mFixedMatrix('N')
96 100
 {
Browse code

matrix CRTP

Tom Sherman authored on 23/07/2018 20:21:55
Showing1 changed files
... ...
@@ -37,7 +37,11 @@ private:
37 37
 public:
38 38
 
39 39
     template <class DataType>
40
-    GapsRunner(const DataType &data, unsigned nPatterns);
40
+    GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns);
41
+
42
+    template <class DataType>
43
+    GapsRunner(const DataType &data, bool transposeData, bool partitionRows,
44
+        const std::vector<unsigned> &indices, unsigned nPatterns);
41 45
 
42 46
     template <class DataType>
43 47
     void setUncertainty(const DataType &unc);
... ...
@@ -70,9 +74,10 @@ public:
70 74
 };
71 75
 
72 76
 template <class DataType>
73
-GapsRunner::GapsRunner(const DataType &data, unsigned nPatterns)
77
+GapsRunner::GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns)
74 78
     :
75
-mASampler(data, nPatterns), mPSampler(data, nPatterns),
79
+mASampler(data, transposeData, nPatterns),
80
+mPSampler(data, transposeData, nPatterns),
76 81
 mStatistics(mASampler.dataRows(), mPSampler.dataCols(), nPatterns),
77 82
 mSamplePhase(false), mNumUpdatesA(0), mNumUpdatesP(0), mFixedMatrix('N')
78 83
 {
... ...
@@ -81,10 +86,31 @@ mSamplePhase(false), mNumUpdatesA(0), mNumUpdatesP(0), mFixedMatrix('N')
81 86
 }
82 87
 
83 88
 template <class DataType>
84
-void GapsRunner::setUncertainty(const DataType &unc)
89
+GapsRunner::GapsRunner(const DataType &data, bool transposeData,
90
+bool partitionRows, const std::vector<unsigned> &indices, unsigned nPatterns)
91
+    :
92
+mASampler(data, transposeData, partitionRows, indices, nPatterns),
93
+mPSampler(data, transposeData, partitionRows, indices, nPatterns),
94
+mStatistics(mASampler.dataRows(), mPSampler.dataCols(), nPatterns),
95
+mSamplePhase(false), mNumUpdatesA(0), mNumUpdatesP(0), mFixedMatrix('N')
96
+{
97
+    mASampler.sync(mPSampler);
98
+    mPSampler.sync(mASampler);
99
+}
100
+
101
+template <class DataType>
102
+void GapsRunner::setUncertainty(const DataType &unc, bool transposeData)
103
+{
104
+    mASampler.setUncertainty(unc, transposeData);
105
+    mPSampler.setUncertainty(unc, transposeData);
106
+}
107
+
108
+template <class DataType>
109
+void GapsRunner::setUncertainty(const DataType &unc, bool transposeData,
110
+bool partitionRows, const std::vector<unsigned> &indices)
85 111
 {
86
-    mASampler.setUncertainty(unc);
87
-    mPSampler.setUncertainty(unc);
112
+    mASampler.setUncertainty(unc, transposeData, partitionRows, indices);
113
+    mPSampler.setUncertainty(unc, transposeData, partitionRows, indices);
88 114
 }
89 <