Browse code

basic framework in place for full async

Tom Sherman authored on 29/08/2018 21:41:05
Showing22 changed files

... ...
@@ -93,40 +93,40 @@ Atom* AtomicDomain::front()
93 93
     return &(mAtoms.front());
94 94
 }
95 95
 
96
-Atom* AtomicDomain::randomAtom()
96
+Atom* AtomicDomain::randomAtom(GapsRng *rng)
97 97
 {
98 98
     GAPS_ASSERT(size() > 0);
99 99
     GAPS_ASSERT(isSorted(mAtoms));
100 100
 
101
-    unsigned index = mRng.uniform32(0, mAtoms.size() - 1);
101
+    unsigned index = rng->uniform32(0, mAtoms.size() - 1);
102 102
     return &(mAtoms[index]);
103 103
 }
104 104
 
105
-AtomNeighborhood AtomicDomain::randomAtomWithNeighbors()
105
+AtomNeighborhood AtomicDomain::randomAtomWithNeighbors(GapsRng *rng)
106 106
 {
107 107
     GAPS_ASSERT(size() > 0);
108 108
 
109
-    unsigned index = mRng.uniform32(0, mAtoms.size() - 1);
109
+    unsigned index = rng->uniform32(0, mAtoms.size() - 1);
110 110
     Atom* left = (index == 0) ? NULL : &(mAtoms[index - 1]);
111 111
     Atom* right = (index == mAtoms.size() - 1) ? NULL : &(mAtoms[index + 1]);
112 112
     return AtomNeighborhood(left, &(mAtoms[index]), right);
113 113
 }
114 114
 
115
-AtomNeighborhood AtomicDomain::randomAtomWithRightNeighbor()
115
+AtomNeighborhood AtomicDomain::randomAtomWithRightNeighbor(GapsRng *rng)
116 116
 {
117 117
     GAPS_ASSERT(size() > 0);
118 118
 
119
-    unsigned index = mRng.uniform32(0, mAtoms.size() - 1);
119
+    unsigned index = rng->uniform32(0, mAtoms.size() - 1);
120 120
     Atom* right = (index == mAtoms.size() - 1) ? NULL : &(mAtoms[index + 1]);
121 121
     return AtomNeighborhood(NULL, &(mAtoms[index]), right);
122 122
 }
123 123
 
124
-uint64_t AtomicDomain::randomFreePosition() const
124
+uint64_t AtomicDomain::randomFreePosition(GapsRng *rng) const
125 125
 {
126
-    uint64_t pos = mRng.uniform64(0, mDomainLength);
126
+    uint64_t pos = rng->uniform64(0, mDomainLength);
127 127
     while (vecContains(mAtoms, pos))
128 128
     {
129
-        pos = mRng.uniform64(0, mDomainLength);
129
+        pos = rng->uniform64(0, mDomainLength);
130 130
     } 
131 131
     return pos;
132 132
 }
... ...
@@ -155,7 +155,7 @@ void AtomicDomain::insert(uint64_t pos, float mass)
155 155
 
156 156
 Archive& operator<<(Archive &ar, AtomicDomain &domain)
157 157
 {
158
-    ar << domain.mDomainLength << domain.mRng << domain.mAtoms.size();
158
+    ar << domain.mDomainLength << domain.mAtoms.size();
159 159
     
160 160
     for (unsigned i = 0; i < domain.mAtoms.size(); ++i)
161 161
     {
... ...
@@ -168,7 +168,7 @@ Archive& operator>>(Archive &ar, AtomicDomain &domain)
168 168
 {
169 169
     Atom temp;
170 170
     uint64_t size = 0;
171
-    ar >> domain.mDomainLength >> domain.mRng >> size;
171
+    ar >> domain.mDomainLength >> size;
172 172
     for (unsigned i = 0; i < size; ++i)
173 173
     {
174 174
         ar >> temp;
... ...
@@ -40,11 +40,11 @@ public:
40 40
 
41 41
     // access atoms
42 42
     Atom* front();
43
-    Atom* randomAtom();
44
-    AtomNeighborhood randomAtomWithNeighbors();
45
-    AtomNeighborhood randomAtomWithRightNeighbor();
43
+    Atom* randomAtom(GapsRng *rng);
44
+    AtomNeighborhood randomAtomWithNeighbors(GapsRng *rng);
45
+    AtomNeighborhood randomAtomWithRightNeighbor(GapsRng *rng);
46 46
 
47
-    uint64_t randomFreePosition() const;
47
+    uint64_t randomFreePosition(GapsRng *rng) const;
48 48
     uint64_t size() const;
49 49
 
50 50
     void erase(uint64_t pos);
... ...
@@ -63,9 +63,6 @@ private:
63 63
 
64 64
     // domain storage, sorted vector
65 65
     std::vector<Atom> mAtoms;
66
-
67
-    // random number generator
68
-    mutable GapsRng mRng;
69 66
 };
70 67
 
71 68
 #endif
72 69
\ No newline at end of file
... ...
@@ -62,7 +62,6 @@ unsigned getNumPatterns(const Rcpp::List &allParams)
62 62
     {
63 63
         std::string file(Rcpp::as<std::string>(allParams["checkpointInFile"]));
64 64
         Archive ar(file, ARCHIVE_READ);
65
-        GapsRng::load(ar);
66 65
         ar >> nPatterns;
67 66
         ar.close();
68 67
     }
... ...
@@ -100,7 +99,6 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix, bool isMaster)
100 99
 {
101 100
     // calculate essential parameters needed for constructing GapsRunner
102 101
     const Rcpp::S4 &gapsParams(allParams["gaps"]);
103
-    GapsRng::setSeed(gapsParams.slot("seed"));
104 102
     unsigned nPatterns = getNumPatterns(allParams); // TODO clarify this sets the checkpoint seed as well
105 103
     bool printThreads = !processDistributedParameters(allParams).first;
106 104
     bool partitionRows = processDistributedParameters(allParams).second;
... ...
@@ -108,7 +106,7 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix, bool isMaster)
108 106
 
109 107
     // construct GapsRunner
110 108
     GapsRunner runner(data, allParams["transposeData"], nPatterns,
111
-        partitionRows, cIndices);
109
+        partitionRows, cIndices, gapsParams.slot("seed"));
112 110
 
113 111
     // set uncertainty
114 112
     if (!isNull(uncertainty))
... ...
@@ -122,7 +120,6 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix, bool isMaster)
122 120
     {
123 121
         std::string file(Rcpp::as<std::string>(allParams["checkpointInFile"]));
124 122
         Archive ar(file, ARCHIVE_READ);
125
-        GapsRng::load(ar);
126 123
         ar >> runner;
127 124
         ar.close();
128 125
     }
... ...
@@ -137,7 +134,6 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix, bool isMaster)
137 134
         }
138 135
 
139 136
         // set parameters that would be saved in the checkpoint
140
-        runner.recordSeed(gapsParams.slot("seed"));
141 137
         runner.setMaxIterations(gapsParams.slot("nIterations"));
142 138
         runner.setSparsity(gapsParams.slot("alphaA"),
143 139
             gapsParams.slot("alphaP"), gapsParams.slot("singleCell"));
... ...
@@ -1,8 +1,10 @@
1 1
 #include "GapsRunner.h"
2
+#include "math/Algorithms.h"
2 3
 #include "math/Random.h"
3 4
 #include "math/SIMD.h"
4 5
 #include "utils/GapsAssert.h"
5 6
 #include "utils/GapsPrint.h"
7
+#include "utils/GlobalConfig.h"
6 8
 
7 9
 #ifdef __GAPS_R_BUILD__
8 10
 #include <Rcpp.h>
... ...
@@ -25,11 +27,6 @@ void GapsRunner::setFixedMatrix(char which, const Matrix &mat)
25 27
     }
26 28
 }
27 29
 
28
-void GapsRunner::recordSeed(uint32_t seed)
29
-{
30
-    mSeed = seed;
31
-}
32
-
33 30
 uint32_t GapsRunner::getSeed() const
34 31
 {
35 32
     return mSeed;
... ...
@@ -245,7 +242,6 @@ void GapsRunner::createCheckpoint()
245 242
     
246 243
         // create checkpoint file
247 244
         Archive ar(mCheckpointOutFile, ARCHIVE_WRITE);
248
-        GapsRng::save(ar);
249 245
         ar << mNumPatterns << mSeed << mASampler << mPSampler << mStatistics
250 246
             << mFixedMatrix << mMaxIterations << mPhase << mCurrentIteration
251 247
             << mNumUpdatesA << mNumUpdatesP << mRng;
... ...
@@ -263,5 +259,12 @@ Archive& operator>>(Archive &ar, GapsRunner &gr)
263 259
         >> gr.mStatistics >> gr.mFixedMatrix >> gr.mMaxIterations >> gr.mPhase
264 260
         >> gr.mCurrentIteration >> gr.mNumUpdatesA >> gr.mNumUpdatesP
265 261
         >> gr.mRng;
262
+
263
+    gr.mASampler.sync(gr.mPSampler);
264
+    gr.mPSampler.sync(gr.mASampler);
265
+
266
+    gr.mASampler.recalculateAPMatrix();
267
+    gr.mPSampler.recalculateAPMatrix();
268
+
266 269
     return ar;
267 270
 }
... ...
@@ -52,7 +52,8 @@ public:
52 52
 
53 53
     template <class DataType>
54 54
     GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns,
55
-        bool partitionRows, const std::vector<unsigned> &indices);
55
+        bool partitionRows, const std::vector<unsigned> &indices,
56
+        uint32_t seed);
56 57
 
57 58
     template <class DataType>
58 59
     void setUncertainty(const DataType &unc, bool transposeData,
... ...
@@ -60,7 +61,6 @@ public:
60 61
 
61 62
     void setFixedMatrix(char which, const Matrix &mat);
62 63
 
63
-    void recordSeed(uint32_t seed);
64 64
     uint32_t getSeed() const;
65 65
 
66 66
     void setMaxIterations(unsigned nIterations);
... ...
@@ -82,7 +82,8 @@ public:
82 82
 // problem with passing file parser - need to read it twice
83 83
 template <class DataType>
84 84
 GapsRunner::GapsRunner(const DataType &data, bool transposeData,
85
-unsigned nPatterns, bool partitionRows, const std::vector<unsigned> &indices)
85
+unsigned nPatterns, bool partitionRows, const std::vector<unsigned> &indices,
86
+uint32_t seed)
86 87
     :
87 88
 mASampler(data, !transposeData, nPatterns,!partitionRows, indices),
88 89
 mPSampler(data, transposeData, nPatterns, partitionRows, indices),
... ...
@@ -90,10 +91,14 @@ mStatistics(mPSampler.dataRows(), mPSampler.dataCols(), nPatterns),
90 91
 mFixedMatrix('N'), mMaxIterations(1000), mMaxThreads(1), mPrintMessages(true),
91 92
 mOutputFrequency(500), mCheckpointOutFile("gaps_checkpoint.out"),
92 93
 mCheckpointInterval(0), mPhase('C'), mCurrentIteration(0),
93
-mNumPatterns(nPatterns), mSeed(0), mNumUpdatesA(0), mNumUpdatesP(0)
94
+mNumPatterns(nPatterns), mSeed(seed), mNumUpdatesA(0), mNumUpdatesP(0),
95
+mRng(seed)
94 96
 {
95 97
     mASampler.sync(mPSampler);
96 98
     mPSampler.sync(mASampler);
99
+
100
+    mASampler.setSeed(mRng.uniform64());
101
+    mPSampler.setSeed(mRng.uniform64());
97 102
 }
98 103
 
99 104
 template <class DataType>
... ...
@@ -1,9 +1,10 @@
1 1
 #include "GibbsSampler.h"
2
+#include "math/Algorithms.h"
2 3
 #include "math/SIMD.h"
3 4
 
4 5
 static float getDeltaLL(AlphaParameters alpha, float mass)
5 6
 {
6
-    return mass * (alpha.su - alpha.s * mass / 2.f);
7
+    return mass * (alpha.s_mu - alpha.s * mass / 2.f);
7 8
 }
8 9
 
9 10
 unsigned GibbsSampler::dataRows() const
... ...
@@ -43,6 +44,11 @@ void GibbsSampler::setMatrix(const Matrix &mat)
43 44
     mMatrix = mat;
44 45
 }
45 46
 
47
+void GibbsSampler::setSeed(uint64_t seed)
48
+{
49
+    mSeeder.seed(seed);
50
+}
51
+
46 52
 float GibbsSampler::chi2() const
47 53
 {   
48 54
     return 2.f * gaps::algo::loglikelihood(mDMatrix, mSMatrix, mAPMatrix);
... ...
@@ -53,6 +59,11 @@ uint64_t GibbsSampler::nAtoms() const
53 59
     return mDomain.size();
54 60
 }
55 61
 
62
+void GibbsSampler::recalculateAPMatrix()
63
+{
64
+    mAPMatrix = gaps::algo::matrixMultiplication(*mOtherMatrix, mMatrix);
65
+}
66
+
56 67
 void GibbsSampler::sync(const GibbsSampler &sampler)
57 68
 {
58 69
     mOtherMatrix = &(sampler.mMatrix);
... ...
@@ -61,25 +72,27 @@ void GibbsSampler::sync(const GibbsSampler &sampler)
61 72
 
62 73
 void GibbsSampler::update(unsigned nSteps, unsigned nCores)
63 74
 {
75
+    uint64_t seed = mSeeder.next();
64 76
     for (unsigned n = 0; n < nSteps; ++n)
65 77
     {
66
-        makeAndProcessProposal();
78
+        GapsRng rng(seed, n);
79
+        makeAndProcessProposal(&rng);
67 80
     }
68 81
 }
69 82
 
70
-void GibbsSampler::makeAndProcessProposal()
83
+void GibbsSampler::makeAndProcessProposal(GapsRng *rng)
71 84
 {
72 85
     if (mDomain.size() < 2)
73 86
     {
74
-        return birth();
87
+        return birth(rng);
75 88
     }
76 89
 
77
-    float u1 = mPropRng.uniform();
90
+    float u1 = rng->uniform();
78 91
     if (u1 < 0.5f)
79 92
     {
80
-        return mPropRng.uniform() < deathProb(mDomain.size()) ? death() : birth();
93
+        return rng->uniform() < deathProb(mDomain.size()) ? death(rng) : birth(rng);
81 94
     }
82
-    return (u1 < 0.75f) ? move() : exchange();
95
+    return (u1 < 0.75f) ? move(rng) : exchange(rng);
83 96
 }
84 97
 
85 98
 float GibbsSampler::deathProb(uint64_t nAtoms) const
... ...
@@ -92,17 +105,16 @@ float GibbsSampler::deathProb(uint64_t nAtoms) const
92 105
 
93 106
 // add an atom at a random position, calculate mass either with an
94 107
 // exponential distribution or with the gibbs mass distribution
95
-void GibbsSampler::birth()
108
+void GibbsSampler::birth(GapsRng *rng)
96 109
 {
97
-    uint64_t pos = mDomain.randomFreePosition();
110
+    uint64_t pos = mDomain.randomFreePosition(rng);
98 111
     unsigned row = getRow(pos);
99 112
     unsigned col = getCol(pos);
100
-    GapsRng rng;
101 113
 
102 114
     // calculate proposed mass
103 115
     float mass = canUseGibbs(col)
104
-        ? gibbsMass(alphaParameters(row, col), &rng).value()
105
-        : rng.exponential(mLambda);
116
+        ? gibbsMass(alphaParameters(row, col), rng).value()
117
+        : rng->exponential(mLambda);
106 118
 
107 119
     // accept mass as long as it's non-zero
108 120
     if (mass >= gaps::epsilon)
... ...
@@ -115,32 +127,31 @@ void GibbsSampler::birth()
115 127
 
116 128
 // automatically accept death, attempt a rebirth at the same position, using
117 129
 // the original mass or the gibbs mass distribution
118
-void GibbsSampler::death()
130
+void GibbsSampler::death(GapsRng *rng)
119 131
 {
120 132
     // get random atom
121
-    Atom* atom = mDomain.randomAtom();
133
+    Atom* atom = mDomain.randomAtom(rng);
122 134
     unsigned row = getRow(atom->pos);
123 135
     unsigned col = getCol(atom->pos);
124
-    GapsRng rng;
125 136
 
126 137
     // calculate alpha parameters assuming atom dies
127 138
     AlphaParameters alpha = alphaParametersWithChange(row, col, -atom->mass);
128 139
     float rebirthMass = atom->mass;
129
-    bool useSame = true;
140
+    bool useOriginalMass = true;
130 141
     if (canUseGibbs(col))
131 142
     {
132
-        OptionalFloat gMass = gibbsMass(alpha, &rng);
143
+        OptionalFloat gMass = gibbsMass(alpha, rng);
133 144
         if (gMass.hasValue())
134 145
         {
135 146
             rebirthMass = gMass.value();
136
-            useSame = false;
147
+            useOriginalMass = false;
137 148
         }
138 149
     }
139 150
 
140 151
     // accept/reject rebirth
141
-    if (std::log(rng.uniform()) < getDeltaLL(alpha, rebirthMass) * mAnnealingTemp)
152
+    if (std::log(rng->uniform()) < getDeltaLL(alpha, rebirthMass) * mAnnealingTemp)
142 153
     {
143
-        if (!useSame)
154
+        if (!useOriginalMass)
144 155
         {
145 156
             safelyChangeMatrix(row, col, rebirthMass - atom->mass);
146 157
             atom->mass = rebirthMass;
... ...
@@ -154,12 +165,12 @@ void GibbsSampler::death()
154 165
 }
155 166
 
156 167
 // move mass from src to dest in the atomic domain
157
-void GibbsSampler::move()
168
+void GibbsSampler::move(GapsRng *rng)
158 169
 {
159
-    AtomNeighborhood hood = mDomain.randomAtomWithNeighbors();
170
+    AtomNeighborhood hood = mDomain.randomAtomWithNeighbors(rng);
160 171
     uint64_t lbound = hood.hasLeft() ? hood.left->pos : 0;
161 172
     uint64_t rbound = hood.hasRight() ? hood.right->pos : mDomainLength;
162
-    uint64_t newLocation = mPropRng.uniform64(lbound + 1, rbound - 1);
173
+    uint64_t newLocation = rng->uniform64(lbound + 1, rbound - 1);
163 174
 
164 175
     unsigned r1 = getRow(hood.center->pos);
165 176
     unsigned c1 = getCol(hood.center->pos);
... ...
@@ -168,9 +179,8 @@ void GibbsSampler::move()
168 179
 
169 180
     if (r1 != r2 || c1 != c2)
170 181
     {
171
-        GapsRng rng;
172 182
         AlphaParameters alpha = alphaParameters(r1, c1, r2, c2);
173
-        if (std::log(rng.uniform()) < getDeltaLL(alpha, -hood.center->mass) * mAnnealingTemp)
183
+        if (std::log(rng->uniform()) < getDeltaLL(alpha, -hood.center->mass) * mAnnealingTemp)
174 184
         {
175 185
             hood.center->pos = newLocation;
176 186
             safelyChangeMatrix(r1, c1, -hood.center->mass);
... ...
@@ -187,9 +197,9 @@ void GibbsSampler::move()
187 197
 // exchange some amount of mass between two positions, note it is possible
188 198
 // for one of the atoms to be deleted if it's mass becomes too small after
189 199
 // the exchange
190
-void GibbsSampler::exchange()
200
+void GibbsSampler::exchange(GapsRng *rng)
191 201
 {
192
-    AtomNeighborhood hood = mDomain.randomAtomWithRightNeighbor();
202
+    AtomNeighborhood hood = mDomain.randomAtomWithRightNeighbor(rng);
193 203
     Atom* a1 = hood.center;
194 204
     Atom* a2 = hood.hasRight() ? hood.right : mDomain.front();
195 205
 
... ...
@@ -200,11 +210,10 @@ void GibbsSampler::exchange()
200 210
 
201 211
     if (r1 != r2 || c1 != c2)
202 212
     {
203
-        GapsRng rng;
204 213
         AlphaParameters alpha = alphaParameters(r1, c1, r2, c2);
205 214
         if (canUseGibbs(c1, c2))
206 215
         {
207
-            OptionalFloat gMass = gibbsMass(alpha, a1->mass, a2->mass, &rng);
216
+            OptionalFloat gMass = gibbsMass(alpha, a1->mass, a2->mass, rng);
208 217
             if (gMass.hasValue())
209 218
             {
210 219
                 acceptExchange(a1, a2, gMass.value(), r1, c1, r2, c2);
... ...
@@ -212,7 +221,7 @@ void GibbsSampler::exchange()
212 221
             }
213 222
         }
214 223
 
215
-        float newMass = rng.truncGammaUpper(a1->mass + a2->mass, 2.f, 1.f / mLambda);
224
+        float newMass = rng->truncGammaUpper(a1->mass + a2->mass, 2.f, 1.f / mLambda);
216 225
 
217 226
         // change larger mass
218 227
         float delta = a1->mass > a2->mass ? newMass - a1->mass : a2->mass - newMass;
... ...
@@ -225,7 +234,7 @@ void GibbsSampler::exchange()
225 234
 
226 235
         float deltaLL = getDeltaLL(alpha, delta);
227 236
         float priorLL = (pNew == 0.f) ? 1.f : pOld / pNew;
228
-        if (priorLL == 0.f || std::log(rng.uniform() * priorLL) < deltaLL * mAnnealingTemp)
237
+        if (priorLL == 0.f || std::log(rng->uniform() * priorLL) < deltaLL * mAnnealingTemp)
229 238
         {
230 239
             acceptExchange(a1, a2, delta, r1, c1, r2, c2);
231 240
             return;
... ...
@@ -261,11 +270,11 @@ OptionalFloat GibbsSampler::gibbsMass(AlphaParameters alpha,
261 270
 GapsRng *rng)
262 271
 {        
263 272
     alpha.s *= mAnnealingTemp;
264
-    alpha.su *= mAnnealingTemp;
273
+    alpha.s_mu *= mAnnealingTemp;
265 274
 
266 275
     if (alpha.s > gaps::epsilon)
267 276
     {
268
-        float mean = (alpha.su - mLambda) / alpha.s;
277
+        float mean = (alpha.s_mu - mLambda) / alpha.s;
269 278
         float sd = 1.f / std::sqrt(alpha.s);
270 279
         float pLower = gaps::p_norm(0.f, mean, sd);
271 280
 
... ...
@@ -286,11 +295,11 @@ OptionalFloat GibbsSampler::gibbsMass(AlphaParameters alpha,
286 295
 float m1, float m2, GapsRng *rng)
287 296
 {
288 297
     alpha.s *= mAnnealingTemp;
289
-    alpha.su *= mAnnealingTemp;
298
+    alpha.s_mu *= mAnnealingTemp;
290 299
 
291 300
     if (alpha.s > gaps::epsilon)
292 301
     {
293
-        float mean = alpha.su / alpha.s; // lambda cancels out
302
+        float mean = alpha.s_mu / alpha.s; // lambda cancels out
294 303
         float sd = 1.f / std::sqrt(alpha.s);
295 304
         float pLower = gaps::p_norm(-m1, mean, sd);
296 305
         float pUpper = gaps::p_norm(m2, mean, sd);
... ...
@@ -319,9 +328,9 @@ void GibbsSampler::updateAPMatrix(unsigned row, unsigned col, float delta)
319 328
     float *ap = mAPMatrix.colPtr(row);
320 329
     unsigned size = mAPMatrix.nRow();
321 330
 
322
-    gaps::simd::packedFloat pOther, pAP;
331
+    gaps::simd::PackedFloat pOther, pAP;
323 332
     gaps::simd::Index i(0);
324
-    gaps::simd::packedFloat pDelta(delta);
333
+    gaps::simd::PackedFloat pDelta(delta);
325 334
     for (; i <= size - i.increment(); ++i)
326 335
     {
327 336
         pOther.load(other + i);
... ...
@@ -27,10 +27,12 @@ public:
27 27
     void setMaxGibbsMass(float max);
28 28
     void setAnnealingTemp(float temp);
29 29
     void setMatrix(const Matrix &mat);
30
+    void setSeed(uint64_t seed);
30 31
 
31 32
     float chi2() const;
32 33
     uint64_t nAtoms() const;
33 34
 
35
+    void recalculateAPMatrix();
34 36
     void sync(const GibbsSampler &sampler);
35 37
     void update(unsigned nSteps, unsigned nCores);
36 38
 
... ...
@@ -53,9 +55,10 @@ private:
53 55
     ColMatrix mMatrix; // genes by patterns for A, samples by patterns for P
54 56
     const ColMatrix *mOtherMatrix; // pointer to P if this is A, and vice versa
55 57
 
56
-    GapsRng mPropRng;
57 58
     AtomicDomain mDomain; // data structure providing access to atoms
58 59
 
60
+    Xoroshiro128plus mSeeder; // used to generate seeds for individual proposals
61
+
59 62
     float mAlpha;
60 63
     float mLambda;
61 64
     float mMaxGibbsMass;
... ...
@@ -66,13 +69,13 @@ private:
66 69
     uint64_t mBinSize;
67 70
     uint64_t mDomainLength;
68 71
 
69
-    void makeAndProcessProposal();
72
+    void makeAndProcessProposal(GapsRng *rng);
70 73
     float deathProb(uint64_t nAtoms) const;
71 74
 
72
-    void birth();
73
-    void death();
74
-    void move();
75
-    void exchange();
75
+    void birth(GapsRng *rng);
76
+    void death(GapsRng *rng);
77
+    void move(GapsRng *rng);
78
+    void exchange(GapsRng *rng);
76 79
 
77 80
     void acceptExchange(Atom *a1, Atom *a2, float d1, unsigned r1,
78 81
         unsigned c1, unsigned r2, unsigned c2);
... ...
@@ -18,7 +18,6 @@ OBJECTS =   AtomicDomain.o \
18 18
             file_parser/TsvParser.o \
19 19
             file_parser/MtxParser.o \
20 20
             math/Algorithms.o \
21
-            math/Math.o \
22 21
             math/Random.o \
23 22
             cpp_tests/testAlgorithms.o \
24 23
             cpp_tests/testAtomicDomain.o \
... ...
@@ -1,5 +1,5 @@
1 1
 #include "catch.h"
2
-#include "../math/Math.h"
2
+#include "../math/Algorithms.h"
3 3
 #include "../math/Random.h"
4 4
 
5 5
 #define TEST_APPROX(x) Approx(x).epsilon(0.001)
... ...
@@ -2,16 +2,10 @@
2 2
 #define __COGAPS_EFFICIENT_SETS_H__
3 3
 
4 4
 #include <vector>
5
-
6 5
 #include <stdint.h>
7 6
 
8 7
 class IntFixedHashSet
9 8
 {
10
-private:
11
-
12
-    std::vector<uint64_t> mSet;
13
-    uint64_t mCurrentKey;
14
-
15 9
 public:
16 10
 
17 11
     IntFixedHashSet() : mCurrentKey(1) {}
... ...
@@ -20,6 +14,11 @@ public:
20 14
     void clear() {++mCurrentKey;}
21 15
     bool contains(unsigned n) {return mSet[n] == mCurrentKey;}
22 16
     void insert(unsigned n) {mSet[n] = mCurrentKey;}
17
+
18
+private:
19
+
20
+    std::vector<uint64_t> mSet;
21
+    uint64_t mCurrentKey;
23 22
 };
24 23
 
25 24
 // TODO have sorted vector with at least some % of holes
... ...
@@ -27,10 +26,6 @@ public:
27 26
 // when shift happens, should be minimal
28 27
 class IntDenseOrderedSet
29 28
 {
30
-private:
31
-
32
-    std::vector<uint64_t> mVec;
33
-
34 29
 public:
35 30
 
36 31
     void insert(uint64_t p) {mVec.push_back(p);}
... ...
@@ -48,6 +43,10 @@ public:
48 43
         }
49 44
         return true;
50 45
     }
46
+
47
+private:
48
+
49
+    std::vector<uint64_t> mVec;
51 50
 };
52 51
 
53
-#endif
54 52
\ No newline at end of file
53
+#endif // __COGAPS_EFFICIENT_SETS_H__
55 54
\ No newline at end of file
... ...
@@ -4,7 +4,8 @@
4 4
 #include "../utils/GapsAssert.h"
5 5
 
6 6
 ColMatrix::ColMatrix(unsigned nrow, unsigned ncol)
7
-: mNumRows(nrow), mNumCols(ncol)
7
+    :
8
+mNumRows(nrow), mNumCols(ncol)
8 9
 {
9 10
     allocate();
10 11
 }
... ...
@@ -1,21 +1,49 @@
1 1
 #include "Vector.h"
2 2
 
3
+#include "../utils/GapsAssert.h"
4
+
5
+Vector::Vector(unsigned size)
6
+    : mValues(aligned_vector(size, 0.f))
7
+{}
8
+
3 9
 Vector::Vector(const std::vector<float> &v) : mValues(v.size())
4 10
 {
5
-    for (unsigned i = 0; i < v.size(); ++i)
11
+    unsigned sz = v.size();
12
+    for (unsigned i = 0; i < sz; ++i)
6 13
     {
7 14
         mValues[i] = v[i];
8 15
     }
9 16
 }
10 17
 
11
-void Vector::concat(const Vector& vec)
18
+unsigned Vector::size() const
19
+{
20
+    return mValues.size();
21
+}
22
+
23
+float* Vector::ptr()
24
+{
25
+    return &mValues[0];
26
+}
27
+
28
+const float* Vector::ptr() const
12 29
 {
13
-    mValues.insert(mValues.end(), vec.mValues.begin(), vec.mValues.end());
30
+    return &mValues[0];
31
+}
32
+
33
+float& Vector::operator[](unsigned i)
34
+{
35
+    return mValues[i];
36
+}
37
+
38
+float Vector::operator[](unsigned i) const
39
+{
40
+    return mValues[i];
14 41
 }
15 42
 
16 43
 void Vector::operator+=(const Vector &vec)
17 44
 {
18
-    for (unsigned i = 0; i < size(); ++i)
45
+    unsigned sz = size();
46
+    for (unsigned i = 0; i < sz; ++i)
19 47
     {
20 48
         mValues[i] += vec[i];
21 49
     }
... ...
@@ -23,7 +51,8 @@ void Vector::operator+=(const Vector &vec)
23 51
 
24 52
 Vector Vector::operator-(Vector v) const
25 53
 {
26
-    for (unsigned i = 0; i < size(); ++i)
54
+    unsigned sz = size();
55
+    for (unsigned i = 0; i < sz; ++i)
27 56
     {
28 57
         v[i] = mValues[i] - v[i];
29 58
     }
... ...
@@ -46,7 +75,8 @@ Vector Vector::operator/(float val) const
46 75
 
47 76
 void Vector::operator*=(float val)
48 77
 {
49
-    for (unsigned i = 0; i < mValues.size(); ++i)
78
+    unsigned sz = size();
79
+    for (unsigned i = 0; i < sz; ++i)
50 80
     {
51 81
         mValues[i] *= val;
52 82
     }
... ...
@@ -54,7 +84,8 @@ void Vector::operator*=(float val)
54 84
 
55 85
 void Vector::operator/=(float val)
56 86
 {
57
-    for (unsigned i = 0; i < size(); ++i)
87
+    unsigned sz = size();
88
+    for (unsigned i = 0; i < sz; ++i)
58 89
     {
59 90
         mValues[i] /= val;
60 91
     }
... ...
@@ -62,7 +93,9 @@ void Vector::operator/=(float val)
62 93
 
63 94
 Archive& operator<<(Archive &ar, Vector &vec)
64 95
 {
65
-    for (unsigned i = 0; i < vec.size(); ++i)
96
+    unsigned sz = vec.size();
97
+    ar << sz;
98
+    for (unsigned i = 0; i < sz; ++i)
66 99
     {
67 100
         ar << vec[i];
68 101
     }
... ...
@@ -71,7 +104,11 @@ Archive& operator<<(Archive &ar, Vector &vec)
71 104
 
72 105
 Archive& operator>>(Archive &ar, Vector &vec)
73 106
 {
74
-    for (unsigned i = 0; i < vec.size(); ++i)
107
+    unsigned sz = 0;
108
+    ar >> sz;
109
+    GAPS_ASSERT(sz == vec.size());
110
+
111
+    for (unsigned i = 0; i < sz; ++i)
75 112
     {
76 113
         ar >> vec.mValues[i];
77 114
     }
... ...
@@ -12,22 +12,18 @@ typedef std::vector<float, bal::aligned_allocator<float,32> > aligned_vector;
12 12
 
13 13
 class Vector
14 14
 {
15
-private:
16
-
17
-    aligned_vector mValues;
18
-
19 15
 public:
20 16
 
21
-    explicit Vector(unsigned size) : mValues(aligned_vector(size, 0.f)) {}
17
+    explicit Vector(unsigned size);
22 18
     explicit Vector(const std::vector<float> &v);
23 19
 
24
-    unsigned size() const {return mValues.size();}
20
+    unsigned size() const;
25 21
 
26
-    float* ptr() {return &mValues[0];}
27
-    const float* ptr() const {return &mValues[0];}
22
+    float* ptr();
23
+    const float* ptr() const;
28 24
 
29
-    float& operator[](unsigned i) {return mValues[i];}
30
-    float operator[](unsigned i) const {return mValues[i];}
25
+    float& operator[](unsigned i);
26
+    float operator[](unsigned i) const;
31 27
 
32 28
     void operator+=(const Vector &vec);
33 29
     Vector operator-(Vector v) const;
... ...
@@ -37,10 +33,12 @@ public:
37 33
     void operator*=(float val);
38 34
     void operator/=(float val);
39 35
 
40
-    void concat(const Vector& vec);
41
-
42 36
     friend Archive& operator<<(Archive &ar, Vector &vec);
43 37
     friend Archive& operator>>(Archive &ar, Vector &vec);
38
+
39
+private:
40
+
41
+    aligned_vector mValues;
44 42
 };
45 43
 
46
-#endif
47 44
\ No newline at end of file
45
+#endif // __COGAPS_VECTOR_H__
48 46
\ No newline at end of file
... ...
@@ -5,151 +5,236 @@
5 5
 
6 6
 #include <algorithm>
7 7
 
8
-ColMatrix gaps::algo::pmax(const ColMatrix &mat, float factor)
8
+#define GAPS_SQ(x) ((x) * (x))
9
+
10
+////////////////////////////// AlphaParameters /////////////////////////////////
11
+
12
+AlphaParameters::AlphaParameters(float inS, float inS_mu)
13
+    : s(inS), s_mu(inS_mu) 
14
+{}
15
+
16
+AlphaParameters AlphaParameters::operator+(const AlphaParameters &other) const
9 17
 {
10
-    ColMatrix temp(mat.nRow(), mat.nCol());
11
-    for (unsigned i = 0; i < mat.nRow(); ++i)
12
-    {
13
-        for (unsigned j = 0; j < mat.nCol(); ++j)
14
-        {
15
-            temp(i,j) = gaps::max(mat(i,j) * factor, factor);
16
-        }
17
-    }
18
-    return temp;
18
+    return AlphaParameters(s + other.s, s_mu - other.s_mu); // not a typo
19 19
 }
20 20
 
21
-float gaps::algo::sum(const ColMatrix &mat, bool transposeOrder)
21
+void AlphaParameters::operator*=(float v)
22 22
 {
23
-    float sum = 0.f;
24
-    unsigned outer = transposeOrder ? mat.nCol() : mat.nRow();
25
-    unsigned inner = transposeOrder ? mat.nRow() : mat.nCol();
26
-    for (unsigned i = 0; i < outer; ++i)
23
+    s *= v;
24
+    s_mu *= v;
25
+}
26
+
27
+//////////////////////////////// Algorithms ////////////////////////////////////
28
+
29
+float gaps::min(float a, float b)
30
+{
31
+    return a < b ? a : b;
32
+}
33
+
34
+unsigned gaps::min(unsigned a, unsigned b)
35
+{
36
+    return a < b ? a : b;
37
+}
38
+
39
+uint64_t gaps::min(uint64_t a, uint64_t b)
40
+{
41
+    return a < b ? a : b;
42
+}
43
+
44
+float gaps::max(float a, float b)
45
+{
46
+    return a < b ? b : a;
47
+}
48
+
49
+unsigned gaps::max(unsigned a, unsigned b)
50
+{
51
+    return a < b ? b : a;
52
+}
53
+
54
+uint64_t gaps::max(uint64_t a, uint64_t b)
55
+{
56
+    return a < b ? b : a;
57
+}
58
+
59
+bool gaps::algo::isVectorZero(const float *vec, unsigned size)
60
+{
61
+    for (unsigned i = 0; i < size; ++i)
27 62
     {
28
-        for (unsigned j = 0; j < inner; ++j)
63
+        if (vec[i] != 0.f)
29 64
         {
30
-            sum += transposeOrder ? mat(j,i) : mat(i,j);
65
+            return false;
31 66
         }
32 67
     }
33
-    return sum;
68
+    return true;
34 69
 }
35 70
 
36
-float gaps::algo::mean(const ColMatrix &mat, bool transposeOrder)
71
+Vector gaps::algo::elementSq(Vector vec)
37 72
 {
38
-    return gaps::algo::sum(mat, transposeOrder) / (mat.nRow() * mat.nCol());
73
+    unsigned sz = vec.size();
74
+
75
+    for (unsigned i = 0; i < sz; ++i)
76
+    {
77
+        vec[i] *= vec[i];
78
+    }
79
+    return vec;
39 80
 }
40 81
 
41
-void gaps::algo::copyTranspose(ColMatrix *dest, const ColMatrix &src)
82
+float gaps::algo::max(const Vector &vec)
42 83
 {
43
-    GAPS_ASSERT(dest->nRow() == src.nCol());
44
-    GAPS_ASSERT(dest->nCol() == src.nRow());
84
+    unsigned sz = vec.size();
45 85
 
46
-    for (unsigned j = 0; j < src.nCol(); ++j)
86
+    float max = vec[0];
87
+    for (unsigned i = 0; i < sz; ++i)
47 88
     {
48
-        for (unsigned i = 0; i < src.nRow(); ++i)
49
-        {
50
-            dest->operator()(j,i) = src(i,j); // TODO test which order is better
51
-        }
89
+        max = gaps::max(max, vec[i]);
52 90
     }
91
+    return max;
53 92
 }
54 93
 
55 94
 float gaps::algo::sum(const Vector &vec)
56 95
 {
96
+    unsigned sz = vec.size();
97
+
57 98
     float sum = 0.f;
58
-    for (unsigned i = 0; i < vec.size(); ++i)
99
+    for (unsigned i = 0; i < sz; ++i)
59 100
     {
60 101
         sum += vec[i];
61 102
     }
62 103
     return sum;
63 104
 }
64 105
 
65
-float gaps::algo::min(const Vector &vec)
106
+float gaps::algo::sum(const ColMatrix &mat)
66 107
 {
67
-    float min = vec[0];
68
-    for (unsigned i = 0; i < vec.size(); ++i)
108
+    unsigned nc = mat.nCol();
109
+
110
+    float sum = 0.f;
111
+    for (unsigned j = 0; j < nc; ++j)
69 112
     {
70
-        min = gaps::min(min, vec[i]);
113
+        sum += gaps::algo::sum(mat.getCol(j));
71 114
     }
72
-    return min;
115
+    return sum;
73 116
 }
74 117
 
75
-float gaps::algo::max(const Vector &vec)
118
+float gaps::algo::mean(const ColMatrix &mat)
76 119
 {
77
-    float max = vec[0];
78
-    for (unsigned i = 0; i < vec.size(); ++i)
79
-    {
80
-        max = gaps::max(max, vec[i]);
81
-    }
82
-    return max;
120
+    return gaps::algo::sum(mat) / static_cast<float>(mat.nRow() * mat.nCol());
83 121
 }
84 122
 
85
-float gaps::algo::dot(const Vector &A, const Vector &B)
123
+float gaps::algo::nonZeroMean(const ColMatrix &mat)
86 124
 {
87
-    float dotProd = 0.f;
88
-    for (unsigned i = 0; i < A.size(); ++i)
125
+    unsigned nr = mat.nRow();
126
+    unsigned nc = mat.nCol();
127
+
128
+    float sum = 0.f;
129
+    unsigned nNonZeros = 0;
130
+    for (unsigned j = 0; j < nc; ++j)
89 131
     {
90
-        dotProd += A[i] * B[i];
132
+        for (unsigned i = 0; i < nr; ++i)
133
+        {
134
+            if (mat(i,j) != 0.f)
135
+            {
136
+                nNonZeros++;
137
+                sum += mat(i,j);
138
+            }
139
+        }
91 140
     }
92
-    return dotProd;
141
+    return sum / static_cast<float>(nNonZeros);
93 142
 }
94 143
 
95
-unsigned gaps::algo::whichMin(const Vector &vec)
144
+ColMatrix gaps::algo::pmax(ColMatrix mat, float factor)
96 145
 {
97
-    float min = vec[0];
98
-    unsigned minNdx = 0;
99
-    for (unsigned i = 1; i < vec.size(); ++i)
146
+    unsigned nr = mat.nRow();
147
+    unsigned nc = mat.nCol();
148
+
149
+    for (unsigned j = 0; j < nc; ++j)
100 150
     {
101
-        if (vec[i] < min)
151
+        for (unsigned i = 0; i < nr; ++i)
102 152
         {
103
-            min = vec[i];
104
-            minNdx = i;
153
+            mat(i,j) = gaps::max(mat(i,j) * factor, factor);
105 154
         }
106 155
     }
107
-    return minNdx;
156
+    return mat;
108 157
 }
109 158
 
110
-Vector gaps::algo::rank(Vector vec)
159
+ColMatrix gaps::algo::matrixMultiplication(const ColMatrix &A, const ColMatrix &BT)
111 160
 {
112
-    std::vector< std::pair<float, float> > sortVec(vec.size());
113
-    for (unsigned i = 0; i < vec.size(); ++i)
161
+    GAPS_ASSERT_MSG(A.nCol() == BT.nCol(), A.nCol() << " " << BT.nCol());
162
+    unsigned nr = A.nRow();
163
+    unsigned nc = BT.nRow();
164
+    unsigned nf = A.nCol(); // inner dimension
165
+
166
+    ColMatrix temp(nr, nc);
167
+    for (unsigned i = 0; i < nr; ++i)
114 168
     {
115
-        sortVec[i] = std::pair<float, float>(vec[i], i);
169
+        for (unsigned j = 0; j < nc; ++j)
170
+        {
171
+            temp(i,j) = 0.f;
172
+            for (unsigned k = 0; k < nf; ++k)
173
+            {
174
+                temp(i,j) += A(i,k) * BT(j,k);
175
+            }
176
+        }
116 177
     }
117
-    
118
-    std::sort(sortVec.begin(), sortVec.end());
119
-    Vector ranks(vec.size());
120
-    for (unsigned i = 0; i < vec.size(); ++i)
178
+    return temp;
179
+}
180
+
181
+void gaps::algo::copyTranspose(ColMatrix *dest, const ColMatrix &src)
182
+{
183
+    GAPS_ASSERT(dest->nRow() == src.nCol());
184
+    GAPS_ASSERT(dest->nCol() == src.nRow());
185
+    unsigned nc = src.nCol();
186
+    unsigned nr = src.nRow();
187
+
188
+    for (unsigned j = 0; j < nc; ++j)
121 189
     {
122
-        ranks[i] = sortVec[i].second;
190
+        for (unsigned i = 0; i < nr; ++i)
191
+        {
192
+            dest->operator()(j,i) = src(i,j); // TODO test which order is better
193
+        }
123 194
     }
124
-    return ranks;
125 195
 }
126 196
 
127
-Vector gaps::algo::elementSq(Vector vec)
197
+float gaps::algo::loglikelihood(const ColMatrix &D, const ColMatrix &S,
198
+const ColMatrix &AP)
128 199
 {
129
-    for (unsigned i = 0; i < vec.size(); ++i)
200
+    unsigned nr = D.nRow();
201
+    unsigned nc = D.nCol();
202
+
203
+    float chi2 = 0.f;
204
+    for (unsigned i = 0; i < nr; ++i)
130 205
     {
131
-        vec[i] *= vec[i];
206
+        for (unsigned j = 0; j < nc; ++j)
207
+        {
208
+            chi2 += GAPS_SQ(D(i,j) - AP(i,j)) / GAPS_SQ(S(i,j));
209
+        }
132 210
     }
133
-    return vec;
211
+    return chi2 / 2.f;
134 212
 }
135 213
 
136
-bool gaps::algo::isVectorZero(const float *vec, unsigned size)
214
+ColMatrix gaps::algo::computeStdDev(ColMatrix stdMat, const ColMatrix &meanMat,
215
+unsigned nUpdates)
137 216
 {
138
-    for (unsigned i = 0; i < size; ++i)
217
+    GAPS_ASSERT(nUpdates > 1);
218
+    unsigned nr = stdMat.nRow();
219
+    unsigned nc = stdMat.nCol();
220
+
221
+    for (unsigned j = 0; j < nc; ++j)
139 222
     {
140
-        if (vec[i] != 0.f)
223
+        for (unsigned i = 0; i < nr; ++i)
141 224
         {
142
-            return false;
225
+            float meanTerm = meanMat(i,j) * meanMat(i,j) / static_cast<float>(nUpdates);
226
+            float numer = gaps::max(0.f, stdMat(i,j) - meanTerm);
227
+            stdMat(i,j) = std::sqrt(numer / (static_cast<float>(nUpdates) - 1.f));
143 228
         }
144 229
     }
145
-    return true;
230
+    return stdMat;
146 231
 }
147
-    
232
+
148 233
 AlphaParameters gaps::algo::alphaParameters(unsigned size, const float *D,
149 234
 const float *S, const float *AP, const float *mat)
150 235
 {
151
-    gaps::simd::packedFloat pMat, pD, pAP, pS;
152
-    gaps::simd::packedFloat partialS(0.f), partialSU(0.f);
236
+    gaps::simd::PackedFloat pMat, pD, pAP, pS;
237
+    gaps::simd::PackedFloat partialS(0.f), partialS_mu(0.f);
153 238
     gaps::simd::Index i(0);
154 239
     for (; i <= size - i.increment(); ++i)
155 240
     {   
... ...
@@ -157,25 +242,26 @@ const float *S, const float *AP, const float *mat)
157 242
         pD.load(D + i);
158 243
         pAP.load(AP + i);
159 244
         pS.load(S + i);
160
-        gaps::simd::packedFloat ratio(pMat / (pS * pS));
245
+        gaps::simd::PackedFloat ratio(pMat / (pS * pS));
161 246
         partialS += pMat * ratio;
162
-        partialSU += ratio * (pD - pAP);
247
+        partialS_mu += ratio * (pD - pAP);
163 248
     }
164
-    float s = partialS.scalar(), su = partialSU.scalar();
249
+
250
+    float s = partialS.scalar(), s_mu = partialS_mu.scalar();
165 251
     for (unsigned j = i.value(); j < size; ++j)
166 252
     {
167 253
         float ratio = mat[j] / (S[j] * S[j]);
168 254
         s += mat[j] * ratio;
169
-        su += ratio * (D[j] - AP[j]);
255
+        s_mu += ratio * (D[j] - AP[j]);
170 256
     }
171
-    return AlphaParameters(s,su);
257
+    return AlphaParameters(s, s_mu);
172 258
 }
173 259
 
174 260
 AlphaParameters gaps::algo::alphaParameters(unsigned size, const float *D,
175 261
 const float *S, const float *AP, const float *mat1, const float *mat2)
176 262
 {
177
-    gaps::simd::packedFloat pMat1, pMat2, pD, pAP, pS;
178
-    gaps::simd::packedFloat partialS(0.f), partialSU(0.f);
263
+    gaps::simd::PackedFloat pMat1, pMat2, pD, pAP, pS;
264
+    gaps::simd::PackedFloat partialS(0.f), partialS_mu(0.f);
179 265
     gaps::simd::Index i(0);
180 266
     for (; i <= size - i.increment(); ++i)
181 267
     {   
... ...
@@ -184,27 +270,27 @@ const float *S, const float *AP, const float *mat1, const float *mat2)
184 270
         pD.load(D + i);
185 271
         pAP.load(AP + i);
186 272
         pS.load(S + i);
187
-        gaps::simd::packedFloat ratio((pMat1 - pMat2) / (pS * pS));
273
+        gaps::simd::PackedFloat ratio((pMat1 - pMat2) / (pS * pS));
188 274
         partialS += (pMat1 - pMat2) * ratio;
189
-        partialSU += ratio * (pD - pAP);
275
+        partialS_mu += ratio * (pD - pAP);
190 276
     }
191 277
 
192
-    float s = partialS.scalar(), su = partialSU.scalar();
278
+    float s = partialS.scalar(), s_mu = partialS_mu.scalar();
193 279
     for (unsigned j = i.value(); j < size; ++j)
194 280
     {
195 281
         float ratio = (mat1[j] - mat2[j]) / (S[j] * S[j]);
196 282
         s += (mat1[j] - mat2[j]) * ratio;
197
-        su += ratio * (D[j] - AP[j]);
283
+        s_mu += ratio * (D[j] - AP[j]);
198 284
     }
199
-    return AlphaParameters(s,su);
285
+    return AlphaParameters(s, s_mu);
200 286
 }
201 287
 
202 288
 AlphaParameters gaps::algo::alphaParametersWithChange(unsigned size,
203 289
 const float *D, const float *S, const float *AP, const float *mat, float ch)
204 290
 {
205
-    gaps::simd::packedFloat pCh(ch);
206
-    gaps::simd::packedFloat pMat, pD, pAP, pS;
207
-    gaps::simd::packedFloat partialS(0.f), partialSU(0.f);
291
+    gaps::simd::PackedFloat pCh(ch);
292
+    gaps::simd::PackedFloat pMat, pD, pAP, pS;
293
+    gaps::simd::PackedFloat partialS(0.f), partialS_mu(0.f);
208 294
     gaps::simd::Index i(0);
209 295
     for (; i <= size - i.increment(); ++i)
210 296
     {   
... ...
@@ -212,36 +298,18 @@ const float *D, const float *S, const float *AP, const float *mat, float ch)
212 298
         pD.load(D + i);
213 299
         pAP.load(AP + i);
214 300
         pS.load(S + i);
215
-        gaps::simd::packedFloat ratio(pMat / (pS * pS));
301
+        gaps::simd::PackedFloat ratio(pMat / (pS * pS));
216 302
         partialS += pMat * ratio;
217
-        partialSU += ratio * (pD - (pAP + pCh * pMat));
303
+        partialS_mu += ratio * (pD - (pAP + pCh * pMat));
218 304
     }
219
-    float s = partialS.scalar(), su = partialSU.scalar();
305
+
306
+    float s = partialS.scalar(), s_mu = partialS_mu.scalar();
220 307
     for (unsigned j = i.value(); j < size; ++j)
221 308
     {
222 309
         float ratio = mat[j] / (S[j] * S[j]);
223 310
         s += mat[j] * ratio;
224
-        su += ratio * (D[j] - (AP[j] + ch * mat[j]));
311
+        s_mu += ratio * (D[j] - (AP[j] + ch * mat[j]));
225 312
     }
226
-    return AlphaParameters(s,su);
313
+    return AlphaParameters(s, s_mu);
227 314
 }
228 315
 
229
-// horribly slow, don't call often
230
-ColMatrix gaps::algo::matrixMultiplication(const ColMatrix &A, const ColMatrix &BT)
231
-{
232
-    GAPS_ASSERT_MSG(A.nCol() == BT.nCol(), A.nCol() << " " << BT.nCol());
233
-    ColMatrix temp(A.nRow(), BT.nRow());
234
-    for (unsigned i = 0; i < A.nRow(); ++i)
235
-    {
236
-        for (unsigned j = 0; j < BT.nRow(); ++j)
237
-        {
238
-            float sum = 0.0;
239
-            for (unsigned k = 0; k < A.nCol(); ++k)
240
-            {
241
-                sum += A(i,k) * BT(j,k);
242
-            }
243
-            temp(i,j) = sum;
244
-        }
245
-    }
246
-    return temp;
247
-}
248 316
\ No newline at end of file
... ...
@@ -2,135 +2,78 @@
2 2
 #define __COGAPS_ALGORITHMS_H__
3 3
 
4 4
 #include "../data_structures/Matrix.h"
5
-#include "Math.h"
6 5
 
7
-#include <cmath>
8
-
9
-#define GAPS_SQ(x) ((x) * (x))
6
+#include <string>
7
+#include <sstream>
10 8
 
11 9
 struct AlphaParameters
12 10
 {
13 11
     float s;
14
-    float su;
12
+    float s_mu;
15 13
     
16
-    AlphaParameters(float inS, float inSU)
17
-        : s(inS), su(inSU)
18
-    {}
19
-
20
-    AlphaParameters operator+(const AlphaParameters &other) const
21
-    {
22
-        float rs = s + other.s;
23
-        float rsu = su - other.su; // not a typo
24
-        return AlphaParameters(rs, rsu);
25
-    }
26
-
27
-    void operator*=(float v)
28
-    {
29
-        s *= v;
30
-        su *= v;
31
-    }
14
+    AlphaParameters(float inS, float inS_mu);
15
+
16
+    AlphaParameters operator+(const AlphaParameters &other) const;
17
+    void operator*=(float v);
32 18
 };
33 19
 
34 20
 namespace gaps
35 21
 {
36
-namespace algo
37
-{
38
-    bool isVectorZero(const float *vec, unsigned size);
39
-    
40
-    // vector algorithms    
41
-    unsigned whichMin(const Vector &vec);
42
-    float sum(const Vector &vec);
43
-    float min(const Vector &vec);
44
-    float max(const Vector &vec);
45
-    float dot(const Vector &A, const Vector &B);
46
-    Vector rank(Vector vec);
47
-    Vector elementSq(Vector vec);
48
-    
49
-    // generic matrix algorithms
50
-    float sum(const ColMatrix &mat, bool transposeOrder=false);
51
-    float mean(const ColMatrix &mat, bool transposeOrder=false);
22
+    template <class T>
23
+    std::string to_string(T a);
52 24
 
53
-    template<class GenericMatrix>
54
-    float nonZeroMean(const GenericMatrix &mat);
25
+    const float epsilon = 1.0e-10f;
26
+    const float pi = 3.1415926535897932384626433832795f;
27
+    const float pi_double = 3.1415926535897932384626433832795;
55 28
 
56
-    template<class GenericMatrix>
57
-    GenericMatrix computeStdDev(const GenericMatrix &stdMat,
58
-        const GenericMatrix &meanMat, unsigned nUpdates);
29
+    float min(float a, float b);
30
+    unsigned min(unsigned a, unsigned b);
31
+    uint64_t min(uint64_t a, uint64_t b);
59 32
 
60
-    ColMatrix pmax(const ColMatrix &mat, float factor);
33
+    float max(float a, float b);
34
+    unsigned max(unsigned a, unsigned b);
35
+    uint64_t max(uint64_t a, uint64_t b);
61 36
 
62
-    // specific matrix algorithms
63
-    ColMatrix matrixMultiplication(const ColMatrix &A, const ColMatrix &B);
37
+namespace algo
38
+{
39
+    // vector algorithms
40
+    bool isVectorZero(const float *vec, unsigned size);
41
+    Vector elementSq(Vector vec);
42
+    float max(const Vector &vec);
43
+    float sum(const Vector &vec);
64 44
 
45
+    // matrix algorithms
46
+    float sum(const ColMatrix &mat);
47
+    float mean(const ColMatrix &mat);
48
+    float nonZeroMean(const ColMatrix &mat);
49
+    ColMatrix pmax(ColMatrix mat, float factor);
50
+    ColMatrix matrixMultiplication(const ColMatrix &A, const ColMatrix &BT);
65 51
     void copyTranspose(ColMatrix *dest, const ColMatrix &src);
52
+    ColMatrix computeStdDev(ColMatrix stdMat, const ColMatrix &meanMat,
53
+        unsigned nUpdates);
66 54
 
67
-    // chiSq / 2
68
-    template <class MatA, class MatB, class MatC>
69
-    float loglikelihood(const MatA &D, const MatB &S,
70
-        const MatC &AP);
55
+    // Cogaps Algorithms
56
+    float loglikelihood(const ColMatrix &D, const ColMatrix &S,
57
+        const ColMatrix &AP);
71 58
 
72 59
     AlphaParameters alphaParameters(unsigned size, const float *D,
73 60
         const float *S, const float *AP, const float *mat);
74 61
 
75
-    AlphaParameters alphaParametersWithChange(unsigned size, const float *D,
76
-        const float *S, const float *AP, const float *mat, float d);
77
-
78 62
     AlphaParameters alphaParameters(unsigned size, const float *D,
79 63
         const float *S, const float *AP, const float *mat1, const float *mat2);
80 64
 
65
+    AlphaParameters alphaParametersWithChange(unsigned size, const float *D,
66
+        const float *S, const float *AP, const float *mat, float ch);
67
+
81 68
 } // namespace algo
82 69
 } // namespace gaps
83 70
 
84
-template<class GenericMatrix>
85
-float gaps::algo::nonZeroMean(const GenericMatrix &mat)
86
-{
87
-    float sum = 0.f;
88
-    unsigned nNonZeros = 0;
89
-    for (unsigned i = 0; i < mat.nRow(); ++i)
90
-    {
91
-        for (unsigned j = 0; j < mat.nCol(); ++j)
92
-        {
93
-            if (mat(i,j) != 0.f)
94
-            {
95
-                nNonZeros++;
96
-                sum += mat(i,j);
97
-            }
98
-        }
99
-    }
100
-    return sum / static_cast<float>(nNonZeros);
101
-}
102
-
103
-template<class GenericMatrix>
104
-GenericMatrix gaps::algo::computeStdDev(const GenericMatrix &stdMat,
105
-const GenericMatrix &meanMat, unsigned nUpdates)
106
-{
107
-    GAPS_ASSERT(nUpdates > 1);
108
-    GenericMatrix retMat(stdMat.nRow(), stdMat.nCol());
109
-    for (unsigned r = 0; r < retMat.nRow(); ++r)
110
-    {
111
-        for (unsigned c = 0; c < retMat.nCol(); ++c)
112
-        {
113
-            float meanTerm = meanMat(r,c) * meanMat(r,c) / static_cast<float>(nUpdates);
114
-            float numer = gaps::max(0.f, stdMat(r,c) - meanTerm);
115
-            retMat(r,c) = std::sqrt(numer / (static_cast<float>(nUpdates) - 1.f));
116
-        }
117
-    }
118
-    return retMat;
119
-}
120
-
121
-template <class MatA, class MatB, class MatC>
122
-float gaps::algo::loglikelihood(const MatA &D, const MatB &S,
123
-const MatC &AP)
71
+template <class T>
72
+std::string gaps::to_string(T a)
124 73
 {
125
-    float chi2 = 0.f;
126
-    for (unsigned i = 0; i < D.nRow(); ++i)
127
-    {
128
-        for (unsigned j = 0; j < D.nCol(); ++j)
129
-        {
130
-            chi2 += GAPS_SQ(D(i,j) - AP(i,j)) / GAPS_SQ(S(i,j));
131
-        }
132
-    }
133
-    return chi2 / 2.f;
74
+    std::stringstream ss;
75
+    ss << a;
76
+    return ss.str();
134 77
 }
135 78
 
136
-#endif
137 79
\ No newline at end of file
80
+#endif // __COGAPS_ALGORITHMS_H__
138 81
\ No newline at end of file
139 82
deleted file mode 100644
... ...
@@ -1,31 +0,0 @@
1
-#include "Math.h"
2
-
3
-float gaps::min(float a, float b)
4
-{
5
-    return a < b ? a : b;
6
-}
7
-
8
-unsigned gaps::min(unsigned a, unsigned b)
9
-{
10
-    return a < b ? a : b;
11
-}
12
-
13
-uint64_t gaps::min(uint64_t a, uint64_t b)
14
-{
15
-    return a < b ? a : b;
16
-}
17
-
18
-float gaps::max(float a, float b)
19
-{
20
-    return a < b ? b : a;
21
-}
22
-
23
-unsigned gaps::max(unsigned a, unsigned b)
24
-{
25
-    return a < b ? b : a;
26
-}
27
-
28
-uint64_t gaps::max(uint64_t a, uint64_t b)
29
-{
30
-    return a < b ? b : a;
31
-}
32 0
\ No newline at end of file
33 1
deleted file mode 100644
... ...
@@ -1,34 +0,0 @@
1
-#ifndef __COGAPS_MATH_H__
2
-#define __COGAPS_MATH_H__
3
-
4
-#include <stdint.h>
5
-#include <string>
6
-#include <sstream>
7
-
8
-namespace gaps
9
-{
10
-    const float epsilon = 1.0e-10f;
11
-    const float pi = 3.1415926535897932384626433832795f;
12
-    const float pi_double = 3.1415926535897932384626433832795;
13
-
14
-    float min(float a, float b);
15
-    unsigned min(unsigned a, unsigned b);
16
-    uint64_t min(uint64_t a, uint64_t b);
17
-
18
-    float max(float a, float b);
19
-    unsigned max(unsigned a, unsigned b);
20
-    uint64_t max(uint64_t a, uint64_t b);
21
-
22
-    template <class T>
23
-    std::string to_string(T a);
24
-}
25
-
26
-template <class T>
27
-std::string gaps::to_string(T a)
28
-{
29
-    std::stringstream ss;
30
-    ss << a;
31
-    return ss.str();
32
-}
33
-
34
-#endif
35 0
\ No newline at end of file
... ...
@@ -1,10 +1,9 @@
1 1
 // [[Rcpp::depends(BH)]]
2 2
 
3
-#include "Math.h"
4 3
 #include "Random.h"
5
-#include "../utils/GapsAssert.h"
6 4
 
7
-// TODO remove boost dependency
5
+#include "Algorithms.h"
6
+#include "../utils/GapsAssert.h"
8 7
 
9 8
 #include <boost/math/distributions/exponential.hpp>
10 9
 #include <boost/math/distributions/gamma.hpp>
... ...
@@ -14,28 +13,29 @@
14 13
 #include <stdint.h>
15 14
 
16 15
 #define Q_GAMMA_THRESHOLD 0.000001f
17
-#define Q_GAMMA_MIN_VALUE 0.0
16
+#define Q_GAMMA_MIN_VALUE 0.f
18 17
 
19 18
 const float maxU32AsFloat = static_cast<float>(std::numeric_limits<uint32_t>::max());
20 19
 const double maxU32AsDouble = static_cast<double>(std::numeric_limits<uint32_t>::max());
21 20
 
22
-static Xoroshiro128plus seeder;
21
+/////////////////////////////// OptionalFloat //////////////////////////////////
23 22
 
24
-void GapsRng::save(Archive &ar)
25
-{
26
-    ar << seeder;
27
-}
23
+OptionalFloat::OptionalFloat() : mHasValue(false), mValue(0.f) {}
24
+
25
+OptionalFloat::OptionalFloat(float f) : mHasValue(true), mValue(f) {}
28 26
 
29
-void GapsRng::load(Archive &ar)
27
+float OptionalFloat::value()
30 28
 {
31
-    ar >> seeder;
29
+    return mValue;
32 30
 }
33 31
 
34
-void GapsRng::setSeed(uint64_t seed)
32
+bool OptionalFloat::hasValue() const
35 33
 {
36
-    seeder.seed(seed);
34
+    return mHasValue;
37 35
 }
38 36
 
37
+///////////////////////////// Xoroshiro128plus /////////////////////////////////
38
+
39 39
 static uint64_t rotl(const uint64_t x, int k)
40 40
 {
41 41
     return (x << k) | (x >> (64 - k));
... ...
@@ -83,7 +83,16 @@ Archive& operator>>(Archive &ar, Xoroshiro128plus &gen)
83 83
     return ar;
84 84
 }
85 85
 
86
-GapsRng::GapsRng() : mState(seeder.next()) {}
86
+////////////////////////////////// GapsRng /////////////////////////////////////
87
+
88
+GapsRng::GapsRng(uint64_t seed, uint64_t stream)
89
+    :
90
+mState(0u), mStream(stream << 1u | 1u)
91
+{
92
+    next();
93
+    mState += seed;
94
+    next();
95
+}
87 96
 
88 97
 uint32_t GapsRng::next()
89 98
 {
... ...
@@ -93,7 +102,7 @@ uint32_t GapsRng::next()
93 102
 
94 103
 void GapsRng::advance()
95 104
 {
96
-    mState = mState * 6364136223846793005ull + (54u|1);
105
+    mState = mState * 6364136223846793005ull + (mStream|1);
97 106
 }
98 107
 
99 108
 uint32_t GapsRng::get() const
... ...
@@ -240,7 +249,6 @@ float GapsRng::inverseNormSample(float a, float b, float mean, float sd)
240 249
 float GapsRng::truncGammaUpper(float b, float shape, float scale)
241 250
 {
242 251
     float upper = gaps::p_gamma(b, shape, scale);
243
-
244 252
     float u = uniform(0.f, upper);
245 253
     while (u == 0.f || u == 1.f)
246 254
     {
... ...
@@ -251,16 +259,18 @@ float GapsRng::truncGammaUpper(float b, float shape, float scale)
251 259
 
252 260
 Archive& operator<<(Archive &ar, GapsRng &gen)
253 261
 {
254
-    ar << gen.mState;
262
+    ar << gen.mState << gen.mStream;
255 263
     return ar;
256 264
 }
257 265
 
258 266
 Archive& operator>>(Archive &ar, GapsRng &gen)
259 267
 {
260
-    ar >> gen.mState;
268
+    ar >> gen.mState >> gen.mStream;
261 269
     return ar;
262 270
 }
263 271
 
272
+////////////////////////// Distribution Calculations ///////////////////////////
273
+
264 274
 float gaps::d_gamma(float d, float shape, float scale)
265 275
 {
266 276
     boost::math::gamma_distribution<> gam(shape, scale);
... ...
@@ -11,11 +11,11 @@ struct OptionalFloat
11 11
 {
12 12
 public :
13 13
 
14
-    OptionalFloat() : mHasValue(false), mValue(0.f) {}
15
-    OptionalFloat(float f) : mHasValue(true), mValue(f) {}
14
+    OptionalFloat();
15
+    OptionalFloat(float f);
16 16
 
17
-    float value() { return mValue; }
18
-    bool hasValue() const { return mHasValue; }
17
+    float value();
18
+    bool hasValue() const;
19 19
 
20 20
 private :
21 21
 
... ...
@@ -65,7 +65,7 @@ class GapsRng
65 65
 {
66 66
 public:
67 67
 
68
-    GapsRng();
68
+    GapsRng(uint64_t seed, uint64_t stream=54);
69 69
 
70 70
     float uniform();
71 71
     float uniform(float a, float b);
... ...
@@ -82,13 +82,10 @@ public:
82 82
     float inverseNormSample(float a, float b, float mean, float sd);
83 83
     float truncGammaUpper(float b, float shape, float scale);
84 84
 
85
-    static void setSeed(uint64_t seed);
86
-    static void save(Archive &ar);
87
-    static void load(Archive &ar);
88
-  
89 85
 private:
90 86
 
91 87
     uint64_t mState;
88
+    uint64_t mStream;
92 89
 
93 90
     uint32_t next();
94 91
     void advance();
... ...
@@ -1,34 +1,10 @@
1 1
 #ifndef __COGAPS_SIMD_H__
2 2
 #define __COGAPS_SIMD_H__
3 3
 
4
-#ifndef SSE_INSTR_SET
5
-    #if defined ( __AVX2__ )
6
-        #define SSE_INSTR_SET 8
7
-    #elif defined ( __AVX__ )
8
-        #define SSE_INSTR_SET 7
9
-    #elif defined ( __SSE4_2__ )
10
-        #define SSE_INSTR_SET 6
11
-    #elif defined ( __SSE4_1__ )
12
-        #define SSE_INSTR_SET 5
13
-    #else
14
-        #define SSE_INSTR_SET 0
15
-    #endif
16
-#endif
4
+#if defined ( __AVX2__ ) || defined ( __AVX__ )
17 5
 
18
-#if SSE_INSTR_SET > 6
19 6
     #define __GAPS_AVX__
20 7
     #include <immintrin.h>
21
-#elif SSE_INSTR_SET == 6 || SSE_INSTR_SET == 5
22
-    #define __GAPS_SSE__
23
-    #include <nmmintrin.h>
24
-#endif
25
-
26
-namespace gaps
27
-{
28
-namespace simd
29
-{
30
-
31
-#if defined( __GAPS_AVX__ )
32 8
     typedef __m256 gaps_packed_t;
33 9
     const unsigned index_increment = 8;
34 10
     #define SET_SCALAR(x) _mm256_set1_ps(x)
... ...
@@ -38,7 +14,11 @@ namespace simd
38 14
     #define SUB_PACKED(a,b) _mm256_sub_ps(a,b)
39 15
     #define MUL_PACKED(a,b) _mm256_mul_ps(a,b)
40 16
     #define DIV_PACKED(a,b) _mm256_div_ps(a,b)
41
-#elif defined( __GAPS_SSE__ )
17
+
18
+#elif defined ( __SSE4_2__ ) || defined ( __SSE4_1__ )
19
+
20
+    #define __GAPS_SSE__
21
+    #include <nmmintrin.h>
42 22
     typedef __m128 gaps_packed_t;
43 23
     const unsigned index_increment = 4;
44 24
     #define SET_SCALAR(x) _mm_set1_ps(x)
... ...
@@ -48,7 +28,9 @@ namespace simd
48 28
     #define SUB_PACKED(a,b) _mm_sub_ps(a,b)
49 29
     #define MUL_PACKED(a,b) _mm_mul_ps(a,b)
50 30
     #define DIV_PACKED(a,b) _mm_div_ps(a,b)
31
+
51 32
 #else
33
+
52 34
     typedef float gaps_packed_t;
53 35
     const unsigned index_increment = 1;
54 36
     #define SET_SCALAR(x) x
... ...
@@ -58,73 +40,80 @@ namespace simd
58 40
     #define SUB_PACKED(a,b) ((a)-(b))
59 41
     #define MUL_PACKED(a,b) ((a)*(b))
60 42
     #define DIV_PACKED(a,b) ((a)/(b))
43
+
61 44
 #endif
62 45
 
63
-class Index
46
+namespace gaps
47
+{
48
+namespace simd
64 49
 {
65
-private:
66
-
67
-    unsigned index;
68 50
 
51
+class Index
52
+{
69 53
 public:
70 54
 
71 55
     explicit Index(unsigned i) : index(i) {}
72 56
     Index& operator=(unsigned val) { index = val; return *this; }
73
-    bool operator<(unsigned comp) { return index < comp; }
74
-    bool operator<=(unsigned comp) { return index <= comp; }
57
+    bool operator<(unsigned comp) const { return index < comp; }
58
+    bool operator<=(unsigned comp) const { return index <= comp; }
75 59
     void operator++() { index += index_increment; }
76 60
     unsigned value() const { return index; }
77 61
     unsigned increment() const { return index_increment; }
62
+
78 63
     friend const float* operator+(const float *ptr, Index ndx);
79 64
     friend float* operator+(float *ptr, Index ndx);
65
+
66
+private:
67
+
68
+    unsigned index;
80 69
 };
81 70
 
82 71
 inline const float* operator+(const float *ptr, Index ndx) { return ptr + ndx.index; }
83 72
 inline float* operator+(float *ptr, Index ndx) { return ptr + ndx.index; }
84 73
 
85
-class packedFloat
74
+class PackedFloat
86 75
 {
87
-private:
88
-
89
-    gaps_packed_t mData;
90
-
91 76
 public:
92 77
 
93
-    packedFloat() : mData() {}
94
-    explicit packedFloat(float val) : mData(SET_SCALAR(val)) {}
95
-#if defined( __GAPS_SSE__ ) || defined( __GAPS_AVX__ ) || defined( __GAPS_AVX512__ )
96
-    explicit packedFloat(gaps_packed_t val) : mData(val) {}
78
+    PackedFloat() : mData() {}
79
+    explicit PackedFloat(float val) : mData(SET_SCALAR(val)) {}
80
+#if defined( __GAPS_SSE__ ) || defined( __GAPS_AVX__ )
81
+    explicit PackedFloat(gaps_packed_t val) : mData(val) {}
97 82
 #endif
98 83
 
99
-    packedFloat operator+(packedFloat b) const { return packedFloat(ADD_PACKED(mData, b.mData)); }
100
-    packedFloat operator-(packedFloat b) const { return packedFloat(SUB_PACKED(mData, b.mData)); }
101
-    packedFloat operator*(packedFloat b) const { return packedFloat(MUL_PACKED(mData, b.mData)); }
102
-    packedFloat operator/(packedFloat b) const { return packedFloat(DIV_PACKED(mData, b.mData)); }
84
+    PackedFloat operator+(PackedFloat b) const { return PackedFloat(ADD_PACKED(mData, b.mData)); }
85
+    PackedFloat operator-(PackedFloat b) const { return PackedFloat(SUB_PACKED(mData, b.mData)); }
86
+    PackedFloat operator*(PackedFloat b) const { return PackedFloat(MUL_PACKED(mData, b.mData)); }
87
+    PackedFloat operator/(PackedFloat b) const { return PackedFloat(DIV_PACKED(mData, b.mData)); }
103 88
 
104
-    void operator+=(packedFloat val) { mData = ADD_PACKED(mData, val.mData); }
89
+    void operator+=(PackedFloat val) { mData = ADD_PACKED(mData, val.mData); }
105 90
     void load(const float *ptr) { mData = LOAD_PACKED(ptr); }
106 91
     void store(float *ptr) { STORE_PACKED(ptr, mData); }
107 92
 
108
-#if defined( __GAPS_AVX__ )
109 93
     float scalar()
110 94
     {
95
+    #if defined( __GAPS_AVX__ )
96
+
111 97
         float* ra = reinterpret_cast<float*>(&mData); // NOLINT
112 98
         mData = _mm256_hadd_ps(mData, mData);
113 99
         mData = _mm256_hadd_ps(mData, mData);
114 100
         return ra[0] + ra[4];
115
-    }
116
-#elif defined( __GAPS_SSE__ )
117
-    float scalar()
118
-    {
101
+
102
+    #elif defined( __GAPS_SSE__ )
103
+
119 104
         float* ra = reinterpret_cast<float*>(&mData); // NOLINT
120 105
         return ra[0] + ra[1] + ra[2] + ra[3];
121
-    }
122
-#else
123
-    float scalar()
124
-    {
106
+
107
+    #else
108
+
125 109
         return mData;
110
+
111
+    #endif
126 112
     }
127
-#endif
113
+
114
+private:
115
+
116
+    gaps_packed_t mData;
128 117
 };
129 118
 
130 119
 } // namespace simd
... ...
@@ -12,7 +12,8 @@
12 12
 
13 13
 // magic number written to beginning of archive files
14 14
 // needs to be updated everytime the method of checkpointing changes
15
-#define ARCHIVE_MAGIC_NUM 0xCE45D32B // v3.3.22
15
+//#define ARCHIVE_MAGIC_NUM 0xCE45D32B // v3.3.22
16
+#define ARCHIVE_MAGIC_NUM 0xB123AA4D // v3.3.30
16 17
 
17 18
 class Archive
18 19
 {
... ...
@@ -36,7 +36,7 @@
36 36
             if (!(cond))                                                \
37 37
             {                                                           \
38 38
                 gaps_cout << "assert failed " << __FILE__ << " " <<     \
39
-                    __LINE__ << ", " << msg << '\n';                   \
39
+                    __LINE__ << ", " << msg << '\n';                    \
40 40
                 gaps_stop();                                            \
41 41
             }                                                           \
42 42
         } while(0)