Browse code

consistent with or without queue

Tom Sherman authored on 19/09/2018 17:33:40
Showing10 changed files

... ...
@@ -111,7 +111,7 @@ Atom* AtomicDomain::front()
111 111
     return mAtoms.front();
112 112
 }
113 113
 
114
-Atom* AtomicDomain::randomAtom(GapsRng *rng)
114
+Atom* AtomicDomain::randomAtom(GapsRng *rng, const SmallPairedHashSetU64 &moves)
115 115
 {
116 116
     GAPS_ASSERT(size() > 0);
117 117
     GAPS_ASSERT(isSorted());
... ...
@@ -126,6 +126,12 @@ Atom* AtomicDomain::randomAtom(GapsRng *rng)
126 126
     unsigned index = std::distance(mAtoms.begin(), it);
127 127
     unsigned leftIndex = index == 0 ? 0 : index - 1;
128 128
     index = (index == mAtoms.size()) ? index - 1 : index;
129
+
130
+    if (moves.contains(mAtoms[leftIndex]->pos) || moves.contains(mAtoms[index]->pos))
131
+    {
132
+        return NULL;
133
+    }
134
+
129 135
     if (std::abs(mAtoms[leftIndex]->pos - pos) < std::abs(mAtoms[index]->pos - pos))
130 136
     {
131 137
         index = leftIndex;
... ...
@@ -151,12 +157,14 @@ AtomNeighborhood AtomicDomain::randomAtomWithNeighbors(GapsRng *rng)
151 157
         index = leftIndex;
152 158
     }
153 159
 
160
+    //gaps_printf("index: %d, size: %d\n", index, mAtoms.size());
154 161
     Atom* left = (index == 0) ? NULL : mAtoms[index - 1];
155 162
     Atom* right = (index == mAtoms.size() - 1) ? NULL : mAtoms[index + 1];
156 163
     return AtomNeighborhood(left, mAtoms[index], right);
157 164
 }
158 165
 
159
-AtomNeighborhood AtomicDomain::randomAtomWithRightNeighbor(GapsRng *rng)
166
+AtomNeighborhood AtomicDomain::randomAtomWithRightNeighbor(GapsRng *rng,
167
+const SmallPairedHashSetU64 &moves)
160 168
 {
161 169
     GAPS_ASSERT(size() > 0);
162 170
 
... ...
@@ -167,6 +175,12 @@ AtomNeighborhood AtomicDomain::randomAtomWithRightNeighbor(GapsRng *rng)
167 175
     unsigned index = std::distance(mAtoms.begin(), it);
168 176
     unsigned leftIndex = index == 0 ? 0 : index - 1;
169 177
     index = (index == mAtoms.size()) ? index - 1 : index;
178
+
179
+    if (moves.contains(mAtoms[leftIndex]->pos) || moves.contains(mAtoms[index]->pos))
180
+    {
181
+        return AtomNeighborhood(NULL, NULL, NULL);
182
+    }
183
+
170 184
     if (std::abs(mAtoms[leftIndex]->pos - pos) < std::abs(mAtoms[index]->pos - pos))
171 185
     {
172 186
         index = leftIndex;
... ...
@@ -200,6 +214,22 @@ uint64_t AtomicDomain::size() const
200 214
     return mAtoms.size();
201 215
 }
202 216
 
217
+Atom* AtomicDomain::insert(uint64_t pos, float mass)
218
+{
219
+    Atom *newAtom = new Atom(pos, mass);
220
+    std::vector<Atom*>::iterator it;
221
+    #pragma omp critical(AtomicInsertOrErase)
222
+    {
223
+        GAPS_ASSERT(!vecContains(mAtoms, pos));
224
+
225
+        it = std::lower_bound(mAtoms.begin(), mAtoms.end(), pos, compareAtomLower);
226
+        it = mAtoms.insert(it, newAtom);
227
+
228
+        GAPS_ASSERT(vecContains(mAtoms, pos));
229
+    }
230
+    return *it;
231
+}
232
+
203 233
 void AtomicDomain::erase(uint64_t pos)
204 234
 {
205 235
     Atom *a = NULL;
... ...
@@ -210,7 +240,7 @@ void AtomicDomain::erase(uint64_t pos)
210 240
 
211 241
         std::vector<Atom*>::iterator it = std::lower_bound(mAtoms.begin(),
212 242
             mAtoms.end(), pos, compareAtomLower);
213
-        Atom *a = *it;
243
+        a = *it;
214 244
         mAtoms.erase(it);
215 245
 
216 246
         GAPS_ASSERT(!vecContains(mAtoms, pos));
... ...
@@ -218,7 +248,8 @@ void AtomicDomain::erase(uint64_t pos)
218 248
     delete a;
219 249
 }
220 250
 
221
-void AtomicDomain::move(uint64_t src, uint64_t dest)
251
+// for moving across a later birth (already present in domain)
252
+/*void AtomicDomain::move(uint64_t src, uint64_t dest)
222 253
 {
223 254
     #pragma omp critical(AtomicInsertOrErase)
224 255
     {
... ...
@@ -248,23 +279,7 @@ void AtomicDomain::move(uint64_t src, uint64_t dest)
248 279
         GAPS_ASSERT(!vecContains(mAtoms, src));
249 280
         GAPS_ASSERT(vecContains(mAtoms, dest));
250 281
     }
251
-}
252
-
253
-Atom* AtomicDomain::insert(uint64_t pos, float mass)
254
-{
255
-    Atom *newAtom = new Atom(pos, mass);
256
-    std::vector<Atom*>::iterator it;
257
-    #pragma omp critical(AtomicInsertOrErase)
258
-    {
259
-        GAPS_ASSERT(!vecContains(mAtoms, pos));
260
-
261
-        it = std::lower_bound(mAtoms.begin(), mAtoms.end(), pos, compareAtomLower);
262
-        it = mAtoms.insert(it, newAtom);
263
-
264
-        GAPS_ASSERT(vecContains(mAtoms, pos));
265
-    }
266
-    return *it;
267
-}
282
+}*/
268 283
 
269 284
 Archive& operator<<(Archive &ar, AtomicDomain &domain)
270 285
 {
... ...
@@ -1,8 +1,9 @@
1 1
 #ifndef __COGAPS_ATOMIC_DOMAIN_H__
2 2
 #define __COGAPS_ATOMIC_DOMAIN_H__
3 3
 
4
-#include "utils/Archive.h"
4
+#include "data_structures/HashSets.h"
5 5
 #include "math/Random.h"
6
+#include "utils/Archive.h"
6 7
 
7 8
 #include <vector>
8 9
 
... ...
@@ -44,9 +45,9 @@ public:
44 45
 
45 46
     // access atoms
46 47
     Atom* front();
47
-    Atom* randomAtom(GapsRng *rng);
48
+    Atom* randomAtom(GapsRng *rng, const SmallPairedHashSetU64 &moves);
48 49
     AtomNeighborhood randomAtomWithNeighbors(GapsRng *rng);
49
-    AtomNeighborhood randomAtomWithRightNeighbor(GapsRng *rng);
50
+    AtomNeighborhood randomAtomWithRightNeighbor(GapsRng *rng, const SmallPairedHashSetU64 &moves);
50 51
 
51 52
     Atom* getLeftNeighbor(uint64_t pos);
52 53
     Atom* getRightNeighbor(uint64_t pos);
... ...
@@ -57,7 +58,7 @@ public:
57 58
 
58 59
     // these need to happen concurrently without invalidating pointers
59 60
     void erase(uint64_t pos);
60
-    void move(uint64_t src, uint64_t dest);
61
+    //void move(uint64_t src, uint64_t dest);
61 62
 
62 63
     // iterators
63 64
     std::vector<Atom*>::iterator begin() { return mAtoms.begin(); }
... ...
@@ -94,6 +94,10 @@ GapsResult GapsRunner::run(bool printThreads)
94 94
     }
95 95
     #endif
96 96
 
97
+#ifdef __GAPS_DEBUG_NO_QUEUE__
98
+    gaps_printf("Running without a queue\n");
99
+#endif
100
+
97 101
     // cascade through phases, allows algorithm to be resumed in either phase
98 102
     GAPS_ASSERT(mPhase == 'C' || mPhase == 'S');
99 103
     switch (mPhase)
... ...
@@ -23,8 +23,9 @@ void GibbsSampler::setSparsity(float alpha, bool singleCell)
23 23
         gaps::algo::mean(mDMatrix);
24 24
 
25 25
     mAlpha = alpha;
26
-    mQueue.setAlpha(alpha);
27 26
     mLambda = alpha * std::sqrt(mNumPatterns / meanD);
27
+    mQueue.setAlpha(alpha);
28
+    mQueue.setLambda(mLambda);
28 29
 }
29 30
 
30 31
 void GibbsSampler::setMaxGibbsMass(float max)
... ...
@@ -72,8 +73,11 @@ void GibbsSampler::update(unsigned nSteps, unsigned nCores)
72 73
     while (n < nSteps)
73 74
     {
74 75
         // populate queue, prepare domain for this queue
76
+        #ifdef __GAPS_DEBUG_NO_QUEUE__
77
+        mQueue.populate(mDomain, 1);
78
+        #else
75 79
         mQueue.populate(mDomain, nSteps - n);
76
-        //mQueue.populate(mDomain, 1);
80
+        #endif
77 81
         n += mQueue.size();
78 82
         
79 83
         // process all proposed updates
... ...
@@ -117,7 +121,6 @@ void GibbsSampler::birth(const AtomicProposal &prop)
117 121
         : prop.rng.exponential(mLambda);
118 122
 
119 123
     // accept mass as long as it's non-zero
120
-    //gaps_printf("mass: %.16f, accept: %d\n", mass, mass >= gaps::epsilon);
121 124
     if (mass >= gaps::epsilon)
122 125
     {
123 126
         mQueue.acceptBirth();
... ...
@@ -152,7 +155,6 @@ void GibbsSampler::death(const AtomicProposal &prop)
152 155
 
153 156
     // accept/reject rebirth
154 157
     float deltaLL = getDeltaLL(alpha, rebirthMass) * mAnnealingTemp;
155
-    //gaps_printf("deltaLL: %.16f\n", deltaLL);
156 158
     if (std::log(prop.rng.uniform()) < deltaLL)
157 159
     {
158 160
         mQueue.rejectDeath();
... ...
@@ -176,7 +178,7 @@ void GibbsSampler::move(const AtomicProposal &prop)
176 178
     AlphaParameters alpha = alphaParameters(prop.r1, prop.c1, prop.r2, prop.c2);
177 179
     if (std::log(prop.rng.uniform()) < getDeltaLL(alpha, -prop.atom1->mass) * mAnnealingTemp)
178 180
     {
179
-        mDomain.move(prop.atom1->pos, prop.pos);
181
+        prop.atom1->pos = prop.pos;
180 182
         safelyChangeMatrix(prop.r1, prop.c1, -prop.atom1->mass);
181 183
         changeMatrix(prop.r2, prop.c2, prop.atom1->mass);
182 184
     }
... ...
@@ -1,4 +1,4 @@
1
-PKG_CPPFLAGS = @GAPS_CPP_FLAGS@ 
1
+PKG_CPPFLAGS = @GAPS_CPP_FLAGS@ -D__GAPS_DEBUG_NO_QUEUE__
2 2
 PKG_CXXFLAGS = @GAPS_CXX_FLAGS@
3 3
 PKG_LIBS = @GAPS_LIBS@
4 4
 
... ...
@@ -1,4 +1,5 @@
1 1
 #include "ProposalQueue.h"
2
+#include "math/Algorithms.h"
2 3
 #include "math/Random.h"
3 4
 #include "utils/GapsAssert.h"
4 5
 
... ...
@@ -30,11 +31,17 @@ void ProposalQueue::setAlpha(float alpha)
30 31
     mAlpha = static_cast<double>(alpha);
31 32
 }
32 33
 
34
+void ProposalQueue::setLambda(float lambda)
35
+{
36
+    mLambda = lambda;
37
+}
38
+
33 39
 void ProposalQueue::populate(AtomicDomain &domain, unsigned limit)
34 40
 {
35 41
     GAPS_ASSERT(mQueue.empty());
36 42
     GAPS_ASSERT(mUsedAtoms.isEmpty());
37 43
     GAPS_ASSERT(mUsedMatrixIndices.isEmpty());
44
+    GAPS_ASSERT(mProposedMoves.isEmpty());
38 45
     GAPS_ASSERT(mMinAtoms == mMaxAtoms);
39 46
     GAPS_ASSERT(mMaxAtoms == domain.size());
40 47
 
... ...
@@ -47,10 +54,6 @@ void ProposalQueue::populate(AtomicDomain &domain, unsigned limit)
47 54
             success = false;
48 55
             mUseCachedRng = true;
49 56
         }
50
-        else
51
-        {
52
-            //gaps_printf("%c %llu\n", mQueue.back().type, mQueue.back().atom1->pos);
53
-        }
54 57
     }
55 58
 }
56 59
 
... ...
@@ -61,6 +64,7 @@ void ProposalQueue::clear()
61 64
     mQueue.clear();
62 65
     mUsedMatrixIndices.clear();
63 66
     mUsedAtoms.clear();
67
+    mProposedMoves.clear();
64 68
 }
65 69
 
66 70
 unsigned ProposalQueue::size() const
... ...
@@ -110,7 +114,6 @@ bool ProposalQueue::makeProposal(AtomicDomain &domain)
110 114
 {
111 115
     mU1 = mUseCachedRng ? mU1 : mRng.uniform();
112 116
     mU2 = mUseCachedRng ? mU2: mRng.uniform();
113
-    //gaps_printf("u1=%f, u2=%f\n", mU1, mU2);
114 117
     mUseCachedRng = false;
115 118
 
116 119
     if (mMinAtoms < 2 && mMaxAtoms >= 2)
... ...
@@ -138,15 +141,14 @@ bool ProposalQueue::makeProposal(AtomicDomain &domain)
138 141
         }
139 142
         return false; // can't determine B/D since range is too wide
140 143
     }
141
-    //return false;
142
-    //return (mU1 < 0.75f) ? move(domain) : exchange(domain);
143
-    return exchange(domain);
144
+    return (mU1 < 0.75f) ? move(domain) : exchange(domain);
144 145
 }
145 146
 
146 147
 bool ProposalQueue::birth(AtomicDomain &domain)
147 148
 {
148 149
     AtomicProposal prop('B');
149 150
     uint64_t pos = domain.randomFreePosition(&(prop.rng), mUsedAtoms.vec());
151
+
150 152
     if (pos == 0)
151 153
     {
152 154
         DEBUG_PING // want to notify since this event should have near 0 probability
... ...
@@ -154,6 +156,12 @@ bool ProposalQueue::birth(AtomicDomain &domain)
154 156
         return false; // atom conflict, might have open spot if atom moves/dies
155 157
     }
156 158
 
159
+    if (mProposedMoves.overlap(pos))
160
+    {
161
+        GapsRng::rollBackOnce(); // ensure same proposal next time
162
+        return false; // this birth would break assumption moves doesn't re-order domain
163
+    }
164
+
157 165
     prop.r1 = (pos / mBinLength) / mNumCols;
158 166
     prop.c1 = (pos / mBinLength) % mNumCols;
159 167
     if (mUsedMatrixIndices.contains(prop.r1))
... ...
@@ -173,7 +181,14 @@ bool ProposalQueue::birth(AtomicDomain &domain)
173 181
 bool ProposalQueue::death(AtomicDomain &domain)
174 182
 {
175 183
     AtomicProposal prop('D');
176
-    prop.atom1 = domain.randomAtom(&(prop.rng));
184
+    prop.atom1 = domain.randomAtom(&(prop.rng), mProposedMoves);
185
+
186
+    if (prop.atom1 == NULL)
187
+    {
188
+        GapsRng::rollBackOnce(); // ensure same proposal next time
189
+        return false; // can't select atom
190
+    }
191
+        
177 192
     prop.r1 = (prop.atom1->pos / mBinLength) / mNumCols;
178 193
     prop.c1 = (prop.atom1->pos / mBinLength) % mNumCols;
179 194
 
... ...
@@ -196,7 +211,8 @@ bool ProposalQueue::move(AtomicDomain &domain)
196 211
     AtomNeighborhood hood = domain.randomAtomWithNeighbors(&(prop.rng));
197 212
 
198 213
     uint64_t lbound = hood.hasLeft() ? hood.left->pos : 0;
199
-    uint64_t rbound = hood.hasRight() ? hood.right->pos : mDomainLength;
214
+    uint64_t rbound = hood.hasRight() ? hood.right->pos : static_cast<uint64_t>(mDomainLength);
215
+
200 216
     if (mUsedAtoms.contains(lbound) || mUsedAtoms.contains(rbound))
201 217
     {
202 218
         GapsRng::rollBackOnce(); // ensure same proposal next time
... ...
@@ -218,25 +234,31 @@ bool ProposalQueue::move(AtomicDomain &domain)
218 234
 
219 235
     if (prop.r1 == prop.r2 && prop.c1 == prop.c2)
220 236
     {
221
-        //prop.atom1->pos = prop.pos; // automatically accept moves in same bin
222
-        return true;
237
+        prop.atom1->pos = prop.pos;
238
+        return true; // automatically accept moves in same bin
223 239
     }
224 240
 
225 241
     mQueue.push_back(prop);
226 242
     mUsedMatrixIndices.insert(prop.r1);
227 243
     mUsedMatrixIndices.insert(prop.r2);
228 244
     mUsedAtoms.insert(prop.atom1->pos);
229
-    mUsedAtoms.insert(prop.pos);
245
+    mProposedMoves.insert(prop.atom1->pos, prop.pos);
230 246
     return true;
231 247
 }
232 248
 
233 249
 bool ProposalQueue::exchange(AtomicDomain &domain)
234 250
 {
235 251
     AtomicProposal prop('E');
236
-    AtomNeighborhood hood = domain.randomAtomWithRightNeighbor(&(prop.rng));
252
+    AtomNeighborhood hood = domain.randomAtomWithRightNeighbor(&(prop.rng), mProposedMoves);
253
+
254
+    if (hood.center == NULL)
255
+    {
256
+        GapsRng::rollBackOnce(); // ensure same proposal next time
257
+        return false; // can't select atom
258
+    }
259
+
237 260
     prop.atom1 = hood.center;
238 261
     prop.atom2 = hood.hasRight() ? hood.right : domain.front();
239
-
240 262
     prop.r1 = (prop.atom1->pos / mBinLength) / mNumCols;
241 263
     prop.c1 = (prop.atom1->pos / mBinLength) % mNumCols;
242 264
     prop.r2 = (prop.atom2->pos / mBinLength) / mNumCols;
... ...
@@ -250,14 +272,14 @@ bool ProposalQueue::exchange(AtomicDomain &domain)
250 272
 
251 273
     if (prop.r1 == prop.r2 && prop.c1 == prop.c2)
252 274
     {
253
-        //float newMass = prop.rng.truncGammaUpper(a1->mass + a2->mass, 2.f, 1.f / mLambda);
254
-        //float delta = (a1->mass > a2->mass) ? newMass - a1->mass : a2->mass - newMass;
255
-        //if (a1->mass + delta > gaps::epsilon && a2->mass - delta > gaps::epsilon)
256
-        //{
257
-        //    a1->mass += delta;
258
-        //    a2->mass -= delta;
259
-        //}        
260
-        return true; // TODO automatically accept exchanges in same bin
275
+        float newMass = prop.rng.truncGammaUpper(prop.atom1->mass + prop.atom2->mass, 2.f, 1.f / mLambda);
276
+        float delta = (prop.atom1->mass > prop.atom2->mass) ? newMass - prop.atom1->mass : prop.atom2->mass - newMass;
277
+        if (prop.atom1->mass + delta > gaps::epsilon && prop.atom2->mass - delta > gaps::epsilon)
278
+        {
279
+            prop.atom1->mass += delta;
280
+            prop.atom2->mass -= delta;
281
+        }        
282
+        return true;
261 283
     }
262 284
 
263 285
     mQueue.push_back(prop);
... ...
@@ -35,6 +35,7 @@ public:
35 35
     // initialize
36 36
     ProposalQueue(unsigned nrow, unsigned ncol);
37 37
     void setAlpha(float alpha);
38
+    void setLambda(float lambda);
38 39
 
39 40
     // modify/access queue
40 41
     void populate(AtomicDomain &domain, unsigned limit);
... ...
@@ -58,6 +59,7 @@ private:
58 59
     
59 60
     FixedHashSetU32 mUsedMatrixIndices;
60 61
     SmallHashSetU64 mUsedAtoms;
62
+    SmallPairedHashSetU64 mProposedMoves;
61 63
 
62 64
     mutable GapsRng mRng;
63 65
 
... ...
@@ -70,6 +72,7 @@ private:
70 72
     double mDomainLength; // length of entire atomic domain
71 73
     double mNumBins; // number of matrix elements
72 74
 
75
+    float mLambda;
73 76
     float mU1;
74 77
     float mU2;
75 78
 
... ...
@@ -70,3 +70,49 @@ const std::vector<uint64_t>& SmallHashSetU64::vec()
70 70
 {
71 71
     return mSet;
72 72
 }
73
+
74
+///////////////////////////// SmallPairedHashSetU64 ////////////////////////////
75
+
76
+SmallPairedHashSetU64::SmallPairedHashSetU64() {}
77
+
78
+
79
+void SmallPairedHashSetU64::insert(uint64_t a, uint64_t b)
80
+{
81
+    mSet.push_back(a < b ? PositionPair(a, b) : PositionPair(b, a));
82
+}
83
+
84
+void SmallPairedHashSetU64::clear()
85
+{
86
+    mSet.clear();
87
+}
88
+
89
+bool SmallPairedHashSetU64::overlap(uint64_t pos)
90
+{
91
+    unsigned sz = mSet.size();
92
+    for (unsigned i = 0; i < sz; ++i)
93
+    {
94
+        if (mSet[i].a < pos && pos < mSet[i].b)
95
+        {
96
+            return true;
97
+        }
98
+    }
99
+    return false;
100
+}
101
+
102
+bool SmallPairedHashSetU64::contains(uint64_t pos) const
103
+{
104
+    unsigned sz = mSet.size();
105
+    for (unsigned i = 0; i < sz; ++i)
106
+    {
107
+        if (mSet[i].a == pos || mSet[i].b == pos)
108
+        {
109
+            return true;
110
+        }
111
+    }
112
+    return false;
113
+}
114
+
115
+bool SmallPairedHashSetU64::isEmpty()
116
+{
117
+    return mSet.empty();
118
+}
73 119
\ No newline at end of file
... ...
@@ -8,7 +8,7 @@
8 8
 
9 9
 #include <boost/unordered_set.hpp>
10 10
 
11
-#ifdef __GAPS_OPENMP__
11
+#ifdef __GAPS_OPENMP__ // defined in global config
12 12
 #include <omp.h>
13 13
 #endif
14 14
 
... ...
@@ -46,4 +46,29 @@ private:
46 46
     std::vector<uint64_t> mSet;
47 47
 };
48 48
 
49
+struct PositionPair
50
+{
51
+    uint64_t a;
52
+    uint64_t b;
53
+
54
+    PositionPair(uint64_t inA, uint64_t inB) : a(inA), b(inB) {}
55
+};
56
+
57
+class SmallPairedHashSetU64
58
+{
59
+public:
60
+
61
+    SmallPairedHashSetU64();
62
+
63
+    void insert(uint64_t a, uint64_t b);
64
+    void clear();
65
+    bool contains(uint64_t pos) const;
66
+    bool overlap(uint64_t pos);
67
+    bool isEmpty();
68
+
69
+private:
70
+
71
+    std::vector<PositionPair> mSet;
72
+};
73
+
49 74
 #endif // __COGAPS_HASH_SETS_H__
... ...
@@ -22,9 +22,9 @@ static Xoroshiro128plus seeder;
22 22
 
23 23
 /////////////////////////////// OptionalFloat //////////////////////////////////
24 24
 
25
-OptionalFloat::OptionalFloat() : mHasValue(false), mValue(0.f) {}
25
+OptionalFloat::OptionalFloat() : mValue(0.f), mHasValue(false) {}
26 26
 
27
-OptionalFloat::OptionalFloat(float f) : mHasValue(true), mValue(f) {}
27
+OptionalFloat::OptionalFloat(float f) : mValue(f), mHasValue(true) {}
28 28
 
29 29
 float OptionalFloat::value()
30 30
 {