... | ... |
@@ -5,7 +5,10 @@ export(CoGapsFromCheckpoint) |
5 | 5 |
export(GWCoGAPS) |
6 | 6 |
export(GWCoGapsFromCheckpoint) |
7 | 7 |
export(binaryA) |
8 |
+export(calcCoGAPSStat) |
|
9 |
+export(calcGeneGSStat) |
|
8 | 10 |
export(calcZ) |
11 |
+export(computeGeneGSProb) |
|
9 | 12 |
export(createGWCoGAPSSets) |
10 | 13 |
export(displayBuildReport) |
11 | 14 |
export(gapsMapRun) |
... | ... |
@@ -1,3 +1,13 @@ |
1 |
+# regenerate `SimpSim.result` with correct row/col names |
|
2 |
+# need to pass whole GSets list |
|
3 |
+# not one element |
|
4 |
+# smooth patterns nbd |
|
5 |
+# bluered not quotes - it's a function |
|
6 |
+# make sure import heatmap.2 from gplots |
|
7 |
+# make sure GWCoGAPS vignette is included |
|
8 |
+# look into conference poster/abstract submission |
|
9 |
+# write first draft of abstract |
|
10 |
+ |
|
1 | 11 |
#' CoGAPS Matrix Factorization Algorithm |
2 | 12 |
#' |
3 | 13 |
#' @details calls the C++ MCMC code and performs Bayesian |
... | ... |
@@ -8,6 +8,11 @@ |
8 | 8 |
#' @param GStoGenes data.frame or list with gene sets |
9 | 9 |
#' @param numPerm number of permutations for null |
10 | 10 |
#' @return gene set statistics for each column of A |
11 |
+#' @examples |
|
12 |
+#' data('SimpSim') |
|
13 |
+#' calcCoGAPSStat(SimpSim.result$Amean, SimpSim.result$Asd, GStoGenes=GSets, |
|
14 |
+#' numPerm=500) |
|
15 |
+#' @export |
|
11 | 16 |
calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500) |
12 | 17 |
{ |
13 | 18 |
# test for std dev of zero, possible mostly in simple simulations |
... | ... |
@@ -10,6 +10,11 @@ |
10 | 10 |
#' @param Pw weight on genes |
11 | 11 |
#' @param nullGenes logical indicating gene adjustment |
12 | 12 |
#' @return gene similiarity statistic |
13 |
+#' @examples |
|
14 |
+#' data('SimpSim') |
|
15 |
+#' calcGeneGSStat(SimpSim.result$Amean, SimpSim.result$Asd, GStoGenes=GSets[[1]], |
|
16 |
+#' numPerm=500) |
|
17 |
+#' @export |
|
13 | 18 |
calcGeneGSStat <- function(Amean, Asd, GSGenes, numPerm, Pw=rep(1,ncol(Amean)), |
14 | 19 |
nullGenes=FALSE) |
15 | 20 |
{ |
... | ... |
@@ -61,6 +66,11 @@ nullGenes=FALSE) |
61 | 66 |
#' @param PwNull - logical indicating gene adjustment |
62 | 67 |
#' @return A vector of length GSGenes containing the p-values of set membership |
63 | 68 |
#' for each gene containined in the set specified in GSGenes. |
69 |
+#' @examples |
|
70 |
+#' data('SimpSim') |
|
71 |
+#' computeGeneGSProb(SimpSim.result$Amean, SimpSim.result$Asd, GStoGenes=GSets[[1]], |
|
72 |
+#' numPerm=500) |
|
73 |
+#' @export |
|
64 | 74 |
computeGeneGSProb <- function(Amean, Asd, GSGenes, Pw=rep(1,ncol(Amean)), |
65 | 75 |
numPerm=500, PwNull=FALSE) |
66 | 76 |
{ |
... | ... |
@@ -26,4 +26,9 @@ calculates the gene set statistics for each |
26 | 26 |
column of A using a Z-score from the elements of the A matrix, |
27 | 27 |
the input gene set, and permutation tests |
28 | 28 |
} |
29 |
+\examples{ |
|
30 |
+data('SimpSim') |
|
31 |
+calcCoGAPSStat(SimpSim.result$Amean, SimpSim.result$Asd, GStoGenes=GSets, |
|
32 |
+numPerm=500) |
|
33 |
+} |
|
29 | 34 |
|
... | ... |
@@ -31,4 +31,9 @@ calculates the probability that a gene |
31 | 31 |
listed in a gene set behaves like other genes in the set within |
32 | 32 |
the given data set |
33 | 33 |
} |
34 |
+\examples{ |
|
35 |
+data('SimpSim') |
|
36 |
+calcGeneGSStat(SimpSim.result$Amean, SimpSim.result$Asd, GStoGenes=GSets[[1]], |
|
37 |
+numPerm=500) |
|
38 |
+} |
|
34 | 39 |
|
... | ... |
@@ -34,4 +34,9 @@ membership for each candidate gene in a set specified in \code{GSGenes} by |
34 | 34 |
comparing the inferred activity of that gene to the average activity of the |
35 | 35 |
set. |
36 | 36 |
} |
37 |
+\examples{ |
|
38 |
+data('SimpSim') |
|
39 |
+computeGeneGSProb(SimpSim.result$Amean, SimpSim.result$Asd, GStoGenes=GSets[[1]], |
|
40 |
+numPerm=500) |
|
41 |
+} |
|
37 | 42 |
|
... | ... |
@@ -274,6 +274,7 @@ const float *S, const float *AP, const float *other1, const float *other2) |
274 | 274 |
return AlphaParameters(s,su); |
275 | 275 |
} |
276 | 276 |
|
277 |
+// should these always be positive? could explain ordering of atoms in exchange |
|
277 | 278 |
AlphaParameters gaps::algo::alphaParameters(const MatrixChange &ch, |
278 | 279 |
const TwoWayMatrix &D, const TwoWayMatrix &S, const ColMatrix &A, |
279 | 280 |
const RowMatrix &P, const TwoWayMatrix &AP) |
280 | 281 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,60 @@ |
1 |
+#ifndef __GAPS_ATOMIC_DOMAIN_H__ |
|
2 |
+#define __GAPS_ATOMIC_DOMAIN_H__ |
|
3 |
+ |
|
4 |
+// data structure that holds atoms |
|
5 |
+class AtomicDomain |
|
6 |
+{ |
|
7 |
+private: |
|
8 |
+ |
|
9 |
+public: |
|
10 |
+ |
|
11 |
+ AtomicDomain(); |
|
12 |
+ |
|
13 |
+ uint64_t randomAtomPosition(); |
|
14 |
+ uint64_t randomFreePosition(); |
|
15 |
+ |
|
16 |
+ float updateMass(uint64_t pos, float delta); |
|
17 |
+ |
|
18 |
+}; |
|
19 |
+ |
|
20 |
+#endif |
|
21 |
+ |
|
22 |
+ |
|
23 |
+struct Atom |
|
24 |
+{ |
|
25 |
+ uint64_t pos; |
|
26 |
+ float mass; |
|
27 |
+ |
|
28 |
+ Atom* left; |
|
29 |
+ Atom* right; |
|
30 |
+ |
|
31 |
+ Atom(uint64_t p, float m) |
|
32 |
+ : pos(p), mass(m), left(nullptr), right(nullptr) |
|
33 |
+ {} |
|
34 |
+}; |
|
35 |
+ |
|
36 |
+void insertAtom(uint64_t p, float m) |
|
37 |
+{ |
|
38 |
+ std::map<uint64_t, Atom>::const_iterator it, left, right; |
|
39 |
+ it = mAtoms.insert(std::pair<uint64_t, Atom>(p, Atom(p,m))).first; |
|
40 |
+ |
|
41 |
+ std::map<uint64_t, Atom>::const_iterator left(it), right(it); |
|
42 |
+ if (it != mAtoms.begin()) |
|
43 |
+ { |
|
44 |
+ --left; |
|
45 |
+ } |
|
46 |
+ if (++it != mAtoms.end()) |
|
47 |
+ { |
|
48 |
+ ++right; |
|
49 |
+ } |
|
50 |
+ it->left = &left; |
|
51 |
+ it->right = &right; |
|
52 |
+} |
|
53 |
+ |
|
54 |
+ |
|
55 |
+ |
|
56 |
+void removeAtom(const Atom &atom) |
|
57 |
+{ |
|
58 |
+ atom.left.right = atom.right; |
|
59 |
+ atom.right.left = atom.left; |
|
60 |
+} |
0 | 61 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,381 @@ |
1 |
+#include "AtomicQueue.h" |
|
2 |
+ |
|
3 |
+// TODO have private random number generator for consistency with benchmark |
|
4 |
+ |
|
5 |
+// consider having separate class that generates the proposal sequence |
|
6 |
+// and selects the atoms, but does not calculate the mass .... separate |
|
7 |
+// class with matrix access computes the mass - also allows for entire |
|
8 |
+// proposal specific code to be together (no more conditionals on nChanges) |
|
9 |
+ |
|
10 |
+// acceptance of proposals needs it's own unit as well - considering |
|
11 |
+// how much time is spent updating matrices - the updates would be independent |
|
12 |
+// of any currently running calculations by design. Find a way to lock |
|
13 |
+// the individual vectors of the matrix without locking the entire matrix. |
|
14 |
+ |
|
15 |
+// consider changing the data layout to have the row matrices (D,AP,S) near |
|
16 |
+// the P matrix and the col matrices (D,AP,S) near the A matrix |
|
17 |
+ |
|
18 |
+// consider allocating a single continuous chunk for each matric instead |
|
19 |
+// of individual vectors - does this mess up locking? |
|
20 |
+ |
|
21 |
+// look into delaying the A/P matrix update until after the full update step |
|
22 |
+// - only the AP updates are neccesary |
|
23 |
+ |
|
24 |
+// no need to separate alphaParameters and deltaLL into separate workers - |
|
25 |
+// they are serial and touch much of the same memory |
|
26 |
+ |
|
27 |
+// need to ensure proposals dont effect each other |
|
28 |
+// BIRTH - mark off locations of birth |
|
29 |
+// DEATH - can't free up death locations, neglible effect |
|
30 |
+// MOVE - must move within neighbor boundary, if a neighbor could be moved |
|
31 |
+// or deleted then this is held up - also know where birth locations are |
|
32 |
+// EXCHANGE - selects right most atom - uneffected by move, by death of this |
|
33 |
+// atom or birth of an atom in between can effect |
|
34 |
+ |
|
35 |
+// Neighbors |
|
36 |
+// DEATH - merge neighbors |
|
37 |
+// MOVE/RESIZE - nothing changes |
|
38 |
+// EXCHANGE - nothing changes |
|
39 |
+// BIRTH - need to find left or right neighbor |
|
40 |
+// store neighbor information, then everything constant besides insertion |
|
41 |
+// store in map makes insertion logN |
|
42 |
+ |
|
43 |
+AtomicQueue::AtomicQueue(char label, unsigned nrow, unsigned ncol) |
|
44 |
+ : |
|
45 |
+mLabel(label), mNumAtoms(0), |
|
46 |
+mMaxNumAtoms(std::numeric_limits<uint64_t>::max()), |
|
47 |
+mNumRows(nrow), mNumCols(ncol), mNumBins(nrow * ncol), |
|
48 |
+mBinSize(std::numeric_limits<uint64_t>::max() / (nrow * ncol)), |
|
49 |
+mAlpha(alpha), mLambda(lambda) |
|
50 |
+{} |
|
51 |
+ |
|
52 |
+// O(logN) |
|
53 |
+void AtomicQueue::addAtom(Atom atom) |
|
54 |
+{ |
|
55 |
+ mAtomPositions.insert(std::pair<uint64_t, uint64_t>(atom.pos, mNumAtoms)); |
|
56 |
+ mAtoms.push_back(atom); |
|
57 |
+ mNumAtoms++; |
|
58 |
+} |
|
59 |
+ |
|
60 |
+// O(logN) |
|
61 |
+void AtomicSupport::removeAtom(uint64_t pos) |
|
62 |
+{ |
|
63 |
+ uint64_t index = mAtomPositions.at(pos); |
|
64 |
+ mAtomPositions.erase(pos); |
|
65 |
+ |
|
66 |
+ // update key of object about to be moved (last one doesn't need to move) |
|
67 |
+ if (index < mNumAtoms - 1) |
|
68 |
+ { |
|
69 |
+ mAtomPositions.erase(mAtoms.back().pos); |
|
70 |
+ mAtomPositions.insert(std::pair<uint64_t, uint64_t>(mAtoms.back().pos, |
|
71 |
+ index)); |
|
72 |
+ } |
|
73 |
+ |
|
74 |
+ // delete atom from vector in O(1) |
|
75 |
+ mAtoms[index] = mAtoms.back(); |
|
76 |
+ mAtoms.pop_back(); |
|
77 |
+ mNumAtoms--; |
|
78 |
+} |
|
79 |
+ |
|
80 |
+// O(logN) |
|
81 |
+AtomNeighbors AtomicSupport::getNeighbors(uint64_t pos) const |
|
82 |
+{ |
|
83 |
+ // get an iterator to this atom and it's neighbors |
|
84 |
+ std::map<uint64_t, uint64_t>::const_iterator it, left, right; |
|
85 |
+ it = mAtomPositions.find(pos); |
|
86 |
+ left = it; right = it; |
|
87 |
+ if (it != mAtomPositions.begin()) |
|
88 |
+ { |
|
89 |
+ --left; |
|
90 |
+ } |
|
91 |
+ if (++it != mAtomPositions.end()) |
|
92 |
+ { |
|
93 |
+ ++right; |
|
94 |
+ } |
|
95 |
+ return AtomNeighbors(left->first, mAtoms[left->second].mass, |
|
96 |
+ right->first, mAtoms[right->second].mass); |
|
97 |
+} |
|
98 |
+ |
|
99 |
+// O(1) |
|
100 |
+uint64_t AtomicSupport::getRow(uint64_t pos) const |
|
101 |
+{ |
|
102 |
+ uint64_t binNum = std::min(pos / mBinSize, mNumBins - 1); |
|
103 |
+ return mLabel == 'A' ? binNum / mNumCols : binNum % mNumRows; |
|
104 |
+} |
|
105 |
+ |
|
106 |
+// O(1) |
|
107 |
+uint64_t AtomicSupport::getCol(uint64_t pos) const |
|
108 |
+{ |
|
109 |
+ uint64_t binNum = std::min(pos / mBinSize, mNumBins - 1); |
|
110 |
+ return mLabel == 'A' ? binNum % mNumCols : binNum / mNumRows; |
|
111 |
+} |
|
112 |
+ |
|
113 |
+// O(logN) |
|
114 |
+uint64_t AtomicSupport::randomFreePosition() const |
|
115 |
+{ |
|
116 |
+ uint64_t pos = 0; |
|
117 |
+ do |
|
118 |
+ { |
|
119 |
+ pos = gaps::random::uniform64(); |
|
120 |
+ } while (mAtomPositions.count(pos) > 0); |
|
121 |
+ return pos; |
|
122 |
+} |
|
123 |
+ |
|
124 |
+// O(1) |
|
125 |
+uint64_t AtomicSupport::randomAtomPosition() const |
|
126 |
+{ |
|
127 |
+ uint64_t num = gaps::random::uniform64(0, mNumAtoms - 1); |
|
128 |
+ return mAtoms[num].pos; |
|
129 |
+} |
|
130 |
+ |
|
131 |
+// O(logN) |
|
132 |
+AtomicProposal AtomicSupport::proposeDeath() const |
|
133 |
+{ |
|
134 |
+ uint64_t location = randomAtomPosition(); |
|
135 |
+ float mass = mAtoms[mAtomPositions.at(location)].mass; |
|
136 |
+ return AtomicProposal(mLabel, 'D', location, -mass); |
|
137 |
+} |
|
138 |
+ |
|
139 |
+// O(logN) |
|
140 |
+AtomicProposal AtomicSupport::proposeBirth() const |
|
141 |
+{ |
|
142 |
+ uint64_t location = randomFreePosition(); |
|
143 |
+ float mass = gaps::random::exponential(mLambda); |
|
144 |
+ return AtomicProposal(mLabel, 'B', location, mass); |
|
145 |
+} |
|
146 |
+ |
|
147 |
+// move atom between adjacent atoms |
|
148 |
+// O(logN) |
|
149 |
+AtomicProposal AtomicSupport::proposeMove() const |
|
150 |
+{ |
|
151 |
+ uint64_t pos = randomAtomPosition(); |
|
152 |
+ AtomNeighbors nbs = getNeighbors(pos); |
|
153 |
+ float mass = at(pos); |
|
154 |
+ |
|
155 |
+ uint64_t lbound = nbs.left.pos != pos ? nbs.left.pos : 0; |
|
156 |
+ uint64_t rbound = nbs.right.pos != pos ? nbs.right.pos : mMaxNumAtoms - 1; |
|
157 |
+ |
|
158 |
+ uint64_t newLocation = gaps::random::uniform64(lbound, rbound); |
|
159 |
+ return AtomicProposal(mLabel, 'M', pos, -mass, newLocation, mass); |
|
160 |
+} |
|
161 |
+ |
|
162 |
+// exchange with adjacent atom to the right |
|
163 |
+// O(logN) |
|
164 |
+AtomicProposal AtomicSupport::proposeExchange() const |
|
165 |
+{ |
|
166 |
+ uint64_t pos1 = randomAtomPosition(); |
|
167 |
+ AtomNeighbors nbs = getNeighbors(pos1); |
|
168 |
+ float mass1 = at(pos1); |
|
169 |
+ |
|
170 |
+ // find right neighbor, wrap around to beginning if this atom is end |
|
171 |
+ uint64_t pos2 = nbs.right.pos != pos1 ? nbs.right.pos : mAtomPositions.begin()->first; |
|
172 |
+ float mass2 = at(pos2); |
|
173 |
+ |
|
174 |
+ // calculate new mass |
|
175 |
+ float pupper = gaps::random::p_gamma(mass1 + mass2, 2.f, 1.f / mLambda); |
|
176 |
+ float newMass = gaps::random::q_gamma(gaps::random::uniform(0.f, pupper), |
|
177 |
+ 2.f, 1.f / mLambda); |
|
178 |
+ |
|
179 |
+ // calculate mass changes |
|
180 |
+ float delta1 = mass1 > mass2 ? newMass - mass1 : mass2 - newMass; |
|
181 |
+ float delta2 = mass2 > mass1 ? newMass - mass2 : mass1 - newMass; |
|
182 |
+ |
|
183 |
+ // set first position to be postive change |
|
184 |
+ float delta1_cached = delta1; |
|
185 |
+ uint64_t pos1_cached = pos1; |
|
186 |
+ pos1 = delta1_cached > 0 ? pos1 : pos2; |
|
187 |
+ pos2 = delta1_cached > 0 ? pos2 : pos1_cached; |
|
188 |
+ delta1 = delta1_cached > 0 ? delta1 : delta2; |
|
189 |
+ delta2 = delta1_cached > 0 ? delta2 : delta1_cached; |
|
190 |
+ |
|
191 |
+ // preserve total mass |
|
192 |
+ return AtomicProposal(mLabel, 'E', pos1, delta1, pos2, delta2); |
|
193 |
+} |
|
194 |
+ |
|
195 |
+// O(logN) |
|
196 |
+float AtomicSupport::updateAtomMass(uint64_t pos, float delta) |
|
197 |
+{ |
|
198 |
+ if (mAtomPositions.count(pos)) // update atom if it exists |
|
199 |
+ { |
|
200 |
+ mAtoms[mAtomPositions.at(pos)].mass += delta; |
|
201 |
+ } |
|
202 |
+ else // create a new atom if it doesn't exist |
|
203 |
+ { |
|
204 |
+ addAtom(Atom(pos, delta)); |
|
205 |
+ } |
|
206 |
+ |
|
207 |
+ if (mAtoms[mAtomPositions.at(pos)].mass < EPSILON) |
|
208 |
+ { |
|
209 |
+ delta -= at(pos); |
|
210 |
+ mMaxAtoms--; // keep queue up to date |
|
211 |
+ removeAtom(pos); |
|
212 |
+ } |
|
213 |
+ return delta; |
|
214 |
+} |
|
215 |
+ |
|
216 |
+MatrixChange AtomicSupport::acceptProposal(const AtomicProposal &prop, |
|
217 |
+MatrixChange &ch) |
|
218 |
+{ |
|
219 |
+ ch.delta1 = updateAtomMass(prop.pos1, prop.delta1); |
|
220 |
+ ch.delta2 = (prop.nChanges > 1) ? updateAtomMass(prop.pos2, prop.delta2) |
|
221 |
+ : ch.delta2; |
|
222 |
+ return ch; |
|
223 |
+} |
|
224 |
+ |
|
225 |
+float AtomicSupport::deleteProb(unsigned nAtoms) const |
|
226 |
+{ |
|
227 |
+ float pDelete = 0.5; // default uniform prior for mAlpha == 0 |
|
228 |
+ float fnAtoms = (float)nAtoms; |
|
229 |
+ if (mAlpha > 0) // poisson prior |
|
230 |
+ { |
|
231 |
+ float maxTerm = ((float)mMaxNumAtoms - fnAtoms) / (float)mMaxNumAtoms; |
|
232 |
+ return fnAtoms / (fnAtoms + mAlpha * (float)mNumBins * maxTerm); |
|
233 |
+ } |
|
234 |
+ else if (mAlpha < 0) // geometric prior |
|
235 |
+ { |
|
236 |
+ float c = -mAlpha * mNumBins / (-mAlpha * mNumBins + 1.0); |
|
237 |
+ return fnAtoms / ((fnAtoms + 1.f) * c + fnAtoms); |
|
238 |
+ } |
|
239 |
+} |
|
240 |
+ |
|
241 |
+static isInVector(std::vector<unsigned> &vec, unsigned n) |
|
242 |
+{ |
|
243 |
+ return std::find(vec.begin(), vec.end(), n) != vec.end(); |
|
244 |
+} |
|
245 |
+ |
|
246 |
+bool AtomicSupport::addProposal() const |
|
247 |
+{ |
|
248 |
+ if (minAtoms == 0 && maxAtoms > 0 || minAtoms < 2 && maxAtoms >= 2) |
|
249 |
+ { |
|
250 |
+ return false; |
|
251 |
+ } |
|
252 |
+ |
|
253 |
+ AtomicProposal proposal; |
|
254 |
+ if (maxAtoms == 0) |
|
255 |
+ { |
|
256 |
+ proposal = proposeBirth(); |
|
257 |
+ minAtoms++; |
|
258 |
+ maxAtoms++; |
|
259 |
+ } |
|
260 |
+ else |
|
261 |
+ { |
|
262 |
+ float bdProb = maxAtoms < 2 ? 0.6667f : 0.5f; |
|
263 |
+ float u1 = gaps::random::uniform(); // cache these values if a failure |
|
264 |
+ float u2 = gaps::random::uniform(); // happens, needed to prevent bias |
|
265 |
+ float lowerBound = deleteProb(minAtoms); |
|
266 |
+ float upperBound = deleteProb(maxAtoms); |
|
267 |
+ if (u1 < bdProb && u2 > upperBound) |
|
268 |
+ { |
|
269 |
+ proposal = proposeBirth(); |
|
270 |
+ minAtoms++; |
|
271 |
+ maxAtoms++; |
|
272 |
+ } |
|
273 |
+ else if (u1 < bdProb && u2 < lowerBound) |
|
274 |
+ { |
|
275 |
+ proposal = proposeDeath(); |
|
276 |
+ minAtoms--; |
|
277 |
+ } |
|
278 |
+ else if (u1 >= bdProb && (maxAtoms < 2 || u1 < 0.75f)) |
|
279 |
+ { |
|
280 |
+ proposal = proposeMove(); |
|
281 |
+ } |
|
282 |
+ else if (u1 >= bdProb) |
|
283 |
+ { |
|
284 |
+ proposal = proposeExchange(); |
|
285 |
+ minAtoms--; |
|
286 |
+ } |
|
287 |
+ else |
|
288 |
+ { |
|
289 |
+ return false; |
|
290 |
+ } |
|
291 |
+ } |
|
292 |
+ |
|
293 |
+ // used rows should be an hash table so that find is O(1) |
|
294 |
+ // can allocate is one chunk of memory since nRows/nCols is known |
|
295 |
+ if (isInVector(usedRows, getRow(proposal.pos1))) |
|
296 |
+ { |
|
297 |
+ return false; |
|
298 |
+ } |
|
299 |
+ if (proposal.nChanges > 1) |
|
300 |
+ { |
|
301 |
+ if (isInVector(usedRows, getRow(proposal.pos2))) |
|
302 |
+ { |
|
303 |
+ return false; |
|
304 |
+ } |
|
305 |
+ usedRows.push_back(getRow(proposal.pos2)); |
|
306 |
+ } |
|
307 |
+ usedRows.push_back(getRow(proposal.pos1)); |
|
308 |
+ mProposalQueue.push(proposal); |
|
309 |
+ return true; |
|
310 |
+} |
|
311 |
+ |
|
312 |
+void AtomicSupport::populateQueue(unsigned limit) |
|
313 |
+{ |
|
314 |
+ unsigned nIter = 0; |
|
315 |
+ while (nIter++ < limit && addProposal()); |
|
316 |
+} |
|
317 |
+ |
|
318 |
+AtomicProposal AtomicSupport::popQueue() |
|
319 |
+{ |
|
320 |
+ AtomicProposal prop = mProposalQueue.front(); |
|
321 |
+ mProposalQueue.pop(); |
|
322 |
+ return prop; |
|
323 |
+} |
|
324 |
+ |
|
325 |
+MatrixChange AtomicSupport::acceptProposal(const AtomicProposal &prop, |
|
326 |
+MatrixChange ch) |
|
327 |
+{ |
|
328 |
+ ch.delta1 = updateAtomMass(prop.pos1, prop.delta1); |
|
329 |
+ ch.delta2 = (prop.nChanges > 1) ? updateAtomMass(prop.pos2, prop.delta2) |
|
330 |
+ : ch.delta2; |
|
331 |
+ return ch; |
|
332 |
+} |
|
333 |
+ |
|
334 |
+void AtomicSupport::rejectProposal(const AtomicProposal &prop) |
|
335 |
+{ |
|
336 |
+ if (prop.label == 'D' || prop.label == 'E') |
|
337 |
+ { |
|
338 |
+ mMinAtoms++; |
|
339 |
+ } |
|
340 |
+} |
|
341 |
+ |
|
342 |
+unsigned AtomicSupport::size() const |
|
343 |
+{ |
|
344 |
+ return mProposalQueue.size(); |
|
345 |
+} |
|
346 |
+ |
|
347 |
+bool AtomicSupport::empty() const |
|
348 |
+{ |
|
349 |
+ return mProposalQueue.empty(); |
|
350 |
+} |
|
351 |
+ |
|
352 |
+Archive& operator<<(Archive &ar, AtomicSupport &domain) |
|
353 |
+{ |
|
354 |
+ ar << domain.mLabel << domain.mNumAtoms << domain.mMaxNumAtoms |
|
355 |
+ << domain.mNumRows << domain.mNumCols |
|
356 |
+ << domain.mNumBins << domain.mBinSize << domain.mAlpha |
|
357 |
+ << domain.mLambda; |
|
358 |
+ |
|
359 |
+ for (unsigned i = 0; i < domain.mAtoms.size(); ++i) |
|
360 |
+ { |
|
361 |
+ ar << domain.mAtoms[i].pos << domain.mAtoms[i].mass; |
|
362 |
+ } |
|
363 |
+ return ar; |
|
364 |
+} |
|
365 |
+ |
|
366 |
+Archive& operator>>(Archive &ar, AtomicSupport &domain) |
|
367 |
+{ |
|
368 |
+ uint64_t nAtoms = 0; |
|
369 |
+ ar >> domain.mLabel >> nAtoms >> domain.mMaxNumAtoms |
|
370 |
+ >> domain.mNumRows >> domain.mNumCols >> domain.mNumBins |
|
371 |
+ >> domain.mBinSize >> domain.mAlpha >> domain.mLambda; |
|
372 |
+ |
|
373 |
+ uint64_t pos = 0; |
|
374 |
+ float mass = 0.0; |
|
375 |
+ for (unsigned i = 0; i < nAtoms; ++i) |
|
376 |
+ { |
|
377 |
+ ar >> pos >> mass; |
|
378 |
+ domain.addAtom(Atom(pos, mass)); // this will increment mNumAtoms |
|
379 |
+ } |
|
380 |
+ return ar; |
|
381 |
+} |
|
0 | 382 |
\ No newline at end of file |
1 | 383 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,113 @@ |
1 |
+#ifndef __GAPS_ATOMIC_QUEUE_H |
|
2 |
+#define __GAPS_ATOMIC_QUEUE_H |
|
3 |
+ |
|
4 |
+#include <map> |
|
5 |
+#include <vector> |
|
6 |
+ |
|
7 |
+struct Atom |
|
8 |
+{ |
|
9 |
+ uint64_t pos; |
|
10 |
+ float mass; |
|
11 |
+ |
|
12 |
+ Atom(uint64_t p, float m) : pos(p), mass(m) {} |
|
13 |
+}; |
|
14 |
+ |
|
15 |
+struct AtomNeighbors |
|
16 |
+{ |
|
17 |
+ Atom left; |
|
18 |
+ Atom right; |
|
19 |
+ |
|
20 |
+ AtomNeighbors(uint64_t lp, float lm, uint64_t rp, float rm) |
|
21 |
+ : left(Atom(lp, lm)), right(Atom(rp, rm)) |
|
22 |
+ {} |
|
23 |
+}; |
|
24 |
+ |
|
25 |
+struct AtomicProposal |
|
26 |
+{ |
|
27 |
+ char label; |
|
28 |
+ char type; |
|
29 |
+ unsigned nChanges; |
|
30 |
+ |
|
31 |
+ uint64_t pos1; |
|
32 |
+ float delta1; |
|
33 |
+ |
|
34 |
+ uint64_t pos2; |
|
35 |
+ float delta2; |
|
36 |
+ |
|
37 |
+ AtomicProposal(char l, char t, uint64_t p1, float m1) |
|
38 |
+ : label(l), type(t), nChanges(1), pos1(p1), delta1(m1), pos2(0), delta2(0.f) |
|
39 |
+ {} |
|
40 |
+ |
|
41 |
+ AtomicProposal(char l, char t, uint64_t p1, float m1, uint64_t p2, float m2) |
|
42 |
+ : label(l), type(t), nChanges(2), pos1(p1), delta1(m1), pos2(p2), delta2(m2) |
|
43 |
+ {} |
|
44 |
+}; |
|
45 |
+ |
|
46 |
+class AtomicQueue |
|
47 |
+{ |
|
48 |
+private: |
|
49 |
+ |
|
50 |
+ // parameters |
|
51 |
+ char mLabel; |
|
52 |
+ float mAlpha; |
|
53 |
+ float mLambda; |
|
54 |
+ |
|
55 |
+ // storage of the atomic domain |
|
56 |
+ std::vector<Atom> mAtoms; |
|
57 |
+ std::map<uint64_t, uint64_t> mAtomPositions; |
|
58 |
+ uint64_t mNumAtoms; |
|
59 |
+ uint64_t mAtomCapacity; |
|
60 |
+ |
|
61 |
+ // matrix information |
|
62 |
+ unsigned mNumRows; |
|
63 |
+ unsigned mNumCols; |
|
64 |
+ unsigned mNumBins; |
|
65 |
+ unsigned mBinSize; |
|
66 |
+ |
|
67 |
+ // storage of proposal queue |
|
68 |
+ std::queue<AtomicProposal> mProposalQueue; |
|
69 |
+ std::vector<unsigned> mUsedIndices; // rows of A, cols of P |
|
70 |
+ unsigned mMinAtoms; |
|
71 |
+ unsigned mMaxAtoms; |
|
72 |
+ |
|
73 |
+ // proposal functions |
|
74 |
+ AtomicProposal proposeBirth() const; |
|
75 |
+ AtomicProposal proposeDeath() const; |
|
76 |
+ AtomicProposal proposeMove() const; |
|
77 |
+ AtomicProposal proposeExchange() const; |
|
78 |
+ |
|
79 |
+ // update the mass of an atom, return the total amount changed |
|
80 |
+ std::pair<float, bool> updateAtomMass(uint64_t pos, float delta); |
|
81 |
+ |
|
82 |
+ // functions for dealing with atomic data structure |
|
83 |
+ void addAtom(Atom atom); // O(logN) |
|
84 |
+ void removeAtom(uint64_t pos); // O(logN) |
|
85 |
+ AtomNeighbors getNeighbors(uint64_t pos) const; // O(logN) |
|
86 |
+ |
|
87 |
+ // get atomic positions at random |
|
88 |
+ uint64_t randomFreePosition() const; // O(1) |
|
89 |
+ uint64_t randomAtomPosition() const; // O(1) |
|
90 |
+ |
|
91 |
+public: |
|
92 |
+ |
|
93 |
+ // constructor |
|
94 |
+ AtomicQueue(char label, unsigned nrow, unsigned ncol); |
|
95 |
+ |
|
96 |
+ // parameter setters |
|
97 |
+ void setAlpha(float alpha) {mAlpha = alpha;} |
|
98 |
+ void setLambda(float lambda) {mLambda = lambda;} |
|
99 |
+ |
|
100 |
+ // queue operations |
|
101 |
+ void populate(unsigned limit); |
|
102 |
+ AtomicProposal pop_front(); |
|
103 |
+ MatrixChange accept(const AtomicProposal &prop, MatrixChange &change); |
|
104 |
+ void reject(const AtomicProposal &prop); |
|
105 |
+ unsigned size() const; |
|
106 |
+ bool empty() const; |
|
107 |
+ |
|
108 |
+ // serialization |
|
109 |
+ friend Archive& operator<<(Archive &ar, AtomicQueue &sampler); |
|
110 |
+ friend Archive& operator>>(Archive &ar, AtomicQueue &sampler); |
|
111 |
+}; |
|
112 |
+ |
|
113 |
+#endif |
|
0 | 114 |
\ No newline at end of file |
... | ... |
@@ -200,60 +200,89 @@ float AtomicSupport::deleteProb(unsigned nAtoms) const |
200 | 200 |
} |
201 | 201 |
} |
202 | 202 |
|
203 |
-bool AtomicSupport::addProposal(unsigned &minAtoms, unsigned &maxAtoms) const |
|
203 |
+static isInVector(std::vector<unsigned> &vec, unsigned n) |
|
204 |
+{ |
|
205 |
+ return std::find(vec.begin(), vec.end(), n) != vec.end(); |
|
206 |
+} |
|
207 |
+ |
|
208 |
+bool AtomicSupport::addProposal(unsigned &minAtoms, unsigned &maxAtoms, |
|
209 |
+std::vector<unsigned> &usedRows, std::vector<unsigned> &usedCols) const |
|
204 | 210 |
{ |
205 | 211 |
if (minAtoms == 0 && maxAtoms > 0 || minAtoms < 2 && maxAtoms >= 2) |
206 | 212 |
{ |
207 | 213 |
return false; |
208 | 214 |
} |
209 | 215 |
|
216 |
+ AtomicProposal proposal; |
|
210 | 217 |
if (maxAtoms == 0) |
211 | 218 |
{ |
212 |
- mProposalQueue.push_back(proposeBirth()); |
|
219 |
+ proposal = proposeBirth(); |
|
213 | 220 |
minAtoms++; |
214 | 221 |
maxAtoms++; |
215 |
- return true; |
|
216 | 222 |
} |
217 |
- |
|
218 |
- float bdProb = maxAtoms < 2 ? 0.6667f : 0.5f; |
|
219 |
- float u1 = gaps::random::uniform(); |
|
220 |
- float u2 = gaps::random::uniform(); |
|
221 |
- float lowerBound = deleteProb(minAtoms); |
|
222 |
- float upperBound = deleteProb(maxAtoms); |
|
223 |
- if (u1 < bdProb && u2 > upperBound) |
|
224 |
- { |
|
225 |
- mProposalQueue.push_back(proposeBirth()); |
|
226 |
- minAtoms++; |
|
227 |
- maxAtoms++; |
|
228 |
- } |
|
229 |
- else if (u1 < bdProb && u2 < lowerBound) |
|
230 |
- { |
|
231 |
- mProposalQueue.push_back(proposeDeath()); |
|
232 |
- minAtoms--; |
|
233 |
- } |
|
234 |
- else if (u1 >= bdProb && (maxAtoms < 2 || u1 < 0.75f)) |
|
223 |
+ else |
|
235 | 224 |
{ |
236 |
- mProposalQueue.push_back(proposeMove()); |
|
225 |
+ float bdProb = maxAtoms < 2 ? 0.6667f : 0.5f; |
|
226 |
+ float u1 = gaps::random::uniform(); |
|
227 |
+ float u2 = gaps::random::uniform(); |
|
228 |
+ float lowerBound = deleteProb(minAtoms); |
|
229 |
+ float upperBound = deleteProb(maxAtoms); |
|
230 |
+ if (u1 < bdProb && u2 > upperBound) |
|
231 |
+ { |
|
232 |
+ proposal = proposeBirth(); |
|
233 |
+ minAtoms++; |
|
234 |
+ maxAtoms++; |
|
235 |
+ } |
|
236 |
+ else if (u1 < bdProb && u2 < lowerBound) |
|
237 |
+ { |
|
238 |
+ proposal = proposeDeath(); |
|
239 |
+ minAtoms--; |
|
240 |
+ } |
|
241 |
+ else if (u1 >= bdProb && (maxAtoms < 2 || u1 < 0.75f)) |
|
242 |
+ { |
|
243 |
+ proposal = proposeMove(); |
|
244 |
+ } |
|
245 |
+ else if (u1 >= bdProb) |
|
246 |
+ { |
|
247 |
+ proposal = proposeExchange(); |
|
248 |
+ minAtoms--; |
|
249 |
+ } |
|
250 |
+ else |
|
251 |
+ { |
|
252 |
+ return false; |
|
253 |
+ } |
|
237 | 254 |
} |
238 |
- else if (u1 >= bdProb) |
|
255 |
+ |
|
256 |
+ // used rows should be an hash table so that find is O(1) |
|
257 |
+ // can allocate is one chunk of memory since nRows/nCols is known |
|
258 |
+ if (isInVector(usedRows, getRow(proposal.pos1))) |
|
239 | 259 |
{ |
240 |
- mProposalQueue.push_back(proposeExchange()); |
|
241 |
- minAtoms--; |
|
260 |
+ return false; |
|
242 | 261 |
} |
243 |
- else |
|
262 |
+ if (proposal.nChanges > 1) |
|
244 | 263 |
{ |
245 |
- return false; |
|
264 |
+ if (isInVector(usedRows, getRow(proposal.pos2))) |
|
265 |
+ { |
|
266 |
+ return false; |
|
267 |
+ } |
|
268 |
+ usedRows.push_back(getRow(proposal.pos2)); |
|
246 | 269 |
} |
270 |
+ usedRows.push_back(getRow(proposal.pos1)); |
|
247 | 271 |
return true; |
248 | 272 |
} |
249 | 273 |
|
274 |
+// need to track min/max number of atoms |
|
275 |
+// birth is already known to be accepted |
|
276 |
+// when death/exchange remove an atom decrement maxAtoms |
|
277 |
+// if death/exchange do not remove an atom increment minAtoms |
|
250 | 278 |
void AtomicSupport::populateQueue(unsigned limit) |
251 | 279 |
{ |
252 | 280 |
unsigned minAtoms = mNumAtoms; |
253 | 281 |
unsigned maxAtoms = mNumAtoms; |
282 |
+ std::vector<unsigned> usedRows; |
|
254 | 283 |
unsigned nIter = 0; |
255 | 284 |
|
256 |
- while (nIter++ < limit && addProposal(minAtoms, maxAtoms)); |
|
285 |
+ while (nIter++ < limit && addProposal(minAtoms, maxAtoms, usedRows)); |
|
257 | 286 |
} |
258 | 287 |
|
259 | 288 |
bool AtomicSupport::isQueueEmpty() const |
... | ... |
@@ -261,6 +290,11 @@ bool AtomicSupport::isQueueEmpty() const |
261 | 290 |
return mProposalQueue.empty(); |
262 | 291 |
} |
263 | 292 |
|
293 |
+unsigned AtomicSupport::queueSize() const |
|
294 |
+{ |
|
295 |
+ return mProposalQueue.size(); |
|
296 |
+} |
|
297 |
+ |
|
264 | 298 |
AtomicProposal AtomicSupport::popQueue() |
265 | 299 |
{ |
266 | 300 |
AtomicProposal prop = mProposalQueue.back(); |
... | ... |
@@ -52,16 +52,8 @@ static void createCheckpoint(GapsInternalState &state) |
52 | 52 |
static void updateSampler(GapsInternalState &state) |
53 | 53 |
{ |
54 | 54 |
state.nUpdatesA += state.nIterA; |
55 |
- for (unsigned j = 0; j < state.nIterA; ++j) |
|
56 |
- { |
|
57 |
- state.sampler.update('A'); |
|
58 |
- } |
|
59 |
- |
|
60 | 55 |
state.nUpdatesP += state.nIterP; |
61 |
- for (unsigned j = 0; j < state.nIterP; ++j) |
|
62 |
- { |
|
63 |
- state.sampler.update('P'); |
|
64 |
- } |
|
56 |
+ state.runner.update(state.nIterA, state.nIterP); |
|
65 | 57 |
} |
66 | 58 |
|
67 | 59 |
static void makeCheckpointIfNeeded(GapsInternalState &state) |
68 | 60 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,81 @@ |
1 |
+#ifndef __GAPS_GAPS_RUNNER_H__ |
|
2 |
+#define __GAPS_GAPS_RUNNER_H__ |
|
3 |
+ |
|
4 |
+// holds the data and dispatches the top level jobs |
|
5 |
+class GapsRunner |
|
6 |
+{ |
|
7 |
+private: |
|
8 |
+ |
|
9 |
+ // Amplitude and Pattern matrices |
|
10 |
+ ColMatrix mAMatrix; |
|
11 |
+ RowMatrix mPMatrix; |
|
12 |
+ |
|
13 |
+ // used when updating A matrix |
|
14 |
+ RowMatrix mDMatrix; |
|
15 |
+ RowMatrix mSMatrix; |
|
16 |
+ RowMatrix mAPMatrix_A; |
|
17 |
+ |
|
18 |
+ // used when upating P matrix |
|
19 |
+ ColMatrix mDMatrix; |
|
20 |
+ ColMatrix mSMatrix; |
|
21 |
+ ColMatrix mAPMatrix_P; |
|
22 |
+ |
|
23 |
+ // gibbs sampler |
|
24 |
+ AmplitudeGibbsSampler mAGibbsSampler; |
|
25 |
+ PatternGibbsSampler mPGibbsSampler; |
|
26 |
+ |
|
27 |
+ // proposal queue |
|
28 |
+ ProposalQueue mAQueue; |
|
29 |
+ ProposalQueue mPQueue; |
|
30 |
+ |
|
31 |
+ // atomic domain |
|
32 |
+ AtomicDomain mADomain; |
|
33 |
+ AtomicDomina mPDomain; |
|
34 |
+ |
|
35 |
+ // number of cores available for jobs |
|
36 |
+ unsigned mNumCores; |
|
37 |
+ |
|
38 |
+public: |
|
39 |
+ |
|
40 |
+ GapsRunner() {} |
|
41 |
+ |
|
42 |
+ void run(unsigned nASteps, unsigned nPSteps) |
|
43 |
+ { |
|
44 |
+ update(mADomain, mAQueue, mAGibbsSampler, nASteps); |
|
45 |
+ mAPMatrix_P = mAPMatrix_A; |
|
46 |
+ |
|
47 |
+ update(mPDomain, mPQueue, mPGibbsSampler, nPSteps); |
|
48 |
+ mAPMatrix_A = mAPMatrix_P; |
|
49 |
+ } |
|
50 |
+ |
|
51 |
+ // Performance Metrics |
|
52 |
+ // 1) % of cores used in each iteration |
|
53 |
+ // 2) given fixed nCores, how does speed get better with matrix size, |
|
54 |
+ // i.e. when does the overhead of parallelization start paying off |
|
55 |
+ // 3) % of program spent in parallel portion, i.e. not in populate queue |
|
56 |
+ void update(AtomicDomain domain, ProposalQueue queue, GibbsSampler sampler, |
|
57 |
+ unsigned nUpdates) |
|
58 |
+ { |
|
59 |
+ unsigned n = 0; |
|
60 |
+ while (n < nSteps) |
|
61 |
+ { |
|
62 |
+ // want this to be as quick as possible - otherwise there would be |
|
63 |
+ // a large speed up to making this run concurrently along with the |
|
64 |
+ // processProposal jobs, but that is much, much more complicated |
|
65 |
+ // to implement |
|
66 |
+ assert(nSteps - (queue.size() + n) >= 0); |
|
67 |
+ queue.populate(domain, nSteps - (queue.size() + n)) |
|
68 |
+ |
|
69 |
+ unsigned nJobs = std::min(queue.size(), mNumCores); |
|
70 |
+ for (unsigned i = 0; i < nJobs; ++i) // can be run in parallel |
|
71 |
+ { |
|
72 |
+ sampler.processProposal(domain, queue[i]); |
|
73 |
+ } |
|
74 |
+ queue.clear(nJobs); |
|
75 |
+ n += nJobs; |
|
76 |
+ assert(n <= nSteps); |
|
77 |
+ } |
|
78 |
+ } |
|
79 |
+}; |
|
80 |
+ |
|
81 |
+#endif |
|
0 | 82 |
\ No newline at end of file |
... | ... |
@@ -82,7 +82,6 @@ float GibbsSampler::getGibbsMass(const MatrixChange &change) |
82 | 82 |
// note: is bounded below by zero so have to use inverse sampling! |
83 | 83 |
// based upon algorithm in DistScalarRmath.cc (scalarRandomSample) |
84 | 84 |
float plower = gaps::random::p_norm(0.f, mean, sd); |
85 |
- float u = gaps::random::uniform(plower, 1.f); |
|
86 | 85 |
|
87 | 86 |
// if the likelihood is flat and nonzero, sample strictly from the prior |
88 | 87 |
float newMass = 0.f; |
... | ... |
@@ -90,11 +89,10 @@ float GibbsSampler::getGibbsMass(const MatrixChange &change) |
90 | 89 |
{ |
91 | 90 |
newMass = death ? std::abs(change.delta1) : 0.f; |
92 | 91 |
} |
93 |
- else if (plower >= 0.99f) |
|
92 |
+ else if (plower >= 0.99f) // what is this? |
|
94 | 93 |
{ |
95 | 94 |
float tmp1 = gaps::random::d_norm(0.f, mean, sd); |
96 | 95 |
float tmp2 = gaps::random::d_norm(10.f * lambda, mean, sd); |
97 |
- |
|
98 | 96 |
if (tmp1 > EPSILON && std::abs(tmp1 - tmp2) < EPSILON) |
99 | 97 |
{ |
100 | 98 |
return death ? 0.0 : change.delta1; |
... | ... |
@@ -102,7 +100,7 @@ float GibbsSampler::getGibbsMass(const MatrixChange &change) |
102 | 100 |
} |
103 | 101 |
else |
104 | 102 |
{ |
105 |
- newMass = gaps::random::q_norm(u, mean, sd); |
|
103 |
+ newMass = gaps::random::inverseNormSample(plower, 1.f, mean, sd); |
|
106 | 104 |
} |
107 | 105 |
|
108 | 106 |
newMass = (change.label == 'A' ? std::min(newMass, mMaxGibbsMassA) |
... | ... |
@@ -117,31 +115,22 @@ float GibbsSampler::computeDeltaLL(const MatrixChange &change) |
117 | 115 |
mPMatrix, mAPMatrix); |
118 | 116 |
} |
119 | 117 |
|
120 |
-void GibbsSampler::update(char matrixLabel) |
|
118 |
+void GibbsSampler::update(char matrixLabel, unsigned nUpdates) |
|
121 | 119 |
{ |
122 | 120 |
AtomicSupport &domain(matrixLabel == 'A' ? mADomain : mPDomain); |
123 |
- AtomicProposal proposal = domain.makeProposal(); |
|
124 |
- |
|
125 |
- if (mFixedMat == 'A' || mFixedMat == 'P') |
|
121 |
+ for (unsigned i = 0; i < nUpdates; ++i) |
|
126 | 122 |
{ |
127 |
- //MatrixChange change = domain.getMatrixChange(proposal); |
|
128 |
- unsigned index1 = mFixedMat == 'A' ? domain.getCol(proposal.pos1) : domain.getRow(proposal.pos1); |
|
129 |
- unsigned index2 = mFixedMat == 'A' ? domain.getCol(proposal.pos2) : domain.getRow(proposal.pos2); |
|
130 |
- index2 = (proposal.nChanges == 1) ? mNumFixedPatterns : index2; |
|
131 |
- |
|
132 |
- if (index1 < mNumFixedPatterns || index2 < mNumFixedPatterns) |
|
123 |
+ assert(nUpdates - i - domain.size() >= 0); |
|
124 |
+ domain.populateQueue(nUpdates - i - domain.size()); |
|
125 |
+ AtomicProposal proposal = domain.popQueue(); |
|
126 |
+ switch (proposal.type) |
|
133 | 127 |
{ |
134 |
- return; |
|
128 |
+ case 'D': death(domain, proposal); break; |
|
129 |
+ case 'B': birth(domain, proposal); break; |
|
130 |
+ case 'M': move(domain, proposal); break; |
|
131 |
+ case 'E': exchange(domain, proposal); break; |
|
135 | 132 |
} |
136 | 133 |
} |
137 |
- |
|
138 |
- switch (proposal.type) // Update based on the proposal type |
|
139 |
- { |
|
140 |
- case 'D': death(domain, proposal); break; |
|
141 |
- case 'B': birth(domain, proposal); break; |
|
142 |
- case 'M': move(domain, proposal); break; |
|
143 |
- case 'E': exchange(domain, proposal); break; |
|
144 |
- } |
|
145 | 134 |
} |
146 | 135 |
|
147 | 136 |
uint64_t GibbsSampler::totalNumAtoms(char matrixLabel) const |
... | ... |
@@ -229,6 +218,7 @@ bool GibbsSampler::canUseGibbs(const MatrixChange &ch) |
229 | 218 |
} |
230 | 219 |
|
231 | 220 |
// accept automatically, try to rebirth |
221 |
+// TODO consolidate to single proposal |
|
232 | 222 |
void GibbsSampler::death(AtomicSupport &domain, AtomicProposal &prop) |
233 | 223 |
{ |
234 | 224 |
// automaticallly accept death |
... | ... |
@@ -236,7 +226,7 @@ void GibbsSampler::death(AtomicSupport &domain, AtomicProposal &prop) |
236 | 226 |
domain.getCol(prop.pos1), prop.delta1); |
237 | 227 |
evaluateChange(domain, prop, change, 0.f, true); |
238 | 228 |
|
239 |
- // rebirth |
|
229 |
+ // rebirth, label as birth |
|
240 | 230 |
float newMass = canUseGibbs(change) ? getGibbsMass(change) : 0.f; |
241 | 231 |
prop.delta1 = newMass < EPSILON ? -prop.delta1 : newMass; |
242 | 232 |
change.delta1 = prop.delta1; |
... | ... |
@@ -294,13 +284,12 @@ void GibbsSampler::exchange(AtomicSupport &domain, AtomicProposal &prop) |
294 | 284 |
{ |
295 | 285 |
float mean = alphaParam.su / alphaParam.s; |
296 | 286 |
float sd = 1.f / std::sqrt(alphaParam.s); |
297 |
- |
|
298 | 287 |
float plower = gaps::random::p_norm(-mass1, mean, sd); |
299 | 288 |
float pupper = gaps::random::p_norm(mass2, mean, sd); |
300 |
- float u = gaps::random::uniform(plower, pupper); |
|
301 | 289 |
|
302 | 290 |
if (!(plower > 0.95f || pupper < 0.05f)) |
303 | 291 |
{ |
292 |
+ float u = gaps::random::uniform(plower, pupper); |
|
304 | 293 |
prop.delta1 = gaps::random::q_norm(u, mean, sd); |
305 | 294 |
prop.delta1 = std::max(prop.delta1, -mass1); |
306 | 295 |
prop.delta1 = std::min(prop.delta1, mass2); |
307 | 296 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,29 @@ |
1 |
+#ifndef __GAPS_PROPOSAL_QUEUE_H__ |
|
2 |
+#define __GAPS_PROPOSAL_QUEUE_H__ |
|
3 |
+ |
|
4 |
+struct AtomicProposal |
|
5 |
+{ |
|
6 |
+ char type; |
|
7 |
+ uint64_t pos1; |
|
8 |
+ uint64_t pos2; |
|
9 |
+ |
|
10 |
+ AtomicProposal(char t, uint64_t p1, uint64_t p2) |
|
11 |
+ : type(t), pos1(p1), pos2(p2) |
|
12 |
+ {} |
|
13 |
+}; |
|
14 |
+ |
|
15 |
+// generate list of independent proposals |
|
16 |
+class ProposalQueue |
|
17 |
+{ |
|
18 |
+private: |
|
19 |
+ |
|
20 |
+public: |
|
21 |
+ |
|
22 |
+ ProposalQueue(); |
|
23 |
+ |
|
24 |
+ void populate(unsigned limit); // add up to limit proposals to the queue |
|
25 |
+ void clear(unsigned n); // delete first n proposals |
|
26 |
+ const AtomicProposal& operator[](unsigned n); // get nth proposal |
|
27 |
+}; |
|
28 |
+ |
|
29 |
+#endif |
|
0 | 30 |
\ No newline at end of file |
16 | 17 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,156 @@ |
1 |
+class GapsRunner |
|
2 |
+{ |
|
3 |
+private: |
|
4 |
+ |
|
5 |
+ // atomic domain |
|
6 |
+ // proposal queue |
|
7 |
+ // matrices |
|
8 |
+ // statistics |
|
9 |
+ |
|
10 |
+public: |
|
11 |
+ |
|
12 |
+ // restrict inner loop to number of cores, then re-populate before calling again |
|
13 |
+ // simple solution that might be hard to beat |
|
14 |
+ void update(unsigned nSteps) |
|
15 |
+ { |
|
16 |
+ queue.populate(); |
|
17 |
+ unsigned size = std::min(queue.size(), nCores); |
|
18 |
+ for (unsigned i = 0; i < size; ++i) |
|
19 |
+ { |
|
20 |
+ processProposal(queue[i]); // dispatch to correct function |
|
21 |
+ } |
|
22 |
+ } |
|
23 |
+ |
|
24 |
+}; |
|
25 |
+ |
|
26 |
+void processProposal(AtomicProposal proposal) |
|
27 |
+{ |
|
28 |
+ switch (proposal.type) |
|
29 |
+ { |
|
30 |
+ case 'D': death(proposal.pos1); break; |
|
31 |
+ case 'B': birth(proposal.pos1); break; |
|
32 |
+ case 'M': move(proposal.pos1, proposal.pos2); break; |
|
33 |
+ case 'E': exchange(proposal.pos1, proposal.pos2); break; |
|
34 |
+ } |
|
35 |
+} |
|
36 |
+ |
|
37 |
+// A matrix |
|
38 |
+ |
|
39 |
+void birth(uint64_t pos, float mass) |
|
40 |
+{ |
|
41 |
+ unsigned row = getRow(pos); |
|
42 |
+ unsigned col = getCol(pos); |
|
43 |
+ bool useGibbs = !gaps::algo::isRowZero(mPMatrix, row); |
|
44 |
+ float mass = useGibbs ? gibbsMass(row, col) : gaps::random::exponential(mLambda); |
|
45 |
+ domain.addAtom(pos, mass); |
|
46 |
+ matrix(row,col) += mass; |
|
47 |
+ updateAPMatrix(row, col, mass); |
|
48 |
+} |
|
49 |
+ |
|
50 |
+void death(uint64_t pos, float mass) |
|
51 |
+{ |
|
52 |
+ unsigned row = getRow(pos); |
|
53 |
+ unsigned col = getCol(pos); |
|
54 |
+ matrix(row,col) += -mass; |
|
55 |
+ updateAPMatrix(row, col, -mass); |
|
56 |
+ |
|
57 |
+ bool useGibbs = !gaps::algo::isRowZero(mPMatrix, row); |
|
58 |
+ |
|
59 |
+ float newMass = useGibbs ? gibbsMass(row, col) : mass; |
|
60 |
+ float deltaLL = computeDeltaLL(row, col, newMass); |
|
61 |
+ if (deltaLL * temp >= std::log(gaps::random::uniform())) |
|
62 |
+ { |
|
63 |
+ matrix(row,col) += newMass; |
|
64 |
+ updateAPMatrix(row, col, newMass); |
|
65 |
+ domain.updateAtomMass(pos, newMass - mass); |
|
66 |
+ queue.minAtoms++; |
|
67 |
+ } |
|
68 |
+ else |
|
69 |
+ { |
|
70 |
+ domain.deleteAtom(pos); |
|
71 |
+ queue.maxAtoms--; |
|
72 |
+ } |
|
73 |
+} |
|
74 |
+ |
|
75 |
+// automatically accept moves/exchanges in the same bin |
|
76 |
+ |
|
77 |
+void move(uint64_t src, uint64_t dest) |
|
78 |
+{ |
|
79 |
+ unsigned srcRow = getRow(src); |
|
80 |
+ unsigned srcCol = getCol(src); |
|
81 |
+ float mass = domain.at(src); |
|
82 |
+ unsigned destRow = getRow(dest); |
|
83 |
+ unsigned destCol = getCol(dest); |
|
84 |
+ float deltaLL = computeDeltaLL(srcRow, srcCol, -mass, destRow, destCol, mass); |
|
85 |
+ if (deltaLL * temp >= std::log(gaps::random::uniform())) |
|
86 |
+ { |
|
87 |
+ matrix(srcRow, srcCol) += -mass; |
|
88 |
+ matrix(destRow, destCol) += mass; |
|
89 |
+ updateAPMatrix(srcRow, srcCol, -mass); |
|
90 |
+ updateAPMatrix(destRow, destCol, mass); |
|
91 |
+ domain.deleteAtom(src); |
|
92 |
+ domain.addAtom(dest, mass); |
|
93 |
+ } |
|
94 |
+} |
|
95 |
+ |
|
96 |
+void exchange(uint64_t p1, uint64_t p2) |
|
97 |
+{ |
|
98 |
+ if (!gaps::algo::isRowZero(mPMatrix, getRow(p1)) && !gaps::algo::isRowZero(mPmatrix, getRow(p2))) |
|
99 |
+ { |
|
100 |
+ AlphaParameters alphaParam = gaps::algo::alphaParameters(); |
|
101 |
+ alphaParam.s *= mAnnealingTemp; |
|
102 |
+ alphaParam.su *= mAnnealingTemp; |
|
103 |
+ |
|
104 |
+ if (alphaParam.s > EPSILON) |
|
105 |
+ { |
|
106 |
+ float mean = alphaParam.su / alphaParam.s; |
|
107 |
+ float sd = 1.f / std::sqrt(alphaParam.s); |
|
108 |
+ float plower = gaps::random::p_norm(-mass1, mean, sd); |
|
109 |
+ float pupper = gaps::random::p_norm(mass2, mean, sd); |
|
110 |
+ |
|
111 |
+ if (!(plower > 0.95f || pupper < 0.05f)) |
|
112 |
+ { |
|
113 |
+ float u = gaps::random::uniform(plower, pupper); |
|
114 |
+ prop.delta1 = gaps::random::q_norm(u, mean, sd); |
|
115 |
+ |
|
116 |
+ float delta1 = gaps::random::q_norm(u, mean, sd); |
|
117 |
+ delta1 = std::max(-mass1, std::min(delta1, mass2)); |
|
118 |
+ float delta2 = -delta1; |
|
119 |
+ } |
|
120 |
+ } |
|
121 |
+ } |
|
122 |
+ |
|
123 |
+ float mass1 = domain.at(p1); |
|
124 |
+ float mass2 = domain.at(p2); |
|
125 |
+ float newMass1 = mass1 + delta1; |
|
126 |
+ float newMass2 = mass2 + delta2; |
|
127 |
+ |
|
128 |
+ float pnewMass = mass1 > mass2 ? newMass1 : newMass2; |
|
129 |
+ float poldMass = newMass1 > newMass2 ? mass1 : mass2; |
|
130 |
+ |
|
131 |
+ float pnew = gaps::random::d_gamma(pnewMass, 2.0, 1.f / domain.lambda()); |
|
132 |
+ float pold = gaps::random::d_gamma(poldMass, 2.0, 1.f / domain.lambda()); |
|
133 |
+ |
|
134 |
+ if (pold == 0.f && pnew != 0.f) |
|
135 |
+ { |
|
136 |
+ evaluateChange(domain, prop, change, 0.f, true); |
|
137 |
+ } |
|
138 |
+ else |
|
139 |
+ { |
|
140 |
+ float priorLL = (pold == 0.f) ? 0.f : log(pnew / pold); |
|
141 |
+ float rejectProb = std::log(gaps::random::uniform()) - priorLL; |
|
142 |
+ evaluateChange(domain, prop, change, rejectProb); |
|
143 |
+ } |
|
144 |
+ delta1 = domain.updateAtomMass(p1, delta1); |
|
145 |
+ delta2 = domain.updateAtomMass(p2, delta2); |
|
146 |
+ |
|
147 |
+ matrix(getRow(p1), getCol(p1)) += delta1; |
|
148 |
+ matrix(getRow(p2), getCol(p2)) += delta2; |
|
149 |
+ updateAPMatrix(getRow(p1), getCol(p1), delta1); |
|
150 |
+ updateAPMatrix(getRow(p2), getCol(p2), delta2); |
|
151 |
+ |
|
152 |
+ queue.maxAtoms = (domain.at(p1) == 0) ? queue.maxAtoms - 1 : queue.maxAtoms; |
|
153 |
+ queue.minAtoms = (domain.at(p1) == 0) ? queue.minAtoms : queue.minAtoms + 1; |
|
154 |
+ queue.maxAtoms = (domain.at(p2) == 0) ? queue.maxAtoms - 1 : queue.maxAtoms; |
|
155 |
+ queue.minAtoms = (domain.at(p2) == 0) ? queue.minAtoms : queue.minAtoms + 1; |
|
156 |
+} |
|
0 | 157 |
\ No newline at end of file |
1 | 158 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,223 @@ |
1 |
+// CRTP |
|
2 |
+template <class T, class M, class N> |
|
3 |
+class GibbsSampler |
|
4 |
+{ |
|
5 |
+private: |
|
6 |
+ |
|
7 |
+ N* mDMatrix; |
|
8 |
+ N* mSMatrix; |
|
9 |
+ M* mMatrix; |
|
10 |
+ N* mAPMatrix; |
|
11 |
+ |
|
12 |
+ AtomicQueue mQueue; |
|
13 |
+ AtomicDomain mDomain; |
|
14 |
+ float mLambda; |
|
15 |
+ |
|
16 |
+ unsigned mNumRows; |
|
17 |
+ unsigned mNumCols; |
|
18 |
+ unsigned mBinSize; |
|
19 |
+ |
|
20 |
+public: |
|
21 |
+ |
|
22 |
+ T* impl() |
|
23 |
+ { |
|
24 |
+ return static_cast<T*>(this); |
|
25 |
+ } |
|
26 |
+ |
|
27 |
+ void update(unsigned nSteps) |
|
28 |
+ { |
|
29 |
+ unsigned i = 0; |
|
30 |
+ while (i < nSteps) |
|
31 |
+ { |
|
32 |
+ // want this to be as quick as possible - otherwise there would be |
|
33 |
+ // a large speed up to making this run concurrently along with the |
|
34 |
+ // processProposal jobs, but that is much, much more complicated |
|
35 |
+ // to implement |
|
36 |
+ mQueue.populate(nSteps - mQueue.size() - i); |
|
37 |
+ |
|
38 |
+ unsigned nJobs = std::min(mQueue.size(), nCores); |
|
39 |
+ for (unsigned i = 0; i < nJobs; ++i) // can be run in parallel |
|
40 |
+ { |
|
41 |
+ processProposal(mQueue[i]); |
|
42 |
+ } |
|
43 |
+ mQueue.clear(nJobs); |
|
44 |
+ i += nJobs; |
|
45 |
+ } |
|
46 |
+ } |
|
47 |
+ |
|
48 |
+ void processProposal(const AtomicProposal &prop) |
|
49 |
+ { |
|
50 |
+ switch (prop.type) |
|
51 |
+ { |
|
52 |
+ case 'D': death(prop.pos1, prop.mass1); break; |
|
53 |
+ case 'B': birth(prop.pos1, prop.mass1); break; |
|
54 |
+ case 'M': move(prop.pos1, prop.mass1, prop.pos2); break; |
|
55 |
+ case 'E': exchange(prop.pos1, prop.mass1, prop.pos2, prop.mass2); break; |
|
56 |
+ } |
|
57 |
+ } |
|
58 |
+ |
|
59 |
+ void birth(uint64_t pos, float mass) |
|
60 |
+ { |
|
61 |
+ unsigned row = impl->getRow(pos); |
|
62 |
+ unsigned col = impl->getCol(pos); |
|
63 |
+ float mass = impl()->canUseGibbs(row, col) ? gibbsMass(row, col) |
|
64 |
+ : gaps::random::exponential(mLambda); |
|
65 |
+ mDomain.addAtom(pos, mass); |
|
66 |
+ mMatrix(row, col) += mass; |
|
67 |
+ impl()->updateAPMatrix(row, col, mass); |
|
68 |
+ } |
|
69 |
+ |
|
70 |
+ void death(uint64_t pos, unsigned row, unsigned col) |
|
71 |
+ { |
|
72 |
+ float mass = mDomain.at(pos); |
|
73 |
+ mMatrix(row, col) += -mass; |
|
74 |
+ impl()->updateAPMatrix(row, col, -mass); |
|
75 |
+ |
|
76 |
+ float newMass = impl()->canUseGibbs(row, col) ? gibbsMass(row, col) |
|
77 |
+ : mass; |
|
78 |
+ float deltaLL = impl()->computeDeltaLL(row, col, newMass); |
|
79 |
+ |
|
80 |
+ if (deltaLL * mAnnealingTemp >= std::log(gaps::random::uniform())) |
|
81 |
+ { |
|
82 |
+ newMass = mDomain.updateAtomMass(pos, newMass - mass); |
|
83 |
+ mMatrix(row, col) += newMass; |
|
84 |
+ impl()->updateAPMatrix(row, col, newMass); |
|
85 |
+ if (mDomain.at(pos)) |
|
86 |
+ { |
|
87 |
+ ++mQueue.minAtoms; |
|
88 |
+ } |
|
89 |
+ } |
|
90 |
+ else |
|
91 |
+ { |
|
92 |
+ mDomain.deleteAtom(pos); |
|
93 |
+ mQueue.maxAtoms--; |
|
94 |
+ } |
|
95 |
+ } |
|
96 |
+ |
|
97 |
+ void move(uint64_t p1, unsigned r1, unsigned c1, uint64_t p2, unsigned r2, unsigned c2) |
|
98 |
+ { |
|
99 |
+ float mass = mDomiain.at(p1); |
|
100 |
+ if (r1 == r2 && c1 == c2) |
|
101 |
+ { |
|
102 |
+ mDomain.deleteAtom(p1); |
|
103 |
+ mDomain.addAtom(p2, mass); |
|
104 |
+ } |
|
105 |
+ else |
|
106 |
+ { |
|
107 |
+ if (deltaLL * mAnnealingTemp >= std::log(gaps::random::uniform())) |
|
108 |
+ { |
|
109 |
+ mDomain.deleteAtom(p1); |
|
110 |
+ mDomain.addAtom(p2, mass); |
|
111 |
+ mMatrix(r1, c1) += -mass; |
|
112 |
+ mMatrix(r2, c2) += mass; |
|
113 |
+ impl()->updateAPMatrix(r1, c1, -mass); |
|
114 |
+ impl()->updateAPMatrix(r2, c2, mass); |
|
115 |
+ } |
|
116 |
+ } |
|
117 |
+ |
|
118 |
+ } |
|
119 |
+ |
|
120 |
+ void exchange(uint64_t p1, unsigned r1, unsigned c1, uint64_t p2, unsigned r2, unsigned c2) |
|
121 |
+ { |
|
122 |
+ float mass1 = mDomiain.at(p1); |
|
123 |
+ float mass2 = mDomiain.at(p2); |
|
124 |
+ float newMass = gaps::random::inverseGammaSample(0.f, mass1 + mass2, 2.f, 1.f / mLambda); |
|
125 |
+ float delta1 = mass1 > mass2 ? newMass - mass1 : mass2 - newMass; |
|
126 |
+ float delta2 = mass2 > mass1 ? newMass - mass2 : mass1 - newMass; |
|
127 |
+ if (r1 == r2 && c1 == c2) |
|
128 |
+ { |
|
129 |
+ mDomain.updateAtomMass(p1, delta1); |
|
130 |
+ mDomain.updateAtomMass(p2, delta2); |
|
131 |
+ } |
|
132 |
+ else |
|
133 |
+ { |
|
134 |
+ // all the exchange code |
|
135 |
+ } |
|
136 |
+ } |
|
137 |
+ |
|
138 |
+ float gibbsMass(unsigned row, unsigned col) |
|
139 |
+ { |
|
140 |
+ AlphaParameters alpha = impl()->alphaParameters(row, col); |
|
141 |
+ alpha.s *= mAnnealingTemp / 2.f; |
|
142 |
+ alpha.su *= mAnnealingTemp / 2.f; |
|
143 |
+ float mean = (2.f * alpha.su - mLambda) / (2.f * alpha.s); |
|
144 |
+ float sd = 1.f / std::sqrt(2.f * alpha.s); |
|
145 |
+ |
|
146 |
+ float plower = gaps::random::p_norm(0.f, mean, sd); |
|
147 |
+ |
|
148 |
+ float newMass = death ? mass : 0.f; |
|
149 |
+ if (plower < 1.f && alpha.s > 0.00001f) |
|
150 |
+ { |
|
151 |
+ newMass = gaps::random::inverseNormSample(plower, 1.f, mean, sd); |
|
152 |
+ } |
|
153 |
+ return std::max(0.f, std::min(newMax, mMaxGibbsMass)); |
|
154 |
+ } |
|
155 |
+}; |
|
156 |
+ |
|
157 |
+class AmplitudeGibbsSampler : public GibbsSampler<AmplitudeGibbsSampler, ColMatrix, RowMatrix> |
|
158 |
+{ |
|
159 |
+public: |
|
160 |
+ |
|
161 |
+ bool canUseGibbs(unsigned row, unsigned col) |
|
162 |
+ { |
|
163 |
+ return !gaps::algo::isRowZero(mOtherMatrix, col); |
|
164 |
+ } |
|
165 |
+ |
|
166 |
+ bool canUseGibbs(unsigned row1, unsigned col1, unsigned row2, unsigned col2) |
|
167 |
+ { |
|
168 |
+ return !gaps::algo::isRowZero(mOtherMatrix, col1) |
|
169 |
+ && !gaps::algo::isRowZero(mOtherMatrix, col2); |
|
170 |
+ } |
|
171 |
+ |
|
172 |
+ void updateAPMatrix(unsigned row, unsigned col, float delta) |
|
173 |
+ { |
|
174 |
+ for (unsigned j = 0; j < mAPMatrix.nCol(); ++j) |
|
175 |
+ { |
|
176 |
+ mAPMatrix(row,j) += delta * mOtherMatrix(j,col); |
|
177 |
+ } |
|
178 |
+ } |
|
179 |
+ |
|
180 |
+ unsigned getRow(uint64_t pos) const |
|
181 |
+ { |
|
182 |
+ return std::min(pos / (mBinSize * mNumCols), mNumRows); |
|
183 |
+ } |
|
184 |
+ |
|
185 |
+ unsigned getCol(uint64_t pos) const |
|
186 |
+ { |
|
187 |
+ return std::min((pos / mBinSize) % mNumCols, mNumCols); |
|
188 |
+ } |
|
189 |
+}; |
|
190 |
+ |
|
191 |
+class PatternGibbsSampler : public GibbsSampler<PatternGibbsSampler, RowMatrix, ColMatrix> |
|
192 |
+{ |
|
193 |
+public: |
|
194 |
+ |
|
195 |
+ bool canUseGibbs(unsigned row, unsigned col) |
|
196 |
+ { |
|
197 |
+ return !gaps::algo::isColZero(mOtherMatrix, row); |
|
198 |
+ } |
|
199 |
+ |
|
200 |
+ bool canUseGibbs(unsigned row1, unsigned col1, unsigned row2, unsigned col2) |
|
201 |
+ { |
|
202 |
+ return !gaps::algo::isColZero(mOtherMatrix, row1) |
|
203 |
+ && !gaps::algo::isColZero(mOtherMatrix, row2); |
|
204 |
+ } |
|
205 |
+ |
|
206 |
+ void updateAPMatrix(unsigned row, unsigned col, float delta) |
|