... | ... |
@@ -4,6 +4,7 @@ export(CoGAPS) |
4 | 4 |
export(GWCoGAPS) |
5 | 5 |
export(GWCoGapsFromCheckpoint) |
6 | 6 |
export(binaryA) |
7 |
+export(buildReport) |
|
7 | 8 |
export(calcCoGAPSStat) |
8 | 9 |
export(calcGeneGSStat) |
9 | 10 |
export(calcZ) |
... | ... |
@@ -11,7 +12,6 @@ export(cellMatchR) |
11 | 12 |
export(computeGeneGSProb) |
12 | 13 |
export(createGWCoGAPSSets) |
13 | 14 |
export(createscCoGAPSSets) |
14 |
-export(displayBuildReport) |
|
15 | 15 |
export(gapsMapRun) |
16 | 16 |
export(gapsRun) |
17 | 17 |
export(patternMarkers) |
144 | 144 |
deleted file mode 100644 |
... | ... |
@@ -1,107 +0,0 @@ |
1 |
-# save file time at beginning to ensure uniqueness |
|
2 |
-file_name <- paste("cogaps_benchmark_", format(Sys.time(), "%m_%d_%y_%H_%M_%OS"), |
|
3 |
- '.RData', sep='') |
|
4 |
- |
|
5 |
-# load packages |
|
6 |
-library(CoGAPS) |
|
7 |
- |
|
8 |
-# display package version |
|
9 |
-print(packageVersion('CoGAPS')) |
|
10 |
- |
|
11 |
-# benchmark dimensions |
|
12 |
- |
|
13 |
-M_dimensions <- c(50, 75, 100, 250, 500, 750, 1000) |
|
14 |
-N_dimensions <- M_dimensions |
|
15 |
-nFactor_dimensions <- c(5, 10, 15, 20, 30, 40, 50) |
|
16 |
-nIter_dimensions <- c(1000, 1250, 1500, 1750, 2000, 2500, 3000) |
|
17 |
- |
|
18 |
-M_default <- floor(median(M_dimensions)) |
|
19 |
-N_default <- floor(median(N_dimensions)) |
|
20 |
-nFactor_default <- floor(median(nFactor_dimensions)) |
|
21 |
-nIter_default <- floor(median(nIter_dimensions)) |
|
22 |
- |
|
23 |
-print(paste("Defaults, M=", M_default, " N=", N_default, " K=", |
|
24 |
- nFactor_default, " I=", nIter_default, sep='')) |
|
25 |
- |
|
26 |
-seed_default <- 12345 |
|
27 |
-reps_default <- 5 |
|
28 |
- |
|
29 |
-# benchmark function |
|
30 |
- |
|
31 |
-timeToSeconds <- function(time) |
|
32 |
-{ |
|
33 |
- time$hour * 3600 + time$min * 60 + time$sec |
|
34 |
-} |
|
35 |
- |
|
36 |
-data(GIST_TS_20084) |
|
37 |
-runBenchmark <- function(m, n, k, iter, seed, reps) |
|
38 |
-{ |
|
39 |
- set.seed(123) |
|
40 |
- test_mat_D <- matrix(sample(as.matrix(GIST.D), m * n, replace=T), |
|
41 |
- nrow = m, ncol = n) |
|
42 |
- |
|
43 |
- test_mat_S <- matrix(sample(as.matrix(GIST.S), m * n, replace=T), |
|
44 |
- nrow = m, ncol = n) |
|
45 |
- |
|
46 |
- times <- c() |
|
47 |
- for (r in 1:reps) |
|
48 |
- { |
|
49 |
- start_time <- as.POSIXlt(Sys.time()) |
|
50 |
- gapsRun(test_mat_D, test_mat_S, nEquil=iter, nSample=iter, |
|
51 |
- nFactor=k, seed=seed+r, messages=TRUE, nOutR = iter/2, |
|
52 |
-# sampleSnapshots=FALSE, numSnapshots=iter / 2) |
|
53 |
- numSnapshots=0) |
|
54 |
- times[r] <- timeToSeconds(as.POSIXlt(Sys.time())) - timeToSeconds(start_time) |
|
55 |
- print(round(times[r], 2)) |
|
56 |
- } |
|
57 |
- params <- c(m, n, k, iter, seed, reps) |
|
58 |
- secs <- c(min(times), mean(times), median(times), max(times)) |
|
59 |
- return(c(params, secs)) |
|
60 |
-} |
|
61 |
- |
|
62 |
-# store sample times and parameters |
|
63 |
-samples <- NULL |
|
64 |
- |
|
65 |
-# run M benchmark |
|
66 |
-for (n in M_dimensions) |
|
67 |
-{ |
|
68 |
- print(paste("benchmarking M =", n)) |
|
69 |
- bm <- runBenchmark(n, N_default, nFactor_default, nIter_default, seed_default, |
|
70 |
- reps_default) |
|
71 |
- samples <- rbind(samples, bm) |
|
72 |
-} |
|
73 |
- |
|
74 |
-# run N benchmark |
|
75 |
-for (n in N_dimensions) |
|
76 |
-{ |
|
77 |
- print(paste("benchmarking N =", n)) |
|
78 |
- bm <- runBenchmark(M_default, n, nFactor_default, nIter_default, seed_default, |
|
79 |
- reps_default) |
|
80 |
- samples <- rbind(samples, bm) |
|
81 |
-} |
|
82 |
- |
|
83 |
-# run nFactor benchmark |
|
84 |
-for (n in nFactor_dimensions) |
|
85 |
-{ |
|
86 |
- print(paste("benchmarking K =", n)) |
|
87 |
- bm <- runBenchmark(M_default, N_default, n, nIter_default, seed_default, |
|
88 |
- reps_default) |
|
89 |
- samples <- rbind(samples, bm) |
|
90 |
-} |
|
91 |
- |
|
92 |
-# run nIter benchmark |
|
93 |
-for (n in nIter_dimensions) |
|
94 |
-{ |
|
95 |
- print(paste("benchmarking nIter =", n)) |
|
96 |
- bm <- runBenchmark(M_default, N_default, nFactor_default, n, seed_default, |
|
97 |
- reps_default) |
|
98 |
- samples <- rbind(samples, bm) |
|
99 |
-} |
|
100 |
- |
|
101 |
-rownames(samples) <- NULL |
|
102 |
-colnames(samples) <- c('M', 'N', 'K', 'nIter', 'seed', 'reps', 'min', 'mean', |
|
103 |
- 'median', 'max') |
|
104 |
-cogaps_benchmark <- data.frame(samples) |
|
105 |
-cogaps_version <- packageVersion('CoGAPS') |
|
106 |
- |
|
107 |
-save(cogaps_benchmark, cogaps_version, file=file_name) |
108 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,381 +0,0 @@ |
1 |
-#include "AtomicQueue.h" |
|
2 |
- |
|
3 |
-// TODO have private random number generator for consistency with benchmark |
|
4 |
- |
|
5 |
-// consider having separate class that generates the proposal sequence |
|
6 |
-// and selects the atoms, but does not calculate the mass .... separate |
|
7 |
-// class with matrix access computes the mass - also allows for entire |
|
8 |
-// proposal specific code to be together (no more conditionals on nChanges) |
|
9 |
- |
|
10 |
-// acceptance of proposals needs it's own unit as well - considering |
|
11 |
-// how much time is spent updating matrices - the updates would be independent |
|
12 |
-// of any currently running calculations by design. Find a way to lock |
|
13 |
-// the individual vectors of the matrix without locking the entire matrix. |
|
14 |
- |
|
15 |
-// consider changing the data layout to have the row matrices (D,AP,S) near |
|
16 |
-// the P matrix and the col matrices (D,AP,S) near the A matrix |
|
17 |
- |
|
18 |
-// consider allocating a single continuous chunk for each matric instead |
|
19 |
-// of individual vectors - does this mess up locking? |
|
20 |
- |
|
21 |
-// look into delaying the A/P matrix update until after the full update step |
|
22 |
-// - only the AP updates are neccesary |
|
23 |
- |
|
24 |
-// no need to separate alphaParameters and deltaLL into separate workers - |
|
25 |
-// they are serial and touch much of the same memory |
|
26 |
- |
|
27 |
-// need to ensure proposals dont effect each other |
|
28 |
-// BIRTH - mark off locations of birth |
|
29 |
-// DEATH - can't free up death locations, neglible effect |
|
30 |
-// MOVE - must move within neighbor boundary, if a neighbor could be moved |
|
31 |
-// or deleted then this is held up - also know where birth locations are |
|
32 |
-// EXCHANGE - selects right most atom - uneffected by move, by death of this |
|
33 |
-// atom or birth of an atom in between can effect |
|
34 |
- |
|
35 |
-// Neighbors |
|
36 |
-// DEATH - merge neighbors |
|
37 |
-// MOVE/RESIZE - nothing changes |
|
38 |
-// EXCHANGE - nothing changes |
|
39 |
-// BIRTH - need to find left or right neighbor |
|
40 |
-// store neighbor information, then everything constant besides insertion |
|
41 |
-// store in map makes insertion logN |
|
42 |
- |
|
43 |
-AtomicQueue::AtomicQueue(char label, unsigned nrow, unsigned ncol) |
|
44 |
- : |
|
45 |
-mLabel(label), mNumAtoms(0), |
|
46 |
-mMaxNumAtoms(std::numeric_limits<uint64_t>::max()), |
|
47 |
-mNumRows(nrow), mNumCols(ncol), mNumBins(nrow * ncol), |
|
48 |
-mBinSize(std::numeric_limits<uint64_t>::max() / (nrow * ncol)), |
|
49 |
-mAlpha(alpha), mLambda(lambda) |
|
50 |
-{} |
|
51 |
- |
|
52 |
-// O(logN) |
|
53 |
-void AtomicQueue::addAtom(Atom atom) |
|
54 |
-{ |
|
55 |
- mAtomPositions.insert(std::pair<uint64_t, uint64_t>(atom.pos, mNumAtoms)); |
|
56 |
- mAtoms.push_back(atom); |
|
57 |
- mNumAtoms++; |
|
58 |
-} |
|
59 |
- |
|
60 |
-// O(logN) |
|
61 |
-void AtomicSupport::removeAtom(uint64_t pos) |
|
62 |
-{ |
|
63 |
- uint64_t index = mAtomPositions.at(pos); |
|
64 |
- mAtomPositions.erase(pos); |
|
65 |
- |
|
66 |
- // update key of object about to be moved (last one doesn't need to move) |
|
67 |
- if (index < mNumAtoms - 1) |
|
68 |
- { |
|
69 |
- mAtomPositions.erase(mAtoms.back().pos); |
|
70 |
- mAtomPositions.insert(std::pair<uint64_t, uint64_t>(mAtoms.back().pos, |
|
71 |
- index)); |
|
72 |
- } |
|
73 |
- |
|
74 |
- // delete atom from vector in O(1) |
|
75 |
- mAtoms[index] = mAtoms.back(); |
|
76 |
- mAtoms.pop_back(); |
|
77 |
- mNumAtoms--; |
|
78 |
-} |
|
79 |
- |
|
80 |
-// O(logN) |
|
81 |
-AtomNeighbors AtomicSupport::getNeighbors(uint64_t pos) const |
|
82 |
-{ |
|
83 |
- // get an iterator to this atom and it's neighbors |
|
84 |
- std::map<uint64_t, uint64_t>::const_iterator it, left, right; |
|
85 |
- it = mAtomPositions.find(pos); |
|
86 |
- left = it; right = it; |
|
87 |
- if (it != mAtomPositions.begin()) |
|
88 |
- { |
|
89 |
- --left; |
|
90 |
- } |
|
91 |
- if (++it != mAtomPositions.end()) |
|
92 |
- { |
|
93 |
- ++right; |
|
94 |
- } |
|
95 |
- return AtomNeighbors(left->first, mAtoms[left->second].mass, |
|
96 |
- right->first, mAtoms[right->second].mass); |
|
97 |
-} |
|
98 |
- |
|
99 |
-// O(1) |
|
100 |
-uint64_t AtomicSupport::getRow(uint64_t pos) const |
|
101 |
-{ |
|
102 |
- uint64_t binNum = std::min(pos / mBinSize, mNumBins - 1); |
|
103 |
- return mLabel == 'A' ? binNum / mNumCols : binNum % mNumRows; |
|
104 |
-} |
|
105 |
- |
|
106 |
-// O(1) |
|
107 |
-uint64_t AtomicSupport::getCol(uint64_t pos) const |
|
108 |
-{ |
|
109 |
- uint64_t binNum = std::min(pos / mBinSize, mNumBins - 1); |
|
110 |
- return mLabel == 'A' ? binNum % mNumCols : binNum / mNumRows; |
|
111 |
-} |
|
112 |
- |
|
113 |
-// O(logN) |
|
114 |
-uint64_t AtomicSupport::randomFreePosition() const |
|
115 |
-{ |
|
116 |
- uint64_t pos = 0; |
|
117 |
- do |
|
118 |
- { |
|
119 |
- pos = gaps::random::uniform64(); |
|
120 |
- } while (mAtomPositions.count(pos) > 0); |
|
121 |
- return pos; |
|
122 |
-} |
|
123 |
- |
|
124 |
-// O(1) |
|
125 |
-uint64_t AtomicSupport::randomAtomPosition() const |
|
126 |
-{ |
|
127 |
- uint64_t num = gaps::random::uniform64(0, mNumAtoms - 1); |
|
128 |
- return mAtoms[num].pos; |
|
129 |
-} |
|
130 |
- |
|
131 |
-// O(logN) |
|
132 |
-AtomicProposal AtomicSupport::proposeDeath() const |
|
133 |
-{ |
|
134 |
- uint64_t location = randomAtomPosition(); |
|
135 |
- float mass = mAtoms[mAtomPositions.at(location)].mass; |
|
136 |
- return AtomicProposal(mLabel, 'D', location, -mass); |
|
137 |
-} |
|
138 |
- |
|
139 |
-// O(logN) |
|
140 |
-AtomicProposal AtomicSupport::proposeBirth() const |
|
141 |
-{ |
|
142 |
- uint64_t location = randomFreePosition(); |
|
143 |
- float mass = gaps::random::exponential(mLambda); |
|
144 |
- return AtomicProposal(mLabel, 'B', location, mass); |
|
145 |
-} |
|
146 |
- |
|
147 |
-// move atom between adjacent atoms |
|
148 |
-// O(logN) |
|
149 |
-AtomicProposal AtomicSupport::proposeMove() const |
|
150 |
-{ |
|
151 |
- uint64_t pos = randomAtomPosition(); |
|
152 |
- AtomNeighbors nbs = getNeighbors(pos); |
|
153 |
- float mass = at(pos); |
|
154 |
- |
|
155 |
- uint64_t lbound = nbs.left.pos != pos ? nbs.left.pos : 0; |
|
156 |
- uint64_t rbound = nbs.right.pos != pos ? nbs.right.pos : mMaxNumAtoms - 1; |
|
157 |
- |
|
158 |
- uint64_t newLocation = gaps::random::uniform64(lbound, rbound); |
|
159 |
- return AtomicProposal(mLabel, 'M', pos, -mass, newLocation, mass); |
|
160 |
-} |
|
161 |
- |
|
162 |
-// exchange with adjacent atom to the right |
|
163 |
-// O(logN) |
|
164 |
-AtomicProposal AtomicSupport::proposeExchange() const |
|
165 |
-{ |
|
166 |
- uint64_t pos1 = randomAtomPosition(); |
|
167 |
- AtomNeighbors nbs = getNeighbors(pos1); |
|
168 |
- float mass1 = at(pos1); |
|
169 |
- |
|
170 |
- // find right neighbor, wrap around to beginning if this atom is end |
|
171 |
- uint64_t pos2 = nbs.right.pos != pos1 ? nbs.right.pos : mAtomPositions.begin()->first; |
|
172 |
- float mass2 = at(pos2); |
|
173 |
- |
|
174 |
- // calculate new mass |
|
175 |
- float pupper = gaps::random::p_gamma(mass1 + mass2, 2.f, 1.f / mLambda); |
|
176 |
- float newMass = gaps::random::q_gamma(gaps::random::uniform(0.f, pupper), |
|
177 |
- 2.f, 1.f / mLambda); |
|
178 |
- |
|
179 |
- // calculate mass changes |
|
180 |
- float delta1 = mass1 > mass2 ? newMass - mass1 : mass2 - newMass; |
|
181 |
- float delta2 = mass2 > mass1 ? newMass - mass2 : mass1 - newMass; |
|
182 |
- |
|
183 |
- // set first position to be postive change |
|
184 |
- float delta1_cached = delta1; |
|
185 |
- uint64_t pos1_cached = pos1; |
|
186 |
- pos1 = delta1_cached > 0 ? pos1 : pos2; |
|
187 |
- pos2 = delta1_cached > 0 ? pos2 : pos1_cached; |
|
188 |
- delta1 = delta1_cached > 0 ? delta1 : delta2; |
|
189 |
- delta2 = delta1_cached > 0 ? delta2 : delta1_cached; |
|
190 |
- |
|
191 |
- // preserve total mass |
|
192 |
- return AtomicProposal(mLabel, 'E', pos1, delta1, pos2, delta2); |
|
193 |
-} |
|
194 |
- |
|
195 |
-// O(logN) |
|
196 |
-float AtomicSupport::updateAtomMass(uint64_t pos, float delta) |
|
197 |
-{ |
|
198 |
- if (mAtomPositions.count(pos)) // update atom if it exists |
|
199 |
- { |
|
200 |
- mAtoms[mAtomPositions.at(pos)].mass += delta; |
|
201 |
- } |
|
202 |
- else // create a new atom if it doesn't exist |
|
203 |
- { |
|
204 |
- addAtom(Atom(pos, delta)); |
|
205 |
- } |
|
206 |
- |
|
207 |
- if (mAtoms[mAtomPositions.at(pos)].mass < EPSILON) |
|
208 |
- { |
|
209 |
- delta -= at(pos); |
|
210 |
- mMaxAtoms--; // keep queue up to date |
|
211 |
- removeAtom(pos); |
|
212 |
- } |
|
213 |
- return delta; |
|
214 |
-} |
|
215 |
- |
|
216 |
-MatrixChange AtomicSupport::acceptProposal(const AtomicProposal &prop, |
|
217 |
-MatrixChange &ch) |
|
218 |
-{ |
|
219 |
- ch.delta1 = updateAtomMass(prop.pos1, prop.delta1); |
|
220 |
- ch.delta2 = (prop.nChanges > 1) ? updateAtomMass(prop.pos2, prop.delta2) |
|
221 |
- : ch.delta2; |
|
222 |
- return ch; |
|
223 |
-} |
|
224 |
- |
|
225 |
-float AtomicSupport::deleteProb(unsigned nAtoms) const |
|
226 |
-{ |
|
227 |
- float pDelete = 0.5; // default uniform prior for mAlpha == 0 |
|
228 |
- float fnAtoms = (float)nAtoms; |
|
229 |
- if (mAlpha > 0) // poisson prior |
|
230 |
- { |
|
231 |
- float maxTerm = ((float)mMaxNumAtoms - fnAtoms) / (float)mMaxNumAtoms; |
|
232 |
- return fnAtoms / (fnAtoms + mAlpha * (float)mNumBins * maxTerm); |
|
233 |
- } |
|
234 |
- else if (mAlpha < 0) // geometric prior |
|
235 |
- { |
|
236 |
- float c = -mAlpha * mNumBins / (-mAlpha * mNumBins + 1.0); |
|
237 |
- return fnAtoms / ((fnAtoms + 1.f) * c + fnAtoms); |
|
238 |
- } |
|
239 |
-} |
|
240 |
- |
|
241 |
-static isInVector(std::vector<unsigned> &vec, unsigned n) |
|
242 |
-{ |
|
243 |
- return std::find(vec.begin(), vec.end(), n) != vec.end(); |
|
244 |
-} |
|
245 |
- |
|
246 |
-bool AtomicSupport::addProposal() const |
|
247 |
-{ |
|
248 |
- if (minAtoms == 0 && maxAtoms > 0 || minAtoms < 2 && maxAtoms >= 2) |
|
249 |
- { |
|
250 |
- return false; |
|
251 |
- } |
|
252 |
- |
|
253 |
- AtomicProposal proposal; |
|
254 |
- if (maxAtoms == 0) |
|
255 |
- { |
|
256 |
- proposal = proposeBirth(); |
|
257 |
- minAtoms++; |
|
258 |
- maxAtoms++; |
|
259 |
- } |
|
260 |
- else |
|
261 |
- { |
|
262 |
- float bdProb = maxAtoms < 2 ? 0.6667f : 0.5f; |
|
263 |
- float u1 = gaps::random::uniform(); // cache these values if a failure |
|
264 |
- float u2 = gaps::random::uniform(); // happens, needed to prevent bias |
|
265 |
- float lowerBound = deleteProb(minAtoms); |
|
266 |
- float upperBound = deleteProb(maxAtoms); |
|
267 |
- if (u1 < bdProb && u2 > upperBound) |
|
268 |
- { |
|
269 |
- proposal = proposeBirth(); |
|
270 |
- minAtoms++; |
|
271 |
- maxAtoms++; |
|
272 |
- } |
|
273 |
- else if (u1 < bdProb && u2 < lowerBound) |
|
274 |
- { |
|
275 |
- proposal = proposeDeath(); |
|
276 |
- minAtoms--; |
|
277 |
- } |
|
278 |
- else if (u1 >= bdProb && (maxAtoms < 2 || u1 < 0.75f)) |
|
279 |
- { |
|
280 |
- proposal = proposeMove(); |
|
281 |
- } |
|
282 |
- else if (u1 >= bdProb) |
|
283 |
- { |
|
284 |
- proposal = proposeExchange(); |
|
285 |
- minAtoms--; |
|
286 |
- } |
|
287 |
- else |
|
288 |
- { |
|
289 |
- return false; |
|
290 |
- } |
|
291 |
- } |
|
292 |
- |
|
293 |
- // used rows should be an hash table so that find is O(1) |
|
294 |
- // can allocate is one chunk of memory since nRows/nCols is known |
|
295 |
- if (isInVector(usedRows, getRow(proposal.pos1))) |
|
296 |
- { |
|
297 |
- return false; |
|
298 |
- } |
|
299 |
- if (proposal.nChanges > 1) |
|
300 |
- { |
|
301 |
- if (isInVector(usedRows, getRow(proposal.pos2))) |
|
302 |
- { |
|
303 |
- return false; |
|
304 |
- } |
|
305 |
- usedRows.push_back(getRow(proposal.pos2)); |
|
306 |
- } |
|
307 |
- usedRows.push_back(getRow(proposal.pos1)); |
|
308 |
- mProposalQueue.push(proposal); |
|
309 |
- return true; |
|
310 |
-} |
|
311 |
- |
|
312 |
-void AtomicSupport::populateQueue(unsigned limit) |
|
313 |
-{ |
|
314 |
- unsigned nIter = 0; |
|
315 |
- while (nIter++ < limit && addProposal()); |
|
316 |
-} |
|
317 |
- |
|
318 |
-AtomicProposal AtomicSupport::popQueue() |
|
319 |
-{ |
|
320 |
- AtomicProposal prop = mProposalQueue.front(); |
|
321 |
- mProposalQueue.pop(); |
|
322 |
- return prop; |
|
323 |
-} |
|
324 |
- |
|
325 |
-MatrixChange AtomicSupport::acceptProposal(const AtomicProposal &prop, |
|
326 |
-MatrixChange ch) |
|
327 |
-{ |
|
328 |
- ch.delta1 = updateAtomMass(prop.pos1, prop.delta1); |
|
329 |
- ch.delta2 = (prop.nChanges > 1) ? updateAtomMass(prop.pos2, prop.delta2) |
|
330 |
- : ch.delta2; |
|
331 |
- return ch; |
|
332 |
-} |
|
333 |
- |
|
334 |
-void AtomicSupport::rejectProposal(const AtomicProposal &prop) |
|
335 |
-{ |
|
336 |
- if (prop.label == 'D' || prop.label == 'E') |
|
337 |
- { |
|
338 |
- mMinAtoms++; |
|
339 |
- } |
|
340 |
-} |
|
341 |
- |
|
342 |
-unsigned AtomicSupport::size() const |
|
343 |
-{ |
|
344 |
- return mProposalQueue.size(); |
|
345 |
-} |
|
346 |
- |
|
347 |
-bool AtomicSupport::empty() const |
|
348 |
-{ |
|
349 |
- return mProposalQueue.empty(); |
|
350 |
-} |
|
351 |
- |
|
352 |
-Archive& operator<<(Archive &ar, AtomicSupport &domain) |
|
353 |
-{ |
|
354 |
- ar << domain.mLabel << domain.mNumAtoms << domain.mMaxNumAtoms |
|
355 |
- << domain.mNumRows << domain.mNumCols |
|
356 |
- << domain.mNumBins << domain.mBinSize << domain.mAlpha |
|
357 |
- << domain.mLambda; |
|
358 |
- |
|
359 |
- for (unsigned i = 0; i < domain.mAtoms.size(); ++i) |
|
360 |
- { |
|
361 |
- ar << domain.mAtoms[i].pos << domain.mAtoms[i].mass; |
|
362 |
- } |
|
363 |
- return ar; |
|
364 |
-} |
|
365 |
- |
|
366 |
-Archive& operator>>(Archive &ar, AtomicSupport &domain) |
|
367 |
-{ |
|
368 |
- uint64_t nAtoms = 0; |
|
369 |
- ar >> domain.mLabel >> nAtoms >> domain.mMaxNumAtoms |
|
370 |
- >> domain.mNumRows >> domain.mNumCols >> domain.mNumBins |
|
371 |
- >> domain.mBinSize >> domain.mAlpha >> domain.mLambda; |
|
372 |
- |
|
373 |
- uint64_t pos = 0; |
|
374 |
- float mass = 0.0; |
|
375 |
- for (unsigned i = 0; i < nAtoms; ++i) |
|
376 |
- { |
|
377 |
- ar >> pos >> mass; |
|
378 |
- domain.addAtom(Atom(pos, mass)); // this will increment mNumAtoms |
|
379 |
- } |
|
380 |
- return ar; |
|
381 |
-} |
|
382 | 0 |
\ No newline at end of file |
383 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,113 +0,0 @@ |
1 |
-#ifndef __GAPS_ATOMIC_QUEUE_H |
|
2 |
-#define __GAPS_ATOMIC_QUEUE_H |
|
3 |
- |
|
4 |
-#include <map> |
|
5 |
-#include <vector> |
|
6 |
- |
|
7 |
-struct Atom |
|
8 |
-{ |
|
9 |
- uint64_t pos; |
|
10 |
- float mass; |
|
11 |
- |
|
12 |
- Atom(uint64_t p, float m) : pos(p), mass(m) {} |
|
13 |
-}; |
|
14 |
- |
|
15 |
-struct AtomNeighbors |
|
16 |
-{ |
|
17 |
- Atom left; |
|
18 |
- Atom right; |
|
19 |
- |
|
20 |
- AtomNeighbors(uint64_t lp, float lm, uint64_t rp, float rm) |
|
21 |
- : left(Atom(lp, lm)), right(Atom(rp, rm)) |
|
22 |
- {} |
|
23 |
-}; |
|
24 |
- |
|
25 |
-struct AtomicProposal |
|
26 |
-{ |
|
27 |
- char label; |
|
28 |
- char type; |
|
29 |
- unsigned nChanges; |
|
30 |
- |
|
31 |
- uint64_t pos1; |
|
32 |
- float delta1; |
|
33 |
- |
|
34 |
- uint64_t pos2; |
|
35 |
- float delta2; |
|
36 |
- |
|
37 |
- AtomicProposal(char l, char t, uint64_t p1, float m1) |
|
38 |
- : label(l), type(t), nChanges(1), pos1(p1), delta1(m1), pos2(0), delta2(0.f) |
|
39 |
- {} |
|
40 |
- |
|
41 |
- AtomicProposal(char l, char t, uint64_t p1, float m1, uint64_t p2, float m2) |
|
42 |
- : label(l), type(t), nChanges(2), pos1(p1), delta1(m1), pos2(p2), delta2(m2) |
|
43 |
- {} |
|
44 |
-}; |
|
45 |
- |
|
46 |
-class AtomicQueue |
|
47 |
-{ |
|
48 |
-private: |
|
49 |
- |
|
50 |
- // parameters |
|
51 |
- char mLabel; |
|
52 |
- float mAlpha; |
|
53 |
- float mLambda; |
|
54 |
- |
|
55 |
- // storage of the atomic domain |
|
56 |
- std::vector<Atom> mAtoms; |
|
57 |
- std::map<uint64_t, uint64_t> mAtomPositions; |
|
58 |
- uint64_t mNumAtoms; |
|
59 |
- uint64_t mAtomCapacity; |
|
60 |
- |
|
61 |
- // matrix information |
|
62 |
- unsigned mNumRows; |
|
63 |
- unsigned mNumCols; |
|
64 |
- unsigned mNumBins; |
|
65 |
- unsigned mBinSize; |
|
66 |
- |
|
67 |
- // storage of proposal queue |
|
68 |
- std::queue<AtomicProposal> mProposalQueue; |
|
69 |
- std::vector<unsigned> mUsedIndices; // rows of A, cols of P |
|
70 |
- unsigned mMinAtoms; |
|
71 |
- unsigned mMaxAtoms; |
|
72 |
- |
|
73 |
- // proposal functions |
|
74 |
- AtomicProposal proposeBirth() const; |
|
75 |
- AtomicProposal proposeDeath() const; |
|
76 |
- AtomicProposal proposeMove() const; |
|
77 |
- AtomicProposal proposeExchange() const; |
|
78 |
- |
|
79 |
- // update the mass of an atom, return the total amount changed |
|
80 |
- std::pair<float, bool> updateAtomMass(uint64_t pos, float delta); |
|
81 |
- |
|
82 |
- // functions for dealing with atomic data structure |
|
83 |
- void addAtom(Atom atom); // O(logN) |
|
84 |
- void removeAtom(uint64_t pos); // O(logN) |
|
85 |
- AtomNeighbors getNeighbors(uint64_t pos) const; // O(logN) |
|
86 |
- |
|
87 |
- // get atomic positions at random |
|
88 |
- uint64_t randomFreePosition() const; // O(1) |
|
89 |
- uint64_t randomAtomPosition() const; // O(1) |
|
90 |
- |
|
91 |
-public: |
|
92 |
- |
|
93 |
- // constructor |
|
94 |
- AtomicQueue(char label, unsigned nrow, unsigned ncol); |
|
95 |
- |
|
96 |
- // parameter setters |
|
97 |
- void setAlpha(float alpha) {mAlpha = alpha;} |
|
98 |
- void setLambda(float lambda) {mLambda = lambda;} |
|
99 |
- |
|
100 |
- // queue operations |
|
101 |
- void populate(unsigned limit); |
|
102 |
- AtomicProposal pop_front(); |
|
103 |
- MatrixChange accept(const AtomicProposal &prop, MatrixChange &change); |
|
104 |
- void reject(const AtomicProposal &prop); |
|
105 |
- unsigned size() const; |
|
106 |
- bool empty() const; |
|
107 |
- |
|
108 |
- // serialization |
|
109 |
- friend Archive& operator<<(Archive &ar, AtomicQueue &sampler); |
|
110 |
- friend Archive& operator>>(Archive &ar, AtomicQueue &sampler); |
|
111 |
-}; |
|
112 |
- |
|
113 |
-#endif |
|
114 | 0 |
\ No newline at end of file |
115 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,332 +0,0 @@ |
1 |
-#include "AtomicSupport.h" |
|
2 |
- |
|
3 |
-static const float EPSILON = 1.e-10; |
|
4 |
- |
|
5 |
-AtomicSupport::AtomicSupport(char label, uint64_t nrow, uint64_t ncol, |
|
6 |
-float alpha, float lambda) |
|
7 |
- : |
|
8 |
-mLabel(label), mNumAtoms(0), |
|
9 |
-mMaxNumAtoms(std::numeric_limits<uint64_t>::max()), |
|
10 |
-mNumRows(nrow), mNumCols(ncol), mNumBins(nrow * ncol), |
|
11 |
-mBinSize(std::numeric_limits<uint64_t>::max() / (nrow * ncol)), |
|
12 |
-mAlpha(alpha), mLambda(lambda) |
|
13 |
-{} |
|
14 |
- |
|
15 |
-// O(logN) |
|
16 |
-void AtomicSupport::addAtom(Atom atom) |
|
17 |
-{ |
|
18 |
- mAtomPositions.insert(std::pair<uint64_t, uint64_t>(atom.pos, mNumAtoms)); |
|
19 |
- mAtoms.push_back(atom); |
|
20 |
- mNumAtoms++; |
|
21 |
-} |
|
22 |
- |
|
23 |
-// O(logN) |
|
24 |
-void AtomicSupport::removeAtom(uint64_t pos) |
|
25 |
-{ |
|
26 |
- uint64_t index = mAtomPositions.at(pos); |
|
27 |
- mAtomPositions.erase(pos); |
|
28 |
- |
|
29 |
- // update key of object about to be moved (last one doesn't need to move) |
|
30 |
- if (index < mNumAtoms - 1) |
|
31 |
- { |
|
32 |
- mAtomPositions.erase(mAtoms.back().pos); |
|
33 |
- mAtomPositions.insert(std::pair<uint64_t, uint64_t>(mAtoms.back().pos, |
|
34 |
- index)); |
|
35 |
- } |
|
36 |
- |
|
37 |
- // delete atom from vector in O(1) |
|
38 |
- mAtoms[index] = mAtoms.back(); |
|
39 |
- mAtoms.pop_back(); |
|
40 |
- mNumAtoms--; |
|
41 |
-} |
|
42 |
- |
|
43 |
-// O(logN) |
|
44 |
-AtomNeighbors AtomicSupport::getNeighbors(uint64_t pos) const |
|
45 |
-{ |
|
46 |
- // get an iterator to this atom and it's neighbors |
|
47 |
- std::map<uint64_t, uint64_t>::const_iterator it, left, right; |
|
48 |
- it = mAtomPositions.find(pos); |
|
49 |
- left = it; right = it; |
|
50 |
- if (it != mAtomPositions.begin()) |
|
51 |
- { |
|
52 |
- --left; |
|
53 |
- } |
|
54 |
- if (++it != mAtomPositions.end()) |
|
55 |
- { |
|
56 |
- ++right; |
|
57 |
- } |
|
58 |
- return AtomNeighbors(left->first, mAtoms[left->second].mass, |
|
59 |
- right->first, mAtoms[right->second].mass); |
|
60 |
-} |
|
61 |
- |
|
62 |
-// O(1) |
|
63 |
-uint64_t AtomicSupport::getRow(uint64_t pos) const |
|
64 |
-{ |
|
65 |
- uint64_t binNum = std::min(pos / mBinSize, mNumBins - 1); |
|
66 |
- return mLabel == 'A' ? binNum / mNumCols : binNum % mNumRows; |
|
67 |
-} |
|
68 |
- |
|
69 |
-// O(1) |
|
70 |
-uint64_t AtomicSupport::getCol(uint64_t pos) const |
|
71 |
-{ |
|
72 |
- uint64_t binNum = std::min(pos / mBinSize, mNumBins - 1); |
|
73 |
- return mLabel == 'A' ? binNum % mNumCols : binNum / mNumRows; |
|
74 |
-} |
|
75 |
- |
|
76 |
-// O(logN) |
|
77 |
-uint64_t AtomicSupport::randomFreePosition() const |
|
78 |
-{ |
|
79 |
- uint64_t pos = 0; |
|
80 |
- do |
|
81 |
- { |
|
82 |
- pos = gaps::random::uniform64(); |
|
83 |
- } while (mAtomPositions.count(pos) > 0); |
|
84 |
- return pos; |
|
85 |
-} |
|
86 |
- |
|
87 |
-// O(1) |
|
88 |
-uint64_t AtomicSupport::randomAtomPosition() const |
|
89 |
-{ |
|
90 |
- uint64_t num = gaps::random::uniform64(0, mNumAtoms - 1); |
|
91 |
- return mAtoms[num].pos; |
|
92 |
-} |
|
93 |
- |
|
94 |
-// O(logN) |
|
95 |
-AtomicProposal AtomicSupport::proposeDeath() const |
|
96 |
-{ |
|
97 |
- uint64_t location = randomAtomPosition(); |
|
98 |
- float mass = mAtoms[mAtomPositions.at(location)].mass; |
|
99 |
- return AtomicProposal(mLabel, 'D', location, -mass); |
|
100 |
-} |
|
101 |
- |
|
102 |
-// O(logN) |
|
103 |
-AtomicProposal AtomicSupport::proposeBirth() const |
|
104 |
-{ |
|
105 |
- uint64_t location = randomFreePosition(); |
|
106 |
- float mass = gaps::random::exponential(mLambda); |
|
107 |
- return AtomicProposal(mLabel, 'B', location, mass); |
|
108 |
-} |
|
109 |
- |
|
110 |
-// move atom between adjacent atoms |
|
111 |
-// O(logN) |
|
112 |
-AtomicProposal AtomicSupport::proposeMove() const |
|
113 |
-{ |
|
114 |
- uint64_t pos = randomAtomPosition(); |
|
115 |
- AtomNeighbors nbs = getNeighbors(pos); |
|
116 |
- float mass = at(pos); |
|
117 |
- |
|
118 |
- uint64_t lbound = nbs.left.pos != pos ? nbs.left.pos : 0; |
|
119 |
- uint64_t rbound = nbs.right.pos != pos ? nbs.right.pos : mMaxNumAtoms - 1; |
|
120 |
- |
|
121 |
- uint64_t newLocation = gaps::random::uniform64(lbound, rbound); |
|
122 |
- return AtomicProposal(mLabel, 'M', pos, -mass, newLocation, mass); |
|
123 |
-} |
|
124 |
- |
|
125 |
-// exchange with adjacent atom to the right |
|
126 |
-// O(logN) |
|
127 |
-AtomicProposal AtomicSupport::proposeExchange() const |
|
128 |
-{ |
|
129 |
- uint64_t pos1 = randomAtomPosition(); |
|
130 |
- AtomNeighbors nbs = getNeighbors(pos1); |
|
131 |
- float mass1 = at(pos1); |
|
132 |
- |
|
133 |
- // find right neighbor, wrap around to beginning if this atom is end |
|
134 |
- uint64_t pos2 = nbs.right.pos != pos1 ? nbs.right.pos : mAtomPositions.begin()->first; |
|
135 |
- float mass2 = at(pos2); |
|
136 |
- |
|
137 |
- // calculate new mass |
|
138 |
- float pupper = gaps::random::p_gamma(mass1 + mass2, 2.f, 1.f / mLambda); |
|
139 |
- float newMass = gaps::random::inverseGammaSample(0.f, pupper, 2.f, 1.f / mLambda); |
|
140 |
- |
|
141 |
- // calculate mass changes |
|
142 |
- float delta1 = mass1 > mass2 ? newMass - mass1 : mass2 - newMass; |
|
143 |
- float delta2 = mass2 > mass1 ? newMass - mass2 : mass1 - newMass; |
|
144 |
- |
|
145 |
- // set first position to be postive change |
|
146 |
- float delta1_cached = delta1; |
|
147 |
- uint64_t pos1_cached = pos1; |
|
148 |
- pos1 = delta1_cached > 0 ? pos1 : pos2; |
|
149 |
- pos2 = delta1_cached > 0 ? pos2 : pos1_cached; |
|
150 |
- delta1 = delta1_cached > 0 ? delta1 : delta2; |
|
151 |
- delta2 = delta1_cached > 0 ? delta2 : delta1_cached; |
|
152 |
- |
|
153 |
- // preserve total mass |
|
154 |
- return AtomicProposal(mLabel, 'E', pos1, delta1, pos2, delta2); |
|
155 |
-} |
|
156 |
- |
|
157 |
-// O(logN) |
|
158 |
-float AtomicSupport::updateAtomMass(uint64_t pos, float delta) |
|
159 |
-{ |
|
160 |
- if (mAtomPositions.count(pos)) // update atom if it exists |
|
161 |
- { |
|
162 |
- mAtoms[mAtomPositions.at(pos)].mass += delta; |
|
163 |
- } |
|
164 |
- else // create a new atom if it doesn't exist |
|
165 |
- { |
|
166 |
- addAtom(Atom(pos, delta)); |
|
167 |
- } |
|
168 |
- |
|
169 |
- if (mAtoms[mAtomPositions.at(pos)].mass < EPSILON) |
|
170 |
- { |
|
171 |
- delta -= at(pos); |
|
172 |
- removeAtom(pos); |
|
173 |
- } |
|
174 |
- return delta; |
|
175 |
-} |
|
176 |
- |
|
177 |
-MatrixChange AtomicSupport::acceptProposal(const AtomicProposal &prop, |
|
178 |
-MatrixChange &ch) |
|
179 |
-{ |
|
180 |
- ch.delta1 = updateAtomMass(prop.pos1, prop.delta1); |
|
181 |
- ch.delta2 = (prop.nChanges > 1) ? updateAtomMass(prop.pos2, prop.delta2) |
|
182 |
- : ch.delta2; |
|
183 |
- return ch; |
|
184 |
-} |
|
185 |
- |
|
186 |
-float AtomicSupport::deleteProb(unsigned nAtoms) const |
|
187 |
-{ |
|
188 |
- float pDelete = 0.5; // default uniform prior for mAlpha == 0 |
|
189 |
- float fnAtoms = (float)nAtoms; |
|
190 |
- if (mAlpha > 0) // poisson prior |
|
191 |
- { |
|
192 |
- float maxTerm = ((float)mMaxNumAtoms - fnAtoms) / (float)mMaxNumAtoms; |
|
193 |
- return fnAtoms / (fnAtoms + mAlpha * (float)mNumBins * maxTerm); |
|
194 |
- } |
|
195 |
- else if (mAlpha < 0) // geometric prior |
|
196 |
- { |
|
197 |
- float c = -mAlpha * mNumBins / (-mAlpha * mNumBins + 1.0); |
|
198 |
- return fnAtoms / ((fnAtoms + 1.f) * c + fnAtoms); |
|
199 |
- } |
|
200 |
-} |
|
201 |
- |
|
202 |
-static isInVector(std::vector<unsigned> &vec, unsigned n) |
|
203 |
-{ |
|
204 |
- return std::find(vec.begin(), vec.end(), n) != vec.end(); |
|
205 |
-} |
|
206 |
- |
|
207 |
-bool AtomicSupport::addProposal(unsigned &minAtoms, unsigned &maxAtoms, |
|
208 |
-std::vector<unsigned> &usedRows, std::vector<unsigned> &usedCols) const |
|
209 |
-{ |
|
210 |
- if (minAtoms == 0 && maxAtoms > 0 || minAtoms < 2 && maxAtoms >= 2) |
|
211 |
- { |
|
212 |
- return false; |
|
213 |
- } |
|
214 |
- |
|
215 |
- AtomicProposal proposal; |
|
216 |
- if (maxAtoms == 0) |
|
217 |
- { |
|
218 |
- proposal = proposeBirth(); |
|
219 |
- minAtoms++; |
|
220 |
- maxAtoms++; |
|
221 |
- } |
|
222 |
- else |
|
223 |
- { |
|
224 |
- float bdProb = maxAtoms < 2 ? 0.6667f : 0.5f; |
|
225 |
- float u1 = gaps::random::uniform(); |
|
226 |
- float u2 = gaps::random::uniform(); |
|
227 |
- float lowerBound = deleteProb(minAtoms); |
|
228 |
- float upperBound = deleteProb(maxAtoms); |
|
229 |
- if (u1 < bdProb && u2 > upperBound) |
|
230 |
- { |
|
231 |
- proposal = proposeBirth(); |
|
232 |
- minAtoms++; |
|
233 |
- maxAtoms++; |
|
234 |
- } |
|
235 |
- else if (u1 < bdProb && u2 < lowerBound) |
|
236 |
- { |
|
237 |
- proposal = proposeDeath(); |
|
238 |
- minAtoms--; |
|
239 |
- } |
|
240 |
- else if (u1 >= bdProb && (maxAtoms < 2 || u1 < 0.75f)) |
|
241 |
- { |
|
242 |
- proposal = proposeMove(); |
|
243 |
- } |
|
244 |
- else if (u1 >= bdProb) |
|
245 |
- { |
|
246 |
- proposal = proposeExchange(); |
|
247 |
- minAtoms--; |
|
248 |
- } |
|
249 |
- else |
|
250 |
- { |
|
251 |
- return false; |
|
252 |
- } |
|
253 |
- } |
|
254 |
- |
|
255 |
- // used rows should be an hash table so that find is O(1) |
|
256 |
- // can allocate is one chunk of memory since nRows/nCols is known |
|
257 |
- if (isInVector(usedRows, getRow(proposal.pos1))) |
|
258 |
- { |
|
259 |
- return false; |
|
260 |
- } |
|
261 |
- if (proposal.nChanges > 1) |
|
262 |
- { |
|
263 |
- if (isInVector(usedRows, getRow(proposal.pos2))) |
|
264 |
- { |
|
265 |
- return false; |
|
266 |
- } |
|
267 |
- usedRows.push_back(getRow(proposal.pos2)); |
|
268 |
- } |
|
269 |
- usedRows.push_back(getRow(proposal.pos1)); |
|
270 |
- return true; |
|
271 |
-} |
|
272 |
- |
|
273 |
-// need to track min/max number of atoms |
|
274 |
-// birth is already known to be accepted |
|
275 |
-// when death/exchange remove an atom decrement maxAtoms |
|
276 |
-// if death/exchange do not remove an atom increment minAtoms |
|
277 |
-void AtomicSupport::populateQueue(unsigned limit) |
|
278 |
-{ |
|
279 |
- unsigned minAtoms = mNumAtoms; |
|
280 |
- unsigned maxAtoms = mNumAtoms; |
|
281 |
- std::vector<unsigned> usedRows; |
|
282 |
- unsigned nIter = 0; |
|
283 |
- |
|
284 |
- while (nIter++ < limit && addProposal(minAtoms, maxAtoms, usedRows)); |
|
285 |
-} |
|
286 |
- |
|
287 |
-bool AtomicSupport::isQueueEmpty() const |
|
288 |
-{ |
|
289 |
- return mProposalQueue.empty(); |
|
290 |
-} |
|
291 |
- |
|
292 |
-unsigned AtomicSupport::queueSize() const |
|
293 |
-{ |
|
294 |
- return mProposalQueue.size(); |
|
295 |
-} |
|
296 |
- |
|
297 |
-AtomicProposal AtomicSupport::popQueue() |
|
298 |
-{ |
|
299 |
- AtomicProposal prop = mProposalQueue.back(); |
|
300 |
- mProposalQueue.pop_back(); |
|
301 |
-} |
|
302 |
- |
|
303 |
-Archive& operator<<(Archive &ar, AtomicSupport &domain) |
|
304 |
-{ |
|
305 |
- ar << domain.mLabel << domain.mNumAtoms << domain.mMaxNumAtoms |
|
306 |
- << domain.mNumRows << domain.mNumCols |
|
307 |
- << domain.mNumBins << domain.mBinSize << domain.mAlpha |
|
308 |
- << domain.mLambda; |
|
309 |
- |
|
310 |
- for (unsigned i = 0; i < domain.mAtoms.size(); ++i) |
|
311 |
- { |
|
312 |
- ar << domain.mAtoms[i].pos << domain.mAtoms[i].mass; |
|
313 |
- } |
|
314 |
- return ar; |
|
315 |
-} |
|
316 |
- |
|
317 |
-Archive& operator>>(Archive &ar, AtomicSupport &domain) |
|
318 |
-{ |
|
319 |
- uint64_t nAtoms = 0; |
|
320 |
- ar >> domain.mLabel >> nAtoms >> domain.mMaxNumAtoms |
|
321 |
- >> domain.mNumRows >> domain.mNumCols >> domain.mNumBins |
|
322 |
- >> domain.mBinSize >> domain.mAlpha >> domain.mLambda; |
|
323 |
- |
|
324 |
- uint64_t pos = 0; |
|
325 |
- float mass = 0.0; |
|
326 |
- for (unsigned i = 0; i < nAtoms; ++i) |
|
327 |
- { |
|
328 |
- ar >> pos >> mass; |
|
329 |
- domain.addAtom(Atom(pos, mass)); // this will increment mNumAtoms |
|
330 |
- } |
|
331 |
- return ar; |
|
332 |
-} |
|
333 | 0 |
\ No newline at end of file |
334 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,133 +0,0 @@ |
1 |
-#ifndef __COGAPS_ATOMIC_SUPPORT_H__ |
|
2 |
-#define __COGAPS_ATOMIC_SUPPORT_H__ |
|
3 |
- |
|
4 |
-#include "Random.h" |
|
5 |
-#include "Matrix.h" |
|
6 |
- |
|
7 |
-#include <map> |
|
8 |
-#include <fstream> |
|
9 |
-#include <vector> |
|
10 |
-#include <stdint.h> |
|
11 |
- |
|
12 |
-struct AtomicProposal |
|
13 |
-{ |
|
14 |
- char label; |
|
15 |
- char type; |
|
16 |
- unsigned nChanges; |
|
17 |
- |
|
18 |
- uint64_t pos1; |
|
19 |
- float delta1; |
|
20 |
- |
|
21 |
- uint64_t pos2; |
|
22 |
- float delta2; |
|
23 |
- |
|
24 |
- AtomicProposal(char l, char t, uint64_t p1, float m1) |
|
25 |
- : label(l), type(t), nChanges(1), pos1(p1), delta1(m1), pos2(0), delta2(0.f) |
|
26 |
- {} |
|
27 |
- |
|
28 |
- AtomicProposal(char l, char t, uint64_t p1, float m1, uint64_t p2, float m2) |
|
29 |
- : label(l), type(t), nChanges(2), pos1(p1), delta1(m1), pos2(p2), delta2(m2) |
|
30 |
- {} |
|
31 |
-}; |
|
32 |
- |
|
33 |
-struct Atom |
|
34 |
-{ |
|
35 |
- uint64_t pos; |
|
36 |
- float mass; |
|
37 |
- |
|
38 |
- Atom(uint64_t p, float m) : pos(p), mass(m) {} |
|
39 |
-}; |
|
40 |
- |
|
41 |
-struct AtomNeighbors |
|
42 |
-{ |
|
43 |
- Atom left; |
|
44 |
- Atom right; |
|
45 |
- |
|
46 |
- AtomNeighbors(uint64_t lp, float lm, uint64_t rp, float rm) |
|
47 |
- : left(Atom(lp, lm)), right(Atom(rp, rm)) |
|
48 |
- {} |
|
49 |
-}; |
|
50 |
- |
|
51 |
-class AtomicSupport |
|
52 |
-{ |
|
53 |
-private: |
|
54 |
- |
|
55 |
-#ifdef GAPS_INTERNAL_TESTS |
|
56 |
-public: |
|
57 |
-#endif |
|
58 |
- |
|
59 |
- // label of this atomic domain (A/P) |
|
60 |
- char mLabel; |
|
61 |
- |
|
62 |
- // storage of the atomic domain |
|
63 |
- std::vector<Atom> mAtoms; |
|
64 |
- std::map<uint64_t, uint64_t> mAtomPositions; |
|
65 |
- uint64_t mNumAtoms; |
|
66 |
- uint64_t mMaxNumAtoms; |
|
67 |
- |
|
68 |
- // proposal queue |
|
69 |
- std::vector<AtomicProposal> mProposalQueue; |
|
70 |
- |
|
71 |
- // matrix information |
|
72 |
- uint64_t mNumRows; |
|
73 |
- uint64_t mNumCols; |
|
74 |
- uint64_t mNumBins; |
|
75 |
- uint64_t mBinSize; |
|
76 |
- |
|
77 |
- // average number of atoms per bin (must be > 0) |
|
78 |
- float mAlpha; |
|
79 |
- |
|
80 |
- // expected magnitude of each atom (must be > 0) |
|
81 |
- float mLambda; |
|
82 |
- |
|
83 |
- // functions for dealing with atomic data structure |
|
84 |
- void addAtom(Atom atom); // O(logN) |
|
85 |
- void removeAtom(uint64_t pos); // O(logN) |
|
86 |
- AtomNeighbors getNeighbors(uint64_t pos) const; // O(logN) |
|
87 |
- |
|
88 |
- // get atomic positions at random |
|
89 |
- uint64_t randomFreePosition() const; |
|
90 |
- uint64_t randomAtomPosition() const; |
|
91 |
- |
|
92 |
- // proposal helper functions |
|
93 |
- AtomicProposal proposeBirth() const; |
|
94 |
- AtomicProposal proposeDeath() const; |
|
95 |
- AtomicProposal proposeMove() const; |
|
96 |
- AtomicProposal proposeExchange() const; |
|
97 |
- |
|
98 |
- // update the mass of an atom, return the total amount changed |
|
99 |
- float updateAtomMass(uint64_t pos, float delta); |
|
100 |
- |
|
101 |
- AtomicProposal makeProposal() const; |
|
102 |
- |
|
103 |
-public: |
|
104 |
- |
|
105 |
- // constructor |
|
106 |
- AtomicSupport(char label, uint64_t nrow, uint64_t ncol); |
|
107 |
- |
|
108 |
- // proposals |
|
109 |
- void populateQueue(); |
|
110 |
- bool isQueueEmpty() const; |
|
111 |
- AtomicProposal popQueue(); |
|
112 |
- MatrixChange acceptProposal(const AtomicProposal &prop, MatrixChange &ch); |
|
113 |
- |
|
114 |
- // setters |
|
115 |
- void setAlpha(float alpha) {mAlpha = alpha;} |
|
116 |
- void setLambda(float lambda) {mLambda = lambda;} |
|
117 |
- |
|
118 |
- // getters |
|
119 |
- float alpha() const {return mAlpha;} |
|
120 |
- float lambda() const {return mLambda;} |
|
121 |
- uint64_t numAtoms() const {return mNumAtoms;} |
|
122 |
- float at(uint64_t pos) const {return mAtoms[mAtomPositions.at(pos)].mass;} |
|
123 |
- |
|
124 |
- // convert atomic position to row/col of the matrix |
|
125 |
- uint64_t getRow(uint64_t pos) const; |
|
126 |
- uint64_t getCol(uint64_t pos) const; |
|
127 |
- |
|
128 |
- // serialization |
|
129 |
- friend Archive& operator<<(Archive &ar, AtomicSupport &sampler); |
|
130 |
- friend Archive& operator>>(Archive &ar, AtomicSupport &sampler); |
|
131 |
-}; |
|
132 |
- |
|
133 |
-#endif |
134 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,529 +0,0 @@ |
1 |
-#include "GibbsSampler.h" |
|
2 |
-#include "Algorithms.h" |
|
3 |
- |
|
4 |
-#include <Rcpp.h> |
|
5 |
- |
|
6 |
-static const float EPSILON = 1.e-10; |
|
7 |
- |
|
8 |
-GibbsSampler::GibbsSampler(const Rcpp::NumericMatrix &D, |
|
9 |
-const Rcpp::NumericMatrix &S, unsigned nFactor) |
|
10 |
- : |
|
11 |
-mDMatrix(D), mSMatrix(S), mAPMatrix(D.nrow(), D.ncol()), |
|
12 |
-mAMatrix(D.nrow(), nFactor), mPMatrix(nFactor, D.ncol()), |
|
13 |
-mADomain('A', D.nrow(), nFactor), mPDomain('P', nFactor, D.ncol()), |
|
14 |
-mAMeanMatrix(D.nrow(), nFactor), mAStdMatrix(D.nrow(), nFactor), |
|
15 |
-mPMeanMatrix(nFactor, D.ncol()), mPStdMatrix(nFactor, D.ncol()), |
|
16 |
-mPumpMatrix(D.nrow(), nFactor) |
|
17 |
-{} |
|
18 |
- |
|
19 |
-GibbsSampler::GibbsSampler(const Rcpp::NumericMatrix &D, |
|
20 |
-const Rcpp::NumericMatrix &S, unsigned nFactor, float alphaA, float alphaP, |
|
21 |
-float maxGibbmassA, float maxGibbmassP, bool singleCellRNASeq, |
|
22 |
-char whichMatrixFixed, const Rcpp::NumericMatrix &FP, PumpThreshold pumpThreshold) |
|
23 |
- : |
|
24 |
-mDMatrix(D), mSMatrix(S), mAPMatrix(D.nrow(), D.ncol()), |
|
25 |
-mAMatrix(D.nrow(), nFactor), mPMatrix(nFactor, D.ncol()), |
|
26 |
-mADomain('A', D.nrow(), nFactor), mPDomain('P', nFactor, D.ncol()), |
|
27 |
-mAMeanMatrix(D.nrow(), nFactor), mAStdMatrix(D.nrow(), nFactor), |
|
28 |
-mPMeanMatrix(nFactor, D.ncol()), mPStdMatrix(nFactor, D.ncol()), |
|
29 |
-mPumpMatrix(D.nrow(), nFactor), mPumpThreshold(pumpThreshold), mStatUpdates(0), |
|
30 |
-mPumpStatUpdates(0), mMaxGibbsMassA(maxGibbmassA), mMaxGibbsMassP(maxGibbmassP), |
|
31 |
-mAnnealingTemp(1.0), mSingleCellRNASeq(singleCellRNASeq), mNumFixedPatterns(0), |
|
32 |
-mFixedMat(whichMatrixFixed) |
|
33 |
-{ |
|
34 |
- float meanD = mSingleCellRNASeq ? gaps::algo::nonZeroMean(mDMatrix) |
|
35 |
- : gaps::algo::mean(mDMatrix); |
|
36 |
- |
|
37 |
- mADomain.setAlpha(alphaA); |
|
38 |
- mADomain.setLambda(alphaA * std::sqrt(nFactor / meanD)); |
|
39 |
- mPDomain.setAlpha(alphaP); |
|
40 |
- mPDomain.setLambda(alphaP * std::sqrt(nFactor / meanD)); |
|
41 |
- |
|
42 |
- mMaxGibbsMassA /= mADomain.lambda(); |
|
43 |
- mMaxGibbsMassP /= mPDomain.lambda(); |
|
44 |
- |
|
45 |
- if (mFixedMat == 'A') |
|
46 |
- { |
|
47 |
- mNumFixedPatterns = FP.ncol(); |
|
48 |
- ColMatrix temp(FP); |
|
49 |
- for (unsigned j = 0; j < mNumFixedPatterns; ++j) |
|
50 |
- { |
|
51 |
- mAMatrix.getCol(j) = temp.getCol(j) / gaps::algo::sum(temp.getCol(j)); |
|
52 |
- } |
|
53 |
- } |
|
54 |
- else if (mFixedMat == 'P') |
|
55 |
- { |
|
56 |
- mNumFixedPatterns = FP.nrow(); |
|
57 |
- RowMatrix temp(FP); |
|
58 |
- for (unsigned i = 0; i < mNumFixedPatterns; ++i) |
|
59 |
- { |
|
60 |
- mPMatrix.getRow(i) = temp.getRow(i) / gaps::algo::sum(temp.getRow(i)); |
|
61 |
- } |
|
62 |
- } |
|
63 |
- gaps::algo::matrixMultiplication(mAPMatrix, mAMatrix, mPMatrix); |
|
64 |
-} |
|
65 |
- |
|
66 |
-float GibbsSampler::getGibbsMass(const MatrixChange &change) |
|
67 |
-{ |
|
68 |
- // check if this change is death (only called in birth/death) |
|
69 |
- bool death = change.delta1 < 0; |
|
70 |
- |
|
71 |
- // get s and su |
|
72 |
- AlphaParameters alphaParam = gaps::algo::alphaParameters(change, mDMatrix, |
|
73 |
- mSMatrix, mAMatrix, mPMatrix, mAPMatrix); |
|
74 |
- |
|
75 |
- // calculate mean and standard deviation |
|
76 |
- alphaParam.s *= mAnnealingTemp / 2.0; |
|
77 |
- alphaParam.su *= mAnnealingTemp / 2.0; |
|
78 |
- float lambda = change.label == 'A' ? mADomain.lambda() : mPDomain.lambda(); |
|
79 |
- float mean = (2.0 * alphaParam.su - lambda) / (2.0 * alphaParam.s); |
|
80 |
- float sd = 1.0 / std::sqrt(2.0 * alphaParam.s); |
|
81 |
- |
|
82 |
- // note: is bounded below by zero so have to use inverse sampling! |
|
83 |
- // based upon algorithm in DistScalarRmath.cc (scalarRandomSample) |
|
84 |
- float plower = gaps::random::p_norm(0.f, mean, sd); |
|
85 |
- |
|
86 |
- // if the likelihood is flat and nonzero, sample strictly from the prior |
|
87 |
- float newMass = 0.f; |
|
88 |
- if (plower == 1.f || alphaParam.s < 0.00001f) |
|
89 |
- { |
|
90 |
- newMass = death ? std::abs(change.delta1) : 0.f; |
|
91 |
- } |
|
92 |
- else if (plower >= 0.99f) // what is this? |
|
93 |
- { |
|
94 |
- float tmp1 = gaps::random::d_norm(0.f, mean, sd); |
|
95 |
- float tmp2 = gaps::random::d_norm(10.f * lambda, mean, sd); |
|
96 |
- if (tmp1 > EPSILON && std::abs(tmp1 - tmp2) < EPSILON) |
|
97 |
- { |
|
98 |
- return death ? 0.0 : change.delta1; |
|
99 |
- } |
|
100 |
- } |
|
101 |
- else |
|
102 |
- { |
|
103 |
- newMass = gaps::random::inverseNormSample(plower, 1.f, mean, sd); |
|
104 |
- } |
|
105 |
- |
|
106 |
- newMass = (change.label == 'A' ? std::min(newMass, mMaxGibbsMassA) |
|
107 |
- : std::min(newMass, mMaxGibbsMassP)); |
|
108 |
- |
|
109 |
- return std::max(newMass, 0.f); |
|
110 |
-} |
|
111 |
- |
|
112 |
-float GibbsSampler::computeDeltaLL(const MatrixChange &change) |
|
113 |
-{ |
|
114 |
- return gaps::algo::deltaLL(change, mDMatrix, mSMatrix, mAMatrix, |
|
115 |
- mPMatrix, mAPMatrix); |
|
116 |
-} |
|
117 |
- |
|
118 |
-void GibbsSampler::update(char matrixLabel, unsigned nUpdates) |
|
119 |
-{ |
|
120 |
- AtomicSupport &domain(matrixLabel == 'A' ? mADomain : mPDomain); |
|
121 |
- for (unsigned i = 0; i < nUpdates; ++i) |
|
122 |
- { |
|
123 |
- assert(nUpdates - i - domain.size() >= 0); |
|
124 |
- domain.populateQueue(nUpdates - i - domain.size()); |
|
125 |
- AtomicProposal proposal = domain.popQueue(); |
|
126 |
- switch (proposal.type) |
|
127 |
- { |
|
128 |
- case 'D': death(domain, proposal); break; |
|
129 |
- case 'B': birth(domain, proposal); break; |
|
130 |
- case 'M': move(domain, proposal); break; |
|
131 |
- case 'E': exchange(domain, proposal); break; |
|
132 |
- } |
|
133 |
- } |
|
134 |
-} |
|
135 |
- |
|
136 |
-uint64_t GibbsSampler::totalNumAtoms(char matrixLabel) const |
|
137 |
-{ |
|
138 |
- return matrixLabel == 'A' ? mADomain.numAtoms() : mPDomain.numAtoms(); |
|
139 |
-} |
|
140 |
- |
|
141 |
-float GibbsSampler::chi2() const |
|
142 |
-{ |
|
143 |
- return 2.f * gaps::algo::loglikelihood(mDMatrix, mSMatrix, mAPMatrix); |
|
144 |
-} |
|
145 |
- |
|
146 |
-void GibbsSampler::setAnnealingTemp(float temp) |
|
147 |
-{ |
|
148 |
- mAnnealingTemp = temp; |
|
149 |
-} |
|
150 |
- |
|
151 |
-void GibbsSampler::evaluateChange(AtomicSupport &domain, |
|
152 |
-const AtomicProposal &proposal, MatrixChange &change, float threshold, |
|
153 |
-bool accept) |
|
154 |
-{ |
|
155 |
- float delLL = accept ? 0.f : computeDeltaLL(change); |
|
156 |
- if (accept || delLL * mAnnealingTemp >= threshold) |
|
157 |
- { |
|
158 |
- change = domain.acceptProposal(proposal, change); |
|
159 |
- change.label == 'A' ? mAMatrix.update(change) : mPMatrix.update(change); |
|
160 |
- updateAPMatrix(change); |
|
161 |
- } |
|
162 |
-} |
|
163 |
- |
|
164 |
-// simd? |
|
165 |
-void GibbsSampler::updateAPMatrix_A(unsigned row, unsigned col, float delta) |
|
166 |
-{ |
|
167 |
- const Vector &APvec = mAPMatrix.getRow(row); |
|
168 |
- const Vector &Pvec = mPMatrix.getRow(col); |
|
169 |
- for (unsigned j = 0; j < mAPMatrix.nCol(); ++j) |
|
170 |
- { |
|
171 |
- mAPMatrix.set(row, j, APvec[j] + delta * Pvec[j]); |
|
172 |
- } |
|
173 |
-} |
|
174 |
- |
|
175 |
-// simd? |
|
176 |
-void GibbsSampler::updateAPMatrix_P(unsigned row, unsigned col, float delta) |
|
177 |
-{ |
|
178 |
- const Vector &APvec = mAPMatrix.getCol(col); |
|
179 |
- const Vector &Avec = mAMatrix.getCol(row); |
|
180 |
- for (unsigned i = 0; i < mAPMatrix.nRow(); ++i) |
|
181 |
- { |
|
182 |
- mAPMatrix.set(i, col, APvec[i] + delta * Avec[i]); |
|
183 |
- } |
|
184 |
-} |
|
185 |
- |
|
186 |
-void GibbsSampler::updateAPMatrix(const MatrixChange &change) |
|
187 |
-{ |
|
188 |
- if (change.label == 'A') |
|
189 |
- { |
|
190 |
- updateAPMatrix_A(change.row1, change.col1, change.delta1); |
|
191 |
- if (change.nChanges > 1) |
|
192 |
- { |
|
193 |
- updateAPMatrix_A(change.row2, change.col2, change.delta2); |
|
194 |
- } |
|
195 |
- } |
|
196 |
- else |
|
197 |
- { |
|
198 |
- updateAPMatrix_P(change.row1, change.col1, change.delta1); |
|
199 |
- if (change.nChanges > 1) |
|
200 |
- { |
|
201 |
- updateAPMatrix_P(change.row2, change.col2, change.delta2); |
|
202 |
- } |
|
203 |
- } |
|
204 |
-} |
|
205 |
- |
|
206 |
-bool GibbsSampler::canUseGibbs(const MatrixChange &ch) |
|
207 |
-{ |
|
208 |
- bool check1 = (ch.label == 'A' && gaps::algo::isRowZero(mPMatrix, ch.col1)) |
|
209 |
- || (ch.label == 'P' && gaps::algo::isColZero(mAMatrix, ch.row1)); |
|
210 |
- |
|
211 |
- if (ch.nChanges > 1) |
|
212 |
- { |
|
213 |
- bool check2 = (ch.label == 'A' && gaps::algo::isRowZero(mPMatrix, ch.col2)) |
|
214 |
- || (ch.label == 'P' && gaps::algo::isColZero(mAMatrix, ch.row2)); |
|
215 |
- return !(check1 && check2); |
|
216 |
- } |
|
217 |
- return !check1; |
|
218 |
-} |
|
219 |
- |
|
220 |
-// accept automatically, try to rebirth |
|
221 |
-// TODO consolidate to single proposal |
|
222 |
-void GibbsSampler::death(AtomicSupport &domain, AtomicProposal &prop) |
|
223 |
-{ |
|
224 |
- // automaticallly accept death |
|
225 |
- MatrixChange change(prop.label, domain.getRow(prop.pos1), |
|
226 |
- domain.getCol(prop.pos1), prop.delta1); |
|
227 |
- evaluateChange(domain, prop, change, 0.f, true); |
|
228 |
- |
|
229 |
- // rebirth, label as birth |
|
230 |
- float newMass = canUseGibbs(change) ? getGibbsMass(change) : 0.f; |
|
231 |
- prop.delta1 = newMass < EPSILON ? -prop.delta1 : newMass; |
|
232 |
- change.delta1 = prop.delta1; |
|
233 |
- |
|
234 |
- // attempt to accept rebirth |
|
235 |
- evaluateChange(domain, prop, change, std::log(gaps::random::uniform())); |
|
236 |
-} |
|
237 |
- |
|
238 |
-void GibbsSampler::birth(AtomicSupport &domain, AtomicProposal &prop) |
|
239 |
-{ |
|
240 |
- // attempt gibbs |
|
241 |
- MatrixChange change(prop.label, domain.getRow(prop.pos1), |
|
242 |
- domain.getCol(prop.pos1), prop.delta1); |
|
243 |
- prop.delta1 = canUseGibbs(change) ? getGibbsMass(change) : prop.delta1; |
|
244 |
- change.delta1 = prop.delta1; |
|
245 |
- |
|
246 |
- // accept birth |
|
247 |
- evaluateChange(domain, prop, change, 0.f, true); |
|
248 |
-} |
|
249 |
- |
|
250 |
-void GibbsSampler::move(AtomicSupport &domain, AtomicProposal &prop) |
|
251 |
-{ |
|
252 |
- MatrixChange change(prop.label, domain.getRow(prop.pos1), |
|
253 |
- domain.getCol(prop.pos1), prop.delta1, domain.getRow(prop.pos2), |
|
254 |
- domain.getCol(prop.pos2), prop.delta2); |
|
255 |
- if (change.row1 != change.row2 || change.col1 != change.col2) |
|
256 |
- { |
|
257 |
- evaluateChange(domain, prop, change, std::log(gaps::random::uniform())); |
|
258 |
- } |
|
259 |
-} |
|
260 |
- |
|
261 |
-void GibbsSampler::exchange(AtomicSupport &domain, AtomicProposal &prop) |
|
262 |
-{ |
|
263 |
- MatrixChange change(prop.label, domain.getRow(prop.pos1), |
|
264 |
- domain.getCol(prop.pos1), prop.delta1, domain.getRow(prop.pos2), |
|
265 |
- domain.getCol(prop.pos2), prop.delta2); |
|
266 |
- if (change.row1 == change.row2 && change.col1 == change.col2) |
|
267 |
- { |
|
268 |
- return; |
|
269 |
- } |
|
270 |
- |
|
271 |
- float mass1 = domain.at(prop.pos1); |
|
272 |
- float mass2 = domain.at(prop.pos2); |
|
273 |
- float newMass1 = mass1 + prop.delta1; |
|
274 |
- float newMass2 = mass2 + prop.delta2; |
|
275 |
- |
|
276 |
- if (canUseGibbs(change)) |
|
277 |
- { |
|
278 |
- AlphaParameters alphaParam = gaps::algo::alphaParameters(change, |
|
279 |
- mDMatrix, mSMatrix, mAMatrix, mPMatrix, mAPMatrix); |
|
280 |
- alphaParam.s *= mAnnealingTemp; |
|
281 |
- alphaParam.su *= mAnnealingTemp; |
|
282 |
- |
|
283 |
- if (alphaParam.s > EPSILON) |
|
284 |
- { |
|
285 |
- float mean = alphaParam.su / alphaParam.s; |
|
286 |
- float sd = 1.f / std::sqrt(alphaParam.s); |
|
287 |
- float plower = gaps::random::p_norm(-mass1, mean, sd); |
|
288 |
- float pupper = gaps::random::p_norm(mass2, mean, sd); |
|
289 |
- |
|
290 |
- if (!(plower > 0.95f || pupper < 0.05f)) |
|
291 |
- { |
|
292 |
- float u = gaps::random::uniform(plower, pupper); |
|
293 |
- prop.delta1 = gaps::random::q_norm(u, mean, sd); |
|
294 |
- prop.delta1 = std::max(prop.delta1, -mass1); |
|
295 |
- prop.delta1 = std::min(prop.delta1, mass2); |
|
296 |
- prop.delta2 = -prop.delta1; |
|
297 |