... | ... |
@@ -82,7 +82,6 @@ uint32_t GapsRng::uniform32(uint32_t a, uint32_t b) |
82 | 82 |
{ |
83 | 83 |
return a; |
84 | 84 |
} |
85 |
- |
|
86 | 85 |
uint32_t range = b + 1 - a; |
87 | 86 |
uint32_t x = uniform32(); |
88 | 87 |
uint32_t iPart = std::numeric_limits<uint32_t>::max() / range; |
... | ... |
@@ -90,7 +89,10 @@ uint32_t GapsRng::uniform32(uint32_t a, uint32_t b) |
90 | 89 |
{ |
91 | 90 |
x = uniform32(); |
92 | 91 |
} |
93 |
- return x / iPart + a; |
|
92 |
+ uint32_t unif = x / iPart + a; |
|
93 |
+ GAPS_ASSERT(unif >= a); |
|
94 |
+ GAPS_ASSERT(unif <= b); |
|
95 |
+ return unif; |
|
94 | 96 |
} |
95 | 97 |
|
96 | 98 |
uint64_t GapsRng::uniform64() |
... | ... |
@@ -107,7 +109,6 @@ uint64_t GapsRng::uniform64(uint64_t a, uint64_t b) |
107 | 109 |
{ |
108 | 110 |
return a; |
109 | 111 |
} |
110 |
- |
|
111 | 112 |
uint64_t range = b + 1 - a; |
112 | 113 |
uint64_t x = uniform64(); |
113 | 114 |
uint64_t iPart = std::numeric_limits<uint64_t>::max() / range; |
... | ... |
@@ -115,7 +116,10 @@ uint64_t GapsRng::uniform64(uint64_t a, uint64_t b) |
115 | 116 |
{ |
116 | 117 |
x = uniform64(); |
117 | 118 |
} |
118 |
- return x / iPart + a; |
|
119 |
+ uint64_t unif = x / iPart + a; |
|
120 |
+ GAPS_ASSERT(unif >= a); |
|
121 |
+ GAPS_ASSERT(unif <= b); |
|
122 |
+ return unif; |
|
119 | 123 |
} |
120 | 124 |
|
121 | 125 |
int GapsRng::poisson(double lambda) |
... | ... |
@@ -144,7 +148,6 @@ int GapsRng::poissonLarge(double lambda) |
144 | 148 |
double beta = gaps::pi_double / sqrt(3.0 * lambda); |
145 | 149 |
double alpha = beta * lambda; |
146 | 150 |
double k = std::log(c) - lambda - std::log(beta); |
147 |
- |
|
148 | 151 |
while(true) |
149 | 152 |
{ |
150 | 153 |
double u = uniformd(); |
... | ... |
@@ -154,7 +157,6 @@ int GapsRng::poissonLarge(double lambda) |
154 | 157 |
{ |
155 | 158 |
continue; |
156 | 159 |
} |
157 |
- |
|
158 | 160 |
double v = uniformd(); |
159 | 161 |
double y = alpha - beta * x; |
160 | 162 |
double w = 1.0 + std::exp(y); |
... | ... |
@@ -1,11 +1,12 @@ |
1 | 1 |
#include "Random.h" |
2 | 2 |
#include "Math.h" |
3 |
+#include "../utils/Archive.h" |
|
3 | 4 |
#include "../utils/GapsAssert.h" |
4 | 5 |
|
5 | 6 |
#include <algorithm> |
6 | 7 |
#include <stdint.h> |
7 |
-#include <limits> |
|
8 | 8 |
#include <cmath> |
9 |
+#include <limits> |
|
9 | 10 |
|
10 | 11 |
const float maxU32AsFloat = static_cast<float>(std::numeric_limits<uint32_t>::max()); |
11 | 12 |
const double maxU32AsDouble = static_cast<double>(std::numeric_limits<uint32_t>::max()); |
... | ... |
@@ -180,7 +181,6 @@ OptionalFloat GapsRng::truncNormal(float a, float b, float mean, float sd) |
180 | 181 |
{ |
181 | 182 |
GAPS_ASSERT(pLower > 0.f); |
182 | 183 |
GAPS_ASSERT(pUpper < 1.f); |
183 |
- |
|
184 | 184 |
float z = mRandState->q_norm_fast(uniform(pLower, pUpper), mean, sd); |
185 | 185 |
z = gaps::max(a, gaps::min(z, b)); |
186 | 186 |
return z; |
... | ... |
@@ -192,7 +192,8 @@ OptionalFloat GapsRng::truncNormal(float a, float b, float mean, float sd) |
192 | 192 |
float GapsRng::truncGammaUpper(float b, float scale) |
193 | 193 |
{ |
194 | 194 |
float upper = 1.f - std::exp(-b / scale) * (1.f + b / scale); |
195 |
- unsigned ndx = static_cast<unsigned>(uniform(0.f, upper * 5000.f)); |
|
195 |
+ const unsigned ndx = static_cast<unsigned>(uniform(0.f, upper * 5000.f)); |
|
196 |
+ GAPS_ASSERT(ndx < Q_GAMMA_LOOKUP_TABLE_SIZE); |
|
196 | 197 |
return mRandState->mQgammaLookupTable[ndx] * scale; |
197 | 198 |
} |
198 | 199 |
|
... | ... |
@@ -219,9 +220,7 @@ Xoroshiro128plus::Xoroshiro128plus(uint64_t seed) |
219 | 220 |
{ |
220 | 221 |
mState[0] = seed|1; |
221 | 222 |
mState[1] = seed|1; |
222 |
- |
|
223 |
- // warmup |
|
224 |
- for (unsigned i = 0; i < 5000; ++i) |
|
223 |
+ for (unsigned i = 0; i < 5000; ++i) // warmup |
|
225 | 224 |
{ |
226 | 225 |
next(); |
227 | 226 |
} |
... | ... |
@@ -231,7 +230,6 @@ uint64_t Xoroshiro128plus::next() |
231 | 230 |
{ |
232 | 231 |
mPreviousState[0] = mState[0]; |
233 | 232 |
mPreviousState[1] = mState[1]; |
234 |
- |
|
235 | 233 |
const uint64_t s0 = mState[0]; |
236 | 234 |
uint64_t s1 = mState[1]; |
237 | 235 |
uint64_t result = s0 + s1; |
... | ... |
@@ -269,29 +267,29 @@ GapsRandomState::GapsRandomState(unsigned seed) : mSeeder(seed) |
269 | 267 |
void GapsRandomState::initLookupTables() |
270 | 268 |
{ |
271 | 269 |
// erf |
272 |
- for (unsigned i = 0; i < 3001; ++i) |
|
270 |
+ for (unsigned i = 0; i < ERF_LOOKUP_TABLE_SIZE; ++i) |
|
273 | 271 |
{ |
274 | 272 |
float x = static_cast<float>(i) / 1000.f; |
275 | 273 |
mErfLookupTable[i] = 2.f * gaps::p_norm(x * gaps::sqrt2, 0.f, 1.f) - 1.f; |
276 | 274 |
} |
277 |
- GAPS_ASSERT(mErfLookupTable[3000] < 1.f); |
|
275 |
+ GAPS_ASSERT(mErfLookupTable[ERF_LOOKUP_TABLE_SIZE - 1] < 1.f); |
|
278 | 276 |
|
279 | 277 |
// erfinv |
280 |
- for (unsigned i = 0; i < 5000; ++i) |
|
278 |
+ for (unsigned i = 0; i < ERF_INV_LOOKUP_TABLE_SIZE - 1; ++i) |
|
281 | 279 |
{ |
282 |
- float x = static_cast<float>(i) / 5000.f; |
|
280 |
+ float x = static_cast<float>(i) / static_cast<float>(ERF_INV_LOOKUP_TABLE_SIZE - 1); |
|
283 | 281 |
mErfinvLookupTable[i] = gaps::q_norm((1.f + x) / 2.f, 0.f, 1.f) / gaps::sqrt2; |
284 | 282 |
} |
285 |
- mErfinvLookupTable[5000] = gaps::q_norm(1.9998f / 2.f, 0.f, 1.f) / gaps::sqrt2; |
|
283 |
+ mErfinvLookupTable[ERF_INV_LOOKUP_TABLE_SIZE - 1] = gaps::q_norm(1.9998f / 2.f, 0.f, 1.f) / gaps::sqrt2; |
|
286 | 284 |
|
287 | 285 |
// qgamma |
288 | 286 |
mQgammaLookupTable[0] = 0.f; |
289 |
- for (unsigned i = 1; i < 5000; ++i) |
|
287 |
+ for (unsigned i = 1; i < Q_GAMMA_LOOKUP_TABLE_SIZE - 1; ++i) |
|
290 | 288 |
{ |
291 |
- float x = static_cast<float>(i) / 5000.f; |
|
289 |
+ float x = static_cast<float>(i) / static_cast<float>(Q_GAMMA_LOOKUP_TABLE_SIZE - 1); |
|
292 | 290 |
mQgammaLookupTable[i] = gaps::q_gamma(x, 2.f, 1.f); |
293 | 291 |
} |
294 |
- mQgammaLookupTable[5000] = gaps::q_gamma(0.9998f, 2.f, 1.f); |
|
292 |
+ mQgammaLookupTable[Q_GAMMA_LOOKUP_TABLE_SIZE - 1] = gaps::q_gamma(0.9998f, 2.f, 1.f); |
|
295 | 293 |
} |
296 | 294 |
|
297 | 295 |
uint64_t GapsRandomState::nextSeed() |
... | ... |
@@ -311,12 +309,16 @@ float GapsRandomState::p_norm_fast(float p, float mean, float sd) const |
311 | 309 |
if (term < 0.f) |
312 | 310 |
{ |
313 | 311 |
term = gaps::max(term, -3.f); |
314 |
- erf = -mErfLookupTable[static_cast<unsigned>(-term * 1000.f)]; |
|
312 |
+ const unsigned ndx = static_cast<unsigned>(-term * 1000.f); |
|
313 |
+ GAPS_ASSERT(ndx < ERF_LOOKUP_TABLE_SIZE); |
|
314 |
+ erf = -mErfLookupTable[ndx]; |
|
315 | 315 |
} |
316 | 316 |
else |
317 | 317 |
{ |
318 | 318 |
term = gaps::min(term, 3.f); |
319 |
- erf = mErfLookupTable[static_cast<unsigned>(term * 1000.f)]; |
|
319 |
+ const unsigned ndx = static_cast<unsigned>(term * 1000.f); |
|
320 |
+ GAPS_ASSERT(ndx < ERF_LOOKUP_TABLE_SIZE); |
|
321 |
+ erf = mErfLookupTable[ndx]; |
|
320 | 322 |
} |
321 | 323 |
return 0.5f * (1.f + erf); |
322 | 324 |
} |
... | ... |
@@ -327,11 +329,15 @@ float GapsRandomState::q_norm_fast(float q, float mean, float sd) const |
327 | 329 |
float erfinv = 0.f; |
328 | 330 |
if (term < 0.f) |
329 | 331 |
{ |
330 |
- erfinv = -mErfinvLookupTable[static_cast<unsigned>(-term * 5000.f)]; |
|
332 |
+ const unsigned ndx = static_cast<unsigned>(-term * (ERF_INV_LOOKUP_TABLE_SIZE - 1)); |
|
333 |
+ GAPS_ASSERT(ndx < ERF_INV_LOOKUP_TABLE_SIZE); |
|
334 |
+ erfinv = -mErfinvLookupTable[ndx]; |
|
331 | 335 |
} |
332 | 336 |
else |
333 | 337 |
{ |
334 |
- erfinv = mErfinvLookupTable[static_cast<unsigned>(term * 5000.f)]; |
|
338 |
+ const unsigned ndx = static_cast<unsigned>(term * (ERF_INV_LOOKUP_TABLE_SIZE - 1)); |
|
339 |
+ GAPS_ASSERT(ndx < ERF_INV_LOOKUP_TABLE_SIZE); |
|
340 |
+ erfinv = mErfinvLookupTable[ndx]; |
|
335 | 341 |
} |
336 | 342 |
return mean + sd * gaps::sqrt2 * erfinv; |
337 | 343 |
} |
... | ... |
@@ -4,6 +4,8 @@ |
4 | 4 |
|
5 | 5 |
#include <algorithm> |
6 | 6 |
#include <stdint.h> |
7 |
+#include <limits> |
|
8 |
+#include <cmath> |
|
7 | 9 |
|
8 | 10 |
const float maxU32AsFloat = static_cast<float>(std::numeric_limits<uint32_t>::max()); |
9 | 11 |
const double maxU32AsDouble = static_cast<double>(std::numeric_limits<uint32_t>::max()); |
... | ... |
@@ -1,61 +1,13 @@ |
1 |
-// [[Rcpp::depends(BH)]] |
|
2 |
- |
|
3 | 1 |
#include "Random.h" |
4 |
- |
|
5 | 2 |
#include "Math.h" |
6 | 3 |
#include "../utils/GapsAssert.h" |
7 | 4 |
|
8 |
-#include <boost/math/distributions/exponential.hpp> |
|
9 |
-#include <boost/math/distributions/gamma.hpp> |
|
10 |
-#include <boost/math/distributions/normal.hpp> |
|
11 |
- |
|
12 | 5 |
#include <algorithm> |
13 | 6 |
#include <stdint.h> |
14 | 7 |
|
15 |
-#define Q_GAMMA_THRESHOLD 0.000001f |
|
16 |
-#define Q_GAMMA_MIN_VALUE 0.f |
|
17 |
- |
|
18 | 8 |
const float maxU32AsFloat = static_cast<float>(std::numeric_limits<uint32_t>::max()); |
19 | 9 |
const double maxU32AsDouble = static_cast<double>(std::numeric_limits<uint32_t>::max()); |
20 | 10 |
|
21 |
-static Xoroshiro128plus seeder; |
|
22 |
- |
|
23 |
-////////////////////////////// Lookup Tables /////////////////////////////////// |
|
24 |
- |
|
25 |
-static bool tables_are_initialized = false; |
|
26 |
-static float erf_lookup_table[3001]; |
|
27 |
-static float erfinv_lookup_table[5001]; |
|
28 |
-static float qgamma_lookup_table[5001]; |
|
29 |
- |
|
30 |
-static void initLookupTables() |
|
31 |
-{ |
|
32 |
- // erf |
|
33 |
- for (unsigned i = 0; i < 3001; ++i) |
|
34 |
- { |
|
35 |
- float x = static_cast<float>(i) / 1000.f; |
|
36 |
- erf_lookup_table[i] = 2.f * gaps::p_norm(x * gaps::sqrt2, 0.f, 1.f) - 1.f; |
|
37 |
- } |
|
38 |
- |
|
39 |
- // erfinv |
|
40 |
- for (unsigned i = 0; i < 5000; ++i) |
|
41 |
- { |
|
42 |
- float x = static_cast<float>(i) / 5000.f; |
|
43 |
- erfinv_lookup_table[i] = gaps::q_norm((1.f + x) / 2.f, 0.f, 1.f) / gaps::sqrt2; |
|
44 |
- } |
|
45 |
- erfinv_lookup_table[5000] = gaps::q_norm(1.9998f / 2.f, 0.f, 1.f) / gaps::sqrt2; |
|
46 |
- |
|
47 |
- // qgamma |
|
48 |
- qgamma_lookup_table[0] = 0.f; |
|
49 |
- for (unsigned i = 1; i < 5000; ++i) |
|
50 |
- { |
|
51 |
- float x = static_cast<float>(i) / 5000.f; |
|
52 |
- qgamma_lookup_table[i] = gaps::q_gamma(x, 2.f, 1.f); |
|
53 |
- } |
|
54 |
- qgamma_lookup_table[5000] = gaps::q_gamma(0.9998f, 2.f, 1.f); |
|
55 |
- |
|
56 |
- GAPS_ASSERT(erf_lookup_table[3000] < 1.f); |
|
57 |
-} |
|
58 |
- |
|
59 | 11 |
/////////////////////////////// OptionalFloat ////////////////////////////////// |
60 | 12 |
|
61 | 13 |
OptionalFloat::OptionalFloat() : mValue(0.f), mHasValue(false) {} |
... | ... |
@@ -72,92 +24,14 @@ bool OptionalFloat::hasValue() const |
72 | 24 |
return mHasValue; |
73 | 25 |
} |
74 | 26 |
|
75 |
-///////////////////////////// Xoroshiro128plus ///////////////////////////////// |
|
76 |
- |
|
77 |
-static uint64_t rotl(const uint64_t x, int k) |
|
78 |
-{ |
|
79 |
- return (x << k) | (x >> (64 - k)); |
|
80 |
-} |
|
81 |
- |
|
82 |
-void Xoroshiro128plus::seed(uint64_t seed) |
|
83 |
-{ |
|
84 |
- mState[0] = seed|1; |
|
85 |
- mState[1] = seed|1; |
|
86 |
- warmup(); |
|
87 |
-} |
|
88 |
- |
|
89 |
-uint64_t Xoroshiro128plus::next() |
|
90 |
-{ |
|
91 |
- mPreviousState[0] = mState[0]; |
|
92 |
- mPreviousState[1] = mState[1]; |
|
93 |
- |
|
94 |
- const uint64_t s0 = mState[0]; |
|
95 |
- uint64_t s1 = mState[1]; |
|
96 |
- uint64_t result = s0 + s1; |
|
97 |
- s1 ^= s0; |
|
98 |
- mState[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b |
|
99 |
- mState[1] = rotl(s1, 37); // c |
|
100 |
- return result; |
|
101 |
-} |
|
102 |
- |
|
103 |
-void Xoroshiro128plus::rollBackOnce() |
|
104 |
-{ |
|
105 |
- mState[0] = mPreviousState[0]; |
|
106 |
- mState[1] = mPreviousState[1]; |
|
107 |
-} |
|
108 |
- |
|
109 |
-void Xoroshiro128plus::warmup() |
|
110 |
-{ |
|
111 |
- for (unsigned i = 0; i < 5000; ++i) |
|
112 |
- { |
|
113 |
- next(); |
|
114 |
- } |
|
115 |
-} |
|
116 |
- |
|
117 |
-Archive& operator<<(Archive &ar, Xoroshiro128plus &gen) |
|
118 |
-{ |
|
119 |
- ar << gen.mState[0] << gen.mState[1]; |
|
120 |
- return ar; |
|
121 |
-} |
|
122 |
- |
|
123 |
-Archive& operator>>(Archive &ar, Xoroshiro128plus &gen) |
|
124 |
-{ |
|
125 |
- ar >> gen.mState[0] >> gen.mState[1]; |
|
126 |
- return ar; |
|
127 |
-} |
|
128 |
- |
|
129 |
-////////////////////////////////// GapsRng ///////////////////////////////////// |
|
130 |
- |
|
131 |
-void GapsRng::setSeed(uint32_t sd) |
|
132 |
-{ |
|
133 |
- GAPS_ASSERT(!tables_are_initialized); |
|
134 |
- initLookupTables(); |
|
135 |
- tables_are_initialized = true; |
|
136 |
- seeder.seed(sd); |
|
137 |
-} |
|
138 |
- |
|
139 |
-Archive& GapsRng::save(Archive &ar) |
|
140 |
-{ |
|
141 |
- ar << seeder; |
|
142 |
- return ar; |
|
143 |
-} |
|
27 |
+//////////////////////////////// GapsRng /////////////////////////////////////// |
|
144 | 28 |
|
145 |
-Archive& GapsRng::load(Archive &ar) |
|
29 |
+GapsRng::GapsRng(GapsRandomState *randState) |
|
30 |
+: |
|
31 |
+mState(randState->nextSeed()), |
|
32 |
+mRandState(randState) |
|
146 | 33 |
{ |
147 |
- initLookupTables(); |
|
148 |
- ar >> seeder; |
|
149 |
- return ar; |
|
150 |
-} |
|
151 |
- |
|
152 |
-void GapsRng::rollBackOnce() |
|
153 |
-{ |
|
154 |
- seeder.rollBackOnce(); |
|
155 |
-} |
|
156 |
- |
|
157 |
-GapsRng::GapsRng() |
|
158 |
- : mState(seeder.next()) |
|
159 |
-{ |
|
160 |
- next(); |
|
34 |
+ advance(); |
|
161 | 35 |
} |
162 | 36 |
|
163 | 37 |
uint32_t GapsRng::next() |
... | ... |
@@ -299,14 +173,14 @@ float GapsRng::exponential(float lambda) |
299 | 173 |
// fails if too far in tail |
300 | 174 |
OptionalFloat GapsRng::truncNormal(float a, float b, float mean, float sd) |
301 | 175 |
{ |
302 |
- float pLower = gaps::p_norm_fast(a, mean, sd); |
|
303 |
- float pUpper = gaps::p_norm_fast(b, mean, sd); |
|
176 |
+ float pLower = mRandState->p_norm_fast(a, mean, sd); |
|
177 |
+ float pUpper = mRandState->p_norm_fast(b, mean, sd); |
|
304 | 178 |
if (!(pLower > 0.95f || pUpper < 0.05f)) // too far in tail |
305 | 179 |
{ |
306 | 180 |
GAPS_ASSERT(pLower > 0.f); |
307 | 181 |
GAPS_ASSERT(pUpper < 1.f); |
308 | 182 |
|
309 |
- float z = gaps::q_norm_fast(uniform(pLower, pUpper), mean, sd); |
|
183 |
+ float z = mRandState->q_norm_fast(uniform(pLower, pUpper), mean, sd); |
|
310 | 184 |
z = gaps::max(a, gaps::min(z, b)); |
311 | 185 |
return z; |
312 | 186 |
} |
... | ... |
@@ -318,10 +192,10 @@ float GapsRng::truncGammaUpper(float b, float scale) |
318 | 192 |
{ |
319 | 193 |
float upper = 1.f - std::exp(-b / scale) * (1.f + b / scale); |
320 | 194 |
unsigned ndx = static_cast<unsigned>(uniform(0.f, upper * 5000.f)); |
321 |
- return qgamma_lookup_table[ndx] * scale; |
|
195 |
+ return mRandState->mQgammaLookupTable[ndx] * scale; |
|
322 | 196 |
} |
323 | 197 |
|
324 |
-Archive& operator<<(Archive &ar, GapsRng &gen) |
|
198 |
+Archive& operator<<(Archive &ar, const GapsRng &gen) |
|
325 | 199 |
{ |
326 | 200 |
ar << gen.mState; |
327 | 201 |
return ar; |
... | ... |
@@ -333,81 +207,142 @@ Archive& operator>>(Archive &ar, GapsRng &gen) |
333 | 207 |
return ar; |
334 | 208 |
} |
335 | 209 |
|
336 |
-////////////////////////// Distribution Calculations /////////////////////////// |
|
210 |
+///////////////////////////// Xoroshiro128plus ///////////////////////////////// |
|
337 | 211 |
|
338 |
-float gaps::d_gamma(float d, float shape, float scale) |
|
212 |
+static uint64_t rotl(const uint64_t x, int k) |
|
339 | 213 |
{ |
340 |
- boost::math::gamma_distribution<> gam(shape, scale); |
|
341 |
- return pdf(gam, d); |
|
214 |
+ return (x << k) | (x >> (64 - k)); |
|
342 | 215 |
} |
343 | 216 |
|
344 |
-float gaps::p_gamma(float p, float shape, float scale) |
|
217 |
+Xoroshiro128plus::Xoroshiro128plus(uint64_t seed) |
|
345 | 218 |
{ |
346 |
- boost::math::gamma_distribution<> gam(shape, scale); |
|
347 |
- return cdf(gam, p); |
|
348 |
-} |
|
219 |
+ mState[0] = seed|1; |
|
220 |
+ mState[1] = seed|1; |
|
349 | 221 |
|
350 |
-float gaps::q_gamma(float q, float shape, float scale) |
|
351 |
-{ |
|
352 |
- if (q < Q_GAMMA_THRESHOLD) |
|
222 |
+ // warmup |
|
223 |
+ for (unsigned i = 0; i < 5000; ++i) |
|
353 | 224 |
{ |
354 |
- return Q_GAMMA_MIN_VALUE; |
|
225 |
+ next(); |
|
355 | 226 |
} |
356 |
- boost::math::gamma_distribution<> gam(shape, scale); |
|
357 |
- return quantile(gam, q); |
|
358 | 227 |
} |
359 | 228 |
|
360 |
-float gaps::d_norm(float d, float mean, float sd) |
|
229 |
+uint64_t Xoroshiro128plus::next() |
|
361 | 230 |
{ |
362 |
- boost::math::normal_distribution<> norm(mean, sd); |
|
363 |
- return pdf(norm, d); |
|
231 |
+ mPreviousState[0] = mState[0]; |
|
232 |
+ mPreviousState[1] = mState[1]; |
|
233 |
+ |
|
234 |
+ const uint64_t s0 = mState[0]; |
|
235 |
+ uint64_t s1 = mState[1]; |
|
236 |
+ uint64_t result = s0 + s1; |
|
237 |
+ s1 ^= s0; |
|
238 |
+ mState[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b |
|
239 |
+ mState[1] = rotl(s1, 37); // c |
|
240 |
+ return result; |
|
364 | 241 |
} |
365 | 242 |
|
366 |
-float gaps::q_norm(float q, float mean, float sd) |
|
243 |
+void Xoroshiro128plus::rollBackOnce() |
|
367 | 244 |
{ |
368 |
- boost::math::normal_distribution<> norm(mean, sd); |
|
369 |
- return quantile(norm, q); |
|
245 |
+ mState[0] = mPreviousState[0]; |
|
246 |
+ mState[1] = mPreviousState[1]; |
|
370 | 247 |
} |
371 | 248 |
|
372 |
-float gaps::p_norm(float p, float mean, float sd) |
|
249 |
+Archive& operator<<(Archive &ar, const Xoroshiro128plus &gen) |
|
373 | 250 |
{ |
374 |
- boost::math::normal_distribution<> norm(mean, sd); |
|
375 |
- return cdf(norm, p); |
|
251 |
+ ar << gen.mState[0] << gen.mState[1]; |
|
252 |
+ return ar; |
|
376 | 253 |
} |
377 | 254 |
|
378 |
-float gaps::p_norm_fast(float p, float mean, float sd) |
|
255 |
+Archive& operator>>(Archive &ar, Xoroshiro128plus &gen) |
|
256 |
+{ |
|
257 |
+ ar >> gen.mState[0] >> gen.mState[1]; |
|
258 |
+ return ar; |
|
259 |
+} |
|
260 |
+ |
|
261 |
+///////////////////////////// GapsRandomState ////////////////////////////////// |
|
262 |
+ |
|
263 |
+GapsRandomState::GapsRandomState(unsigned seed) : mSeeder(seed) |
|
264 |
+{ |
|
265 |
+ initLookupTables(); |
|
266 |
+} |
|
267 |
+ |
|
268 |
+void GapsRandomState::initLookupTables() |
|
269 |
+{ |
|
270 |
+ // erf |
|
271 |
+ for (unsigned i = 0; i < 3001; ++i) |
|
272 |
+ { |
|
273 |
+ float x = static_cast<float>(i) / 1000.f; |
|
274 |
+ mErfLookupTable[i] = 2.f * gaps::p_norm(x * gaps::sqrt2, 0.f, 1.f) - 1.f; |
|
275 |
+ } |
|
276 |
+ GAPS_ASSERT(mErfLookupTable[3000] < 1.f); |
|
277 |
+ |
|
278 |
+ // erfinv |
|
279 |
+ for (unsigned i = 0; i < 5000; ++i) |
|
280 |
+ { |
|
281 |
+ float x = static_cast<float>(i) / 5000.f; |
|
282 |
+ mErfinvLookupTable[i] = gaps::q_norm((1.f + x) / 2.f, 0.f, 1.f) / gaps::sqrt2; |
|
283 |
+ } |
|
284 |
+ mErfinvLookupTable[5000] = gaps::q_norm(1.9998f / 2.f, 0.f, 1.f) / gaps::sqrt2; |
|
285 |
+ |
|
286 |
+ // qgamma |
|
287 |
+ mQgammaLookupTable[0] = 0.f; |
|
288 |
+ for (unsigned i = 1; i < 5000; ++i) |
|
289 |
+ { |
|
290 |
+ float x = static_cast<float>(i) / 5000.f; |
|
291 |
+ mQgammaLookupTable[i] = gaps::q_gamma(x, 2.f, 1.f); |
|
292 |
+ } |
|
293 |
+ mQgammaLookupTable[5000] = gaps::q_gamma(0.9998f, 2.f, 1.f); |
|
294 |
+} |
|
295 |
+ |
|
296 |
+uint64_t GapsRandomState::nextSeed() |
|
297 |
+{ |
|
298 |
+ return mSeeder.next(); |
|
299 |
+} |
|
300 |
+ |
|
301 |
+void GapsRandomState::rollBackOnce() |
|
302 |
+{ |
|
303 |
+ mSeeder.rollBackOnce(); |
|
304 |
+} |
|
305 |
+ |
|
306 |
+float GapsRandomState::p_norm_fast(float p, float mean, float sd) const |
|
379 | 307 |
{ |
380 | 308 |
float term = (p - mean) / (sd * gaps::sqrt2); |
381 | 309 |
float erf = 0.f; |
382 | 310 |
if (term < 0.f) |
383 | 311 |
{ |
384 | 312 |
term = gaps::max(term, -3.f); |
385 |
- erf = -erf_lookup_table[static_cast<unsigned>(-term * 1000.f)]; |
|
313 |
+ erf = -mErfLookupTable[static_cast<unsigned>(-term * 1000.f)]; |
|
386 | 314 |
} |
387 | 315 |
else |
388 | 316 |
{ |
389 | 317 |
term = gaps::min(term, 3.f); |
390 |
- erf = erf_lookup_table[static_cast<unsigned>(term * 1000.f)]; |
|
318 |
+ erf = mErfLookupTable[static_cast<unsigned>(term * 1000.f)]; |
|
391 | 319 |
} |
392 | 320 |
return 0.5f * (1.f + erf); |
393 | 321 |
} |
394 | 322 |
|
395 |
-float gaps::q_norm_fast(float q, float mean, float sd) |
|
323 |
+float GapsRandomState::q_norm_fast(float q, float mean, float sd) const |
|
396 | 324 |
{ |
397 | 325 |
float term = 2.f * q - 1.f; |
398 | 326 |
float erfinv = 0.f; |
399 | 327 |
if (term < 0.f) |
400 | 328 |
{ |
401 |
- erfinv = -erfinv_lookup_table[static_cast<unsigned>(-term * 5000.f)]; |
|
329 |
+ erfinv = -mErfinvLookupTable[static_cast<unsigned>(-term * 5000.f)]; |
|
402 | 330 |
} |
403 | 331 |
else |
404 | 332 |
{ |
405 |
- erfinv = erfinv_lookup_table[static_cast<unsigned>(term * 5000.f)]; |
|
333 |
+ erfinv = mErfinvLookupTable[static_cast<unsigned>(term * 5000.f)]; |
|
406 | 334 |
} |
407 | 335 |
return mean + sd * gaps::sqrt2 * erfinv; |
408 | 336 |
} |
409 | 337 |
|
410 |
-double gaps::lgamma(double x) |
|
338 |
+Archive& operator<<(Archive &ar, const GapsRandomState &s) |
|
411 | 339 |
{ |
412 |
- return boost::math::lgamma(x); |
|
340 |
+ ar << s.mSeeder; |
|
341 |
+ return ar; |
|
342 |
+} |
|
343 |
+ |
|
344 |
+Archive& operator>>(Archive &ar, GapsRandomState &s) |
|
345 |
+{ |
|
346 |
+ ar >> s.mSeeder; |
|
347 |
+ return ar; |
|
413 | 348 |
} |
414 | 349 |
\ No newline at end of file |
... | ... |
@@ -22,6 +22,7 @@ static Xoroshiro128plus seeder; |
22 | 22 |
|
23 | 23 |
////////////////////////////// Lookup Tables /////////////////////////////////// |
24 | 24 |
|
25 |
+static bool tables_are_initialized = false; |
|
25 | 26 |
static float erf_lookup_table[3001]; |
26 | 27 |
static float erfinv_lookup_table[5001]; |
27 | 28 |
static float qgamma_lookup_table[5001]; |
... | ... |
@@ -129,7 +130,9 @@ Archive& operator>>(Archive &ar, Xoroshiro128plus &gen) |
129 | 130 |
|
130 | 131 |
void GapsRng::setSeed(uint32_t sd) |
131 | 132 |
{ |
133 |
+ GAPS_ASSERT(!tables_are_initialized); |
|
132 | 134 |
initLookupTables(); |
135 |
+ tables_are_initialized = true; |
|
133 | 136 |
seeder.seed(sd); |
134 | 137 |
} |
135 | 138 |
|
... | ... |
@@ -20,6 +20,41 @@ const double maxU32AsDouble = static_cast<double>(std::numeric_limits<uint32_t>: |
20 | 20 |
|
21 | 21 |
static Xoroshiro128plus seeder; |
22 | 22 |
|
23 |
+////////////////////////////// Lookup Tables /////////////////////////////////// |
|
24 |
+ |
|
25 |
+static float erf_lookup_table[3001]; |
|
26 |
+static float erfinv_lookup_table[5001]; |
|
27 |
+static float qgamma_lookup_table[5001]; |
|
28 |
+ |
|
29 |
+static void initLookupTables() |
|
30 |
+{ |
|
31 |
+ // erf |
|
32 |
+ for (unsigned i = 0; i < 3001; ++i) |
|
33 |
+ { |
|
34 |
+ float x = static_cast<float>(i) / 1000.f; |
|
35 |
+ erf_lookup_table[i] = 2.f * gaps::p_norm(x * gaps::sqrt2, 0.f, 1.f) - 1.f; |
|
36 |
+ } |
|
37 |
+ |
|
38 |
+ // erfinv |
|
39 |
+ for (unsigned i = 0; i < 5000; ++i) |
|
40 |
+ { |
|
41 |
+ float x = static_cast<float>(i) / 5000.f; |
|
42 |
+ erfinv_lookup_table[i] = gaps::q_norm((1.f + x) / 2.f, 0.f, 1.f) / gaps::sqrt2; |
|
43 |
+ } |
|
44 |
+ erfinv_lookup_table[5000] = gaps::q_norm(1.9998f / 2.f, 0.f, 1.f) / gaps::sqrt2; |
|
45 |
+ |
|
46 |
+ // qgamma |
|
47 |
+ qgamma_lookup_table[0] = 0.f; |
|
48 |
+ for (unsigned i = 1; i < 5000; ++i) |
|
49 |
+ { |
|
50 |
+ float x = static_cast<float>(i) / 5000.f; |
|
51 |
+ qgamma_lookup_table[i] = gaps::q_gamma(x, 2.f, 1.f); |
|
52 |
+ } |
|
53 |
+ qgamma_lookup_table[5000] = gaps::q_gamma(0.9998f, 2.f, 1.f); |
|
54 |
+ |
|
55 |
+ GAPS_ASSERT(erf_lookup_table[3000] < 1.f); |
|
56 |
+} |
|
57 |
+ |
|
23 | 58 |
/////////////////////////////// OptionalFloat ////////////////////////////////// |
24 | 59 |
|
25 | 60 |
OptionalFloat::OptionalFloat() : mValue(0.f), mHasValue(false) {} |
... | ... |
@@ -94,6 +129,7 @@ Archive& operator>>(Archive &ar, Xoroshiro128plus &gen) |
94 | 129 |
|
95 | 130 |
void GapsRng::setSeed(uint32_t sd) |
96 | 131 |
{ |
132 |
+ initLookupTables(); |
|
97 | 133 |
seeder.seed(sd); |
98 | 134 |
} |
99 | 135 |
|
... | ... |
@@ -105,6 +141,7 @@ Archive& GapsRng::save(Archive &ar) |
105 | 141 |
|
106 | 142 |
Archive& GapsRng::load(Archive &ar) |
107 | 143 |
{ |
144 |
+ initLookupTables(); |
|
108 | 145 |
ar >> seeder; |
109 | 146 |
return ar; |
110 | 147 |
} |
... | ... |
@@ -204,14 +241,7 @@ uint64_t GapsRng::uniform64(uint64_t a, uint64_t b) |
204 | 241 |
|
205 | 242 |
int GapsRng::poisson(double lambda) |
206 | 243 |
{ |
207 |
- if (lambda <= 5.0) |
|
208 |
- { |
|
209 |
- return poissonSmall(lambda); |
|
210 |
- } |
|
211 |
- else |
|
212 |
- { |
|
213 |
- return poissonLarge(lambda); |
|
214 |
- } |
|
244 |
+ return lambda <= 5.0 ? poissonSmall(lambda) : poissonLarge(lambda); |
|
215 | 245 |
} |
216 | 246 |
|
217 | 247 |
// lambda <= 5 |
... | ... |
@@ -263,33 +293,29 @@ float GapsRng::exponential(float lambda) |
263 | 293 |
return -1.f * std::log(uniform()) / lambda; |
264 | 294 |
} |
265 | 295 |
|
296 |
+// fails if too far in tail |
|
266 | 297 |
OptionalFloat GapsRng::truncNormal(float a, float b, float mean, float sd) |
267 | 298 |
{ |
268 |
- float pLower = gaps::p_norm(a, mean, sd); |
|
269 |
- float pUpper = gaps::p_norm(b, mean, sd); |
|
299 |
+ float pLower = gaps::p_norm_fast(a, mean, sd); |
|
300 |
+ float pUpper = gaps::p_norm_fast(b, mean, sd); |
|
270 | 301 |
if (!(pLower > 0.95f || pUpper < 0.05f)) // too far in tail |
271 | 302 |
{ |
272 |
- float u = uniform(pLower, pUpper); |
|
273 |
- while (u == 0.f || u == 1.f) |
|
274 |
- { |
|
275 |
- u = uniform(pLower, pUpper); |
|
276 |
- } |
|
277 |
- float m = gaps::q_norm(u, mean, sd); |
|
278 |
- m = gaps::max(a, gaps::min(m, b)); |
|
279 |
- return m; |
|
303 |
+ GAPS_ASSERT(pLower > 0.f); |
|
304 |
+ GAPS_ASSERT(pUpper < 1.f); |
|
305 |
+ |
|
306 |
+ float z = gaps::q_norm_fast(uniform(pLower, pUpper), mean, sd); |
|
307 |
+ z = gaps::max(a, gaps::min(z, b)); |
|
308 |
+ return z; |
|
280 | 309 |
} |
281 | 310 |
return OptionalFloat(); |
282 | 311 |
} |
283 | 312 |
|
284 |
-float GapsRng::truncGammaUpper(float b, float shape, float scale) |
|
313 |
+// shape is hardcoded to 2 since it never changes |
|
314 |
+float GapsRng::truncGammaUpper(float b, float scale) |
|
285 | 315 |
{ |
286 |
- float upper = gaps::p_gamma(b, shape, scale); |
|
287 |
- float u = uniform(0.f, upper); |
|
288 |
- while (u == 0.f || u == 1.f) |
|
289 |
- { |
|
290 |
- u = uniform(0.f, upper); |
|
291 |
- } |
|
292 |
- return gaps::q_gamma(u, shape, scale); |
|
316 |
+ float upper = 1.f - std::exp(-b / scale) * (1.f + b / scale); |
|
317 |
+ unsigned ndx = static_cast<unsigned>(uniform(0.f, upper * 5000.f)); |
|
318 |
+ return qgamma_lookup_table[ndx] * scale; |
|
293 | 319 |
} |
294 | 320 |
|
295 | 321 |
Archive& operator<<(Archive &ar, GapsRng &gen) |
... | ... |
@@ -346,13 +372,39 @@ float gaps::p_norm(float p, float mean, float sd) |
346 | 372 |
return cdf(norm, p); |
347 | 373 |
} |
348 | 374 |
|
349 |
-double gaps::lgamma(double x) |
|
375 |
+float gaps::p_norm_fast(float p, float mean, float sd) |
|
350 | 376 |
{ |
351 |
- return boost::math::lgamma(x); |
|
377 |
+ float term = (p - mean) / (sd * gaps::sqrt2); |
|
378 |
+ float erf = 0.f; |
|
379 |
+ if (term < 0.f) |
|
380 |
+ { |
|
381 |
+ term = gaps::max(term, -3.f); |
|
382 |
+ erf = -erf_lookup_table[static_cast<unsigned>(-term * 1000.f)]; |
|
383 |
+ } |
|
384 |
+ else |
|
385 |
+ { |
|
386 |
+ term = gaps::min(term, 3.f); |
|
387 |
+ erf = erf_lookup_table[static_cast<unsigned>(term * 1000.f)]; |
|
388 |
+ } |
|
389 |
+ return 0.5f * (1.f + erf); |
|
352 | 390 |
} |
353 | 391 |
|
354 |
-float gaps::d_norm_fast(float d, float mean, float sd) |
|
392 |
+float gaps::q_norm_fast(float q, float mean, float sd) |
|
355 | 393 |
{ |
356 |
- return std::exp((d - mean) * (d - mean) / (-2.f * sd * sd)) |
|
357 |
- / std::sqrt(2.f * gaps::pi * sd * sd); |
|
394 |
+ float term = 2.f * q - 1.f; |
|
395 |
+ float erfinv = 0.f; |
|
396 |
+ if (term < 0.f) |
|
397 |
+ { |
|
398 |
+ erfinv = -erfinv_lookup_table[static_cast<unsigned>(-term * 5000.f)]; |
|
399 |
+ } |
|
400 |
+ else |
|
401 |
+ { |
|
402 |
+ erfinv = erfinv_lookup_table[static_cast<unsigned>(term * 5000.f)]; |
|
403 |
+ } |
|
404 |
+ return mean + sd * gaps::sqrt2 * erfinv; |
|
405 |
+} |
|
406 |
+ |
|
407 |
+double gaps::lgamma(double x) |
|
408 |
+{ |
|
409 |
+ return boost::math::lgamma(x); |
|
358 | 410 |
} |
359 | 411 |
\ No newline at end of file |
... | ... |
@@ -263,14 +263,22 @@ float GapsRng::exponential(float lambda) |
263 | 263 |
return -1.f * std::log(uniform()) / lambda; |
264 | 264 |
} |
265 | 265 |
|
266 |
-float GapsRng::inverseNormSample(float a, float b, float mean, float sd) |
|
266 |
+OptionalFloat GapsRng::truncNormal(float a, float b, float mean, float sd) |
|
267 | 267 |
{ |
268 |
- float u = uniform(a, b); |
|
269 |
- while (u == 0.f || u == 1.f) |
|
268 |
+ float pLower = gaps::p_norm(a, mean, sd); |
|
269 |
+ float pUpper = gaps::p_norm(b, mean, sd); |
|
270 |
+ if (!(pLower > 0.95f || pUpper < 0.05f)) // too far in tail |
|
270 | 271 |
{ |
271 |
- u = uniform(a, b); |
|
272 |
+ float u = uniform(pLower, pUpper); |
|
273 |
+ while (u == 0.f || u == 1.f) |
|
274 |
+ { |
|
275 |
+ u = uniform(pLower, pUpper); |
|
276 |
+ } |
|
277 |
+ float m = gaps::q_norm(u, mean, sd); |
|
278 |
+ m = gaps::max(a, gaps::min(m, b)); |
|
279 |
+ return m; |
|
272 | 280 |
} |
273 |
- return gaps::q_norm(u, mean, sd); |
|
281 |
+ return OptionalFloat(); |
|
274 | 282 |
} |
275 | 283 |
|
276 | 284 |
float GapsRng::truncGammaUpper(float b, float shape, float scale) |
... | ... |
@@ -22,9 +22,9 @@ static Xoroshiro128plus seeder; |
22 | 22 |
|
23 | 23 |
/////////////////////////////// OptionalFloat ////////////////////////////////// |
24 | 24 |
|
25 |
-OptionalFloat::OptionalFloat() : mHasValue(false), mValue(0.f) {} |
|
25 |
+OptionalFloat::OptionalFloat() : mValue(0.f), mHasValue(false) {} |
|
26 | 26 |
|
27 |
-OptionalFloat::OptionalFloat(float f) : mHasValue(true), mValue(f) {} |
|
27 |
+OptionalFloat::OptionalFloat(float f) : mValue(f), mHasValue(true) {} |
|
28 | 28 |
|
29 | 29 |
float OptionalFloat::value() |
30 | 30 |
{ |
... | ... |
@@ -52,6 +52,9 @@ void Xoroshiro128plus::seed(uint64_t seed) |
52 | 52 |
|
53 | 53 |
uint64_t Xoroshiro128plus::next() |
54 | 54 |
{ |
55 |
+ mPreviousState[0] = mState[0]; |
|
56 |
+ mPreviousState[1] = mState[1]; |
|
57 |
+ |
|
55 | 58 |
const uint64_t s0 = mState[0]; |
56 | 59 |
uint64_t s1 = mState[1]; |
57 | 60 |
uint64_t result = s0 + s1; |
... | ... |
@@ -61,6 +64,12 @@ uint64_t Xoroshiro128plus::next() |
61 | 64 |
return result; |
62 | 65 |
} |
63 | 66 |
|
67 |
+void Xoroshiro128plus::rollBackOnce() |
|
68 |
+{ |
|
69 |
+ mState[0] = mPreviousState[0]; |
|
70 |
+ mState[1] = mPreviousState[1]; |
|
71 |
+} |
|
72 |
+ |
|
64 | 73 |
void Xoroshiro128plus::warmup() |
65 | 74 |
{ |
66 | 75 |
for (unsigned i = 0; i < 5000; ++i) |
... | ... |
@@ -100,15 +109,15 @@ Archive& GapsRng::load(Archive &ar) |
100 | 109 |
return ar; |
101 | 110 |
} |
102 | 111 |
|
103 |
-GapsRng::GapsRng() |
|
104 |
- : mState(0) |
|
105 |
-{} |
|
106 |
- |
|
107 |
-void GapsRng::init() |
|
112 |
+void GapsRng::rollBackOnce() |
|
108 | 113 |
{ |
109 |
- mState = seeder.next(); |
|
114 |
+ seeder.rollBackOnce(); |
|
110 | 115 |
} |
111 | 116 |
|
117 |
+GapsRng::GapsRng() |
|
118 |
+ : mState(seeder.next()) |
|
119 |
+{} |
|
120 |
+ |
|
112 | 121 |
uint32_t GapsRng::next() |
113 | 122 |
{ |
114 | 123 |
advance(); |
... | ... |
@@ -117,6 +126,7 @@ uint32_t GapsRng::next() |
117 | 126 |
|
118 | 127 |
void GapsRng::advance() |
119 | 128 |
{ |
129 |
+ mPreviousState = mState; |
|
120 | 130 |
mState = mState * 6364136223846793005ull + (54u|1); |
121 | 131 |
} |
122 | 132 |
|
... | ... |
@@ -274,13 +284,13 @@ float GapsRng::truncGammaUpper(float b, float shape, float scale) |
274 | 284 |
|
275 | 285 |
Archive& operator<<(Archive &ar, GapsRng &gen) |
276 | 286 |
{ |
277 |
- ar << gen.mState << gen.mStream; |
|
287 |
+ ar << gen.mState; |
|
278 | 288 |
return ar; |
279 | 289 |
} |
280 | 290 |
|
281 | 291 |
Archive& operator>>(Archive &ar, GapsRng &gen) |
282 | 292 |
{ |
283 |
- ar >> gen.mState >> gen.mStream; |
|
293 |
+ ar >> gen.mState; |
|
284 | 294 |
return ar; |
285 | 295 |
} |
286 | 296 |
|
... | ... |
@@ -101,9 +101,14 @@ Archive& GapsRng::load(Archive &ar) |
101 | 101 |
} |
102 | 102 |
|
103 | 103 |
GapsRng::GapsRng() |
104 |
- : mState(seeder.next()) |
|
104 |
+ : mState(0) |
|
105 | 105 |
{} |
106 | 106 |
|
107 |
+void GapsRng::init() |
|
108 |
+{ |
|
109 |
+ mState = seeder.next(); |
|
110 |
+} |
|
111 |
+ |
|
107 | 112 |
uint32_t GapsRng::next() |
108 | 113 |
{ |
109 | 114 |
advance(); |
... | ... |
@@ -18,6 +18,8 @@ |
18 | 18 |
const float maxU32AsFloat = static_cast<float>(std::numeric_limits<uint32_t>::max()); |
19 | 19 |
const double maxU32AsDouble = static_cast<double>(std::numeric_limits<uint32_t>::max()); |
20 | 20 |
|
21 |
+static Xoroshiro128plus seeder; |
|
22 |
+ |
|
21 | 23 |
/////////////////////////////// OptionalFloat ////////////////////////////////// |
22 | 24 |
|
23 | 25 |
OptionalFloat::OptionalFloat() : mHasValue(false), mValue(0.f) {} |
... | ... |
@@ -50,16 +52,12 @@ void Xoroshiro128plus::seed(uint64_t seed) |
50 | 52 |
|
51 | 53 |
uint64_t Xoroshiro128plus::next() |
52 | 54 |
{ |
53 |
- uint64_t result = 0; |
|
54 |
- #pragma omp critical(RngCreation) |
|
55 |
- { |
|
56 |
- const uint64_t s0 = mState[0]; |
|
57 |
- uint64_t s1 = mState[1]; |
|
58 |
- result = s0 + s1; |
|
59 |
- s1 ^= s0; |
|
60 |
- mState[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b |
|
61 |
- mState[1] = rotl(s1, 37); // c |
|
62 |
- } |
|
55 |
+ const uint64_t s0 = mState[0]; |
|
56 |
+ uint64_t s1 = mState[1]; |
|
57 |
+ uint64_t result = s0 + s1; |
|
58 |
+ s1 ^= s0; |
|
59 |
+ mState[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b |
|
60 |
+ mState[1] = rotl(s1, 37); // c |
|
63 | 61 |
return result; |
64 | 62 |
} |
65 | 63 |
|
... | ... |
@@ -85,8 +83,25 @@ Archive& operator>>(Archive &ar, Xoroshiro128plus &gen) |
85 | 83 |
|
86 | 84 |
////////////////////////////////// GapsRng ///////////////////////////////////// |
87 | 85 |
|
88 |
-GapsRng::GapsRng(uint64_t seed, uint64_t stream) |
|
89 |
- : mState(seed), mStream(54u) |
|
86 |
+void GapsRng::setSeed(uint32_t sd) |
|
87 |
+{ |
|
88 |
+ seeder.seed(sd); |
|
89 |
+} |
|
90 |
+ |
|
91 |
+Archive& GapsRng::save(Archive &ar) |
|
92 |
+{ |
|
93 |
+ ar << seeder; |
|
94 |
+ return ar; |
|
95 |
+} |
|
96 |
+ |
|
97 |
+Archive& GapsRng::load(Archive &ar) |
|
98 |
+{ |
|
99 |
+ ar >> seeder; |
|
100 |
+ return ar; |
|
101 |
+} |
|
102 |
+ |
|
103 |
+GapsRng::GapsRng() |
|
104 |
+ : mState(seeder.next()) |
|
90 | 105 |
{} |
91 | 106 |
|
92 | 107 |
uint32_t GapsRng::next() |
... | ... |
@@ -97,7 +112,7 @@ uint32_t GapsRng::next() |
97 | 112 |
|
98 | 113 |
void GapsRng::advance() |
99 | 114 |
{ |
100 |
- mState = mState * 6364136223846793005ull + (mStream|1); |
|
115 |
+ mState = mState * 6364136223846793005ull + (54u|1); |
|
101 | 116 |
} |
102 | 117 |
|
103 | 118 |
uint32_t GapsRng::get() const |
... | ... |
@@ -86,13 +86,8 @@ Archive& operator>>(Archive &ar, Xoroshiro128plus &gen) |
86 | 86 |
////////////////////////////////// GapsRng ///////////////////////////////////// |
87 | 87 |
|
88 | 88 |
GapsRng::GapsRng(uint64_t seed, uint64_t stream) |
89 |
- : |
|
90 |
-mState(0u), mStream(stream << 1u | 1u) |
|
91 |
-{ |
|
92 |
- next(); |
|
93 |
- mState += seed; |
|
94 |
- next(); |
|
95 |
-} |
|
89 |
+ : mState(seed), mStream(54u) |
|
90 |
+{} |
|
96 | 91 |
|
97 | 92 |
uint32_t GapsRng::next() |
98 | 93 |
{ |
... | ... |
@@ -1,10 +1,9 @@ |
1 | 1 |
// [[Rcpp::depends(BH)]] |
2 | 2 |
|
3 |
-#include "Math.h" |
|
4 | 3 |
#include "Random.h" |
5 |
-#include "../utils/GapsAssert.h" |
|
6 | 4 |
|
7 |
-// TODO remove boost dependency |
|
5 |
+#include "Algorithms.h" |
|
6 |
+#include "../utils/GapsAssert.h" |
|
8 | 7 |
|
9 | 8 |
#include <boost/math/distributions/exponential.hpp> |
10 | 9 |
#include <boost/math/distributions/gamma.hpp> |
... | ... |
@@ -14,28 +13,29 @@ |
14 | 13 |
#include <stdint.h> |
15 | 14 |
|
16 | 15 |
#define Q_GAMMA_THRESHOLD 0.000001f |
17 |
-#define Q_GAMMA_MIN_VALUE 0.0 |
|
16 |
+#define Q_GAMMA_MIN_VALUE 0.f |
|
18 | 17 |
|
19 | 18 |
const float maxU32AsFloat = static_cast<float>(std::numeric_limits<uint32_t>::max()); |
20 | 19 |
const double maxU32AsDouble = static_cast<double>(std::numeric_limits<uint32_t>::max()); |
21 | 20 |
|
22 |
-static Xoroshiro128plus seeder; |
|
21 |
+/////////////////////////////// OptionalFloat ////////////////////////////////// |
|
23 | 22 |
|
24 |
-void GapsRng::save(Archive &ar) |
|
25 |
-{ |
|
26 |
- ar << seeder; |
|
27 |
-} |
|
23 |
+OptionalFloat::OptionalFloat() : mHasValue(false), mValue(0.f) {} |
|
24 |
+ |
|
25 |
+OptionalFloat::OptionalFloat(float f) : mHasValue(true), mValue(f) {} |
|
28 | 26 |
|
29 |
-void GapsRng::load(Archive &ar) |
|
27 |
+float OptionalFloat::value() |
|
30 | 28 |
{ |
31 |
- ar >> seeder; |
|
29 |
+ return mValue; |
|
32 | 30 |
} |
33 | 31 |
|
34 |
-void GapsRng::setSeed(uint64_t seed) |
|
32 |
+bool OptionalFloat::hasValue() const |
|
35 | 33 |
{ |
36 |
- seeder.seed(seed); |
|
34 |
+ return mHasValue; |
|
37 | 35 |
} |
38 | 36 |
|
37 |
+///////////////////////////// Xoroshiro128plus ///////////////////////////////// |
|
38 |
+ |
|
39 | 39 |
static uint64_t rotl(const uint64_t x, int k) |
40 | 40 |
{ |
41 | 41 |
return (x << k) | (x >> (64 - k)); |
... | ... |
@@ -83,7 +83,16 @@ Archive& operator>>(Archive &ar, Xoroshiro128plus &gen) |
83 | 83 |
return ar; |
84 | 84 |
} |
85 | 85 |
|
86 |
-GapsRng::GapsRng() : mState(seeder.next()) {} |
|
86 |
+////////////////////////////////// GapsRng ///////////////////////////////////// |
|
87 |
+ |
|
88 |
+GapsRng::GapsRng(uint64_t seed, uint64_t stream) |
|
89 |
+ : |
|
90 |
+mState(0u), mStream(stream << 1u | 1u) |
|
91 |
+{ |
|
92 |
+ next(); |
|
93 |
+ mState += seed; |
|
94 |
+ next(); |
|
95 |
+} |
|
87 | 96 |
|
88 | 97 |
uint32_t GapsRng::next() |
89 | 98 |
{ |
... | ... |
@@ -93,7 +102,7 @@ uint32_t GapsRng::next() |
93 | 102 |
|
94 | 103 |
void GapsRng::advance() |
95 | 104 |
{ |
96 |
- mState = mState * 6364136223846793005ull + (54u|1); |
|
105 |
+ mState = mState * 6364136223846793005ull + (mStream|1); |
|
97 | 106 |
} |
98 | 107 |
|
99 | 108 |
uint32_t GapsRng::get() const |
... | ... |
@@ -240,7 +249,6 @@ float GapsRng::inverseNormSample(float a, float b, float mean, float sd) |
240 | 249 |
float GapsRng::truncGammaUpper(float b, float shape, float scale) |
241 | 250 |
{ |
242 | 251 |
float upper = gaps::p_gamma(b, shape, scale); |
243 |
- |
|
244 | 252 |
float u = uniform(0.f, upper); |
245 | 253 |
while (u == 0.f || u == 1.f) |
246 | 254 |
{ |
... | ... |
@@ -251,16 +259,18 @@ float GapsRng::truncGammaUpper(float b, float shape, float scale) |
251 | 259 |
|
252 | 260 |
Archive& operator<<(Archive &ar, GapsRng &gen) |
253 | 261 |
{ |
254 |
- ar << gen.mState; |
|
262 |
+ ar << gen.mState << gen.mStream; |
|
255 | 263 |
return ar; |
256 | 264 |
} |
257 | 265 |
|
258 | 266 |
Archive& operator>>(Archive &ar, GapsRng &gen) |
259 | 267 |
{ |
260 |
- ar >> gen.mState; |
|
268 |
+ ar >> gen.mState >> gen.mStream; |
|
261 | 269 |
return ar; |
262 | 270 |
} |
263 | 271 |
|
272 |
+////////////////////////// Distribution Calculations /////////////////////////// |
|
273 |
+ |
|
264 | 274 |
float gaps::d_gamma(float d, float shape, float scale) |
265 | 275 |
{ |
266 | 276 |
boost::math::gamma_distribution<> gam(shape, scale); |
... | ... |
@@ -227,23 +227,14 @@ float GapsRng::exponential(float lambda) |
227 | 227 |
return -1.f * std::log(uniform()) / lambda; |
228 | 228 |
} |
229 | 229 |
|
230 |
-OptionalFloat GapsRng::truncNormal(float a, float b, float mean, float sd) |
|
230 |
+float GapsRng::inverseNormSample(float a, float b, float mean, float sd) |
|
231 | 231 |
{ |
232 |
- float pLower = gaps::p_norm(a, mean, sd); |
|
233 |
- float pUpper = gaps::p_norm(b, mean, sd); |
|
234 |
- |
|
235 |
- if (!(pLower > 0.95f || pUpper < 0.05f)) |
|
232 |
+ float u = uniform(a, b); |
|
233 |
+ while (u == 0.f || u == 1.f) |
|
236 | 234 |
{ |
237 |
- float u = uniform(pLower, pUpper); |
|
238 |
- while (u == 0.f || u == 1.f) |
|
239 |
- { |
|
240 |
- u = uniform(pLower, pUpper); |
|
241 |
- } |
|
242 |
- float ret = gaps::q_norm(u, mean, sd); |
|
243 |
- ret = gaps::min(gaps::max(a, ret), b); // need this for truncation error |
|
244 |
- return OptionalFloat(ret); |
|
235 |
+ u = uniform(a, b); |
|
245 | 236 |
} |
246 |
- OptionalFloat(); |
|
237 |
+ return gaps::q_norm(u, mean, sd); |
|
247 | 238 |
} |
248 | 239 |
|
249 | 240 |
float GapsRng::truncGammaUpper(float b, float shape, float scale) |
... | ... |
@@ -319,4 +310,4 @@ float gaps::d_norm_fast(float d, float mean, float sd) |
319 | 310 |
{ |
320 | 311 |
return std::exp((d - mean) * (d - mean) / (-2.f * sd * sd)) |
321 | 312 |
/ std::sqrt(2.f * gaps::pi * sd * sd); |
322 |
-} |
|
313 |
+} |
|
323 | 314 |
\ No newline at end of file |
... | ... |
@@ -229,8 +229,8 @@ float GapsRng::exponential(float lambda) |
229 | 229 |
|
230 | 230 |
OptionalFloat GapsRng::truncNormal(float a, float b, float mean, float sd) |
231 | 231 |
{ |
232 |
- float pLower = gaps::p_norm(a + gaps::epsilon, mean, sd); |
|
233 |
- float pUpper = gaps::p_norm(b + gaps::epsilon, mean, sd); |
|
232 |
+ float pLower = gaps::p_norm(a, mean, sd); |
|
233 |
+ float pUpper = gaps::p_norm(b, mean, sd); |
|
234 | 234 |
|
235 | 235 |
if (!(pLower > 0.95f || pUpper < 0.05f)) |
236 | 236 |
{ |
... | ... |
@@ -229,8 +229,8 @@ float GapsRng::exponential(float lambda) |
229 | 229 |
|
230 | 230 |
OptionalFloat GapsRng::truncNormal(float a, float b, float mean, float sd) |
231 | 231 |
{ |
232 |
- float pLower = gaps::p_norm(a, mean, sd); |
|
233 |
- float pUpper = gaps::p_norm(b, mean, sd); |
|
232 |
+ float pLower = gaps::p_norm(a + gaps::epsilon, mean, sd); |
|
233 |
+ float pUpper = gaps::p_norm(b + gaps::epsilon, mean, sd); |
|
234 | 234 |
|
235 | 235 |
if (!(pLower > 0.95f || pUpper < 0.05f)) |
236 | 236 |
{ |
... | ... |
@@ -240,8 +240,7 @@ OptionalFloat GapsRng::truncNormal(float a, float b, float mean, float sd) |
240 | 240 |
u = uniform(pLower, pUpper); |
241 | 241 |
} |
242 | 242 |
float ret = gaps::q_norm(u, mean, sd); |
243 |
- GAPS_ASSERT(ret >= a); |
|
244 |
- GAPS_ASSERT(ret <= b); |
|
243 |
+ ret = gaps::min(gaps::max(a, ret), b); // need this for truncation error |
|
245 | 244 |
return OptionalFloat(ret); |
246 | 245 |
} |
247 | 246 |
OptionalFloat(); |
... | ... |
@@ -227,14 +227,24 @@ float GapsRng::exponential(float lambda) |
227 | 227 |
return -1.f * std::log(uniform()) / lambda; |
228 | 228 |
} |
229 | 229 |
|
230 |
-float GapsRng::inverseNormSample(float a, float b, float mean, float sd) |
|
230 |
+OptionalFloat GapsRng::truncNormal(float a, float b, float mean, float sd) |
|
231 | 231 |
{ |
232 |
- float u = uniform(a, b); |
|
233 |
- while (u == 0.f || u == 1.f) |
|
232 |
+ float pLower = gaps::p_norm(a, mean, sd); |
|
233 |
+ float pUpper = gaps::p_norm(b, mean, sd); |
|
234 |
+ |
|
235 |
+ if (!(pLower > 0.95f || pUpper < 0.05f)) |
|
234 | 236 |
{ |
235 |
- u = uniform(a, b); |
|
237 |
+ float u = uniform(pLower, pUpper); |
|
238 |
+ while (u == 0.f || u == 1.f) |
|
239 |
+ { |
|
240 |
+ u = uniform(pLower, pUpper); |
|
241 |
+ } |
|
242 |
+ float ret = gaps::q_norm(u, mean, sd); |
|
243 |
+ GAPS_ASSERT(ret >= a); |
|
244 |
+ GAPS_ASSERT(ret <= b); |
|
245 |
+ return OptionalFloat(ret); |
|
236 | 246 |
} |
237 |
- return gaps::q_norm(u, mean, sd); |
|
247 |
+ OptionalFloat(); |
|
238 | 248 |
} |
239 | 249 |
|
240 | 250 |
float GapsRng::truncGammaUpper(float b, float shape, float scale) |
... | ... |
@@ -237,14 +237,16 @@ float GapsRng::inverseNormSample(float a, float b, float mean, float sd) |
237 | 237 |
return gaps::q_norm(u, mean, sd); |
238 | 238 |
} |
239 | 239 |
|
240 |
-float GapsRng::inverseGammaSample(float a, float b, float mean, float sd) |
|
240 |
+float GapsRng::truncGammaUpper(float b, float shape, float scale) |
|
241 | 241 |
{ |
242 |
- float u = uniform(a, b); |
|
242 |
+ float upper = gaps::p_gamma(b, shape, scale); |
|
243 |
+ |
|
244 |
+ float u = uniform(0.f, upper); |
|
243 | 245 |
while (u == 0.f || u == 1.f) |
244 | 246 |
{ |
245 |
- u = uniform(a, b); |
|
247 |
+ u = uniform(0.f, upper); |
|
246 | 248 |
} |
247 |
- return gaps::q_gamma(u, mean, sd); |
|
249 |
+ return gaps::q_gamma(u, shape, scale); |
|
248 | 250 |
} |
249 | 251 |
|
250 | 252 |
Archive& operator<<(Archive &ar, GapsRng &gen) |
... | ... |
@@ -303,3 +305,9 @@ double gaps::lgamma(double x) |
303 | 305 |
{ |
304 | 306 |
return boost::math::lgamma(x); |
305 | 307 |
} |
308 |
+ |
|
309 |
+float gaps::d_norm_fast(float d, float mean, float sd) |
|
310 |
+{ |
|
311 |
+ return std::exp((d - mean) * (d - mean) / (-2.f * sd * sd)) |
|
312 |
+ / std::sqrt(2.f * gaps::pi * sd * sd); |
|
313 |
+} |
... | ... |
@@ -123,14 +123,15 @@ uint32_t GapsRng::uniform32() |
123 | 123 |
return next(); |
124 | 124 |
} |
125 | 125 |
|
126 |
+// inclusive of a and b |
|
126 | 127 |
uint32_t GapsRng::uniform32(uint32_t a, uint32_t b) |
127 | 128 |
{ |
128 |
- uint32_t range = b - a; |
|
129 |
- if (range == 0) |
|
129 |
+ if (b == a) |
|
130 | 130 |
{ |
131 | 131 |
return a; |
132 | 132 |
} |
133 | 133 |
|
134 |
+ uint32_t range = b + 1 - a; |
|
134 | 135 |
uint32_t x = uniform32(); |
135 | 136 |
uint32_t iPart = std::numeric_limits<uint32_t>::max() / range; |
136 | 137 |
while (x >= range * iPart) |
... | ... |
@@ -147,14 +148,15 @@ uint64_t GapsRng::uniform64() |
147 | 148 |
return high | low; |
148 | 149 |
} |
149 | 150 |
|
151 |
+// inclusive of a and b |
|
150 | 152 |
uint64_t GapsRng::uniform64(uint64_t a, uint64_t b) |
151 | 153 |
{ |
152 |
- uint64_t range = b - a; |
|
153 |
- if (range == 0) |
|
154 |
+ if (b == a) |
|
154 | 155 |
{ |
155 | 156 |
return a; |
156 | 157 |
} |
157 | 158 |
|
159 |
+ uint64_t range = b + 1 - a; |
|
158 | 160 |
uint64_t x = uniform64(); |
159 | 161 |
uint64_t iPart = std::numeric_limits<uint64_t>::max() / range; |
160 | 162 |
while (x >= range * iPart) |
... | ... |
@@ -10,13 +10,6 @@ |
10 | 10 |
#include <boost/math/distributions/gamma.hpp> |
11 | 11 |
#include <boost/math/distributions/normal.hpp> |
12 | 12 |
|
13 |
-#include <boost/random/exponential_distribution.hpp> |
|
14 |
-#include <boost/random/mersenne_twister.hpp> |
|
15 |
-#include <boost/random/normal_distribution.hpp> |
|
16 |
-#include <boost/random/poisson_distribution.hpp> |
|
17 |
-#include <boost/random/uniform_01.hpp> |
|
18 |
-#include <boost/random/uniform_real_distribution.hpp> |
|
19 |
- |
|
20 | 13 |
#include <algorithm> |
21 | 14 |
#include <stdint.h> |
22 | 15 |
|
... | ... |
@@ -252,6 +252,18 @@ float GapsRng::inverseGammaSample(float a, float b, float mean, float sd) |
252 | 252 |
return gaps::q_gamma(u, mean, sd); |
253 | 253 |
} |
254 | 254 |
|
255 |
+Archive& operator<<(Archive &ar, GapsRng &gen) |
|
256 |
+{ |
|
257 |
+ ar << gen.mState; |
|
258 |
+ return ar; |
|
259 |
+} |
|
260 |
+ |
|
261 |
+Archive& operator>>(Archive &ar, GapsRng &gen) |
|
262 |
+{ |
|
263 |
+ ar >> gen.mState; |
|
264 |
+ return ar; |
|
265 |
+} |
|
266 |
+ |
|
255 | 267 |
float gaps::d_gamma(float d, float shape, float scale) |
256 | 268 |
{ |
257 | 269 |
boost::math::gamma_distribution<> gam(shape, scale); |
... | ... |
@@ -20,149 +20,29 @@ |
20 | 20 |
#include <algorithm> |
21 | 21 |
#include <stdint.h> |
22 | 22 |
|
23 |
-#ifdef __GAPS_OPENMP__ |
|
24 |
- #include <omp.h> |
|
25 |
-#else |
|
26 |
- typedef int omp_int_t; |
|
27 |
- inline omp_int_t omp_get_thread_num() { return 0; } |
|
28 |
- inline omp_int_t omp_get_max_threads() { return 1; } |
|
29 |
-#endif |
|
30 |
- |
|
31 | 23 |
#define Q_GAMMA_THRESHOLD 0.000001f |
32 | 24 |
#define Q_GAMMA_MIN_VALUE 0.0 |
33 | 25 |
|
34 |
-//typedef boost::random::mt19937 RNGType; |
|
35 |
-//typedef boost::random::mt11213b RNGType; // should be faster |
|
26 |
+const float maxU32AsFloat = static_cast<float>(std::numeric_limits<uint32_t>::max()); |
|
27 |
+const double maxU32AsDouble = static_cast<double>(std::numeric_limits<uint32_t>::max()); |
|
36 | 28 |
|
37 | 29 |
static Xoroshiro128plus seeder; |
38 | 30 |
|
39 |
-static std::vector<GapsRng>& rng() |
|
31 |
+void GapsRng::save(Archive &ar) |
|
40 | 32 |
{ |
41 |
- static std::vector<GapsRng> allRngs(omp_get_max_threads()); |
|
42 |
- return allRngs; |
|
33 |
+ ar << seeder; |
|
43 | 34 |
} |
44 | 35 |
|
45 |
-void gaps::random::save(Archive &ar) |
|
36 |
+void GapsRng::load(Archive &ar) |
|
46 | 37 |
{ |
47 |
- for (unsigned i = 0; i < rng().size(); ++i) |
|
48 |
- { |
|
49 |
- ar << seeder; |
|
50 |
- } |
|
38 |
+ ar >> seeder; |
|
51 | 39 |
} |
52 | 40 |
|
53 |
-void gaps::random::load(Archive &ar) |
|
54 |
-{ |
|
55 |
- for (unsigned i = 0; i < rng().size(); ++i) |
|
56 |
- { |
|
57 |
- ar >> seeder; |
|
58 |
- } |
|
59 |
-} |
|
60 |
- |
|
61 |
-void gaps::random::setSeed(uint32_t seed) |
|
41 |
+void GapsRng::setSeed(uint64_t seed) |
|
62 | 42 |
{ |
63 | 43 |
seeder.seed(seed); |
64 | 44 |
} |
65 | 45 |
|
66 |
-int gaps::random::poisson(float lambda) |
|
67 |
-{ |
|
68 |
- return rng().at(omp_get_thread_num()).poisson(lambda); |
|
69 |
-} |
|
70 |
- |
|
71 |
-float gaps::random::exponential(float lambda) |
|
72 |
-{ |
|
73 |
- return rng().at(omp_get_thread_num()).exponential(lambda); |
|
74 |
-} |
|
75 |
- |
|
76 |
-float gaps::random::uniform() |
|
77 |
-{ |
|
78 |
- return rng().at(omp_get_thread_num()).uniform(); |
|
79 |
-} |
|
80 |
- |
|
81 |
-float gaps::random::uniform(float a, float b) |
|
82 |
-{ |
|
83 |
- return rng().at(omp_get_thread_num()).uniform(a,b); |
|
84 |
-} |
|
85 |
- |
|
86 |
-uint64_t gaps::random::uniform64() |
|
87 |
-{ |
|
88 |
- return rng().at(omp_get_thread_num()).uniform64(); |
|
89 |
-} |
|
90 |
- |
|
91 |
-uint64_t gaps::random::uniform64(uint64_t a, uint64_t b) |
|
92 |
-{ |
|
93 |
- return rng().at(omp_get_thread_num()).uniform64(a,b); |
|
94 |