-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Faster normal draws with the ziggurat algorithm #326
Changes from 15 commits
5d89e39
dc52a9b
3320340
e649dbc
990545b
9df555c
32c7d85
fbcd325
338a9a3
5c6d3f2
7c19f48
ec7a72b
3e8bfab
f2fa303
2d6eae7
8f00ea2
57a17bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
Package: dust | ||
Title: Iterate Multiple Realisations of Stochastic Models | ||
Version: 0.11.5 | ||
Version: 0.11.6 | ||
Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"), | ||
email = "[email protected]"), | ||
person("John", "Lees", role = "aut"), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#ifndef DUST_RANDOM_NORMAL_BOX_MULLER_HPP | ||
#define DUST_RANDOM_NORMAL_BOX_MULLER_HPP | ||
|
||
#include <cmath> | ||
|
||
#include "dust/random/generator.hpp" | ||
|
||
namespace dust { | ||
namespace random { | ||
|
||
__nv_exec_check_disable__ | ||
template <typename real_type, typename rng_state_type> | ||
__host__ __device__ | ||
real_type random_normal_box_muller(rng_state_type& rng_state) { | ||
// This function implements the Box-Muller transform: | ||
// https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form | ||
// Do not send a really small number to log(). | ||
const real_type epsilon = utils::epsilon<real_type>(); | ||
const real_type two_pi = 2 * M_PI; | ||
|
||
real_type u1, u2; | ||
do { | ||
u1 = random_real<real_type>(rng_state); | ||
u2 = random_real<real_type>(rng_state); | ||
} while (u1 <= epsilon); | ||
|
||
SYNCWARP | ||
return std::sqrt(-2 * std::log(u1)) * std::cos(two_pi * u2); | ||
} | ||
|
||
|
||
} | ||
} | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#ifndef DUST_RANDOM_NORMAL_ZIGGURAT_HPP | ||
#define DUST_RANDOM_NORMAL_ZIGGURAT_HPP | ||
|
||
#include <cmath> | ||
|
||
#include "dust/random/generator.hpp" | ||
#include "dust/random/normal_ziggurat_tables.hpp" | ||
|
||
namespace dust { | ||
namespace random { | ||
|
||
__nv_exec_check_disable__ | ||
namespace { | ||
template <typename real_type, typename rng_state_type> | ||
__host__ __device__ | ||
real_type normal_ziggurat_tail(rng_state_type& rng_state, real_type x1, | ||
bool negative) { | ||
real_type ret; | ||
do { | ||
const auto u1 = random_real<real_type>(rng_state); | ||
const auto u2 = random_real<real_type>(rng_state); | ||
const auto x = std::log(u1) / x1; | ||
const auto y = std::log(u2); | ||
if (- 2 * y > x * x) { | ||
ret = negative ? x - x1 : x1 - x; | ||
break; | ||
} | ||
} while (true); | ||
return ret; | ||
} | ||
} | ||
|
||
// TODO: this will not work efficiently for float types because we | ||
// don't have float tables for 'x' and 'y'; getting them is not easy | ||
// without requiring c++14 either. The lower loop using 'x' could | ||
Comment on lines
+33
to
+35
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How come we are able to manage this with binomial draws? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The binomial algorithm doesn't need to use the random numbers; the tables are only used in There are some solutions, but I'd like to try and implement these separately and compare timings to make sure that we don't end up paying too much on the CPU case |
||
// rewritten easily as a function though taking 'u1' and so allowing | ||
// full template specialisation. However by most accounts this | ||
// performs poorly onn a GPU to latency so it might be ok. | ||
__nv_exec_check_disable__ | ||
template <typename real_type, typename rng_state_type> | ||
__host__ __device__ | ||
real_type random_normal_ziggurat(rng_state_type& rng_state) { | ||
using ziggurat::x; | ||
using ziggurat::y; | ||
constexpr size_t n = 256; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment to explain this choice and whether it is tuneable? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My first shot at this was fully tunable using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added a note indicating where this comes from and why now |
||
const real_type r = x[1]; | ||
|
||
real_type ret; | ||
do { | ||
const auto i = (next(rng_state) >> 16) % n; | ||
const auto u0 = 2 * random_real<real_type>(rng_state) - 1; | ||
if (std::abs(u0) < y[i]) { | ||
ret = u0 * x[i]; | ||
break; | ||
} | ||
if (i == 0) { | ||
ret = normal_ziggurat_tail<real_type>(rng_state, r, u0 < 0); | ||
break; | ||
} | ||
const auto z = u0 * x[i]; | ||
const auto f0 = std::exp(-0.5 * (x[i] * x[i] - z * z)); | ||
const auto f1 = std::exp(-0.5 * (x[i + 1] * x[i + 1] - z * z)); | ||
const auto u1 = random_real<real_type>(rng_state); | ||
if (f1 + u1 * (f0 - f1) < 1.0) { | ||
ret = z; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ugh, yes |
||
} | ||
} while (true); | ||
SYNCWARP | ||
return ret; | ||
} | ||
|
||
} | ||
} | ||
|
||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the algorithm chosen at run time or when compiling the object? Wondering whether we could just template rather than using this function
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that is a template - am I missing something?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After discussion: it feels like one could overload this as we know the algorithm is known at compile time but C++ doesn't let us do that