mrc-ide · johnlees · Nov 8, 2021 · Nov 8, 2021 · Nov 8, 2021 · Nov 8, 2021
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: dust
 Title: Iterate Multiple Realisations of Stochastic Models
-Version: 0.11.5
+Version: 0.11.6
 Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"),
                     email = "[email protected]"),
              person("John", "Lees", role = "aut"),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+# dust 0.11.6
+
+* Added support for drawing normally distributed random numbers using the ziggurat method. Currently this is only efficient on a CPU, though supported on a GPU (#308)
+
 # dust 0.11.5
 
 * Major header reorganisation, the `dust::cuda` namespace is now `dust::gpu`, most user-facing uses of `cuda` and `device` now replaced with `gpu` (#298, #317)

diff --git a/R/cpp11.R b/R/cpp11.R
@@ -40,6 +40,10 @@ dust_rng_random_real <- function(ptr, n, n_threads, is_float) {
   .Call(`_dust_dust_rng_random_real`, ptr, n, n_threads, is_float)
 }
 
+dust_rng_random_normal <- function(ptr, n, n_threads, algorithm, is_float) {
+  .Call(`_dust_dust_rng_random_normal`, ptr, n, n_threads, algorithm, is_float)
+}
+
 dust_rng_uniform <- function(ptr, n, r_min, r_max, n_threads, is_float) {
   .Call(`_dust_dust_rng_uniform`, ptr, n, r_min, r_max, n_threads, is_float)
 }
@@ -48,8 +52,8 @@ dust_rng_exponential <- function(ptr, n, r_rate, n_threads, is_float) {
   .Call(`_dust_dust_rng_exponential`, ptr, n, r_rate, n_threads, is_float)
 }
 
-dust_rng_normal <- function(ptr, n, r_mean, r_sd, n_threads, is_float) {
-  .Call(`_dust_dust_rng_normal`, ptr, n, r_mean, r_sd, n_threads, is_float)
+dust_rng_normal <- function(ptr, n, r_mean, r_sd, n_threads, algorithm, is_float) {
+  .Call(`_dust_dust_rng_normal`, ptr, n, r_mean, r_sd, n_threads, algorithm, is_float)
 }
 
 dust_rng_binomial <- function(ptr, n, r_size, r_prob, n_threads, is_float) {

diff --git a/R/rng.R b/R/rng.R
@@ -84,6 +84,9 @@
 ##' # Shorthand for Uniform(0, 1)
 ##' rng$random_real(5)
 ##'
+##' # Shorthand for Normal(0, 1)
+##' rng$random_normal(5)
+##'
 ##' # Uniform random numbers between min and max
 ##' rng$uniform(5, -2, 6)
 ##'
@@ -192,6 +195,20 @@ dust_rng <- R6::R6Class(
       dust_rng_random_real(private$ptr, n, n_threads, private$float)
     },
 
+    ##' @description Generate `n` numbers from a standard normal distribution
+    ##'
+    ##' @param n Number of samples to draw (per generator)
+    ##'
+    ##' @param n_threads Number of threads to use; see Details
+    ##'
+    ##' @param algorithm Name of the algorithm to use; currently `box_muller`
+    ##'   and `ziggurat` are supported, with the latter being considerably
+    ##'   faster.
+    random_normal = function(n, n_threads = 1L, algorithm = "box_muller") {
+      dust_rng_random_normal(private$ptr, n, n_threads, algorithm,
+                             private$float)
+    },
+
     ##' @description Generate `n` numbers from a uniform distribution
     ##'
     ##' @param n Number of samples to draw (per generator)
@@ -214,8 +231,13 @@ dust_rng <- R6::R6Class(
     ##' @param sd The standard deviation of the distribution (length 1 or n)
     ##'
     ##' @param n_threads Number of threads to use; see Details
-    normal = function(n, mean, sd, n_threads = 1L) {
-      dust_rng_normal(private$ptr, n, mean, sd, n_threads, private$float)
+    ##'
+    ##' @param algorithm Name of the algorithm to use; currently `box_muller`
+    ##'   and `ziggurat` are supported, with the latter being considerably
+    ##'   faster.
+    normal = function(n, mean, sd, n_threads = 1L, algorithm = "box_muller") {
+      dust_rng_normal(private$ptr, n, mean, sd, n_threads, algorithm,
+                      private$float)
     },
 
     ##' @description Generate `n` numbers from a binomial distribution

diff --git a/inst/include/dust/random/binomial.hpp b/inst/include/dust/random/binomial.hpp
@@ -3,7 +3,7 @@
 
 #include <cmath>
 
-#include "dust/random/gamma_table.hpp"
+#include "dust/random/binomial_gamma_tables.hpp"
 #include "dust/random/generator.hpp"
 
 namespace dust {

diff --git a/inst/include/dust/random/gamma_table.hpp → ...ude/dust/random/binomial_gamma_tables.hpp b/inst/include/dust/random/gamma_table.hpp → ...ude/dust/random/binomial_gamma_tables.hpp
@@ -1,6 +1,6 @@
-// Generated by scripts/update_gamma_table - do not edit
-#ifndef DUST_RANDOM_GAMMA_TABLE_HPP
-#define DUST_RANDOM_GAMMA_TABLE_HPP
+// Generated by scripts/update_gamma_tables - do not edit
+#ifndef DUST_RANDOM_BINOMIAL_GAMMA_TABLES_HPP
+#define DUST_RANDOM_BINOMIAL_GAMMA_TABLES_HPP
 
 #include "dust/random/cuda_compatibility.hpp"
 

diff --git a/inst/include/dust/random/normal.hpp b/inst/include/dust/random/normal.hpp
@@ -5,44 +5,46 @@
 
 #include "dust/random/generator.hpp"
 #include "dust/random/numeric.hpp"
+#include "dust/random/normal_box_muller.hpp"
+#include "dust/random/normal_ziggurat.hpp"
 
 namespace dust {
 namespace random {
 
-namespace {
+namespace algorithm {
+enum class normal {box_muller, ziggurat};
+}
 
 __nv_exec_check_disable__
-template <typename real_type, typename rng_state_type>
+template <typename real_type,
+          algorithm::normal algorithm = algorithm::normal::box_muller,
+          typename rng_state_type>
 __host__ __device__
-real_type box_muller(rng_state_type& rng_state) {
-  // This function implements the Box-Muller transform:
-  // https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
-  // Do not send a really small number to log().
-  const real_type epsilon = utils::epsilon<real_type>();
-  const real_type two_pi = 2 * M_PI;
-
-  real_type u1, u2;
-  do {
-    u1 = random_real<real_type>(rng_state);
-    u2 = random_real<real_type>(rng_state);
-  } while (u1 <= epsilon);
-
-  SYNCWARP
-  return std::sqrt(-2 * std::log(u1)) * std::cos(two_pi * u2);
-}
-
+real_type random_normal(rng_state_type& rng_state) {
+  static_assert(std::is_floating_point<real_type>::value,
+                "Only valid for floating-point types; use normal<real_type>()");
+  switch(algorithm) {
+  case algorithm::normal::box_muller:
+    return random_normal_box_muller<real_type>(rng_state);
+  case algorithm::normal::ziggurat:
+  default: // keeps compiler happy
+    return random_normal_ziggurat<real_type>(rng_state);
+  }
 }
 
 __nv_exec_check_disable__
-template <typename real_type, typename rng_state_type>
+template <typename real_type,
+          algorithm::normal algorithm = algorithm::normal::box_muller,
+          typename rng_state_type>
 __host__ __device__
 real_type normal(rng_state_type& rng_state, real_type mean, real_type sd) {
   static_assert(std::is_floating_point<real_type>::value,
                 "Only valid for floating-point types; use normal<real_type>()");
 #ifdef __CUDA_ARCH__
-  real_type z = box_muller<real_type>(rng_state);
+  real_type z = random_normal<real_type, algorithm>(rng_state);
 #else
-  real_type z = rng_state.deterministic ? 0 : box_muller<real_type>(rng_state);
+  real_type z = rng_state.deterministic ?
+    0 : random_normal<real_type, algorithm>(rng_state);
 #endif
   return z * sd + mean;
 }

diff --git a/inst/include/dust/random/normal_box_muller.hpp b/inst/include/dust/random/normal_box_muller.hpp
@@ -0,0 +1,35 @@
+#ifndef DUST_RANDOM_NORMAL_BOX_MULLER_HPP
+#define DUST_RANDOM_NORMAL_BOX_MULLER_HPP
+
+#include <cmath>
+
+#include "dust/random/generator.hpp"
+
+namespace dust {
+namespace random {
+
+__nv_exec_check_disable__
+template <typename real_type, typename rng_state_type>
+__host__ __device__
+real_type random_normal_box_muller(rng_state_type& rng_state) {
+  // This function implements the Box-Muller transform:
+  // https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
+  // Do not send a really small number to log().
+  const real_type epsilon = utils::epsilon<real_type>();
+  const real_type two_pi = 2 * M_PI;
+
+  real_type u1, u2;
+  do {
+    u1 = random_real<real_type>(rng_state);
+    u2 = random_real<real_type>(rng_state);
+  } while (u1 <= epsilon);
+
+  SYNCWARP
+  return std::sqrt(-2 * std::log(u1)) * std::cos(two_pi * u2);
+}
+
+
+}
+}
+
+#endif
diff --git a/inst/include/dust/random/normal_ziggurat.hpp b/inst/include/dust/random/normal_ziggurat.hpp
@@ -0,0 +1,75 @@
+#ifndef DUST_RANDOM_NORMAL_ZIGGURAT_HPP
+#define DUST_RANDOM_NORMAL_ZIGGURAT_HPP
+
+#include <cmath>
+
+#include "dust/random/generator.hpp"
+#include "dust/random/normal_ziggurat_tables.hpp"
+
+namespace dust {
+namespace random {
+
+__nv_exec_check_disable__
+namespace {
+template <typename real_type, typename rng_state_type>
+__host__ __device__
+real_type normal_ziggurat_tail(rng_state_type& rng_state, real_type x1,
+                               bool negative) {
+  real_type ret;
+  do {
+    const auto u1 = random_real<real_type>(rng_state);
+    const auto u2 = random_real<real_type>(rng_state);
+    const auto x = std::log(u1) / x1;
+    const auto y = std::log(u2);
+    if (- 2 * y > x * x) {
+      ret = negative ? x - x1 : x1 - x;
+      break;
+    }
+  } while (true);
+  return ret;
+}
+}
+
+// TODO: this will not work efficiently for float types because we
+// don't have float tables for 'x' and 'y'; getting them is not easy
+// without requiring c++14 either. The lower loop using 'x' could
+// rewritten easily as a function though taking 'u1' and so allowing
+// full template specialisation. However by most accounts this
+// performs poorly onn a GPU to latency so it might be ok.
+__nv_exec_check_disable__
+template <typename real_type, typename rng_state_type>
+__host__ __device__
+real_type random_normal_ziggurat(rng_state_type& rng_state) {
+  using ziggurat::x;
+  using ziggurat::y;
+  constexpr size_t n = 256;
+  const real_type r = x[1];
+
+  real_type ret;
+  do {
+    const auto i = (next(rng_state) >> 16) % n;
+    const auto u0 = 2 * random_real<real_type>(rng_state) - 1;
+    if (std::abs(u0) < y[i]) {
+      ret = u0 * x[i];
+      break;
+    }
+    if (i == 0) {
+      ret = normal_ziggurat_tail<real_type>(rng_state, r, u0 < 0);
+      break;
+    }
+    const auto z = u0 * x[i];
+    const auto f0 = std::exp(-0.5 * (x[i] * x[i] - z * z));
+    const auto f1 = std::exp(-0.5 * (x[i + 1] * x[i + 1] - z * z));
+    const auto u1 = random_real<real_type>(rng_state);
+    if (f1 + u1 * (f0 - f1) < 1.0) {
+      ret = z;
+    }
+  } while (true);
+  SYNCWARP
+  return ret;
+}
+
+}
+}
+
+#endif