From d44fadd218cfc053b7a3b5709d1fd060242f6d84 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Wed, 10 Nov 2021 18:18:54 +0000 Subject: [PATCH 01/27] Start work on a sensible API --- R/cpp11.R | 4 ++ R/rng.R | 21 +++++++++++ inst/include/dust/r/helpers.hpp | 4 +- inst/include/dust/r/random.hpp | 24 ++++++++++++ src/cpp11.cpp | 8 ++++ src/dust_rng.cpp | 39 +++++++++++++++++++- vignettes_src/rng_pi_dust.cpp | 19 ++++++++++ vignettes_src/rng_r_api.cpp | 17 +++++++++ vignettes_src/rng_use.Rmd | 65 +++++++++++++++++++++++++++++++++ 9 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 vignettes_src/rng_pi_dust.cpp create mode 100644 vignettes_src/rng_r_api.cpp create mode 100644 vignettes_src/rng_use.Rmd diff --git a/R/cpp11.R b/R/cpp11.R index e088446ba..26e4829d8 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -72,6 +72,10 @@ dust_rng_state <- function(ptr, is_float) { .Call(`_dust_dust_rng_state`, ptr, is_float) } +dust_rng_pointer_init <- function(n_streams, seed, algorithm) { + .Call(`_dust_dust_rng_pointer_init`, n_streams, seed, algorithm) +} + cpp_openmp_info <- function() { .Call(`_dust_cpp_openmp_info`) } diff --git a/R/rng.R b/R/rng.R index 3b48cfacc..9ae3bdcc1 100644 --- a/R/rng.R +++ b/R/rng.R @@ -137,6 +137,9 @@ dust_rng <- R6::R6Class( ##' expectations and the state is never changed. initialize = function(seed, n_generators = 1L, real_type = "double", deterministic = FALSE) { + ## TODO: n_generators -> n_streams + ## TODO: seed gets default + ## TODO: change order of initialisers? if (!(real_type %in% c("double", "float"))) { stop("Invalid value for 'real_type': must be 'double' or 'float'") } @@ -353,3 +356,21 @@ dust_rng_state_long_jump <- function(state, times = 1L) { } rng$state() } + + +## TODO: store the algorithm alongside the pointer and validate that +## TODO: keep track of if we we're current or not +## TODO: keep a copy of the full state as a raw vector so we survive +## serialisation +## TODO: jump support +dust_rng_pointer <- R6::R6Class( + "dust_rng_pointer", + public = list( + ptr = NULL, + initialize = function(seed = NULL, n_streams = 1L, + algorithm = "xoshiro256plus") { + self$ptr <- dust_rng_pointer_init(n_streams, seed, algorithm) + } + )) + + diff --git a/inst/include/dust/r/helpers.hpp b/inst/include/dust/r/helpers.hpp index c97d1dc59..2eecacf41 100644 --- a/inst/include/dust/r/helpers.hpp +++ b/inst/include/dust/r/helpers.hpp @@ -364,7 +364,7 @@ dust_inputs process_inputs_single(cpp11::list r_pars, int step, dust::r::validate_size(step, "step"); dust::r::validate_positive(n_threads, "n_threads"); std::vector seed = - dust::r::as_rng_seed(r_seed); + dust::random::r::as_rng_seed(r_seed); std::vector> pars; pars.push_back(dust::dust_pars(r_pars)); @@ -389,7 +389,7 @@ dust_inputs process_inputs_multi(cpp11::list r_pars, int step, dust::r::validate_size(step, "step"); dust::r::validate_positive(n_threads, "n_threads"); std::vector seed = - dust::r::as_rng_seed(r_seed); + dust::random::r::as_rng_seed(r_seed); dust::r::check_pars_multi(r_pars); std::vector> pars; diff --git a/inst/include/dust/r/random.hpp b/inst/include/dust/r/random.hpp index 5eebf03a6..0d24873a5 100644 --- a/inst/include/dust/r/random.hpp +++ b/inst/include/dust/r/random.hpp @@ -7,8 +7,10 @@ #include #include "dust/random/generator.hpp" +#include "dust/random/prng.hpp" namespace dust { +namespace random { namespace r { template @@ -40,6 +42,28 @@ std::vector as_rng_seed(cpp11::sexp r_seed) { return seed; } +template +SEXP rng_pointer_init(int n_streams, cpp11::sexp r_seed) { + auto seed = as_rng_seed(r_seed); + auto *rng = new prng(n_streams, seed); + auto ret = cpp11::external_pointer>(rng); + return ret; +} + +template +prng* rng_pointer_get(cpp11::sexp ptr, int n_streams = 0) { + auto *rng = + cpp11::as_cpp>>(ptr).get(); + if (n_streams > 0) { + if (static_cast(rng->size()) < n_streams) { + cpp11::stop("Requested a rng with %d streams but only have %d", + n_streams, rng->size()); + } + } + return rng; +} + +} } } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 80d335008..da718afc7 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -133,6 +133,13 @@ extern "C" SEXP _dust_dust_rng_state(SEXP ptr, SEXP is_float) { return cpp11::as_sexp(dust_rng_state(cpp11::as_cpp>(ptr), cpp11::as_cpp>(is_float))); END_CPP11 } +// dust_rng.cpp +cpp11::sexp dust_rng_pointer_init(int n_streams, cpp11::sexp seed, std::string algorithm); +extern "C" SEXP _dust_dust_rng_pointer_init(SEXP n_streams, SEXP seed, SEXP algorithm) { + BEGIN_CPP11 + return cpp11::as_sexp(dust_rng_pointer_init(cpp11::as_cpp>(n_streams), cpp11::as_cpp>(seed), cpp11::as_cpp>(algorithm))); + END_CPP11 +} // openmp.cpp cpp11::writable::list cpp_openmp_info(); extern "C" SEXP _dust_cpp_openmp_info() { @@ -1159,6 +1166,7 @@ static const R_CallMethodDef CallEntries[] = { {"_dust_dust_rng_long_jump", (DL_FUNC) &_dust_dust_rng_long_jump, 2}, {"_dust_dust_rng_multinomial", (DL_FUNC) &_dust_dust_rng_multinomial, 6}, {"_dust_dust_rng_normal", (DL_FUNC) &_dust_dust_rng_normal, 7}, + {"_dust_dust_rng_pointer_init", (DL_FUNC) &_dust_dust_rng_pointer_init, 3}, {"_dust_dust_rng_poisson", (DL_FUNC) &_dust_dust_rng_poisson, 5}, {"_dust_dust_rng_random_normal", (DL_FUNC) &_dust_dust_rng_random_normal, 5}, {"_dust_dust_rng_random_real", (DL_FUNC) &_dust_dust_rng_random_real, 4}, diff --git a/src/dust_rng.cpp b/src/dust_rng.cpp index 30cf24b92..cd241c616 100644 --- a/src/dust_rng.cpp +++ b/src/dust_rng.cpp @@ -18,7 +18,7 @@ using dust_rng32 = dust::random::prng>; template SEXP dust_rng_alloc(cpp11::sexp r_seed, int n_generators, bool deterministic) { - auto seed = dust::r::as_rng_seed(r_seed); + auto seed = dust::random::r::as_rng_seed(r_seed); T *rng = new T(n_generators, seed, deterministic); return cpp11::external_pointer(rng); } @@ -537,3 +537,40 @@ cpp11::sexp dust_rng_state(SEXP ptr, bool is_float) { dust_rng_state(ptr) : dust_rng_state(ptr); } + +[[cpp11::register]] +cpp11::sexp dust_rng_pointer_init(int n_streams, cpp11::sexp seed, + std::string algorithm) { + cpp11::sexp ret; + + using namespace dust::random; + if (algorithm == "xoshiro256starstar") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro256plusplus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro256plus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro128starstar") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro128plusplus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro128plus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoroshiro128starstar") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoroshiro128plusplus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoroshiro128plus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro512starstar") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro512plusplus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro512plus") { + ret = r::rng_pointer_init(n_streams, seed); + } else { + cpp11::stop("Unknown algorithm '%s'", algorithm.c_str()); + } + + return ret; +} diff --git a/vignettes_src/rng_pi_dust.cpp b/vignettes_src/rng_pi_dust.cpp new file mode 100644 index 000000000..71d1e6791 --- /dev/null +++ b/vignettes_src/rng_pi_dust.cpp @@ -0,0 +1,19 @@ +#include +#include + +[[cpp11::linking_to(dust)]] +[[cpp11::register]] +double pi_dust(int n, cpp11::sexp ptr) { + auto rng = + dust::random::r::rng_pointer_get(ptr); + auto& state = rng->state(0); + int tot = 0; + for (int i = 0; i < n; ++i) { + const double u1 = dust::random::random_real(state); + const double u2 = dust::random::random_real(state); + if (u1 * u1 + u2 * u2 < 1) { + tot++; + } + } + return tot / static_cast(n) * 4.0; +} diff --git a/vignettes_src/rng_r_api.cpp b/vignettes_src/rng_r_api.cpp new file mode 100644 index 000000000..5555315fb --- /dev/null +++ b/vignettes_src/rng_r_api.cpp @@ -0,0 +1,17 @@ +#include +#include + +[[cpp11::register]] +double pi_r_api(int n) { + int tot = 0; + GetRNGstate(); + for (int i = 0; i < n; ++i) { + const double u1 = unif_rand(); + const double u2 = unif_rand(); + if (u1 * u1 + u2 * u2 < 1) { + tot++; + } + } + PutRNGstate(); + return tot / static_cast(n) * 4.0; +} diff --git a/vignettes_src/rng_use.Rmd b/vignettes_src/rng_use.Rmd new file mode 100644 index 000000000..658e11f0b --- /dev/null +++ b/vignettes_src/rng_use.Rmd @@ -0,0 +1,65 @@ +--- +title: "Using RNGs from packages" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Using RNGs from packages} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + +```{r, include = FALSE} +knitr::opts_chunk$set( + error = FALSE, + collapse = TRUE, + comment = "#>" +) +lang_output <- function(x, lang) { + cat(c(sprintf("```%s", lang), x, "```"), sep = "\n") +} +cc_output <- function(x) lang_output(x, "cc") +r_output <- function(x) lang_output(x, "r") +plain_output <- function(x) lang_output(x, "plain") +``` + +The dust random number generators are suitable for use from other packages and we provide a few helpers in both R and C++ to make this easier. + +For illustration purposes we will assume that you want to estimate pi using rejection sampling. The basic idea of this algorithm is simple; sample two U(0, 1) points `x` and `y` and test if they lie in the unit circle (i.e. `sqrt(x^2 + x^2) < 1`) giving the ratio of the area of a unit circle to a square. Multiplying the fraction of the points that we accept by 4 yields our estimate of pi. + +## Background using R's random number generator + +First, an example that uses R's API for example (note that while the R API is C, we're using the cpp11 package interface here so that the following examples are similar): + +```{r, results = "asis", echo = FALSE} +cc_output(readLines("rng-r_api.cpp")) +``` + +With cpp11 we can load this with `cpp11::cpp_source` + +```{r} +cpp11::cpp_source("rng_r_api.cpp") +``` + +and then run it wih + +```{r} +pi_r_api(1e6) +``` + +The key bits within the code above are that we: + +* included random number support from R via the `R_ext/Random.h` header +* initialised the state of the global random number stream within our C++ context with `GetRNGstate` before drawing any random numbers +* drew some possible large number of numbers using `unif_rand` +* restored the random number state back to R. + +Failure to run the `GetRNGstate` / `PutRNGstate` will result in the stream not behaving properly. This is explained in detail in the "Writing R Extensions" manual. + +## Implementation using dust + + +obj <- dust:::dust_rng_pointer$new() + +cpp11::cpp_source("rng_pi_dust.cpp", quiet = FALSE) +pi_dust(1e6, obj$ptr) From 7bbbec75faebd6c1f52c7df5f34cd44395918fa3 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 11 Nov 2021 11:56:51 +0000 Subject: [PATCH 02/27] Start on a nice interface for pointers --- R/cpp11.R | 8 +++ R/dust.R | 2 +- R/rng.R | 18 +++++-- inst/include/dust/r/random.hpp | 94 ++++++++++++++++++++++++++++++---- src/cpp11.cpp | 17 ++++++ src/dust_rng.cpp | 33 ++++++++++++ src/sir.cpp | 2 +- src/sirs.cpp | 2 +- src/test_rng.cpp | 21 +++++++- src/variable.cpp | 2 +- src/volatility.cpp | 2 +- src/walk.cpp | 2 +- vignettes_src/rng_use.Rmd | 2 +- 13 files changed, 184 insertions(+), 21 deletions(-) diff --git a/R/cpp11.R b/R/cpp11.R index 26e4829d8..49c3d49eb 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -76,6 +76,10 @@ dust_rng_pointer_init <- function(n_streams, seed, algorithm) { .Call(`_dust_dust_rng_pointer_init`, n_streams, seed, algorithm) } +dust_rng_pointer_sync <- function(obj) { + invisible(.Call(`_dust_dust_rng_pointer_sync`, obj)) +} + cpp_openmp_info <- function() { .Call(`_dust_cpp_openmp_info`) } @@ -296,6 +300,10 @@ test_xoshiro_run <- function(name) { .Call(`_dust_test_xoshiro_run`, name) } +pi_dust <- function(n, ptr) { + .Call(`_dust_pi_dust`, n, ptr) +} + cpp_scale_log_weights <- function(w) { .Call(`_dust_cpp_scale_log_weights`, w) } diff --git a/R/dust.R b/R/dust.R index 55698f97b..5e5628628 100644 --- a/R/dust.R +++ b/R/dust.R @@ -1,4 +1,4 @@ -## Generated by dust (version 0.11.4) - do not edit +## Generated by dust (version 0.11.6) - do not edit sir <- R6::R6Class( "dust", cloneable = FALSE, diff --git a/R/rng.R b/R/rng.R index 9ae3bdcc1..08c4fc087 100644 --- a/R/rng.R +++ b/R/rng.R @@ -363,14 +363,26 @@ dust_rng_state_long_jump <- function(state, times = 1L) { ## TODO: keep a copy of the full state as a raw vector so we survive ## serialisation ## TODO: jump support +## TODO: clone +## TODO: sync +## TODO: work out how to access private from C? dust_rng_pointer <- R6::R6Class( "dust_rng_pointer", public = list( ptr = NULL, + algorithm = NULL, + state = NULL, + current = NULL, initialize = function(seed = NULL, n_streams = 1L, algorithm = "xoshiro256plus") { - self$ptr <- dust_rng_pointer_init(n_streams, seed, algorithm) + dat <- dust_rng_pointer_init(n_streams, seed, algorithm) + self$ptr <- dat[[1]] + self$state <- dat[[2]] + self$algorithm <- algorithm + self$current <- TRUE + }, + + sync = function() { + dust_rng_pointer_sync(self) } )) - - diff --git a/inst/include/dust/r/random.hpp b/inst/include/dust/r/random.hpp index 0d24873a5..90a28948c 100644 --- a/inst/include/dust/r/random.hpp +++ b/inst/include/dust/r/random.hpp @@ -3,7 +3,10 @@ #include // memcpy +#include +#include #include + #include #include "dust/random/generator.hpp" @@ -42,27 +45,98 @@ std::vector as_rng_seed(cpp11::sexp r_seed) { return seed; } +namespace { + +template +std::string algorithm_name() { + std::string ret; + if (std::is_same::value) { + ret = "xoshiro128plus"; + } else if (std::is_same::value) { + ret = "xoshiro128plusplus"; + } else if (std::is_same::value) { + ret = "xoshiro128starstar"; + } else if (std::is_same::value) { + ret = "xoroshiro128plus"; + } else if (std::is_same::value) { + ret = "xoroshiro128plusplus"; + } else if (std::is_same::value) { + ret = "xoroshiro128starstar"; + } else if (std::is_same::value) { + ret = "xoshiro256plus"; + } else if (std::is_same::value) { + ret = "xoshiro256plusplus"; + } else if (std::is_same::value) { + ret = "xoshiro256starstar"; + } else if (std::is_same::value) { + ret = "xoshiro512plus"; + } else if (std::is_same::value) { + ret = "xoshiro512plusplus"; + } else if (std::is_same::value) { + ret = "xoshiro512starstar"; + } + return ret; +} + +template +cpp11::raws rng_state_vector(prng* rng) { + auto state = rng->export_state(); + size_t len = sizeof(typename rng_state_type::int_type) * state.size(); + cpp11::writable::raws r_state(len); + std::memcpy(RAW(r_state), state.data(), len); + return r_state; +} + +} + template SEXP rng_pointer_init(int n_streams, cpp11::sexp r_seed) { auto seed = as_rng_seed(r_seed); auto *rng = new prng(n_streams, seed); - auto ret = cpp11::external_pointer>(rng); - return ret; + auto r_ptr = cpp11::external_pointer>(rng); + auto r_state = rng_state_vector(rng); + return cpp11::writable::list({r_ptr, r_state}); } +// Start with the assumption that we'll pass in the R6 object, might +// write a simpler version later. template -prng* rng_pointer_get(cpp11::sexp ptr, int n_streams = 0) { - auto *rng = - cpp11::as_cpp>>(ptr).get(); - if (n_streams > 0) { - if (static_cast(rng->size()) < n_streams) { - cpp11::stop("Requested a rng with %d streams but only have %d", - n_streams, rng->size()); - } +prng* rng_pointer_get(cpp11::environment obj, + int n_streams = 0) { + // We could probably do this more efficiently if we store an enum + // in the object but this is probably ok. + const auto algorithm_given = cpp11::as_cpp(obj["algorithm"]); + const auto algorithm_expected = algorithm_name(); + if (algorithm_given != algorithm_expected) { + cpp11::stop("Incorrect rng type: given %s, expected %s", + algorithm_given.c_str(), algorithm_expected.c_str()); + } + + using ptr_type = cpp11::external_pointer>; + auto ptr = cpp11::as_cpp(obj["ptr"]); + + auto * rng = ptr.get(); + if (rng == nullptr) { + cpp11::stop("pointer needs rebuilding"); } + + if (n_streams > 0 && static_cast(rng->size()) < n_streams) { + cpp11::stop("Requested a rng with %d streams but only have %d", + n_streams, rng->size()); + } + obj["current"] = cpp11::as_sexp(false); + return rng; } +template +void rng_pointer_sync(cpp11::environment obj) { + using ptr_type = cpp11::external_pointer>; + auto ptr = cpp11::as_cpp(obj["ptr"]); + obj["state"] = rng_state_vector(ptr.get()); + obj["current"] = cpp11::as_sexp(true); +} + } } } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index da718afc7..5f64b3eea 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -140,6 +140,14 @@ extern "C" SEXP _dust_dust_rng_pointer_init(SEXP n_streams, SEXP seed, SEXP algo return cpp11::as_sexp(dust_rng_pointer_init(cpp11::as_cpp>(n_streams), cpp11::as_cpp>(seed), cpp11::as_cpp>(algorithm))); END_CPP11 } +// dust_rng.cpp +void dust_rng_pointer_sync(cpp11::environment obj); +extern "C" SEXP _dust_dust_rng_pointer_sync(SEXP obj) { + BEGIN_CPP11 + dust_rng_pointer_sync(cpp11::as_cpp>(obj)); + return R_NilValue; + END_CPP11 +} // openmp.cpp cpp11::writable::list cpp_openmp_info(); extern "C" SEXP _dust_cpp_openmp_info() { @@ -531,6 +539,13 @@ extern "C" SEXP _dust_test_xoshiro_run(SEXP name) { return cpp11::as_sexp(test_xoshiro_run(cpp11::as_cpp>(name))); END_CPP11 } +// test_rng.cpp +double pi_dust(int n, cpp11::sexp ptr); +extern "C" SEXP _dust_pi_dust(SEXP n, SEXP ptr) { + BEGIN_CPP11 + return cpp11::as_sexp(pi_dust(cpp11::as_cpp>(n), cpp11::as_cpp>(ptr))); + END_CPP11 +} // tools.cpp cpp11::list cpp_scale_log_weights(std::vector w); extern "C" SEXP _dust_cpp_scale_log_weights(SEXP w) { @@ -1167,6 +1182,7 @@ static const R_CallMethodDef CallEntries[] = { {"_dust_dust_rng_multinomial", (DL_FUNC) &_dust_dust_rng_multinomial, 6}, {"_dust_dust_rng_normal", (DL_FUNC) &_dust_dust_rng_normal, 7}, {"_dust_dust_rng_pointer_init", (DL_FUNC) &_dust_dust_rng_pointer_init, 3}, + {"_dust_dust_rng_pointer_sync", (DL_FUNC) &_dust_dust_rng_pointer_sync, 1}, {"_dust_dust_rng_poisson", (DL_FUNC) &_dust_dust_rng_poisson, 5}, {"_dust_dust_rng_random_normal", (DL_FUNC) &_dust_dust_rng_random_normal, 5}, {"_dust_dust_rng_random_real", (DL_FUNC) &_dust_dust_rng_random_real, 4}, @@ -1182,6 +1198,7 @@ static const R_CallMethodDef CallEntries[] = { {"_dust_dust_volatility_gpu_info", (DL_FUNC) &_dust_dust_volatility_gpu_info, 0}, {"_dust_dust_walk_capabilities", (DL_FUNC) &_dust_dust_walk_capabilities, 0}, {"_dust_dust_walk_gpu_info", (DL_FUNC) &_dust_dust_walk_gpu_info, 0}, + {"_dust_pi_dust", (DL_FUNC) &_dust_pi_dust, 2}, {"_dust_test_cuda_pars", (DL_FUNC) &_dust_test_cuda_pars, 9}, {"_dust_test_xoshiro_run", (DL_FUNC) &_dust_test_xoshiro_run, 1}, {NULL, NULL, 0} diff --git a/src/dust_rng.cpp b/src/dust_rng.cpp index cd241c616..387dde6d1 100644 --- a/src/dust_rng.cpp +++ b/src/dust_rng.cpp @@ -574,3 +574,36 @@ cpp11::sexp dust_rng_pointer_init(int n_streams, cpp11::sexp seed, return ret; } + +[[cpp11::register]] +void dust_rng_pointer_sync(cpp11::environment obj) { + const auto algorithm = cpp11::as_cpp(obj["algorithm"]); + using namespace dust::random; + if (algorithm == "xoshiro256starstar") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro256plusplus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro256plus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro128starstar") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro128plusplus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro128plus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoroshiro128starstar") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoroshiro128plusplus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoroshiro128plus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro512starstar") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro512plusplus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro512plus") { + r::rng_pointer_sync(obj); + } else { + cpp11::stop("Unknown algorithm '%s'", algorithm.c_str()); + } +} diff --git a/src/sir.cpp b/src/sir.cpp index 1c3445601..1417e5729 100644 --- a/src/sir.cpp +++ b/src/sir.cpp @@ -1,4 +1,4 @@ -// Generated by dust (version 0.11.4) - do not edit +// Generated by dust (version 0.11.6) - do not edit #include [[cpp11::register]] diff --git a/src/sirs.cpp b/src/sirs.cpp index d7941b1dd..54262ced3 100644 --- a/src/sirs.cpp +++ b/src/sirs.cpp @@ -1,4 +1,4 @@ -// Generated by dust (version 0.11.4) - do not edit +// Generated by dust (version 0.11.6) - do not edit #include [[cpp11::register]] diff --git a/src/test_rng.cpp b/src/test_rng.cpp index a37ac8013..76fad2fe6 100644 --- a/src/test_rng.cpp +++ b/src/test_rng.cpp @@ -5,7 +5,7 @@ #include #include - +#include template std::string to_string(const T& t) { std::ostringstream ss; @@ -64,3 +64,22 @@ std::vector test_xoshiro_run(std::string name) { return ret; } + + +// This is copied over from our example, we'll tidy this up later, but +// we need something that can be easily tested for now. +[[cpp11::register]] +double pi_dust(int n, cpp11::sexp ptr) { + auto rng = + dust::random::r::rng_pointer_get(ptr); + auto& state = rng->state(0); + int tot = 0; + for (int i = 0; i < n; ++i) { + const double u1 = dust::random::random_real(state); + const double u2 = dust::random::random_real(state); + if (u1 * u1 + u2 * u2 < 1) { + tot++; + } + } + return tot / static_cast(n) * 4.0; +} diff --git a/src/variable.cpp b/src/variable.cpp index 7dbdc8e2c..f42f7f5b6 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -1,4 +1,4 @@ -// Generated by dust (version 0.11.4) - do not edit +// Generated by dust (version 0.11.6) - do not edit #include [[cpp11::register]] diff --git a/src/volatility.cpp b/src/volatility.cpp index cfe826068..129cb6e5c 100644 --- a/src/volatility.cpp +++ b/src/volatility.cpp @@ -1,4 +1,4 @@ -// Generated by dust (version 0.11.4) - do not edit +// Generated by dust (version 0.11.6) - do not edit #include [[cpp11::register]] diff --git a/src/walk.cpp b/src/walk.cpp index 997a5a898..ba8f0f878 100644 --- a/src/walk.cpp +++ b/src/walk.cpp @@ -1,4 +1,4 @@ -// Generated by dust (version 0.11.4) - do not edit +// Generated by dust (version 0.11.6) - do not edit #include [[cpp11::register]] diff --git a/vignettes_src/rng_use.Rmd b/vignettes_src/rng_use.Rmd index 658e11f0b..523b5e99d 100644 --- a/vignettes_src/rng_use.Rmd +++ b/vignettes_src/rng_use.Rmd @@ -62,4 +62,4 @@ Failure to run the `GetRNGstate` / `PutRNGstate` will result in the stream not b obj <- dust:::dust_rng_pointer$new() cpp11::cpp_source("rng_pi_dust.cpp", quiet = FALSE) -pi_dust(1e6, obj$ptr) +pi_dust(1e3, obj) From 872eb5565ab52214076da645b85cfc83a2784be4 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 11 Nov 2021 13:29:18 +0000 Subject: [PATCH 03/27] Gracefully cope with pointer serialisation --- inst/include/dust/r/random.hpp | 12 +++++++- tests/testthat/helper-dust.R | 7 +++++ tests/testthat/test-rng-interface.R | 45 +++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/test-rng-interface.R diff --git a/inst/include/dust/r/random.hpp b/inst/include/dust/r/random.hpp index 90a28948c..aa103f4d1 100644 --- a/inst/include/dust/r/random.hpp +++ b/inst/include/dust/r/random.hpp @@ -117,7 +117,17 @@ prng* rng_pointer_get(cpp11::environment obj, auto * rng = ptr.get(); if (rng == nullptr) { - cpp11::stop("pointer needs rebuilding"); + if (!cpp11::as_cpp(obj["current"])) { + cpp11::stop("Can't unserialise an rng pointer that was not synced"); + } + + using int_type = typename rng_state_type::int_type; + cpp11::raws seed_data = cpp11::as_cpp(obj["state"]); + std::vector seed(seed_data.size() / sizeof(int_type)); + std::memcpy(seed.data(), RAW(seed_data), seed_data.size()); + const auto n_streams_orig = seed.size() / rng_state_type::size(); + rng = new prng(n_streams_orig, seed); + obj["ptr"] = cpp11::external_pointer>(rng); } if (n_streams > 0 && static_cast(rng->size()) < n_streams) { diff --git a/tests/testthat/helper-dust.R b/tests/testthat/helper-dust.R index 9466a318a..e2cd04b31 100644 --- a/tests/testthat/helper-dust.R +++ b/tests/testthat/helper-dust.R @@ -54,3 +54,10 @@ copy_directory <- function(src, as) { stop("Error copying files") } } + + +## Serialising and restoring an external pointer replaces the pointer +## with one to NULL. +corrupt_pointer <- function(x) { + unserialize(serialize(x, NULL)) +} diff --git a/tests/testthat/test-rng-interface.R b/tests/testthat/test-rng-interface.R new file mode 100644 index 000000000..35fa0b094 --- /dev/null +++ b/tests/testthat/test-rng-interface.R @@ -0,0 +1,45 @@ +test_that("Can create pointer object", { + obj <- dust_rng_pointer$new() + expect_true(obj$current) + expect_type(obj$state, "raw") + expect_length(obj$state, 32) + expect_equal(obj$algorithm, "xoshiro256plus") + r <- obj$state + obj$sync() + expect_identical(r, obj$state) +}) + + +test_that("Using object requires sync", { + obj <- dust_rng_pointer$new() + expect_true(obj$current) + r <- obj$state + pi_dust(100, obj) + expect_false(obj$current) + expect_identical(obj$state, r) + obj$sync() + expect_true(obj$current) + expect_false(identical(obj$state, r)) +}) + + +test_that("Invalidated pointers can be rebuilt", { + obj1 <- dust_rng_pointer$new() + obj2 <- corrupt_pointer(obj1) + expect_equal( + pi_dust(100, obj2), + pi_dust(100, obj1)) + + ## This saves that the pointer is invalid: + obj3 <- corrupt_pointer(obj2) + expect_error( + pi_dust(100, obj3), + "Can't unserialise an rng pointer that was not synced") + + ## But if we sync things it's ok: + obj2$sync() + obj4 <- corrupt_pointer(obj2) + expect_equal( + pi_dust(100, obj4), + pi_dust(100, obj1)) +}) From e6f9d7f2878933c82ba0fff51663d2b6b45c0ae0 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 11 Nov 2021 13:40:36 +0000 Subject: [PATCH 04/27] Add some encapsulation --- R/rng.R | 43 +++++++++++++++++------------ inst/include/dust/r/random.hpp | 23 +++++++++------ src/dust_rng.cpp | 2 +- tests/testthat/test-rng-interface.R | 24 ++++++++-------- 4 files changed, 53 insertions(+), 39 deletions(-) diff --git a/R/rng.R b/R/rng.R index 08c4fc087..e807811e9 100644 --- a/R/rng.R +++ b/R/rng.R @@ -358,31 +358,40 @@ dust_rng_state_long_jump <- function(state, times = 1L) { } -## TODO: store the algorithm alongside the pointer and validate that -## TODO: keep track of if we we're current or not -## TODO: keep a copy of the full state as a raw vector so we survive -## serialisation -## TODO: jump support -## TODO: clone -## TODO: sync -## TODO: work out how to access private from C? dust_rng_pointer <- R6::R6Class( "dust_rng_pointer", + cloneable = FALSE, + + private = list( + ptr_ = NULL, + algorithm_ = NULL, + state_ = NULL, + is_current_ = NULL + ), + public = list( - ptr = NULL, - algorithm = NULL, - state = NULL, - current = NULL, initialize = function(seed = NULL, n_streams = 1L, algorithm = "xoshiro256plus") { dat <- dust_rng_pointer_init(n_streams, seed, algorithm) - self$ptr <- dat[[1]] - self$state <- dat[[2]] - self$algorithm <- algorithm - self$current <- TRUE + private$ptr_ <- dat[[1L]] + private$state_ <- dat[[2L]] + private$algorithm_ <- algorithm + private$is_current_ <- TRUE }, sync = function() { - dust_rng_pointer_sync(self) + dust_rng_pointer_sync(private) + }, + + state = function() { + private$state_ + }, + + is_current = function() { + private$is_current_ + }, + + algorithm = function() { + private$algorithm_ } )) diff --git a/inst/include/dust/r/random.hpp b/inst/include/dust/r/random.hpp index aa103f4d1..752b96384 100644 --- a/inst/include/dust/r/random.hpp +++ b/inst/include/dust/r/random.hpp @@ -103,9 +103,14 @@ SEXP rng_pointer_init(int n_streams, cpp11::sexp r_seed) { template prng* rng_pointer_get(cpp11::environment obj, int n_streams = 0) { + cpp11::environment env_enclos = + cpp11::as_cpp(obj[".__enclos_env__"]); + cpp11::environment env = + cpp11::as_cpp(env_enclos["private"]); + // We could probably do this more efficiently if we store an enum // in the object but this is probably ok. - const auto algorithm_given = cpp11::as_cpp(obj["algorithm"]); + const auto algorithm_given = cpp11::as_cpp(env["algorithm_"]); const auto algorithm_expected = algorithm_name(); if (algorithm_given != algorithm_expected) { cpp11::stop("Incorrect rng type: given %s, expected %s", @@ -113,28 +118,28 @@ prng* rng_pointer_get(cpp11::environment obj, } using ptr_type = cpp11::external_pointer>; - auto ptr = cpp11::as_cpp(obj["ptr"]); + auto ptr = cpp11::as_cpp(env["ptr_"]); auto * rng = ptr.get(); if (rng == nullptr) { - if (!cpp11::as_cpp(obj["current"])) { + if (!cpp11::as_cpp(env["is_current_"])) { cpp11::stop("Can't unserialise an rng pointer that was not synced"); } using int_type = typename rng_state_type::int_type; - cpp11::raws seed_data = cpp11::as_cpp(obj["state"]); + cpp11::raws seed_data = cpp11::as_cpp(env["state_"]); std::vector seed(seed_data.size() / sizeof(int_type)); std::memcpy(seed.data(), RAW(seed_data), seed_data.size()); const auto n_streams_orig = seed.size() / rng_state_type::size(); rng = new prng(n_streams_orig, seed); - obj["ptr"] = cpp11::external_pointer>(rng); + env["ptr_"] = cpp11::external_pointer>(rng); } if (n_streams > 0 && static_cast(rng->size()) < n_streams) { cpp11::stop("Requested a rng with %d streams but only have %d", n_streams, rng->size()); } - obj["current"] = cpp11::as_sexp(false); + env["is_current_"] = cpp11::as_sexp(false); return rng; } @@ -142,9 +147,9 @@ prng* rng_pointer_get(cpp11::environment obj, template void rng_pointer_sync(cpp11::environment obj) { using ptr_type = cpp11::external_pointer>; - auto ptr = cpp11::as_cpp(obj["ptr"]); - obj["state"] = rng_state_vector(ptr.get()); - obj["current"] = cpp11::as_sexp(true); + auto ptr = cpp11::as_cpp(obj["ptr_"]); + obj["state_"] = rng_state_vector(ptr.get()); + obj["is_current_"] = cpp11::as_sexp(true); } } diff --git a/src/dust_rng.cpp b/src/dust_rng.cpp index 387dde6d1..98073ccc9 100644 --- a/src/dust_rng.cpp +++ b/src/dust_rng.cpp @@ -577,7 +577,7 @@ cpp11::sexp dust_rng_pointer_init(int n_streams, cpp11::sexp seed, [[cpp11::register]] void dust_rng_pointer_sync(cpp11::environment obj) { - const auto algorithm = cpp11::as_cpp(obj["algorithm"]); + const auto algorithm = cpp11::as_cpp(obj["algorithm_"]); using namespace dust::random; if (algorithm == "xoshiro256starstar") { r::rng_pointer_sync(obj); diff --git a/tests/testthat/test-rng-interface.R b/tests/testthat/test-rng-interface.R index 35fa0b094..272f5e92f 100644 --- a/tests/testthat/test-rng-interface.R +++ b/tests/testthat/test-rng-interface.R @@ -1,25 +1,25 @@ test_that("Can create pointer object", { obj <- dust_rng_pointer$new() - expect_true(obj$current) - expect_type(obj$state, "raw") - expect_length(obj$state, 32) - expect_equal(obj$algorithm, "xoshiro256plus") - r <- obj$state + expect_true(obj$is_current()) + expect_type(obj$state(), "raw") + expect_length(obj$state(), 32) + expect_equal(obj$algorithm(), "xoshiro256plus") + r <- obj$state() obj$sync() - expect_identical(r, obj$state) + expect_identical(r, obj$state()) }) test_that("Using object requires sync", { obj <- dust_rng_pointer$new() - expect_true(obj$current) - r <- obj$state + expect_true(obj$is_current()) + r <- obj$state() pi_dust(100, obj) - expect_false(obj$current) - expect_identical(obj$state, r) + expect_false(obj$is_current()) + expect_identical(obj$state(), r) obj$sync() - expect_true(obj$current) - expect_false(identical(obj$state, r)) + expect_true(obj$is_current()) + expect_false(identical(obj$state(), r)) }) From 4e858cb46562600c807514f2d9a0d96ecea0942c Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 11 Nov 2021 13:46:39 +0000 Subject: [PATCH 05/27] Start adding docs --- NAMESPACE | 1 + R/rng.R | 29 ++++++++++++ man/dust_rng_pointer.Rd | 102 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 man/dust_rng_pointer.Rd diff --git a/NAMESPACE b/NAMESPACE index 0521a3343..df5069786 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(dust_openmp_support) export(dust_openmp_threads) export(dust_package) export(dust_rng) +export(dust_rng_pointer) export(dust_rng_state_long_jump) importFrom(stats,coef) useDynLib(dust, .registration = TRUE) diff --git a/R/rng.R b/R/rng.R index e807811e9..6b9da56bf 100644 --- a/R/rng.R +++ b/R/rng.R @@ -358,6 +358,13 @@ dust_rng_state_long_jump <- function(state, times = 1L) { } +##' @title Create pointer to random number generator +##' +##' For external use, vignette coming soon. +##' +##' @export +##' @examples +##' dust::dust_rng_pointer$new() dust_rng_pointer <- R6::R6Class( "dust_rng_pointer", cloneable = FALSE, @@ -370,6 +377,16 @@ dust_rng_pointer <- R6::R6Class( ), public = list( + ##' @description Create a new `dust_rng_pointer` object + ##' + ##' @param seed The random number seed to use (see [dust::dust_rng] + ##' for details) + ##' + ##' @param n_streams The number of independent random number streams to + ##' create + ##' + ##' @param algorithm The random number algorithm to use. The default is + ##' `xoshiro256plus` which is a good general choice initialize = function(seed = NULL, n_streams = 1L, algorithm = "xoshiro256plus") { dat <- dust_rng_pointer_init(n_streams, seed, algorithm) @@ -379,18 +396,30 @@ dust_rng_pointer <- R6::R6Class( private$is_current_ <- TRUE }, + ##' @description Synchronise the R copy of the random number state. + ##' Typically this is only needed before serialisation if you have + ##' ever used the object. sync = function() { dust_rng_pointer_sync(private) }, + ##' @description Return a raw vector of state. This can be used to + ##' create other generators with the same state. state = function() { private$state_ }, + ##' @description Return a logical, indicating if the random number + ##' state that would be returned by `state()` is "current" (i.e., the + ##' same as the copy held in the pointer) or not. This is `TRUE` on + ##' creation or immediately after calling `$sync()` and `FALSE` after + ##' any use of the pointer. is_current = function() { private$is_current_ }, + ##' @description Return a string with the name of the generator + ##' algorithm used. algorithm = function() { private$algorithm_ } diff --git a/man/dust_rng_pointer.Rd b/man/dust_rng_pointer.Rd new file mode 100644 index 000000000..bae51da63 --- /dev/null +++ b/man/dust_rng_pointer.Rd @@ -0,0 +1,102 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rng.R +\name{dust_rng_pointer} +\alias{dust_rng_pointer} +\title{Create pointer to random number generator + +For external use, vignette coming soon.} +\description{ +Create pointer to random number generator + +For external use, vignette coming soon. + +Create pointer to random number generator + +For external use, vignette coming soon. +} +\examples{ +dust::dust_rng_pointer$new() +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-new}{\code{dust_rng_pointer$new()}} +\item \href{#method-sync}{\code{dust_rng_pointer$sync()}} +\item \href{#method-state}{\code{dust_rng_pointer$state()}} +\item \href{#method-is_current}{\code{dust_rng_pointer$is_current()}} +\item \href{#method-algorithm}{\code{dust_rng_pointer$algorithm()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-new}{}}} +\subsection{Method \code{new()}}{ +Create a new \code{dust_rng_pointer} object +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{dust_rng_pointer$new(seed = NULL, n_streams = 1L, algorithm = "xoshiro256plus")}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{seed}}{The random number seed to use (see \link{dust_rng} +for details)} + +\item{\code{n_streams}}{The number of independent random number streams to +create} + +\item{\code{algorithm}}{The random number algorithm to use. The default is +\code{xoshiro256plus} which is a good general choice} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-sync}{}}} +\subsection{Method \code{sync()}}{ +Synchronise the R copy of the random number state. +Typically this is only needed before serialisation if you have +ever used the object. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{dust_rng_pointer$sync()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-state}{}}} +\subsection{Method \code{state()}}{ +Return a raw vector of state. This can be used to +create other generators with the same state. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{dust_rng_pointer$state()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-is_current}{}}} +\subsection{Method \code{is_current()}}{ +Return a logical, indicating if the random number +state that would be returned by \code{state()} is "current" (i.e., the +same as the copy held in the pointer) or not. This is \code{TRUE} on +creation or immediately after calling \verb{$sync()} and \code{FALSE} after +any use of the pointer. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{dust_rng_pointer$is_current()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-algorithm}{}}} +\subsection{Method \code{algorithm()}}{ +Return a string with the name of the generator +algorithm used. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{dust_rng_pointer$algorithm()}\if{html}{\out{
}} +} + +} +} From 0b99e9710dfc6d70298f02371a84a8b59491413f Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 11 Nov 2021 16:16:31 +0000 Subject: [PATCH 06/27] Tidy up examples --- R/cpp11.R | 8 +-- src/cpp11.cpp | 14 ++--- src/test_rng.cpp | 82 +++++++++++++---------------- tests/testthat/test-rng-interface.R | 12 ++--- tests/testthat/test-xoshiro.R | 3 +- 5 files changed, 49 insertions(+), 70 deletions(-) diff --git a/R/cpp11.R b/R/cpp11.R index 49c3d49eb..298823c55 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -296,12 +296,8 @@ test_cuda_pars <- function(r_gpu_config, n_particles, n_particles_each, n_state, .Call(`_dust_test_cuda_pars`, r_gpu_config, n_particles, n_particles_each, n_state, n_state_full, n_shared_int, n_shared_real, data_size, shared_size) } -test_xoshiro_run <- function(name) { - .Call(`_dust_test_xoshiro_run`, name) -} - -pi_dust <- function(n, ptr) { - .Call(`_dust_pi_dust`, n, ptr) +test_xoshiro_run <- function(obj) { + .Call(`_dust_test_xoshiro_run`, obj) } cpp_scale_log_weights <- function(w) { diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 5f64b3eea..e3b077dd9 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -533,17 +533,10 @@ extern "C" SEXP _dust_test_cuda_pars(SEXP r_gpu_config, SEXP n_particles, SEXP n END_CPP11 } // test_rng.cpp -std::vector test_xoshiro_run(std::string name); -extern "C" SEXP _dust_test_xoshiro_run(SEXP name) { +std::vector test_xoshiro_run(cpp11::environment obj); +extern "C" SEXP _dust_test_xoshiro_run(SEXP obj) { BEGIN_CPP11 - return cpp11::as_sexp(test_xoshiro_run(cpp11::as_cpp>(name))); - END_CPP11 -} -// test_rng.cpp -double pi_dust(int n, cpp11::sexp ptr); -extern "C" SEXP _dust_pi_dust(SEXP n, SEXP ptr) { - BEGIN_CPP11 - return cpp11::as_sexp(pi_dust(cpp11::as_cpp>(n), cpp11::as_cpp>(ptr))); + return cpp11::as_sexp(test_xoshiro_run(cpp11::as_cpp>(obj))); END_CPP11 } // tools.cpp @@ -1198,7 +1191,6 @@ static const R_CallMethodDef CallEntries[] = { {"_dust_dust_volatility_gpu_info", (DL_FUNC) &_dust_dust_volatility_gpu_info, 0}, {"_dust_dust_walk_capabilities", (DL_FUNC) &_dust_dust_walk_capabilities, 0}, {"_dust_dust_walk_gpu_info", (DL_FUNC) &_dust_dust_walk_gpu_info, 0}, - {"_dust_pi_dust", (DL_FUNC) &_dust_pi_dust, 2}, {"_dust_test_cuda_pars", (DL_FUNC) &_dust_test_cuda_pars, 9}, {"_dust_test_xoshiro_run", (DL_FUNC) &_dust_test_xoshiro_run, 1}, {NULL, NULL, 0} diff --git a/src/test_rng.cpp b/src/test_rng.cpp index 76fad2fe6..e743de54d 100644 --- a/src/test_rng.cpp +++ b/src/test_rng.cpp @@ -14,8 +14,9 @@ std::string to_string(const T& t) { } template -std::vector test_xoshiro_run1() { - T state = dust::random::seed(42); +std::vector test_xoshiro_run1(cpp11::environment ptr) { + auto rng = dust::random::r::rng_pointer_get(ptr, 1); + auto& state = rng->state(0); constexpr int n = 10; @@ -34,52 +35,41 @@ std::vector test_xoshiro_run1() { } [[cpp11::register]] -std::vector test_xoshiro_run(std::string name) { +std::vector test_xoshiro_run(cpp11::environment obj) { + // TODO: consider making algorithm a r-o field in self, and + // algorithm_ a field in private? + cpp11::environment env_enclos = + cpp11::as_cpp(obj[".__enclos_env__"]); + cpp11::environment env = + cpp11::as_cpp(env_enclos["private"]); + + const auto algorithm = cpp11::as_cpp(env["algorithm_"]); std::vector ret; - if (name == "xoshiro256starstar") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro256plusplus") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro256plus") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro128starstar") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro128plusplus") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro128plus") { - ret = test_xoshiro_run1(); - } else if (name == "xoroshiro128starstar") { - ret = test_xoshiro_run1(); - } else if (name == "xoroshiro128plusplus") { - ret = test_xoshiro_run1(); - } else if (name == "xoroshiro128plus") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro512starstar") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro512plusplus") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro512plus") { - ret = test_xoshiro_run1(); + if (algorithm == "xoshiro256starstar") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro256plusplus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro256plus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro128starstar") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro128plusplus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro128plus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoroshiro128starstar") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoroshiro128plusplus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoroshiro128plus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro512starstar") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro512plusplus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro512plus") { + ret = test_xoshiro_run1(obj); } return ret; } - - -// This is copied over from our example, we'll tidy this up later, but -// we need something that can be easily tested for now. -[[cpp11::register]] -double pi_dust(int n, cpp11::sexp ptr) { - auto rng = - dust::random::r::rng_pointer_get(ptr); - auto& state = rng->state(0); - int tot = 0; - for (int i = 0; i < n; ++i) { - const double u1 = dust::random::random_real(state); - const double u2 = dust::random::random_real(state); - if (u1 * u1 + u2 * u2 < 1) { - tot++; - } - } - return tot / static_cast(n) * 4.0; -} diff --git a/tests/testthat/test-rng-interface.R b/tests/testthat/test-rng-interface.R index 272f5e92f..5de546d6a 100644 --- a/tests/testthat/test-rng-interface.R +++ b/tests/testthat/test-rng-interface.R @@ -14,7 +14,7 @@ test_that("Using object requires sync", { obj <- dust_rng_pointer$new() expect_true(obj$is_current()) r <- obj$state() - pi_dust(100, obj) + test_xoshiro_run(obj) expect_false(obj$is_current()) expect_identical(obj$state(), r) obj$sync() @@ -27,19 +27,19 @@ test_that("Invalidated pointers can be rebuilt", { obj1 <- dust_rng_pointer$new() obj2 <- corrupt_pointer(obj1) expect_equal( - pi_dust(100, obj2), - pi_dust(100, obj1)) + test_xoshiro_run(obj2), + test_xoshiro_run(obj1)) ## This saves that the pointer is invalid: obj3 <- corrupt_pointer(obj2) expect_error( - pi_dust(100, obj3), + test_xoshiro_run(obj3), "Can't unserialise an rng pointer that was not synced") ## But if we sync things it's ok: obj2$sync() obj4 <- corrupt_pointer(obj2) expect_equal( - pi_dust(100, obj4), - pi_dust(100, obj1)) + test_xoshiro_run(obj4), + test_xoshiro_run(obj1)) }) diff --git a/tests/testthat/test-xoshiro.R b/tests/testthat/test-xoshiro.R index d0dda85f5..608ea7d07 100644 --- a/tests/testthat/test-xoshiro.R +++ b/tests/testthat/test-xoshiro.R @@ -2,7 +2,8 @@ test_that("xoshiro output agrees with reference data", { path <- "xoshiro-ref" status <- list() for (name in dir(path)) { - res <- test_xoshiro_run(name) + obj <- dust_rng_pointer$new(seed = 42, algorithm = name) + res <- test_xoshiro_run(obj) cmp <- readLines(sprintf("%s/%s", path, name)) expect_equal(res, cmp) } From ec82dcf469b95bbf896bc2a672c504409b87a663 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 11 Nov 2021 16:40:05 +0000 Subject: [PATCH 07/27] Tidy up interface --- R/cpp11.R | 4 +- R/rng.R | 68 ----------------------------- R/rng_pointer.R | 64 +++++++++++++++++++++++++++ inst/include/dust/r/random.hpp | 41 ++++++++--------- man/dust_rng_pointer.Rd | 21 ++++----- src/cpp11.cpp | 8 ++-- src/dust_rng.cpp | 3 +- src/test_rng.cpp | 9 +--- tests/testthat/test-rng-interface.R | 2 +- 9 files changed, 102 insertions(+), 118 deletions(-) create mode 100644 R/rng_pointer.R diff --git a/R/cpp11.R b/R/cpp11.R index 298823c55..1e23f5c8e 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -76,8 +76,8 @@ dust_rng_pointer_init <- function(n_streams, seed, algorithm) { .Call(`_dust_dust_rng_pointer_init`, n_streams, seed, algorithm) } -dust_rng_pointer_sync <- function(obj) { - invisible(.Call(`_dust_dust_rng_pointer_sync`, obj)) +dust_rng_pointer_sync <- function(obj, algorithm) { + invisible(.Call(`_dust_dust_rng_pointer_sync`, obj, algorithm)) } cpp_openmp_info <- function() { diff --git a/R/rng.R b/R/rng.R index 6b9da56bf..f7ef7164b 100644 --- a/R/rng.R +++ b/R/rng.R @@ -356,71 +356,3 @@ dust_rng_state_long_jump <- function(state, times = 1L) { } rng$state() } - - -##' @title Create pointer to random number generator -##' -##' For external use, vignette coming soon. -##' -##' @export -##' @examples -##' dust::dust_rng_pointer$new() -dust_rng_pointer <- R6::R6Class( - "dust_rng_pointer", - cloneable = FALSE, - - private = list( - ptr_ = NULL, - algorithm_ = NULL, - state_ = NULL, - is_current_ = NULL - ), - - public = list( - ##' @description Create a new `dust_rng_pointer` object - ##' - ##' @param seed The random number seed to use (see [dust::dust_rng] - ##' for details) - ##' - ##' @param n_streams The number of independent random number streams to - ##' create - ##' - ##' @param algorithm The random number algorithm to use. The default is - ##' `xoshiro256plus` which is a good general choice - initialize = function(seed = NULL, n_streams = 1L, - algorithm = "xoshiro256plus") { - dat <- dust_rng_pointer_init(n_streams, seed, algorithm) - private$ptr_ <- dat[[1L]] - private$state_ <- dat[[2L]] - private$algorithm_ <- algorithm - private$is_current_ <- TRUE - }, - - ##' @description Synchronise the R copy of the random number state. - ##' Typically this is only needed before serialisation if you have - ##' ever used the object. - sync = function() { - dust_rng_pointer_sync(private) - }, - - ##' @description Return a raw vector of state. This can be used to - ##' create other generators with the same state. - state = function() { - private$state_ - }, - - ##' @description Return a logical, indicating if the random number - ##' state that would be returned by `state()` is "current" (i.e., the - ##' same as the copy held in the pointer) or not. This is `TRUE` on - ##' creation or immediately after calling `$sync()` and `FALSE` after - ##' any use of the pointer. - is_current = function() { - private$is_current_ - }, - - ##' @description Return a string with the name of the generator - ##' algorithm used. - algorithm = function() { - private$algorithm_ - } - )) diff --git a/R/rng_pointer.R b/R/rng_pointer.R new file mode 100644 index 000000000..452e10c43 --- /dev/null +++ b/R/rng_pointer.R @@ -0,0 +1,64 @@ +##' @title Create pointer to random number generator +##' +##' For external use, vignette coming soon. +##' +##' @export +##' @examples +##' dust::dust_rng_pointer$new() +dust_rng_pointer <- R6::R6Class( + "dust_rng_pointer", + cloneable = FALSE, + + private = list( + ptr_ = NULL, + state_ = NULL, + is_current_ = NULL + ), + + public = list( + ##' @field algorithm The name of the generator algorithm used (read-only) + algorithm = NULL, + + ##' @description Create a new `dust_rng_pointer` object + ##' + ##' @param seed The random number seed to use (see [dust::dust_rng] + ##' for details) + ##' + ##' @param n_streams The number of independent random number streams to + ##' create + ##' + ##' @param algorithm The random number algorithm to use. The default is + ##' `xoshiro256plus` which is a good general choice + initialize = function(seed = NULL, n_streams = 1L, + algorithm = "xoshiro256plus") { + dat <- dust_rng_pointer_init(n_streams, seed, algorithm) + private$ptr_ <- dat[[1L]] + private$state_ <- dat[[2L]] + private$is_current_ <- TRUE + + self$algorithm <- algorithm + lockBinding("algorithm", self) + }, + + ##' @description Synchronise the R copy of the random number state. + ##' Typically this is only needed before serialisation if you have + ##' ever used the object. + sync = function() { + dust_rng_pointer_sync(private, self$algorithm) + }, + + ##' @description Return a raw vector of state. This can be used to + ##' create other generators with the same state. + state = function() { + private$state_ + }, + + ##' @description Return a logical, indicating if the random number + ##' state that would be returned by `state()` is "current" (i.e., the + ##' same as the copy held in the pointer) or not. This is `TRUE` on + ##' creation or immediately after calling `$sync()` and `FALSE` after + ##' any use of the pointer. + is_current = function() { + private$is_current_ + } + )) diff --git a/inst/include/dust/r/random.hpp b/inst/include/dust/r/random.hpp index 752b96384..c8a419114 100644 --- a/inst/include/dust/r/random.hpp +++ b/inst/include/dust/r/random.hpp @@ -16,6 +16,14 @@ namespace dust { namespace random { namespace r { +template +std::vector raw_seed(cpp11::raws seed_data) { + using int_type = typename rng_state_type::int_type; + std::vector seed(seed_data.size() / sizeof(int_type)); + std::memcpy(seed.data(), RAW(seed_data), seed_data.size()); + return seed; +} + template std::vector as_rng_seed(cpp11::sexp r_seed) { typedef typename rng_state_type::int_type int_type; @@ -26,13 +34,7 @@ std::vector as_rng_seed(cpp11::sexp r_seed) { seed = dust::random::seed_data(seed_int); } else if (seed_type == RAWSXP) { cpp11::raws seed_data = cpp11::as_cpp(r_seed); - constexpr size_t len = sizeof(int_type) * rng_state_type::size(); - if (seed_data.size() == 0 || seed_data.size() % len != 0) { - cpp11::stop("Expected raw vector of length as multiple of %d for 'seed'", - len); - } - seed.resize(seed_data.size() / sizeof(int_type)); - std::memcpy(seed.data(), RAW(seed_data), seed_data.size()); + seed = raw_seed(seed_data); } else if (seed_type == NILSXP) { GetRNGstate(); size_t seed_int = @@ -103,20 +105,20 @@ SEXP rng_pointer_init(int n_streams, cpp11::sexp r_seed) { template prng* rng_pointer_get(cpp11::environment obj, int n_streams = 0) { - cpp11::environment env_enclos = - cpp11::as_cpp(obj[".__enclos_env__"]); - cpp11::environment env = - cpp11::as_cpp(env_enclos["private"]); - // We could probably do this more efficiently if we store an enum // in the object but this is probably ok. - const auto algorithm_given = cpp11::as_cpp(env["algorithm_"]); + const auto algorithm_given = cpp11::as_cpp(obj["algorithm"]); const auto algorithm_expected = algorithm_name(); if (algorithm_given != algorithm_expected) { cpp11::stop("Incorrect rng type: given %s, expected %s", algorithm_given.c_str(), algorithm_expected.c_str()); } + cpp11::environment env_enclos = + cpp11::as_cpp(obj[".__enclos_env__"]); + cpp11::environment env = + cpp11::as_cpp(env_enclos["private"]); + using ptr_type = cpp11::external_pointer>; auto ptr = cpp11::as_cpp(env["ptr_"]); @@ -125,11 +127,8 @@ prng* rng_pointer_get(cpp11::environment obj, if (!cpp11::as_cpp(env["is_current_"])) { cpp11::stop("Can't unserialise an rng pointer that was not synced"); } - - using int_type = typename rng_state_type::int_type; cpp11::raws seed_data = cpp11::as_cpp(env["state_"]); - std::vector seed(seed_data.size() / sizeof(int_type)); - std::memcpy(seed.data(), RAW(seed_data), seed_data.size()); + auto seed = raw_seed(seed_data); const auto n_streams_orig = seed.size() / rng_state_type::size(); rng = new prng(n_streams_orig, seed); env["ptr_"] = cpp11::external_pointer>(rng); @@ -147,9 +146,11 @@ prng* rng_pointer_get(cpp11::environment obj, template void rng_pointer_sync(cpp11::environment obj) { using ptr_type = cpp11::external_pointer>; - auto ptr = cpp11::as_cpp(obj["ptr_"]); - obj["state_"] = rng_state_vector(ptr.get()); - obj["is_current_"] = cpp11::as_sexp(true); + if (!cpp11::as_cpp(obj["is_current_"])) { + auto ptr = cpp11::as_cpp(obj["ptr_"]); + obj["state_"] = rng_state_vector(ptr.get()); + obj["is_current_"] = cpp11::as_sexp(true); + } } } diff --git a/man/dust_rng_pointer.Rd b/man/dust_rng_pointer.Rd index bae51da63..69c308cdc 100644 --- a/man/dust_rng_pointer.Rd +++ b/man/dust_rng_pointer.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rng.R +% Please edit documentation in R/rng_pointer.R \name{dust_rng_pointer} \alias{dust_rng_pointer} \title{Create pointer to random number generator @@ -17,6 +17,13 @@ For external use, vignette coming soon. \examples{ dust::dust_rng_pointer$new() } +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{algorithm}}{The name of the generator algorithm used (read-only)} +} +\if{html}{\out{
}} +} \section{Methods}{ \subsection{Public methods}{ \itemize{ @@ -24,7 +31,6 @@ dust::dust_rng_pointer$new() \item \href{#method-sync}{\code{dust_rng_pointer$sync()}} \item \href{#method-state}{\code{dust_rng_pointer$state()}} \item \href{#method-is_current}{\code{dust_rng_pointer$is_current()}} -\item \href{#method-algorithm}{\code{dust_rng_pointer$algorithm()}} } } \if{html}{\out{
}} @@ -87,16 +93,5 @@ any use of the pointer. \if{html}{\out{
}}\preformatted{dust_rng_pointer$is_current()}\if{html}{\out{
}} } -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-algorithm}{}}} -\subsection{Method \code{algorithm()}}{ -Return a string with the name of the generator -algorithm used. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{dust_rng_pointer$algorithm()}\if{html}{\out{
}} -} - } } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index e3b077dd9..95c1db487 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -141,10 +141,10 @@ extern "C" SEXP _dust_dust_rng_pointer_init(SEXP n_streams, SEXP seed, SEXP algo END_CPP11 } // dust_rng.cpp -void dust_rng_pointer_sync(cpp11::environment obj); -extern "C" SEXP _dust_dust_rng_pointer_sync(SEXP obj) { +void dust_rng_pointer_sync(cpp11::environment obj, std::string algorithm); +extern "C" SEXP _dust_dust_rng_pointer_sync(SEXP obj, SEXP algorithm) { BEGIN_CPP11 - dust_rng_pointer_sync(cpp11::as_cpp>(obj)); + dust_rng_pointer_sync(cpp11::as_cpp>(obj), cpp11::as_cpp>(algorithm)); return R_NilValue; END_CPP11 } @@ -1175,7 +1175,7 @@ static const R_CallMethodDef CallEntries[] = { {"_dust_dust_rng_multinomial", (DL_FUNC) &_dust_dust_rng_multinomial, 6}, {"_dust_dust_rng_normal", (DL_FUNC) &_dust_dust_rng_normal, 7}, {"_dust_dust_rng_pointer_init", (DL_FUNC) &_dust_dust_rng_pointer_init, 3}, - {"_dust_dust_rng_pointer_sync", (DL_FUNC) &_dust_dust_rng_pointer_sync, 1}, + {"_dust_dust_rng_pointer_sync", (DL_FUNC) &_dust_dust_rng_pointer_sync, 2}, {"_dust_dust_rng_poisson", (DL_FUNC) &_dust_dust_rng_poisson, 5}, {"_dust_dust_rng_random_normal", (DL_FUNC) &_dust_dust_rng_random_normal, 5}, {"_dust_dust_rng_random_real", (DL_FUNC) &_dust_dust_rng_random_real, 4}, diff --git a/src/dust_rng.cpp b/src/dust_rng.cpp index 98073ccc9..a313f6f4f 100644 --- a/src/dust_rng.cpp +++ b/src/dust_rng.cpp @@ -576,8 +576,7 @@ cpp11::sexp dust_rng_pointer_init(int n_streams, cpp11::sexp seed, } [[cpp11::register]] -void dust_rng_pointer_sync(cpp11::environment obj) { - const auto algorithm = cpp11::as_cpp(obj["algorithm_"]); +void dust_rng_pointer_sync(cpp11::environment obj, std::string algorithm) { using namespace dust::random; if (algorithm == "xoshiro256starstar") { r::rng_pointer_sync(obj); diff --git a/src/test_rng.cpp b/src/test_rng.cpp index e743de54d..815905227 100644 --- a/src/test_rng.cpp +++ b/src/test_rng.cpp @@ -36,14 +36,7 @@ std::vector test_xoshiro_run1(cpp11::environment ptr) { [[cpp11::register]] std::vector test_xoshiro_run(cpp11::environment obj) { - // TODO: consider making algorithm a r-o field in self, and - // algorithm_ a field in private? - cpp11::environment env_enclos = - cpp11::as_cpp(obj[".__enclos_env__"]); - cpp11::environment env = - cpp11::as_cpp(env_enclos["private"]); - - const auto algorithm = cpp11::as_cpp(env["algorithm_"]); + const auto algorithm = cpp11::as_cpp(obj["algorithm"]); std::vector ret; if (algorithm == "xoshiro256starstar") { ret = test_xoshiro_run1(obj); diff --git a/tests/testthat/test-rng-interface.R b/tests/testthat/test-rng-interface.R index 5de546d6a..680287386 100644 --- a/tests/testthat/test-rng-interface.R +++ b/tests/testthat/test-rng-interface.R @@ -3,7 +3,7 @@ test_that("Can create pointer object", { expect_true(obj$is_current()) expect_type(obj$state(), "raw") expect_length(obj$state(), 32) - expect_equal(obj$algorithm(), "xoshiro256plus") + expect_equal(obj$algorithm, "xoshiro256plus") r <- obj$state() obj$sync() expect_identical(r, obj$state()) From bf69eb44de9576ed650f5140fc96a4e996e3e06a Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 11 Nov 2021 16:41:12 +0000 Subject: [PATCH 08/27] Bump version and add news --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6a2b4231c..cfc915d52 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: dust Title: Iterate Multiple Realisations of Stochastic Models -Version: 0.11.7 +Version: 0.11.8 Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"), email = "rich.fitzjohn@gmail.com"), person("John", "Lees", role = "aut"), diff --git a/NEWS.md b/NEWS.md index ff1abbfbd..b65e96fb2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# dust 0.11.8 + +* Improved the interface for using dust's random number support from other packages (#329) + # dust 0.11.7 * New polar algorithm for normally distributed random numbers; faster than Box-Muller but slower than Ziggurat From db72276c3a53e8014567cad3cd6b7a4e3b550f6a Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 11 Nov 2021 17:39:16 +0000 Subject: [PATCH 09/27] Expose the number of streams --- R/rng_pointer.R | 6 ++++++ man/dust_rng_pointer.Rd | 3 +++ 2 files changed, 9 insertions(+) diff --git a/R/rng_pointer.R b/R/rng_pointer.R index 452e10c43..99624716d 100644 --- a/R/rng_pointer.R +++ b/R/rng_pointer.R @@ -19,6 +19,10 @@ dust_rng_pointer <- R6::R6Class( ##' @field algorithm The name of the generator algorithm used (read-only) algorithm = NULL, + ##' @field n_streams The number of streams of random numbers provided + ##' (read-only) + n_streams = NULL, + ##' @description Create a new `dust_rng_pointer` object ##' ##' @param seed The random number seed to use (see [dust::dust_rng] @@ -37,7 +41,9 @@ dust_rng_pointer <- R6::R6Class( private$is_current_ <- TRUE self$algorithm <- algorithm + self$n_streams <- n_streams lockBinding("algorithm", self) + lockBinding("n_streams", self) }, ##' @description Synchronise the R copy of the random number state. diff --git a/man/dust_rng_pointer.Rd b/man/dust_rng_pointer.Rd index 69c308cdc..013b79461 100644 --- a/man/dust_rng_pointer.Rd +++ b/man/dust_rng_pointer.Rd @@ -21,6 +21,9 @@ dust::dust_rng_pointer$new() \if{html}{\out{
}} \describe{ \item{\code{algorithm}}{The name of the generator algorithm used (read-only)} + +\item{\code{n_streams}}{The number of streams of random numbers provided +(read-only)} } \if{html}{\out{
}} } From 27a12f1ff2eec345ad68c2043c02cb6d29ab46e2 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 11 Nov 2021 17:39:36 +0000 Subject: [PATCH 10/27] Expand example --- vignettes_src/rng_pi_parallel.cpp | 32 +++++ vignettes_src/{rng_r_api.cpp => rng_pi_r.cpp} | 2 +- vignettes_src/rng_use.Rmd | 119 ++++++++++++++++-- 3 files changed, 145 insertions(+), 8 deletions(-) create mode 100644 vignettes_src/rng_pi_parallel.cpp rename vignettes_src/{rng_r_api.cpp => rng_pi_r.cpp} (92%) diff --git a/vignettes_src/rng_pi_parallel.cpp b/vignettes_src/rng_pi_parallel.cpp new file mode 100644 index 000000000..ba09be323 --- /dev/null +++ b/vignettes_src/rng_pi_parallel.cpp @@ -0,0 +1,32 @@ +#include +#include + +#ifdef _OPENMP +#include +#endif + +[[cpp11::linking_to(dust)]] +[[cpp11::register]] +double pi_dust_parallel(int n, cpp11::sexp ptr, int n_threads) { + auto rng = + dust::random::r::rng_pointer_get(ptr); + const auto n_streams = rng->size(); + int tot = 0; +#ifdef _OPENMP +#pragma omp parallel for schedule(static) num_threads(n_threads) \ + reduction(+:tot) +#endif + for (size_t i = 0; i < n_streams; ++i) { + auto& state = rng->state(0); + int tot_i = 0; + for (int i = 0; i < n; ++i) { + const double u1 = dust::random::random_real(state); + const double u2 = dust::random::random_real(state); + if (u1 * u1 + u2 * u2 < 1) { + tot_i++; + } + } + tot += tot_i; + } + return tot / static_cast(n * n_streams) * 4.0; +} diff --git a/vignettes_src/rng_r_api.cpp b/vignettes_src/rng_pi_r.cpp similarity index 92% rename from vignettes_src/rng_r_api.cpp rename to vignettes_src/rng_pi_r.cpp index 5555315fb..d31f6af06 100644 --- a/vignettes_src/rng_r_api.cpp +++ b/vignettes_src/rng_pi_r.cpp @@ -2,7 +2,7 @@ #include [[cpp11::register]] -double pi_r_api(int n) { +double pi_r(int n) { int tot = 0; GetRNGstate(); for (int i = 0; i < n; ++i) { diff --git a/vignettes_src/rng_use.Rmd b/vignettes_src/rng_use.Rmd index 523b5e99d..b68699e64 100644 --- a/vignettes_src/rng_use.Rmd +++ b/vignettes_src/rng_use.Rmd @@ -32,19 +32,19 @@ For illustration purposes we will assume that you want to estimate pi using reje First, an example that uses R's API for example (note that while the R API is C, we're using the cpp11 package interface here so that the following examples are similar): ```{r, results = "asis", echo = FALSE} -cc_output(readLines("rng-r_api.cpp")) +cc_output(readLines("rng_pi_r.cpp")) ``` With cpp11 we can load this with `cpp11::cpp_source` ```{r} -cpp11::cpp_source("rng_r_api.cpp") +cpp11::cpp_source("rng_pi_r.cpp") ``` and then run it wih ```{r} -pi_r_api(1e6) +pi_r(1e6) ``` The key bits within the code above are that we: @@ -56,10 +56,115 @@ The key bits within the code above are that we: Failure to run the `GetRNGstate` / `PutRNGstate` will result in the stream not behaving properly. This is explained in detail in the "Writing R Extensions" manual. -## Implementation using dust +## Basic implementation using dust +One of the design ideas in dust is that there is no single global source of random numbers, so we need to create a source -obj <- dust:::dust_rng_pointer$new() +```{r} +rng <- dust:::dust_rng_pointer$new() +rng +``` + +This object acts as a "handle" to some random number state that can be passed safely to C++ programs; the state will be updated when the program runs as a side effect. Unlike the `dust::dust_rng` object there are no real useful methods on this object and from the R side we'll treat it as a black box. Importantly the `rng` object knows which algorithm it has been created to use + +```{r} +rng$algorithm +``` + +The default will be suitable for most purposes. + +We can rewrite the pi approximation function as: + +```{r, results = "asis", echo = FALSE} +cc_output(readLines("rng_pi_dust.cpp")) +``` + +This snippet looks much the same as above: + +* We've added `[[cpp::linking_to(dust)]]` and included the dust random interface (`dust/r/random.hpp`) +* The first line of the function safely creates a pointer to the random state data. The template argument here (``) refers to the rng algorithm and matches `rng$algorithm` +* The second line extracts a reference to the first (C++ indexing starting at 0) random number stream - this pair of lines is roughly equivalent to `GetRNGstate()` except that that the random numbers do not come from some global source +* After that the listing proceeds as before proceeds as before, except there is no equivalent to `PutRNGstate()` + + +```{r} +cpp11::cpp_source("rng_pi_dust.cpp") +``` + +and then run it wih + +```{r} +pi_dust(1e6, rng) +``` -cpp11::cpp_source("rng_pi_dust.cpp", quiet = FALSE) -pi_dust(1e3, obj) +## Parallel implementation with dust and OpenMP + +Part of the point of dust's random number generators is that they create independent streams of random numbers that can be safely used in parallel. + +```{r, results = "asis", echo = FALSE} +cc_output(readLines("rng_pi_parallel.cpp")) +``` + +```{r} +cpp11::cpp_source("rng_pi_parallel.cpp") +``` + +Here we've made a number of decisions about how to split the problem up subject to a few constraints about using OpenMP together with R: + +* Generally speaking we want the answer to be independent of the number of threads used to run it, as this will vary in different sessions. As such avoid the temptation to do a loop over the threads; here we instead iterate over *streams* with the idea that there will be one or more streams used per threads. If we ran a single thread we'd get the same answer as if we ran one thread per stream. +* Each thread is going to do it's own loop of length `n` so we need to divide by `n * n_streams` at the end as that's many attempts we have made. +* We use OpenMP's `reduction` clause to safely accumulate the different subtotals (the `tot_i` values) into one `tot` value. +* In order to compile gracefully on machines that do not have OpenMP support both the `#include ` line and the `#pragma omp` line are wrapped in guards that test for `_OPENMP` (see "Writing R Extensions"). +* We let the generator tell us how many streams it has (`n_streams = rng->size()`) but we could as easily specify an ideal number of streams as an argument here and then test that the generator has *at least that many* by adding an argument to the call to `rng_pointer_get` (e.g., if we wanted `m` streams the call would be `rng_pointer_get(ptr, m)`) + +```{r} +rng <- dust:::dust_rng_pointer$new(n_streams = 20) +pi_dust_parallel(1e6, rng, 4) +``` + +Unfortunately [`cpp11::cpp_source` does not support using OpenMP](https://github.com/r-lib/cpp11/issues/243) so in the example above the code will run in serial and we can't see if parallelisation will help. + +In order to compile with support, we need to build a little package and set up an appropriate `Makevars` file + +```{r, include = FALSE} +path <- tempfile() +dir.create(path) +dir.create(file.path(path, "R"), FALSE, TRUE) +dir.create(file.path(path, "src"), FALSE, TRUE) + +writeLines( + c("Package: piparallel", + "LinkingTo: cpp11, dust", + "Version: 0.0.1", + "SystemRequirements: C++11"), + file.path(path, "DESCRIPTION")) +writeLines( + c('useDynLib("piparallel", .registration = TRUE)', + 'exportPattern("^[[:alpha:]]+")'), + file.path(path, "NAMESPACE")) +writeLines( + c("PKG_CXXFLAGS=-DHAVE_INLINE $(SHLIB_OPENMP_CXXFLAGS)", + "PKG_LIBS=$(SHLIB_OPENMP_CXXFLAGS)"), + file.path(path, "src", "Makevars")) +code <- grep("cpp11::linking_to", readLines("rng_pi_parallel.cpp"), + invert = TRUE, value = TRUE) +writeLines(code, file.path(path, "src", "code.cpp")) +pkgbuild::compile_dll(path, quiet = FALSE, debug = FALSE) +pkg <- pkgload::load_all(path, compile = FALSE, recompile = FALSE, + warn_conflicts = FALSE, export_all = FALSE, + helpers = FALSE, attach_testthat = FALSE, + quiet = TRUE) +pi_dust_parallel <- pkg$env$pi_dust_parallel +``` + +Once we have a parallel version we can see a speed-up as we add threads: + +```{r} +rng <- dust:::dust_rng_pointer$new(n_streams = 20) +bench::mark( + pi_dust_parallel(1e6, rng, 1), + pi_dust_parallel(1e6, rng, 2), + pi_dust_parallel(1e6, rng, 3), + pi_dust_parallel(1e6, rng, 4), + check = FALSE) +``` From 394b0ee49a2f9f2853339cf8f71c05058f6cc0c0 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 09:05:58 +0000 Subject: [PATCH 11/27] Restore important error check --- inst/include/dust/r/random.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/inst/include/dust/r/random.hpp b/inst/include/dust/r/random.hpp index c8a419114..e42923e60 100644 --- a/inst/include/dust/r/random.hpp +++ b/inst/include/dust/r/random.hpp @@ -19,6 +19,11 @@ namespace r { template std::vector raw_seed(cpp11::raws seed_data) { using int_type = typename rng_state_type::int_type; + constexpr size_t len = sizeof(int_type) * rng_state_type::size(); + if (seed_data.size() == 0 || seed_data.size() % len != 0) { + cpp11::stop("Expected raw vector of length as multiple of %d for 'seed'", + len); + } std::vector seed(seed_data.size() / sizeof(int_type)); std::memcpy(seed.data(), RAW(seed_data), seed_data.size()); return seed; From 4c5cd3330fb01175e23f74c7e7a11061b91b547d Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 09:08:55 +0000 Subject: [PATCH 12/27] Minor cleanup --- R/rng.R | 3 --- R/rng_pointer.R | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/R/rng.R b/R/rng.R index f7ef7164b..3b48cfacc 100644 --- a/R/rng.R +++ b/R/rng.R @@ -137,9 +137,6 @@ dust_rng <- R6::R6Class( ##' expectations and the state is never changed. initialize = function(seed, n_generators = 1L, real_type = "double", deterministic = FALSE) { - ## TODO: n_generators -> n_streams - ## TODO: seed gets default - ## TODO: change order of initialisers? if (!(real_type %in% c("double", "float"))) { stop("Invalid value for 'real_type': must be 'double' or 'float'") } diff --git a/R/rng_pointer.R b/R/rng_pointer.R index 99624716d..3a44a0d62 100644 --- a/R/rng_pointer.R +++ b/R/rng_pointer.R @@ -14,7 +14,7 @@ dust_rng_pointer <- R6::R6Class( state_ = NULL, is_current_ = NULL ), - + public = list( ##' @field algorithm The name of the generator algorithm used (read-only) algorithm = NULL, From 8c58863431d1e6e823fad59ea72cf614f7f36954 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 09:10:50 +0000 Subject: [PATCH 13/27] Move code into its own file --- src/dust_rng.cpp | 69 ----------------------------------- src/dust_rng_pointer.cpp | 77 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 69 deletions(-) create mode 100644 src/dust_rng_pointer.cpp diff --git a/src/dust_rng.cpp b/src/dust_rng.cpp index a313f6f4f..b5d6923ff 100644 --- a/src/dust_rng.cpp +++ b/src/dust_rng.cpp @@ -537,72 +537,3 @@ cpp11::sexp dust_rng_state(SEXP ptr, bool is_float) { dust_rng_state(ptr) : dust_rng_state(ptr); } - -[[cpp11::register]] -cpp11::sexp dust_rng_pointer_init(int n_streams, cpp11::sexp seed, - std::string algorithm) { - cpp11::sexp ret; - - using namespace dust::random; - if (algorithm == "xoshiro256starstar") { - ret = r::rng_pointer_init(n_streams, seed); - } else if (algorithm == "xoshiro256plusplus") { - ret = r::rng_pointer_init(n_streams, seed); - } else if (algorithm == "xoshiro256plus") { - ret = r::rng_pointer_init(n_streams, seed); - } else if (algorithm == "xoshiro128starstar") { - ret = r::rng_pointer_init(n_streams, seed); - } else if (algorithm == "xoshiro128plusplus") { - ret = r::rng_pointer_init(n_streams, seed); - } else if (algorithm == "xoshiro128plus") { - ret = r::rng_pointer_init(n_streams, seed); - } else if (algorithm == "xoroshiro128starstar") { - ret = r::rng_pointer_init(n_streams, seed); - } else if (algorithm == "xoroshiro128plusplus") { - ret = r::rng_pointer_init(n_streams, seed); - } else if (algorithm == "xoroshiro128plus") { - ret = r::rng_pointer_init(n_streams, seed); - } else if (algorithm == "xoshiro512starstar") { - ret = r::rng_pointer_init(n_streams, seed); - } else if (algorithm == "xoshiro512plusplus") { - ret = r::rng_pointer_init(n_streams, seed); - } else if (algorithm == "xoshiro512plus") { - ret = r::rng_pointer_init(n_streams, seed); - } else { - cpp11::stop("Unknown algorithm '%s'", algorithm.c_str()); - } - - return ret; -} - -[[cpp11::register]] -void dust_rng_pointer_sync(cpp11::environment obj, std::string algorithm) { - using namespace dust::random; - if (algorithm == "xoshiro256starstar") { - r::rng_pointer_sync(obj); - } else if (algorithm == "xoshiro256plusplus") { - r::rng_pointer_sync(obj); - } else if (algorithm == "xoshiro256plus") { - r::rng_pointer_sync(obj); - } else if (algorithm == "xoshiro128starstar") { - r::rng_pointer_sync(obj); - } else if (algorithm == "xoshiro128plusplus") { - r::rng_pointer_sync(obj); - } else if (algorithm == "xoshiro128plus") { - r::rng_pointer_sync(obj); - } else if (algorithm == "xoroshiro128starstar") { - r::rng_pointer_sync(obj); - } else if (algorithm == "xoroshiro128plusplus") { - r::rng_pointer_sync(obj); - } else if (algorithm == "xoroshiro128plus") { - r::rng_pointer_sync(obj); - } else if (algorithm == "xoshiro512starstar") { - r::rng_pointer_sync(obj); - } else if (algorithm == "xoshiro512plusplus") { - r::rng_pointer_sync(obj); - } else if (algorithm == "xoshiro512plus") { - r::rng_pointer_sync(obj); - } else { - cpp11::stop("Unknown algorithm '%s'", algorithm.c_str()); - } -} diff --git a/src/dust_rng_pointer.cpp b/src/dust_rng_pointer.cpp new file mode 100644 index 000000000..671bd444d --- /dev/null +++ b/src/dust_rng_pointer.cpp @@ -0,0 +1,77 @@ +#include + +#ifdef _OPENMP +#include +#endif + +#include +#include + +[[cpp11::register]] +cpp11::sexp dust_rng_pointer_init(int n_streams, cpp11::sexp seed, + std::string algorithm) { + cpp11::sexp ret; + + using namespace dust::random; + if (algorithm == "xoshiro256starstar") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro256plusplus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro256plus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro128starstar") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro128plusplus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro128plus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoroshiro128starstar") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoroshiro128plusplus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoroshiro128plus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro512starstar") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro512plusplus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro512plus") { + ret = r::rng_pointer_init(n_streams, seed); + } else { + cpp11::stop("Unknown algorithm '%s'", algorithm.c_str()); + } + + return ret; +} + +[[cpp11::register]] +void dust_rng_pointer_sync(cpp11::environment obj, std::string algorithm) { + using namespace dust::random; + if (algorithm == "xoshiro256starstar") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro256plusplus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro256plus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro128starstar") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro128plusplus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro128plus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoroshiro128starstar") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoroshiro128plusplus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoroshiro128plus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro512starstar") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro512plusplus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro512plus") { + r::rng_pointer_sync(obj); + } else { + cpp11::stop("Unknown algorithm '%s'", algorithm.c_str()); + } +} From be595a3a1fa0b4d7fbfe9cc554fec28f6ebbee16 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 10:06:48 +0000 Subject: [PATCH 14/27] Check that all pointer types can be synced --- R/cpp11.R | 16 +++++----- extra/harness.cpp | 12 ++++++-- src/cpp11.cpp | 30 +++++++++---------- tests/testthat/test-xoshiro.R | 6 +++- tests/testthat/xoshiro-ref/xoroshiro128plus | 2 ++ .../testthat/xoshiro-ref/xoroshiro128plusplus | 2 ++ .../testthat/xoshiro-ref/xoroshiro128starstar | 2 ++ tests/testthat/xoshiro-ref/xoshiro128plus | 4 +++ tests/testthat/xoshiro-ref/xoshiro128plusplus | 4 +++ tests/testthat/xoshiro-ref/xoshiro128starstar | 4 +++ tests/testthat/xoshiro-ref/xoshiro256plus | 4 +++ tests/testthat/xoshiro-ref/xoshiro256plusplus | 4 +++ tests/testthat/xoshiro-ref/xoshiro256starstar | 4 +++ tests/testthat/xoshiro-ref/xoshiro512plus | 8 +++++ tests/testthat/xoshiro-ref/xoshiro512plusplus | 8 +++++ tests/testthat/xoshiro-ref/xoshiro512starstar | 8 +++++ 16 files changed, 91 insertions(+), 27 deletions(-) diff --git a/R/cpp11.R b/R/cpp11.R index 1e23f5c8e..9e6e452ce 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -24,6 +24,14 @@ density_poisson <- function(x, lambda, log) { .Call(`_dust_density_poisson`, x, lambda, log) } +dust_rng_pointer_init <- function(n_streams, seed, algorithm) { + .Call(`_dust_dust_rng_pointer_init`, n_streams, seed, algorithm) +} + +dust_rng_pointer_sync <- function(obj, algorithm) { + invisible(.Call(`_dust_dust_rng_pointer_sync`, obj, algorithm)) +} + dust_rng_alloc <- function(r_seed, n_generators, deterministic, is_float) { .Call(`_dust_dust_rng_alloc`, r_seed, n_generators, deterministic, is_float) } @@ -72,14 +80,6 @@ dust_rng_state <- function(ptr, is_float) { .Call(`_dust_dust_rng_state`, ptr, is_float) } -dust_rng_pointer_init <- function(n_streams, seed, algorithm) { - .Call(`_dust_dust_rng_pointer_init`, n_streams, seed, algorithm) -} - -dust_rng_pointer_sync <- function(obj, algorithm) { - invisible(.Call(`_dust_dust_rng_pointer_sync`, obj, algorithm)) -} - cpp_openmp_info <- function() { .Call(`_dust_cpp_openmp_info`) } diff --git a/extra/harness.cpp b/extra/harness.cpp index 7aadb666d..1a7552575 100644 --- a/extra/harness.cpp +++ b/extra/harness.cpp @@ -7,11 +7,11 @@ typedef uint64_t int_type; constexpr size_t data_size = 4; #elif defined(XOSHIRO128) -typedef uint64_t int_type; +typedef uint32_t int_type; constexpr size_t data_size = 4; #elif defined(XOROSHIRO128) typedef uint64_t int_type; -constexpr size_t data_size = 4; +constexpr size_t data_size = 2; #elif defined(XOSHIRO512) typedef uint64_t int_type; constexpr size_t data_size = 8; @@ -51,10 +51,16 @@ int main() { } auto x = next(); std::cout << - //std::setw(16) << std::setfill('0') << std::hex << x << " " << std::dec << x << std::endl; } + // At the end we dump out the model state, in hex: + for (int i = 0; i < data_size; ++i) { + std::cout << std::setw(sizeof(int_type) * 2) << + std::setfill('0') << std::hex << s[i] << + std::endl; + } + return 0; } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 95c1db487..4e3b513db 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -47,6 +47,21 @@ extern "C" SEXP _dust_density_poisson(SEXP x, SEXP lambda, SEXP log) { return cpp11::as_sexp(density_poisson(cpp11::as_cpp>(x), cpp11::as_cpp>(lambda), cpp11::as_cpp>(log))); END_CPP11 } +// dust_rng_pointer.cpp +cpp11::sexp dust_rng_pointer_init(int n_streams, cpp11::sexp seed, std::string algorithm); +extern "C" SEXP _dust_dust_rng_pointer_init(SEXP n_streams, SEXP seed, SEXP algorithm) { + BEGIN_CPP11 + return cpp11::as_sexp(dust_rng_pointer_init(cpp11::as_cpp>(n_streams), cpp11::as_cpp>(seed), cpp11::as_cpp>(algorithm))); + END_CPP11 +} +// dust_rng_pointer.cpp +void dust_rng_pointer_sync(cpp11::environment obj, std::string algorithm); +extern "C" SEXP _dust_dust_rng_pointer_sync(SEXP obj, SEXP algorithm) { + BEGIN_CPP11 + dust_rng_pointer_sync(cpp11::as_cpp>(obj), cpp11::as_cpp>(algorithm)); + return R_NilValue; + END_CPP11 +} // dust_rng.cpp SEXP dust_rng_alloc(cpp11::sexp r_seed, int n_generators, bool deterministic, bool is_float); extern "C" SEXP _dust_dust_rng_alloc(SEXP r_seed, SEXP n_generators, SEXP deterministic, SEXP is_float) { @@ -133,21 +148,6 @@ extern "C" SEXP _dust_dust_rng_state(SEXP ptr, SEXP is_float) { return cpp11::as_sexp(dust_rng_state(cpp11::as_cpp>(ptr), cpp11::as_cpp>(is_float))); END_CPP11 } -// dust_rng.cpp -cpp11::sexp dust_rng_pointer_init(int n_streams, cpp11::sexp seed, std::string algorithm); -extern "C" SEXP _dust_dust_rng_pointer_init(SEXP n_streams, SEXP seed, SEXP algorithm) { - BEGIN_CPP11 - return cpp11::as_sexp(dust_rng_pointer_init(cpp11::as_cpp>(n_streams), cpp11::as_cpp>(seed), cpp11::as_cpp>(algorithm))); - END_CPP11 -} -// dust_rng.cpp -void dust_rng_pointer_sync(cpp11::environment obj, std::string algorithm); -extern "C" SEXP _dust_dust_rng_pointer_sync(SEXP obj, SEXP algorithm) { - BEGIN_CPP11 - dust_rng_pointer_sync(cpp11::as_cpp>(obj), cpp11::as_cpp>(algorithm)); - return R_NilValue; - END_CPP11 -} // openmp.cpp cpp11::writable::list cpp_openmp_info(); extern "C" SEXP _dust_cpp_openmp_info() { diff --git a/tests/testthat/test-xoshiro.R b/tests/testthat/test-xoshiro.R index 608ea7d07..879f8a13e 100644 --- a/tests/testthat/test-xoshiro.R +++ b/tests/testthat/test-xoshiro.R @@ -4,7 +4,11 @@ test_that("xoshiro output agrees with reference data", { for (name in dir(path)) { obj <- dust_rng_pointer$new(seed = 42, algorithm = name) res <- test_xoshiro_run(obj) + obj$sync() + len <- if (grepl("^xoshiro128", name)) 4 else 8 + s <- matrix(obj$state(), len)[rev(seq_len(len)), ] + s_str <- apply(s, 2, paste, collapse = "") cmp <- readLines(sprintf("%s/%s", path, name)) - expect_equal(res, cmp) + expect_equal(c(res, s_str), cmp) } }) diff --git a/tests/testthat/xoshiro-ref/xoroshiro128plus b/tests/testthat/xoshiro-ref/xoroshiro128plus index fa2817c08..84f2b1320 100644 --- a/tests/testthat/xoshiro-ref/xoroshiro128plus +++ b/tests/testthat/xoshiro-ref/xoroshiro128plus @@ -28,3 +28,5 @@ 5534324586141643616 12383152334171140518 6818487868096510966 +16d7ba4f38631d13 +457f7c83abfba2dd diff --git a/tests/testthat/xoshiro-ref/xoroshiro128plusplus b/tests/testthat/xoshiro-ref/xoroshiro128plusplus index 214c23def..dfb520946 100644 --- a/tests/testthat/xoshiro-ref/xoroshiro128plusplus +++ b/tests/testthat/xoshiro-ref/xoroshiro128plusplus @@ -28,3 +28,5 @@ 15221866165451680989 13489980999189886949 7198607794995435210 +a6e5ea2f7651cd1c +a354b7a33f8e8b0f diff --git a/tests/testthat/xoshiro-ref/xoroshiro128starstar b/tests/testthat/xoshiro-ref/xoroshiro128starstar index 1fd02ab86..aa280c377 100644 --- a/tests/testthat/xoshiro-ref/xoroshiro128starstar +++ b/tests/testthat/xoshiro-ref/xoroshiro128starstar @@ -28,3 +28,5 @@ 6223406593289342926 8842572349973568536 5532774325646040112 +16d7ba4f38631d13 +457f7c83abfba2dd diff --git a/tests/testthat/xoshiro-ref/xoshiro128plus b/tests/testthat/xoshiro-ref/xoshiro128plus index 078719c3d..680355aba 100644 --- a/tests/testthat/xoshiro-ref/xoshiro128plus +++ b/tests/testthat/xoshiro-ref/xoshiro128plus @@ -28,3 +28,7 @@ 1871045121 2791939146 4201551836 +cea64c5f +db6b6622 +b4d6d7df +f8370366 diff --git a/tests/testthat/xoshiro-ref/xoshiro128plusplus b/tests/testthat/xoshiro-ref/xoshiro128plusplus index e1ab3cecc..c0e6a5308 100644 --- a/tests/testthat/xoshiro-ref/xoshiro128plusplus +++ b/tests/testthat/xoshiro-ref/xoshiro128plusplus @@ -28,3 +28,7 @@ 3392677730 916576617 3653581116 +cea64c5f +db6b6622 +b4d6d7df +f8370366 diff --git a/tests/testthat/xoshiro-ref/xoshiro128starstar b/tests/testthat/xoshiro-ref/xoshiro128starstar index 770f2f9c0..30b317de7 100644 --- a/tests/testthat/xoshiro-ref/xoshiro128starstar +++ b/tests/testthat/xoshiro-ref/xoshiro128starstar @@ -28,3 +28,7 @@ 4105119796 192728354 934133035 +cea64c5f +db6b6622 +b4d6d7df +f8370366 diff --git a/tests/testthat/xoshiro-ref/xoshiro256plus b/tests/testthat/xoshiro-ref/xoshiro256plus index 6d4802b94..ae5950046 100644 --- a/tests/testthat/xoshiro-ref/xoshiro256plus +++ b/tests/testthat/xoshiro-ref/xoshiro256plus @@ -28,3 +28,7 @@ 4016147741394319913 10252714880087829589 4039534024713725545 +80a0fa56688f6d90 +bc1497c72c80d211 +56057dd5c1cdeb94 +c697d77d6ceb8c6f diff --git a/tests/testthat/xoshiro-ref/xoshiro256plusplus b/tests/testthat/xoshiro-ref/xoshiro256plusplus index ba6921b23..ce3076301 100644 --- a/tests/testthat/xoshiro-ref/xoshiro256plusplus +++ b/tests/testthat/xoshiro-ref/xoshiro256plusplus @@ -28,3 +28,7 @@ 5291826448566605555 11793225760405578669 11587817079881687253 +80a0fa56688f6d90 +bc1497c72c80d211 +56057dd5c1cdeb94 +c697d77d6ceb8c6f diff --git a/tests/testthat/xoshiro-ref/xoshiro256starstar b/tests/testthat/xoshiro-ref/xoshiro256starstar index 5f54f96ce..df937aeb5 100644 --- a/tests/testthat/xoshiro-ref/xoshiro256starstar +++ b/tests/testthat/xoshiro-ref/xoshiro256starstar @@ -28,3 +28,7 @@ 3974300978875138071 1780744086601141409 1647406970322170425 +80a0fa56688f6d90 +bc1497c72c80d211 +56057dd5c1cdeb94 +c697d77d6ceb8c6f diff --git a/tests/testthat/xoshiro-ref/xoshiro512plus b/tests/testthat/xoshiro-ref/xoshiro512plus index 0e18792c1..2c7ac3cf4 100644 --- a/tests/testthat/xoshiro-ref/xoshiro512plus +++ b/tests/testthat/xoshiro-ref/xoshiro512plus @@ -28,3 +28,11 @@ 4145877857605170073 9752473752304387592 10718358607918504099 +00b448f275493f8a +5b1367cee3a96bb0 +48b0391817bb975b +8b25dc046efc11a6 +d093498b8e575238 +d93c6d39fdc2f1a4 +36b2328cabb0ab34 +0c9338eb42605ae9 diff --git a/tests/testthat/xoshiro-ref/xoshiro512plusplus b/tests/testthat/xoshiro-ref/xoshiro512plusplus index 4a9f5ed0e..8a87e81f1 100644 --- a/tests/testthat/xoshiro-ref/xoshiro512plusplus +++ b/tests/testthat/xoshiro-ref/xoshiro512plusplus @@ -28,3 +28,11 @@ 2283681299546516403 3808647405580472584 16956876242805140340 +00b448f275493f8a +5b1367cee3a96bb0 +48b0391817bb975b +8b25dc046efc11a6 +d093498b8e575238 +d93c6d39fdc2f1a4 +36b2328cabb0ab34 +0c9338eb42605ae9 diff --git a/tests/testthat/xoshiro-ref/xoshiro512starstar b/tests/testthat/xoshiro-ref/xoshiro512starstar index 4839d97a4..585f79d27 100644 --- a/tests/testthat/xoshiro-ref/xoshiro512starstar +++ b/tests/testthat/xoshiro-ref/xoshiro512starstar @@ -28,3 +28,11 @@ 12006650672579575208 2731211645731671501 15840818449790708025 +00b448f275493f8a +5b1367cee3a96bb0 +48b0391817bb975b +8b25dc046efc11a6 +d093498b8e575238 +d93c6d39fdc2f1a4 +36b2328cabb0ab34 +0c9338eb42605ae9 From e3b687b07c7da54ecbce2709f0ae43a0638791fc Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 10:08:57 +0000 Subject: [PATCH 15/27] Check we can hit all branches in pointer code --- src/dust_rng_pointer.cpp | 2 -- tests/testthat/test-rng-interface.R | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/dust_rng_pointer.cpp b/src/dust_rng_pointer.cpp index 671bd444d..7f9e6db34 100644 --- a/src/dust_rng_pointer.cpp +++ b/src/dust_rng_pointer.cpp @@ -71,7 +71,5 @@ void dust_rng_pointer_sync(cpp11::environment obj, std::string algorithm) { r::rng_pointer_sync(obj); } else if (algorithm == "xoshiro512plus") { r::rng_pointer_sync(obj); - } else { - cpp11::stop("Unknown algorithm '%s'", algorithm.c_str()); } } diff --git a/tests/testthat/test-rng-interface.R b/tests/testthat/test-rng-interface.R index 680287386..983ce25e0 100644 --- a/tests/testthat/test-rng-interface.R +++ b/tests/testthat/test-rng-interface.R @@ -43,3 +43,10 @@ test_that("Invalidated pointers can be rebuilt", { test_xoshiro_run(obj4), test_xoshiro_run(obj1)) }) + + +test_that("can't create invalid pointer types", { + expect_error( + dust_rng_pointer$new(algorithm = "mt19937"), + "Unknown algorithm 'mt19937'") +}) From 34cd0625c9b07d07e4fa48eb2b26ece0ed37787c Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 10:12:37 +0000 Subject: [PATCH 16/27] Tidy up headers --- inst/include/dust/r/random.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/include/dust/r/random.hpp b/inst/include/dust/r/random.hpp index e42923e60..c3c9a0ed0 100644 --- a/inst/include/dust/r/random.hpp +++ b/inst/include/dust/r/random.hpp @@ -4,6 +4,7 @@ #include // memcpy #include +#include #include #include From 827ca85eb23e54faecb3c5aebf7dba3b80f955d8 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 10:50:56 +0000 Subject: [PATCH 17/27] Test pointer handling --- R/cpp11.R | 4 ++++ src/cpp11.cpp | 8 ++++++++ src/dust_rng_pointer.cpp | 9 +++++++++ tests/testthat/test-rng-interface.R | 16 ++++++++++++++++ 4 files changed, 37 insertions(+) diff --git a/R/cpp11.R b/R/cpp11.R index 9e6e452ce..8320d6c16 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -32,6 +32,10 @@ dust_rng_pointer_sync <- function(obj, algorithm) { invisible(.Call(`_dust_dust_rng_pointer_sync`, obj, algorithm)) } +test_rng_pointer_get <- function(obj, n_streams) { + .Call(`_dust_test_rng_pointer_get`, obj, n_streams) +} + dust_rng_alloc <- function(r_seed, n_generators, deterministic, is_float) { .Call(`_dust_dust_rng_alloc`, r_seed, n_generators, deterministic, is_float) } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 4e3b513db..d4735c866 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -62,6 +62,13 @@ extern "C" SEXP _dust_dust_rng_pointer_sync(SEXP obj, SEXP algorithm) { return R_NilValue; END_CPP11 } +// dust_rng_pointer.cpp +double test_rng_pointer_get(cpp11::environment obj, int n_streams); +extern "C" SEXP _dust_test_rng_pointer_get(SEXP obj, SEXP n_streams) { + BEGIN_CPP11 + return cpp11::as_sexp(test_rng_pointer_get(cpp11::as_cpp>(obj), cpp11::as_cpp>(n_streams))); + END_CPP11 +} // dust_rng.cpp SEXP dust_rng_alloc(cpp11::sexp r_seed, int n_generators, bool deterministic, bool is_float); extern "C" SEXP _dust_dust_rng_alloc(SEXP r_seed, SEXP n_generators, SEXP deterministic, SEXP is_float) { @@ -1192,6 +1199,7 @@ static const R_CallMethodDef CallEntries[] = { {"_dust_dust_walk_capabilities", (DL_FUNC) &_dust_dust_walk_capabilities, 0}, {"_dust_dust_walk_gpu_info", (DL_FUNC) &_dust_dust_walk_gpu_info, 0}, {"_dust_test_cuda_pars", (DL_FUNC) &_dust_test_cuda_pars, 9}, + {"_dust_test_rng_pointer_get", (DL_FUNC) &_dust_test_rng_pointer_get, 2}, {"_dust_test_xoshiro_run", (DL_FUNC) &_dust_test_xoshiro_run, 1}, {NULL, NULL, 0} }; diff --git a/src/dust_rng_pointer.cpp b/src/dust_rng_pointer.cpp index 7f9e6db34..a4f0c7cab 100644 --- a/src/dust_rng_pointer.cpp +++ b/src/dust_rng_pointer.cpp @@ -73,3 +73,12 @@ void dust_rng_pointer_sync(cpp11::environment obj, std::string algorithm) { r::rng_pointer_sync(obj); } } + +// This exists to check some error paths in rng_pointer_get; it is not +// for use by users. +[[cpp11::register]] +double test_rng_pointer_get(cpp11::environment obj, int n_streams) { + using namespace dust::random; + auto rng = r::rng_pointer_get(obj, n_streams); + return random_real(rng->state(0)); +} diff --git a/tests/testthat/test-rng-interface.R b/tests/testthat/test-rng-interface.R index 983ce25e0..1404301da 100644 --- a/tests/testthat/test-rng-interface.R +++ b/tests/testthat/test-rng-interface.R @@ -50,3 +50,19 @@ test_that("can't create invalid pointer types", { dust_rng_pointer$new(algorithm = "mt19937"), "Unknown algorithm 'mt19937'") }) + + +test_that("Validate pointers on fetch", { + obj <- dust_rng_pointer$new(algorithm = "xoshiro256starstar") + expect_error( + test_rng_pointer_get(obj, 1), + "Incorrect rng type: given xoshiro256starstar, expected xoshiro256plus") + obj <- dust_rng_pointer$new(algorithm = "xoshiro256plus", n_streams = 4) + expect_error( + test_rng_pointer_get(obj, 20), + "Requested a rng with 20 streams but only have 4") + expect_silent( + test_rng_pointer_get(obj, 0)) + expect_silent( + test_rng_pointer_get(obj, 1)) +}) From 3e684799e7dd81bd00057af6c4300e616a1b7fb5 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 11:12:02 +0000 Subject: [PATCH 18/27] Make vignettes buildable --- .github/workflows/R-CMD-check.yaml | 5 + .gitignore | 2 +- Makefile | 5 +- scripts/build_gpu_vignette | 11 - scripts/build_vignette | 10 + vignettes/rng_package.Rmd | 226 ++++++++++++++++++ .../{rng_use.Rmd => rng_package.Rmd} | 4 +- 7 files changed, 248 insertions(+), 15 deletions(-) delete mode 100755 scripts/build_gpu_vignette create mode 100755 scripts/build_vignette create mode 100644 vignettes/rng_package.Rmd rename vignettes_src/{rng_use.Rmd => rng_package.Rmd} (98%) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index a7a48e945..bdda29fe9 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -70,6 +70,11 @@ jobs: remotes::install_cran("rcmdcheck") shell: Rscript {0} + - name: Move real vignettes + run: | + cp vignettes_src/rng_package.Rmd vignettes + cp vignettes/rng_pi*.cpp vignettes + - name: Check env: _R_CHECK_CRAN_INCOMING_REMOTE_: false diff --git a/.gitignore b/.gitignore index 0a3dce8a6..c404599f5 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,7 @@ inst/doc pkgdown inst/include/cub *.gcov -vignettes_src/gpu.md +vignettes_src/*.md .vscode/ diff --git a/Makefile b/Makefile index 35483f202..1fed3392f 100644 --- a/Makefile +++ b/Makefile @@ -49,7 +49,10 @@ clean: src/*.gcov src/*.gcda src/*.gcno vignettes/gpu.Rmd: vignettes_src/gpu.Rmd - ./scripts/build_gpu_vignette + ./scripts/build_vignette gpu + +vignettes/rng_package.Rmd: vignettes_src/rng_package.Rmd + ./scripts/build_vignette rng_package vignettes: vignettes/dust.Rmd vignettes/rng.Rmd ${RSCRIPT} -e 'tools::buildVignettes(dir = ".")' diff --git a/scripts/build_gpu_vignette b/scripts/build_gpu_vignette deleted file mode 100755 index 408f793d9..000000000 --- a/scripts/build_gpu_vignette +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -# We can't run the gpu vignette meaningfully on any computer that -# lacks a suitable graphics card and toolchain (including CI or CRAN), -# so we pre-compile it occasionally here. The version in vignettes_src -# should be edited, then this script run to create the version in -# vignettes -(cd vignettes_src && Rscript -e 'knitr::knit("gpu.Rmd")') -header="DO NOT EDIT THIS FILE - see vignettes_src and make changes there" -sed -s 's/[[:space:]]*$//' vignettes_src/gpu.md | - sed 's/\r//g' | - sed "s/HEADER/$header/" > vignettes/gpu.Rmd diff --git a/scripts/build_vignette b/scripts/build_vignette new file mode 100755 index 000000000..845fee447 --- /dev/null +++ b/scripts/build_vignette @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -e +SRC="$1.Rmd" +DEST="$1.md" + +(cd vignettes_src && Rscript -e "knitr::knit(\"${SRC}\")") +header="DO NOT EDIT THIS FILE - see vignettes_src and make changes there" +sed -s 's/[[:space:]]*$//' vignettes_src/$DEST | + sed 's/\r//g' | + sed "s/HEADER/$header/" > vignettes/$SRC diff --git a/vignettes/rng_package.Rmd b/vignettes/rng_package.Rmd new file mode 100644 index 000000000..4a0517dc8 --- /dev/null +++ b/vignettes/rng_package.Rmd @@ -0,0 +1,226 @@ +--- +title: "Using RNGs from packages" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Using RNGs from packages} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + + + +The dust random number generators are suitable for use from other packages and we provide a few helpers in both R and C++ to make this easier. + +For illustration purposes we will assume that you want to estimate pi using rejection sampling. The basic idea of this algorithm is simple; sample two U(0, 1) points `x` and `y` and test if they lie in the unit circle (i.e. `sqrt(x^2 + x^2) < 1`) giving the ratio of the area of a unit circle to a square. Multiplying the fraction of the points that we accept by 4 yields our estimate of pi. + +## Background using R's random number generator + +First, an example that uses R's API for example (note that while the R API is C, we're using the cpp11 package interface here so that the following examples are similar): + +```cc +#include +#include + +[[cpp11::register]] +double pi_r(int n) { + int tot = 0; + GetRNGstate(); + for (int i = 0; i < n; ++i) { + const double u1 = unif_rand(); + const double u2 = unif_rand(); + if (u1 * u1 + u2 * u2 < 1) { + tot++; + } + } + PutRNGstate(); + return tot / static_cast(n) * 4.0; +} +``` + +With cpp11 we can load this with `cpp11::cpp_source` + + +```r +cpp11::cpp_source("rng_pi_r.cpp") +``` + +and then run it wih + + +```r +pi_r(1e6) +#> [1] 3.143772 +``` + +The key bits within the code above are that we: + +* included random number support from R via the `R_ext/Random.h` header +* initialised the state of the global random number stream within our C++ context with `GetRNGstate` before drawing any random numbers +* drew some possible large number of numbers using `unif_rand` +* restored the random number state back to R. + +Failure to run the `GetRNGstate` / `PutRNGstate` will result in the stream not behaving properly. This is explained in detail in the "Writing R Extensions" manual. + +## Basic implementation using dust + +One of the design ideas in dust is that there is no single global source of random numbers, so we need to create a source + + +```r +rng <- dust:::dust_rng_pointer$new(seed = 42) +rng +#> +#> Public: +#> algorithm: xoshiro256plus +#> initialize: function (seed = NULL, n_streams = 1L, algorithm = "xoshiro256plus") +#> is_current: function () +#> n_streams: 1 +#> state: function () +#> sync: function () +#> Private: +#> is_current_: TRUE +#> ptr_: externalptr +#> state_: 95 6e eb 2f 26 32 d7 bd 04 72 10 65 ba fa e1 57 46 7f 20 ... +``` + +This object acts as a "handle" to some random number state that can be passed safely to C++ programs; the state will be updated when the program runs as a side effect. Unlike the `dust::dust_rng` object there are no real useful methods on this object and from the R side we'll treat it as a black box. Importantly the `rng` object knows which algorithm it has been created to use + + +```r +rng$algorithm +#> [1] "xoshiro256plus" +``` + +The default will be suitable for most purposes. + +We can rewrite the pi approximation function as: + +```cc +#include +#include + +[[cpp11::linking_to(dust)]] +[[cpp11::register]] +double pi_dust(int n, cpp11::sexp ptr) { + auto rng = + dust::random::r::rng_pointer_get(ptr); + auto& state = rng->state(0); + int tot = 0; + for (int i = 0; i < n; ++i) { + const double u1 = dust::random::random_real(state); + const double u2 = dust::random::random_real(state); + if (u1 * u1 + u2 * u2 < 1) { + tot++; + } + } + return tot / static_cast(n) * 4.0; +} +``` + +This snippet looks much the same as above: + +* We've added `[[cpp::linking_to(dust)]]` and included the dust random interface (`dust/r/random.hpp`) +* The first line of the function safely creates a pointer to the random state data. The template argument here (``) refers to the rng algorithm and matches `rng$algorithm` +* The second line extracts a reference to the first (C++ indexing starting at 0) random number stream - this pair of lines is roughly equivalent to `GetRNGstate()` except that that the random numbers do not come from some global source +* After that the listing proceeds as before proceeds as before, except there is no equivalent to `PutRNGstate()` + + + +```r +cpp11::cpp_source("rng_pi_dust.cpp") +``` + +and then run it wih + + +```r +pi_dust(1e6, rng) +#> [1] 3.14098 +``` + +## Parallel implementation with dust and OpenMP + +Part of the point of dust's random number generators is that they create independent streams of random numbers that can be safely used in parallel. + +```cc +#include +#include + +#ifdef _OPENMP +#include +#endif + +[[cpp11::linking_to(dust)]] +[[cpp11::register]] +double pi_dust_parallel(int n, cpp11::sexp ptr, int n_threads) { + auto rng = + dust::random::r::rng_pointer_get(ptr); + const auto n_streams = rng->size(); + int tot = 0; +#ifdef _OPENMP +#pragma omp parallel for schedule(static) num_threads(n_threads) \ + reduction(+:tot) +#endif + for (size_t i = 0; i < n_streams; ++i) { + auto& state = rng->state(0); + int tot_i = 0; + for (int i = 0; i < n; ++i) { + const double u1 = dust::random::random_real(state); + const double u2 = dust::random::random_real(state); + if (u1 * u1 + u2 * u2 < 1) { + tot_i++; + } + } + tot += tot_i; + } + return tot / static_cast(n * n_streams) * 4.0; +} +``` + + +```r +cpp11::cpp_source("rng_pi_parallel.cpp") +``` + +Here we've made a number of decisions about how to split the problem up subject to a few constraints about using OpenMP together with R: + +* Generally speaking we want the answer to be independent of the number of threads used to run it, as this will vary in different sessions. As such avoid the temptation to do a loop over the threads; here we instead iterate over *streams* with the idea that there will be one or more streams used per threads. If we ran a single thread we'd get the same answer as if we ran one thread per stream. +* Each thread is going to do it's own loop of length `n` so we need to divide by `n * n_streams` at the end as that's many attempts we have made. +* We use OpenMP's `reduction` clause to safely accumulate the different subtotals (the `tot_i` values) into one `tot` value. +* In order to compile gracefully on machines that do not have OpenMP support both the `#include ` line and the `#pragma omp` line are wrapped in guards that test for `_OPENMP` (see "Writing R Extensions"). +* We let the generator tell us how many streams it has (`n_streams = rng->size()`) but we could as easily specify an ideal number of streams as an argument here and then test that the generator has *at least that many* by adding an argument to the call to `rng_pointer_get` (e.g., if we wanted `m` streams the call would be `rng_pointer_get(ptr, m)`) + + +```r +rng <- dust:::dust_rng_pointer$new(seed = 42, n_streams = 20) +pi_dust_parallel(1e6, rng, 4) +#> [1] 3.141703 +``` + +Unfortunately [`cpp11::cpp_source` does not support using OpenMP](https://github.com/r-lib/cpp11/issues/243) so in the example above the code will run in serial and we can't see if parallelisation will help. + +In order to compile with support, we need to build a little package and set up an appropriate `Makevars` file + + + +Once we have a parallel version we can see a speed-up as we add threads: + + +```r +rng <- dust:::dust_rng_pointer$new(n_streams = 20) +bench::mark( + pi_dust_parallel(1e6, rng, 1), + pi_dust_parallel(1e6, rng, 2), + pi_dust_parallel(1e6, rng, 3), + pi_dust_parallel(1e6, rng, 4), + check = FALSE) +#> # A tibble: 4 x 6 +#> expression min median `itr/sec` mem_alloc `gc/sec` +#> +#> 1 pi_dust_parallel(1e+06, rng, 1) 44.3ms 44.7ms 22.4 0B 0 +#> 2 pi_dust_parallel(1e+06, rng, 2) 22.5ms 23ms 43.4 0B 0 +#> 3 pi_dust_parallel(1e+06, rng, 3) 15.8ms 15.9ms 62.7 0B 0 +#> 4 pi_dust_parallel(1e+06, rng, 4) 11.5ms 11.6ms 85.5 0B 0 +``` diff --git a/vignettes_src/rng_use.Rmd b/vignettes_src/rng_package.Rmd similarity index 98% rename from vignettes_src/rng_use.Rmd rename to vignettes_src/rng_package.Rmd index b68699e64..6e8294457 100644 --- a/vignettes_src/rng_use.Rmd +++ b/vignettes_src/rng_package.Rmd @@ -61,7 +61,7 @@ Failure to run the `GetRNGstate` / `PutRNGstate` will result in the stream not b One of the design ideas in dust is that there is no single global source of random numbers, so we need to create a source ```{r} -rng <- dust:::dust_rng_pointer$new() +rng <- dust:::dust_rng_pointer$new(seed = 42) rng ``` @@ -118,7 +118,7 @@ Here we've made a number of decisions about how to split the problem up subject * We let the generator tell us how many streams it has (`n_streams = rng->size()`) but we could as easily specify an ideal number of streams as an argument here and then test that the generator has *at least that many* by adding an argument to the call to `rng_pointer_get` (e.g., if we wanted `m` streams the call would be `rng_pointer_get(ptr, m)`) ```{r} -rng <- dust:::dust_rng_pointer$new(n_streams = 20) +rng <- dust:::dust_rng_pointer$new(seed = 42, n_streams = 20) pi_dust_parallel(1e6, rng, 4) ``` From 0c43a240405549893e04b9e5a93b4f7f0c6b12b8 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 11:27:33 +0000 Subject: [PATCH 19/27] Tidy vignette --- inst/WORDLIST | 1 + vignettes/rng.Rmd | 2 + vignettes/rng_package.Rmd | 101 +++++++++++++++++++++++++++++----- vignettes_src/rng_package.Rmd | 48 +++++++++++++--- 4 files changed, 132 insertions(+), 20 deletions(-) diff --git a/inst/WORDLIST b/inst/WORDLIST index f740c6591..e71357175 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -23,6 +23,7 @@ Marsaglia Mersenne OMP OpenMP +OpenMP's Perez Poisson R's diff --git a/vignettes/rng.Rmd b/vignettes/rng.Rmd index a3df8242b..50e39f38e 100644 --- a/vignettes/rng.Rmd +++ b/vignettes/rng.Rmd @@ -321,6 +321,8 @@ plain_output(readLines(file.path(path_pkg, "NAMESPACE"))) Finally, run `cpp11::cpp_register()` before compiling your package so that the relevant interfaces are created (`R/cpp11.R` and `cpp11/cpp11.cpp`). A similar process would likely work with Rcpp without any dependency on cpp11. +More interesting use with persistant streams is described in `vignette("rng_package.Rmd")` + ### Standalone, parallel with OpenMP *This is somewhat more experimental, so let us know if you have success using the library this way.* diff --git a/vignettes/rng_package.Rmd b/vignettes/rng_package.Rmd index 4a0517dc8..4f913b21b 100644 --- a/vignettes/rng_package.Rmd +++ b/vignettes/rng_package.Rmd @@ -46,12 +46,12 @@ With cpp11 we can load this with `cpp11::cpp_source` cpp11::cpp_source("rng_pi_r.cpp") ``` -and then run it wih +and then run it with ```r pi_r(1e6) -#> [1] 3.143772 +#> [1] 3.143272 ``` The key bits within the code above are that we: @@ -69,7 +69,7 @@ One of the design ideas in dust is that there is no single global source of rand ```r -rng <- dust:::dust_rng_pointer$new(seed = 42) +rng <- dust:::dust_rng_pointer$new() rng #> #> Public: @@ -82,7 +82,7 @@ rng #> Private: #> is_current_: TRUE #> ptr_: externalptr -#> state_: 95 6e eb 2f 26 32 d7 bd 04 72 10 65 ba fa e1 57 46 7f 20 ... +#> state_: c2 65 5d bc c6 fd 34 2f 8e 92 da ae cb 41 a8 b9 cf af e0 ... ``` This object acts as a "handle" to some random number state that can be passed safely to C++ programs; the state will be updated when the program runs as a side effect. Unlike the `dust::dust_rng` object there are no real useful methods on this object and from the R side we'll treat it as a black box. Importantly the `rng` object knows which algorithm it has been created to use @@ -132,12 +132,12 @@ This snippet looks much the same as above: cpp11::cpp_source("rng_pi_dust.cpp") ``` -and then run it wih +and then run it with ```r pi_dust(1e6, rng) -#> [1] 3.14098 +#> [1] 3.14428 ``` ## Parallel implementation with dust and OpenMP @@ -194,9 +194,9 @@ Here we've made a number of decisions about how to split the problem up subject ```r -rng <- dust:::dust_rng_pointer$new(seed = 42, n_streams = 20) +rng <- dust:::dust_rng_pointer$new(n_streams = 20) pi_dust_parallel(1e6, rng, 4) -#> [1] 3.141703 +#> [1] 3.141316 ``` Unfortunately [`cpp11::cpp_source` does not support using OpenMP](https://github.com/r-lib/cpp11/issues/243) so in the example above the code will run in serial and we can't see if parallelisation will help. @@ -205,7 +205,82 @@ In order to compile with support, we need to build a little package and set up a -Once we have a parallel version we can see a speed-up as we add threads: +The package is fairly minimal: + + +``` +#> . +#> ├── DESCRIPTION +#> ├── NAMESPACE +#> └── src +#> ├── Makevars +#> └── code.cpp +``` + +We have an extremely minimal `DESCRIPTION`, which contains line `LinkingTo: cpp11, dust` from which R will arrange compiler flags to find both packages' headers: + +```plain +Package: piparallel +LinkingTo: cpp11, dust +Version: 0.0.1 +SystemRequirements: C++11 +``` + +The `NAMESPACE` loads the dynamic library + +```plain +useDynLib("piparallel", .registration = TRUE) +exportPattern("^[[:alpha:]]+") +``` + +The `src/Makevars` file contains important flags to pick up OpenMP support: + +```make +PKG_CXXFLAGS=-DHAVE_INLINE $(SHLIB_OPENMP_CXXFLAGS) +PKG_LIBS=$(SHLIB_OPENMP_CXXFLAGS) +``` + +And `src/code.cpp` contains the file above but without the `[[cpp11::linking_to(dust)]]` line: + +```cc +#include +#include + +#ifdef _OPENMP +#include +#endif + +[[cpp11::register]] +double pi_dust_parallel(int n, cpp11::sexp ptr, int n_threads) { + auto rng = + dust::random::r::rng_pointer_get(ptr); + const auto n_streams = rng->size(); + int tot = 0; +#ifdef _OPENMP +#pragma omp parallel for schedule(static) num_threads(n_threads) \ + reduction(+:tot) +#endif + for (size_t i = 0; i < n_streams; ++i) { + auto& state = rng->state(0); + int tot_i = 0; + for (int i = 0; i < n; ++i) { + const double u1 = dust::random::random_real(state); + const double u2 = dust::random::random_real(state); + if (u1 * u1 + u2 * u2 < 1) { + tot_i++; + } + } + tot += tot_i; + } + return tot / static_cast(n * n_streams) * 4.0; +} +``` + +After compiling and installing the package, `pi_dust_parallel` will be available + + + +Now we have a parallel version we can see a speed-up as we add threads: ```r @@ -219,8 +294,8 @@ bench::mark( #> # A tibble: 4 x 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> -#> 1 pi_dust_parallel(1e+06, rng, 1) 44.3ms 44.7ms 22.4 0B 0 -#> 2 pi_dust_parallel(1e+06, rng, 2) 22.5ms 23ms 43.4 0B 0 -#> 3 pi_dust_parallel(1e+06, rng, 3) 15.8ms 15.9ms 62.7 0B 0 -#> 4 pi_dust_parallel(1e+06, rng, 4) 11.5ms 11.6ms 85.5 0B 0 +#> 1 pi_dust_parallel(1e+06, rng, 1) 44.9ms 45.1ms 21.7 0B 0 +#> 2 pi_dust_parallel(1e+06, rng, 2) 22.6ms 22.7ms 43.4 0B 0 +#> 3 pi_dust_parallel(1e+06, rng, 3) 15.8ms 16ms 62.5 0B 0 +#> 4 pi_dust_parallel(1e+06, rng, 4) 11.5ms 11.6ms 84.5 0B 0 ``` diff --git a/vignettes_src/rng_package.Rmd b/vignettes_src/rng_package.Rmd index 6e8294457..773003ab3 100644 --- a/vignettes_src/rng_package.Rmd +++ b/vignettes_src/rng_package.Rmd @@ -41,7 +41,7 @@ With cpp11 we can load this with `cpp11::cpp_source` cpp11::cpp_source("rng_pi_r.cpp") ``` -and then run it wih +and then run it with ```{r} pi_r(1e6) @@ -61,7 +61,7 @@ Failure to run the `GetRNGstate` / `PutRNGstate` will result in the stream not b One of the design ideas in dust is that there is no single global source of random numbers, so we need to create a source ```{r} -rng <- dust:::dust_rng_pointer$new(seed = 42) +rng <- dust:::dust_rng_pointer$new() rng ``` @@ -91,7 +91,7 @@ This snippet looks much the same as above: cpp11::cpp_source("rng_pi_dust.cpp") ``` -and then run it wih +and then run it with ```{r} pi_dust(1e6, rng) @@ -118,7 +118,7 @@ Here we've made a number of decisions about how to split the problem up subject * We let the generator tell us how many streams it has (`n_streams = rng->size()`) but we could as easily specify an ideal number of streams as an argument here and then test that the generator has *at least that many* by adding an argument to the call to `rng_pointer_get` (e.g., if we wanted `m` streams the call would be `rng_pointer_get(ptr, m)`) ```{r} -rng <- dust:::dust_rng_pointer$new(seed = 42, n_streams = 20) +rng <- dust:::dust_rng_pointer$new(n_streams = 20) pi_dust_parallel(1e6, rng, 4) ``` @@ -129,7 +129,6 @@ In order to compile with support, we need to build a little package and set up a ```{r, include = FALSE} path <- tempfile() dir.create(path) -dir.create(file.path(path, "R"), FALSE, TRUE) dir.create(file.path(path, "src"), FALSE, TRUE) writeLines( @@ -149,7 +148,42 @@ writeLines( code <- grep("cpp11::linking_to", readLines("rng_pi_parallel.cpp"), invert = TRUE, value = TRUE) writeLines(code, file.path(path, "src", "code.cpp")) -pkgbuild::compile_dll(path, quiet = FALSE, debug = FALSE) +``` + +The package is fairly minimal: + +```{r pkg_tree, echo = FALSE} +withr::with_dir(path, fs::dir_tree()) +``` + +We have an extremely minimal `DESCRIPTION`, which contains line `LinkingTo: cpp11, dust` from which R will arrange compiler flags to find both packages' headers: + +```{r, results = "asis", echo = FALSE} +plain_output(readLines(file.path(path, "DESCRIPTION"))) +``` + +The `NAMESPACE` loads the dynamic library + +```{r, results = "asis", echo = FALSE} +plain_output(readLines(file.path(path, "NAMESPACE"))) +``` + +The `src/Makevars` file contains important flags to pick up OpenMP support: + +```{r, results = "asis", echo = FALSE} +lang_output(readLines(file.path(path, "src/Makevars")), "make") +``` + +And `src/code.cpp` contains the file above but without the `[[cpp11::linking_to(dust)]]` line: + +```{r, results = "asis", echo = FALSE} +cc_output(readLines(file.path(path, "src/code.cpp"))) +``` + +After compiling and installing the package, `pi_dust_parallel` will be available + +```{r, include = FALSE} +pkgbuild::compile_dll(path, quiet = TRUE, debug = FALSE) pkg <- pkgload::load_all(path, compile = FALSE, recompile = FALSE, warn_conflicts = FALSE, export_all = FALSE, helpers = FALSE, attach_testthat = FALSE, @@ -157,7 +191,7 @@ pkg <- pkgload::load_all(path, compile = FALSE, recompile = FALSE, pi_dust_parallel <- pkg$env$pi_dust_parallel ``` -Once we have a parallel version we can see a speed-up as we add threads: +Now we have a parallel version we can see a speed-up as we add threads: ```{r} rng <- dust:::dust_rng_pointer$new(n_streams = 20) From 73920e9a90e7b0727ef0e9fabf141cf2a4ba72da Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 11:34:48 +0000 Subject: [PATCH 20/27] Copy files from right place --- .github/workflows/R-CMD-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index bdda29fe9..ca25437e9 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -73,7 +73,7 @@ jobs: - name: Move real vignettes run: | cp vignettes_src/rng_package.Rmd vignettes - cp vignettes/rng_pi*.cpp vignettes + cp vignettes_src/rng_pi*.cpp vignettes - name: Check env: From de322625864bc43b47bfcdb97e366397b182742b Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 11:39:14 +0000 Subject: [PATCH 21/27] Spelling --- vignettes/rng.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/rng.Rmd b/vignettes/rng.Rmd index 50e39f38e..edd124a1f 100644 --- a/vignettes/rng.Rmd +++ b/vignettes/rng.Rmd @@ -321,7 +321,7 @@ plain_output(readLines(file.path(path_pkg, "NAMESPACE"))) Finally, run `cpp11::cpp_register()` before compiling your package so that the relevant interfaces are created (`R/cpp11.R` and `cpp11/cpp11.cpp`). A similar process would likely work with Rcpp without any dependency on cpp11. -More interesting use with persistant streams is described in `vignette("rng_package.Rmd")` +More interesting use with persistent streams is described in `vignette("rng_package.Rmd")` ### Standalone, parallel with OpenMP From 30da86af94e797a5f488c774a37503a770db8b94 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 11:43:12 +0000 Subject: [PATCH 22/27] Avoid windows pain --- .github/workflows/R-CMD-check.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index ca25437e9..0564eba65 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -71,6 +71,7 @@ jobs: shell: Rscript {0} - name: Move real vignettes + if: runner.os != 'Windows' run: | cp vignettes_src/rng_package.Rmd vignettes cp vignettes_src/rng_pi*.cpp vignettes From 57688d393d863bc55df19bafb1913d9393dca230 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 13:48:37 +0000 Subject: [PATCH 23/27] Better default behaviour of state() --- R/rng_pointer.R | 7 +++++-- man/dust_rng_pointer.Rd | 4 ++-- tests/testthat/test-rng-interface.R | 1 - 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/R/rng_pointer.R b/R/rng_pointer.R index 3a44a0d62..cd29d7a71 100644 --- a/R/rng_pointer.R +++ b/R/rng_pointer.R @@ -56,14 +56,17 @@ dust_rng_pointer <- R6::R6Class( ##' @description Return a raw vector of state. This can be used to ##' create other generators with the same state. state = function() { + if (!private$is_current_) { + self$sync() + } private$state_ }, ##' @description Return a logical, indicating if the random number ##' state that would be returned by `state()` is "current" (i.e., the ##' same as the copy held in the pointer) or not. This is `TRUE` on - ##' creation or immediately after calling `$sync()` and `FALSE` after - ##' any use of the pointer. + ##' creation or immediately after calling `$sync()` or `$state()` + ##' and `FALSE` after any use of the pointer. is_current = function() { private$is_current_ } diff --git a/man/dust_rng_pointer.Rd b/man/dust_rng_pointer.Rd index 013b79461..f174ca9e8 100644 --- a/man/dust_rng_pointer.Rd +++ b/man/dust_rng_pointer.Rd @@ -90,8 +90,8 @@ create other generators with the same state. Return a logical, indicating if the random number state that would be returned by \code{state()} is "current" (i.e., the same as the copy held in the pointer) or not. This is \code{TRUE} on -creation or immediately after calling \verb{$sync()} and \code{FALSE} after -any use of the pointer. +creation or immediately after calling \verb{$sync()} or \verb{$state()} +and \code{FALSE} after any use of the pointer. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{dust_rng_pointer$is_current()}\if{html}{\out{
}} } diff --git a/tests/testthat/test-rng-interface.R b/tests/testthat/test-rng-interface.R index 1404301da..8fa6f8982 100644 --- a/tests/testthat/test-rng-interface.R +++ b/tests/testthat/test-rng-interface.R @@ -16,7 +16,6 @@ test_that("Using object requires sync", { r <- obj$state() test_xoshiro_run(obj) expect_false(obj$is_current()) - expect_identical(obj$state(), r) obj$sync() expect_true(obj$is_current()) expect_false(identical(obj$state(), r)) From 2c1935b4a1fb8f40a3ea9dc92ebf2351d48fc98a Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 13:50:09 +0000 Subject: [PATCH 24/27] Documentation improvement from review --- R/rng_pointer.R | 7 ++++-- man/dust_rng_pointer.Rd | 21 +++++++++++----- vignettes/rng.Rmd | 2 +- vignettes_src/rng_package.Rmd | 46 ++++++++++++++++++++++++++++++++--- 4 files changed, 64 insertions(+), 12 deletions(-) diff --git a/R/rng_pointer.R b/R/rng_pointer.R index cd29d7a71..b87f4a8ae 100644 --- a/R/rng_pointer.R +++ b/R/rng_pointer.R @@ -1,6 +1,9 @@ -##' @title Create pointer to random number generator +##' @title Create pointer to random number generator stream ##' -##' For external use, vignette coming soon. +##' This function exists to support use from other packages that wish +##' to use dust's random number support, and creates an opaque +##' pointer to a set of random number streams. It is desribed more +##' fully in `vignette("rng_package.Rmd")` ##' ##' @export ##' @examples diff --git a/man/dust_rng_pointer.Rd b/man/dust_rng_pointer.Rd index f174ca9e8..bdcf33265 100644 --- a/man/dust_rng_pointer.Rd +++ b/man/dust_rng_pointer.Rd @@ -2,17 +2,26 @@ % Please edit documentation in R/rng_pointer.R \name{dust_rng_pointer} \alias{dust_rng_pointer} -\title{Create pointer to random number generator +\title{Create pointer to random number generator stream -For external use, vignette coming soon.} +This function exists to support use from other packages that wish +to use dust's random number support, and creates an opaque +pointer to a set of random number streams. It is desribed more +fully in \code{vignette("rng_package.Rmd")}} \description{ -Create pointer to random number generator +Create pointer to random number generator stream -For external use, vignette coming soon. +This function exists to support use from other packages that wish +to use dust's random number support, and creates an opaque +pointer to a set of random number streams. It is desribed more +fully in \code{vignette("rng_package.Rmd")} -Create pointer to random number generator +Create pointer to random number generator stream -For external use, vignette coming soon. +This function exists to support use from other packages that wish +to use dust's random number support, and creates an opaque +pointer to a set of random number streams. It is desribed more +fully in \code{vignette("rng_package.Rmd")} } \examples{ dust::dust_rng_pointer$new() diff --git a/vignettes/rng.Rmd b/vignettes/rng.Rmd index edd124a1f..458ee2f1b 100644 --- a/vignettes/rng.Rmd +++ b/vignettes/rng.Rmd @@ -321,7 +321,7 @@ plain_output(readLines(file.path(path_pkg, "NAMESPACE"))) Finally, run `cpp11::cpp_register()` before compiling your package so that the relevant interfaces are created (`R/cpp11.R` and `cpp11/cpp11.cpp`). A similar process would likely work with Rcpp without any dependency on cpp11. -More interesting use with persistent streams is described in `vignette("rng_package.Rmd")` +The issue with this example is that every call to `random_normal` must provdide its own `seed` argument, so the random number streams are not continued with each call to the function. This is is not very useful in practice and we describe more fully how to do this properly in `vignette("rng_package.Rmd")`. ### Standalone, parallel with OpenMP diff --git a/vignettes_src/rng_package.Rmd b/vignettes_src/rng_package.Rmd index 773003ab3..1f2176f47 100644 --- a/vignettes_src/rng_package.Rmd +++ b/vignettes_src/rng_package.Rmd @@ -58,14 +58,20 @@ Failure to run the `GetRNGstate` / `PutRNGstate` will result in the stream not b ## Basic implementation using dust -One of the design ideas in dust is that there is no single global source of random numbers, so we need to create a source +The implementation in dust will look very similar to above, but the way that we cope with the random number stream will look quite different. With the R version above we are looking after R's global stream (stored in the variable `.Random.seed`) and making sure that it is fetched and set on entry and exit to the function. + +One of the design ideas in dust is that there is no single global source of random numbers, so we need to create a source that our function can use. If we were to use the simulation function multiple times we would want the stream to pick up where it left off last time, so the act of calling the function should update the seed as a "side effect". + +The way we expose this for use within other packages is that the user (either package developer or user of the package) creates a "pointer" to some random number state. Passing that state into a C++ function will allow use of the random functions within dust, and will update the state correctly (see the following section for details). + +First we create a pointer object: ```{r} rng <- dust:::dust_rng_pointer$new() rng ``` -This object acts as a "handle" to some random number state that can be passed safely to C++ programs; the state will be updated when the program runs as a side effect. Unlike the `dust::dust_rng` object there are no real useful methods on this object and from the R side we'll treat it as a black box. Importantly the `rng` object knows which algorithm it has been created to use +Unlike the `dust::dust_rng` object there are no real useful methods on this object and from the R side we'll treat it as a black box. Importantly the `rng` object knows which algorithm it has been created to use ```{r} rng$algorithm @@ -84,7 +90,7 @@ This snippet looks much the same as above: * We've added `[[cpp::linking_to(dust)]]` and included the dust random interface (`dust/r/random.hpp`) * The first line of the function safely creates a pointer to the random state data. The template argument here (``) refers to the rng algorithm and matches `rng$algorithm` * The second line extracts a reference to the first (C++ indexing starting at 0) random number stream - this pair of lines is roughly equivalent to `GetRNGstate()` except that that the random numbers do not come from some global source -* After that the listing proceeds as before proceeds as before, except there is no equivalent to `PutRNGstate()` +* After that the listing proceeds as before proceeds as before, except there is no equivalent to `PutRNGstate()` because the pointer object takes care of this automatically. ```{r} @@ -202,3 +208,37 @@ bench::mark( pi_dust_parallel(1e6, rng, 4), check = FALSE) ``` + +## More on the pointer object + +This section aims to de-mystify the pointer objects a little. The dust random number state is a series of integers (by default 64-bit unsigned integers) that are updated each time a state is drawn (see `vignette("rng.Rmd")`). We expose this state to R as a vector of "raw" values (literally a series of bytes of data). + +```{r} +rng <- dust::dust_rng$new(seed = 1) +rng$state() +``` + +When numbers are drawn from the stream, the state is modified as a side-effect: + +```{r} +rng$random_real(20) +rng$state() +``` + +The same happens with our `dust_rng_pointer` objects used above: + +```{r} +ptr <- dust::dust_rng_pointer$new(seed = 1) +ptr$state() +``` + +Note that `ptr` starts with the same state here as `rng` did because we started from the same seed. When we draw 20 numbers from the stream (by drawing 10 pairs of numbers with our pi-estimation algorithm), we will advance the state + +```{r} +pi_dust(10, ptr) +ptr$state() +``` + +Note that the state here now matches the value returned by `rng`. + +Normally nobody needs to know this - treat the pointer as an object that you pass to functions and ignore the details. From b4bf97e55faafd56f8c07734fb057ff24f806795 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 13:51:50 +0000 Subject: [PATCH 25/27] Fix docs --- R/rng_pointer.R | 8 ++++---- man/dust_rng_pointer.Rd | 24 +++++------------------- vignettes/rng.Rmd | 2 +- 3 files changed, 10 insertions(+), 24 deletions(-) diff --git a/R/rng_pointer.R b/R/rng_pointer.R index b87f4a8ae..fa785f0b4 100644 --- a/R/rng_pointer.R +++ b/R/rng_pointer.R @@ -1,9 +1,9 @@ ##' @title Create pointer to random number generator stream ##' -##' This function exists to support use from other packages that wish -##' to use dust's random number support, and creates an opaque -##' pointer to a set of random number streams. It is desribed more -##' fully in `vignette("rng_package.Rmd")` +##' @description This function exists to support use from other +##' packages that wish to use dust's random number support, and +##' creates an opaque pointer to a set of random number streams. It +##' is described more fully in `vignette("rng_package.Rmd")` ##' ##' @export ##' @examples diff --git a/man/dust_rng_pointer.Rd b/man/dust_rng_pointer.Rd index bdcf33265..822b81fbe 100644 --- a/man/dust_rng_pointer.Rd +++ b/man/dust_rng_pointer.Rd @@ -2,26 +2,12 @@ % Please edit documentation in R/rng_pointer.R \name{dust_rng_pointer} \alias{dust_rng_pointer} -\title{Create pointer to random number generator stream - -This function exists to support use from other packages that wish -to use dust's random number support, and creates an opaque -pointer to a set of random number streams. It is desribed more -fully in \code{vignette("rng_package.Rmd")}} +\title{Create pointer to random number generator stream} \description{ -Create pointer to random number generator stream - -This function exists to support use from other packages that wish -to use dust's random number support, and creates an opaque -pointer to a set of random number streams. It is desribed more -fully in \code{vignette("rng_package.Rmd")} - -Create pointer to random number generator stream - -This function exists to support use from other packages that wish -to use dust's random number support, and creates an opaque -pointer to a set of random number streams. It is desribed more -fully in \code{vignette("rng_package.Rmd")} +This function exists to support use from other +packages that wish to use dust's random number support, and +creates an opaque pointer to a set of random number streams. It +is described more fully in \code{vignette("rng_package.Rmd")} } \examples{ dust::dust_rng_pointer$new() diff --git a/vignettes/rng.Rmd b/vignettes/rng.Rmd index 458ee2f1b..9a017d9ae 100644 --- a/vignettes/rng.Rmd +++ b/vignettes/rng.Rmd @@ -321,7 +321,7 @@ plain_output(readLines(file.path(path_pkg, "NAMESPACE"))) Finally, run `cpp11::cpp_register()` before compiling your package so that the relevant interfaces are created (`R/cpp11.R` and `cpp11/cpp11.cpp`). A similar process would likely work with Rcpp without any dependency on cpp11. -The issue with this example is that every call to `random_normal` must provdide its own `seed` argument, so the random number streams are not continued with each call to the function. This is is not very useful in practice and we describe more fully how to do this properly in `vignette("rng_package.Rmd")`. +The issue with this example is that every call to `random_normal` must provide its own `seed` argument, so the random number streams are not continued with each call to the function. This is is not very useful in practice and we describe more fully how to do this properly in `vignette("rng_package.Rmd")`. ### Standalone, parallel with OpenMP From 369fc2981645186c6b359d96c6c07356eed50185 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 13:56:17 +0000 Subject: [PATCH 26/27] Regenerate vignette --- vignettes/rng_package.Rmd | 79 +++++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 11 deletions(-) diff --git a/vignettes/rng_package.Rmd b/vignettes/rng_package.Rmd index 4f913b21b..3e6765cd2 100644 --- a/vignettes/rng_package.Rmd +++ b/vignettes/rng_package.Rmd @@ -51,7 +51,7 @@ and then run it with ```r pi_r(1e6) -#> [1] 3.143272 +#> [1] 3.140756 ``` The key bits within the code above are that we: @@ -65,7 +65,13 @@ Failure to run the `GetRNGstate` / `PutRNGstate` will result in the stream not b ## Basic implementation using dust -One of the design ideas in dust is that there is no single global source of random numbers, so we need to create a source +The implementation in dust will look very similar to above, but the way that we cope with the random number stream will look quite different. With the R version above we are looking after R's global stream (stored in the variable `.Random.seed`) and making sure that it is fetched and set on entry and exit to the function. + +One of the design ideas in dust is that there is no single global source of random numbers, so we need to create a source that our function can use. If we were to use the simulation function multiple times we would want the stream to pick up where it left off last time, so the act of calling the function should update the seed as a "side effect". + +The way we expose this for use within other packages is that the user (either package developer or user of the package) creates a "pointer" to some random number state. Passing that state into a C++ function will allow use of the random functions within dust, and will update the state correctly (see the following section for details). + +First we create a pointer object: ```r @@ -82,10 +88,10 @@ rng #> Private: #> is_current_: TRUE #> ptr_: externalptr -#> state_: c2 65 5d bc c6 fd 34 2f 8e 92 da ae cb 41 a8 b9 cf af e0 ... +#> state_: 2b af 1c 0b 86 40 6b 1b 5d 9b 94 99 3a 71 ff be 63 09 89 ... ``` -This object acts as a "handle" to some random number state that can be passed safely to C++ programs; the state will be updated when the program runs as a side effect. Unlike the `dust::dust_rng` object there are no real useful methods on this object and from the R side we'll treat it as a black box. Importantly the `rng` object knows which algorithm it has been created to use +Unlike the `dust::dust_rng` object there are no real useful methods on this object and from the R side we'll treat it as a black box. Importantly the `rng` object knows which algorithm it has been created to use ```r @@ -124,7 +130,7 @@ This snippet looks much the same as above: * We've added `[[cpp::linking_to(dust)]]` and included the dust random interface (`dust/r/random.hpp`) * The first line of the function safely creates a pointer to the random state data. The template argument here (``) refers to the rng algorithm and matches `rng$algorithm` * The second line extracts a reference to the first (C++ indexing starting at 0) random number stream - this pair of lines is roughly equivalent to `GetRNGstate()` except that that the random numbers do not come from some global source -* After that the listing proceeds as before proceeds as before, except there is no equivalent to `PutRNGstate()` +* After that the listing proceeds as before proceeds as before, except there is no equivalent to `PutRNGstate()` because the pointer object takes care of this automatically. @@ -137,7 +143,7 @@ and then run it with ```r pi_dust(1e6, rng) -#> [1] 3.14428 +#> [1] 3.14102 ``` ## Parallel implementation with dust and OpenMP @@ -196,7 +202,7 @@ Here we've made a number of decisions about how to split the problem up subject ```r rng <- dust:::dust_rng_pointer$new(n_streams = 20) pi_dust_parallel(1e6, rng, 4) -#> [1] 3.141316 +#> [1] 3.142025 ``` Unfortunately [`cpp11::cpp_source` does not support using OpenMP](https://github.com/r-lib/cpp11/issues/243) so in the example above the code will run in serial and we can't see if parallelisation will help. @@ -294,8 +300,59 @@ bench::mark( #> # A tibble: 4 x 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> -#> 1 pi_dust_parallel(1e+06, rng, 1) 44.9ms 45.1ms 21.7 0B 0 -#> 2 pi_dust_parallel(1e+06, rng, 2) 22.6ms 22.7ms 43.4 0B 0 -#> 3 pi_dust_parallel(1e+06, rng, 3) 15.8ms 16ms 62.5 0B 0 -#> 4 pi_dust_parallel(1e+06, rng, 4) 11.5ms 11.6ms 84.5 0B 0 +#> 1 pi_dust_parallel(1e+06, rng, 1) 44.1ms 44.4ms 22.5 0B 0 +#> 2 pi_dust_parallel(1e+06, rng, 2) 22.3ms 22.5ms 44.4 0B 0 +#> 3 pi_dust_parallel(1e+06, rng, 3) 15.7ms 15.8ms 63.2 0B 0 +#> 4 pi_dust_parallel(1e+06, rng, 4) 11.2ms 11.5ms 87.1 0B 0 +``` + +## More on the pointer object + +This section aims to de-mystify the pointer objects a little. The dust random number state is a series of integers (by default 64-bit unsigned integers) that are updated each time a state is drawn (see `vignette("rng.Rmd")`). We expose this state to R as a vector of "raw" values (literally a series of bytes of data). + + +```r +rng <- dust::dust_rng$new(seed = 1) +rng$state() +#> [1] c1 5c 02 89 ec 2d 0a 91 1e 61 39 74 08 ab 41 5e c3 86 8d 6d f4 02 8a b1 56 +#> [26] 49 ee d9 dd 95 81 e2 +``` + +When numbers are drawn from the stream, the state is modified as a side-effect: + + +```r +rng$random_real(20) +#> [1] 0.451351392 0.073534657 0.218129406 0.200139527 0.017172743 0.323288520 +#> [7] 0.338823899 0.862898581 0.005702738 0.006453255 0.844823829 0.222676329 +#> [13] 0.665130023 0.283016916 0.502155564 0.290045309 0.055841255 0.083163285 +#> [19] 0.391729373 0.744045249 +rng$state() +#> [1] 2c ed 49 d0 8f 7e 97 2c 2a 5e 9e a1 78 85 47 80 06 d5 05 38 a3 5b 08 12 59 +#> [26] 12 03 5c b3 ea 87 c8 ``` + +The same happens with our `dust_rng_pointer` objects used above: + + +```r +ptr <- dust::dust_rng_pointer$new(seed = 1) +ptr$state() +#> [1] c1 5c 02 89 ec 2d 0a 91 1e 61 39 74 08 ab 41 5e c3 86 8d 6d f4 02 8a b1 56 +#> [26] 49 ee d9 dd 95 81 e2 +``` + +Note that `ptr` starts with the same state here as `rng` did because we started from the same seed. When we draw 20 numbers from the stream (by drawing 10 pairs of numbers with our pi-estimation algorithm), we will advance the state + + +```r +pi_dust(10, ptr) +#> [1] 4 +ptr$state() +#> [1] 2c ed 49 d0 8f 7e 97 2c 2a 5e 9e a1 78 85 47 80 06 d5 05 38 a3 5b 08 12 59 +#> [26] 12 03 5c b3 ea 87 c8 +``` + +Note that the state here now matches the value returned by `rng`. + +Normally nobody needs to know this - treat the pointer as an object that you pass to functions and ignore the details. From 621e602d1c3c990aa4376d96d3b4a32c868c5af6 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 12 Nov 2021 14:07:39 +0000 Subject: [PATCH 27/27] Add test that state() syncs rng state --- tests/testthat/test-rng-interface.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/testthat/test-rng-interface.R b/tests/testthat/test-rng-interface.R index 8fa6f8982..583063f6d 100644 --- a/tests/testthat/test-rng-interface.R +++ b/tests/testthat/test-rng-interface.R @@ -22,6 +22,17 @@ test_that("Using object requires sync", { }) +test_that("Fetching state syncs", { + obj <- dust_rng_pointer$new() + expect_true(obj$is_current()) + r <- obj$state() + test_xoshiro_run(obj) + expect_false(obj$is_current()) + expect_false(identical(obj$state(), r)) + expect_true(obj$is_current()) +}) + + test_that("Invalidated pointers can be rebuilt", { obj1 <- dust_rng_pointer$new() obj2 <- corrupt_pointer(obj1)