diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index a7a48e945..0564eba65 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -70,6 +70,12 @@ jobs: remotes::install_cran("rcmdcheck") shell: Rscript {0} + - name: Move real vignettes + if: runner.os != 'Windows' + run: | + cp vignettes_src/rng_package.Rmd vignettes + cp vignettes_src/rng_pi*.cpp vignettes + - name: Check env: _R_CHECK_CRAN_INCOMING_REMOTE_: false diff --git a/.gitignore b/.gitignore index 0a3dce8a6..c404599f5 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,7 @@ inst/doc pkgdown inst/include/cub *.gcov -vignettes_src/gpu.md +vignettes_src/*.md .vscode/ diff --git a/DESCRIPTION b/DESCRIPTION index 6a2b4231c..cfc915d52 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: dust Title: Iterate Multiple Realisations of Stochastic Models -Version: 0.11.7 +Version: 0.11.8 Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"), email = "rich.fitzjohn@gmail.com"), person("John", "Lees", role = "aut"), diff --git a/Makefile b/Makefile index 35483f202..1fed3392f 100644 --- a/Makefile +++ b/Makefile @@ -49,7 +49,10 @@ clean: src/*.gcov src/*.gcda src/*.gcno vignettes/gpu.Rmd: vignettes_src/gpu.Rmd - ./scripts/build_gpu_vignette + ./scripts/build_vignette gpu + +vignettes/rng_package.Rmd: vignettes_src/rng_package.Rmd + ./scripts/build_vignette rng_package vignettes: vignettes/dust.Rmd vignettes/rng.Rmd ${RSCRIPT} -e 'tools::buildVignettes(dir = ".")' diff --git a/NAMESPACE b/NAMESPACE index 0521a3343..df5069786 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(dust_openmp_support) export(dust_openmp_threads) export(dust_package) export(dust_rng) +export(dust_rng_pointer) export(dust_rng_state_long_jump) importFrom(stats,coef) useDynLib(dust, .registration = TRUE) diff --git a/NEWS.md b/NEWS.md index ff1abbfbd..b65e96fb2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# dust 0.11.8 + +* Improved the interface for using dust's random number support from other packages (#329) + # dust 0.11.7 * New polar algorithm for normally distributed random numbers; faster than Box-Muller but slower than Ziggurat diff --git a/R/cpp11.R b/R/cpp11.R index e088446ba..8320d6c16 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -24,6 +24,18 @@ density_poisson <- function(x, lambda, log) { .Call(`_dust_density_poisson`, x, lambda, log) } +dust_rng_pointer_init <- function(n_streams, seed, algorithm) { + .Call(`_dust_dust_rng_pointer_init`, n_streams, seed, algorithm) +} + +dust_rng_pointer_sync <- function(obj, algorithm) { + invisible(.Call(`_dust_dust_rng_pointer_sync`, obj, algorithm)) +} + +test_rng_pointer_get <- function(obj, n_streams) { + .Call(`_dust_test_rng_pointer_get`, obj, n_streams) +} + dust_rng_alloc <- function(r_seed, n_generators, deterministic, is_float) { .Call(`_dust_dust_rng_alloc`, r_seed, n_generators, deterministic, is_float) } @@ -288,8 +300,8 @@ test_cuda_pars <- function(r_gpu_config, n_particles, n_particles_each, n_state, .Call(`_dust_test_cuda_pars`, r_gpu_config, n_particles, n_particles_each, n_state, n_state_full, n_shared_int, n_shared_real, data_size, shared_size) } -test_xoshiro_run <- function(name) { - .Call(`_dust_test_xoshiro_run`, name) +test_xoshiro_run <- function(obj) { + .Call(`_dust_test_xoshiro_run`, obj) } cpp_scale_log_weights <- function(w) { diff --git a/R/dust.R b/R/dust.R index 55698f97b..5e5628628 100644 --- a/R/dust.R +++ b/R/dust.R @@ -1,4 +1,4 @@ -## Generated by dust (version 0.11.4) - do not edit +## Generated by dust (version 0.11.6) - do not edit sir <- R6::R6Class( "dust", cloneable = FALSE, diff --git a/R/rng_pointer.R b/R/rng_pointer.R new file mode 100644 index 000000000..fa785f0b4 --- /dev/null +++ b/R/rng_pointer.R @@ -0,0 +1,76 @@ +##' @title Create pointer to random number generator stream +##' +##' @description This function exists to support use from other +##' packages that wish to use dust's random number support, and +##' creates an opaque pointer to a set of random number streams. It +##' is described more fully in `vignette("rng_package.Rmd")` +##' +##' @export +##' @examples +##' dust::dust_rng_pointer$new() +dust_rng_pointer <- R6::R6Class( + "dust_rng_pointer", + cloneable = FALSE, + + private = list( + ptr_ = NULL, + state_ = NULL, + is_current_ = NULL + ), + + public = list( + ##' @field algorithm The name of the generator algorithm used (read-only) + algorithm = NULL, + + ##' @field n_streams The number of streams of random numbers provided + ##' (read-only) + n_streams = NULL, + + ##' @description Create a new `dust_rng_pointer` object + ##' + ##' @param seed The random number seed to use (see [dust::dust_rng] + ##' for details) + ##' + ##' @param n_streams The number of independent random number streams to + ##' create + ##' + ##' @param algorithm The random number algorithm to use. The default is + ##' `xoshiro256plus` which is a good general choice + initialize = function(seed = NULL, n_streams = 1L, + algorithm = "xoshiro256plus") { + dat <- dust_rng_pointer_init(n_streams, seed, algorithm) + private$ptr_ <- dat[[1L]] + private$state_ <- dat[[2L]] + private$is_current_ <- TRUE + + self$algorithm <- algorithm + self$n_streams <- n_streams + lockBinding("algorithm", self) + lockBinding("n_streams", self) + }, + + ##' @description Synchronise the R copy of the random number state. + ##' Typically this is only needed before serialisation if you have + ##' ever used the object. + sync = function() { + dust_rng_pointer_sync(private, self$algorithm) + }, + + ##' @description Return a raw vector of state. This can be used to + ##' create other generators with the same state. + state = function() { + if (!private$is_current_) { + self$sync() + } + private$state_ + }, + + ##' @description Return a logical, indicating if the random number + ##' state that would be returned by `state()` is "current" (i.e., the + ##' same as the copy held in the pointer) or not. This is `TRUE` on + ##' creation or immediately after calling `$sync()` or `$state()` + ##' and `FALSE` after any use of the pointer. + is_current = function() { + private$is_current_ + } + )) diff --git a/extra/harness.cpp b/extra/harness.cpp index 7aadb666d..1a7552575 100644 --- a/extra/harness.cpp +++ b/extra/harness.cpp @@ -7,11 +7,11 @@ typedef uint64_t int_type; constexpr size_t data_size = 4; #elif defined(XOSHIRO128) -typedef uint64_t int_type; +typedef uint32_t int_type; constexpr size_t data_size = 4; #elif defined(XOROSHIRO128) typedef uint64_t int_type; -constexpr size_t data_size = 4; +constexpr size_t data_size = 2; #elif defined(XOSHIRO512) typedef uint64_t int_type; constexpr size_t data_size = 8; @@ -51,10 +51,16 @@ int main() { } auto x = next(); std::cout << - //std::setw(16) << std::setfill('0') << std::hex << x << " " << std::dec << x << std::endl; } + // At the end we dump out the model state, in hex: + for (int i = 0; i < data_size; ++i) { + std::cout << std::setw(sizeof(int_type) * 2) << + std::setfill('0') << std::hex << s[i] << + std::endl; + } + return 0; } diff --git a/inst/WORDLIST b/inst/WORDLIST index f740c6591..e71357175 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -23,6 +23,7 @@ Marsaglia Mersenne OMP OpenMP +OpenMP's Perez Poisson R's diff --git a/inst/include/dust/r/helpers.hpp b/inst/include/dust/r/helpers.hpp index c97d1dc59..2eecacf41 100644 --- a/inst/include/dust/r/helpers.hpp +++ b/inst/include/dust/r/helpers.hpp @@ -364,7 +364,7 @@ dust_inputs process_inputs_single(cpp11::list r_pars, int step, dust::r::validate_size(step, "step"); dust::r::validate_positive(n_threads, "n_threads"); std::vector seed = - dust::r::as_rng_seed(r_seed); + dust::random::r::as_rng_seed(r_seed); std::vector> pars; pars.push_back(dust::dust_pars(r_pars)); @@ -389,7 +389,7 @@ dust_inputs process_inputs_multi(cpp11::list r_pars, int step, dust::r::validate_size(step, "step"); dust::r::validate_positive(n_threads, "n_threads"); std::vector seed = - dust::r::as_rng_seed(r_seed); + dust::random::r::as_rng_seed(r_seed); dust::r::check_pars_multi(r_pars); std::vector> pars; diff --git a/inst/include/dust/r/random.hpp b/inst/include/dust/r/random.hpp index 5eebf03a6..c3c9a0ed0 100644 --- a/inst/include/dust/r/random.hpp +++ b/inst/include/dust/r/random.hpp @@ -3,14 +3,33 @@ #include // memcpy +#include +#include +#include #include + #include #include "dust/random/generator.hpp" +#include "dust/random/prng.hpp" namespace dust { +namespace random { namespace r { +template +std::vector raw_seed(cpp11::raws seed_data) { + using int_type = typename rng_state_type::int_type; + constexpr size_t len = sizeof(int_type) * rng_state_type::size(); + if (seed_data.size() == 0 || seed_data.size() % len != 0) { + cpp11::stop("Expected raw vector of length as multiple of %d for 'seed'", + len); + } + std::vector seed(seed_data.size() / sizeof(int_type)); + std::memcpy(seed.data(), RAW(seed_data), seed_data.size()); + return seed; +} + template std::vector as_rng_seed(cpp11::sexp r_seed) { typedef typename rng_state_type::int_type int_type; @@ -21,13 +40,7 @@ std::vector as_rng_seed(cpp11::sexp r_seed) { seed = dust::random::seed_data(seed_int); } else if (seed_type == RAWSXP) { cpp11::raws seed_data = cpp11::as_cpp(r_seed); - constexpr size_t len = sizeof(int_type) * rng_state_type::size(); - if (seed_data.size() == 0 || seed_data.size() % len != 0) { - cpp11::stop("Expected raw vector of length as multiple of %d for 'seed'", - len); - } - seed.resize(seed_data.size() / sizeof(int_type)); - std::memcpy(seed.data(), RAW(seed_data), seed_data.size()); + seed = raw_seed(seed_data); } else if (seed_type == NILSXP) { GetRNGstate(); size_t seed_int = @@ -40,6 +53,113 @@ std::vector as_rng_seed(cpp11::sexp r_seed) { return seed; } +namespace { + +template +std::string algorithm_name() { + std::string ret; + if (std::is_same::value) { + ret = "xoshiro128plus"; + } else if (std::is_same::value) { + ret = "xoshiro128plusplus"; + } else if (std::is_same::value) { + ret = "xoshiro128starstar"; + } else if (std::is_same::value) { + ret = "xoroshiro128plus"; + } else if (std::is_same::value) { + ret = "xoroshiro128plusplus"; + } else if (std::is_same::value) { + ret = "xoroshiro128starstar"; + } else if (std::is_same::value) { + ret = "xoshiro256plus"; + } else if (std::is_same::value) { + ret = "xoshiro256plusplus"; + } else if (std::is_same::value) { + ret = "xoshiro256starstar"; + } else if (std::is_same::value) { + ret = "xoshiro512plus"; + } else if (std::is_same::value) { + ret = "xoshiro512plusplus"; + } else if (std::is_same::value) { + ret = "xoshiro512starstar"; + } + return ret; +} + +template +cpp11::raws rng_state_vector(prng* rng) { + auto state = rng->export_state(); + size_t len = sizeof(typename rng_state_type::int_type) * state.size(); + cpp11::writable::raws r_state(len); + std::memcpy(RAW(r_state), state.data(), len); + return r_state; +} + +} + +template +SEXP rng_pointer_init(int n_streams, cpp11::sexp r_seed) { + auto seed = as_rng_seed(r_seed); + auto *rng = new prng(n_streams, seed); + auto r_ptr = cpp11::external_pointer>(rng); + auto r_state = rng_state_vector(rng); + return cpp11::writable::list({r_ptr, r_state}); +} + +// Start with the assumption that we'll pass in the R6 object, might +// write a simpler version later. +template +prng* rng_pointer_get(cpp11::environment obj, + int n_streams = 0) { + // We could probably do this more efficiently if we store an enum + // in the object but this is probably ok. + const auto algorithm_given = cpp11::as_cpp(obj["algorithm"]); + const auto algorithm_expected = algorithm_name(); + if (algorithm_given != algorithm_expected) { + cpp11::stop("Incorrect rng type: given %s, expected %s", + algorithm_given.c_str(), algorithm_expected.c_str()); + } + + cpp11::environment env_enclos = + cpp11::as_cpp(obj[".__enclos_env__"]); + cpp11::environment env = + cpp11::as_cpp(env_enclos["private"]); + + using ptr_type = cpp11::external_pointer>; + auto ptr = cpp11::as_cpp(env["ptr_"]); + + auto * rng = ptr.get(); + if (rng == nullptr) { + if (!cpp11::as_cpp(env["is_current_"])) { + cpp11::stop("Can't unserialise an rng pointer that was not synced"); + } + cpp11::raws seed_data = cpp11::as_cpp(env["state_"]); + auto seed = raw_seed(seed_data); + const auto n_streams_orig = seed.size() / rng_state_type::size(); + rng = new prng(n_streams_orig, seed); + env["ptr_"] = cpp11::external_pointer>(rng); + } + + if (n_streams > 0 && static_cast(rng->size()) < n_streams) { + cpp11::stop("Requested a rng with %d streams but only have %d", + n_streams, rng->size()); + } + env["is_current_"] = cpp11::as_sexp(false); + + return rng; +} + +template +void rng_pointer_sync(cpp11::environment obj) { + using ptr_type = cpp11::external_pointer>; + if (!cpp11::as_cpp(obj["is_current_"])) { + auto ptr = cpp11::as_cpp(obj["ptr_"]); + obj["state_"] = rng_state_vector(ptr.get()); + obj["is_current_"] = cpp11::as_sexp(true); + } +} + +} } } diff --git a/man/dust_rng_pointer.Rd b/man/dust_rng_pointer.Rd new file mode 100644 index 000000000..822b81fbe --- /dev/null +++ b/man/dust_rng_pointer.Rd @@ -0,0 +1,95 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rng_pointer.R +\name{dust_rng_pointer} +\alias{dust_rng_pointer} +\title{Create pointer to random number generator stream} +\description{ +This function exists to support use from other +packages that wish to use dust's random number support, and +creates an opaque pointer to a set of random number streams. It +is described more fully in \code{vignette("rng_package.Rmd")} +} +\examples{ +dust::dust_rng_pointer$new() +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{algorithm}}{The name of the generator algorithm used (read-only)} + +\item{\code{n_streams}}{The number of streams of random numbers provided +(read-only)} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-new}{\code{dust_rng_pointer$new()}} +\item \href{#method-sync}{\code{dust_rng_pointer$sync()}} +\item \href{#method-state}{\code{dust_rng_pointer$state()}} +\item \href{#method-is_current}{\code{dust_rng_pointer$is_current()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-new}{}}} +\subsection{Method \code{new()}}{ +Create a new \code{dust_rng_pointer} object +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{dust_rng_pointer$new(seed = NULL, n_streams = 1L, algorithm = "xoshiro256plus")}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{seed}}{The random number seed to use (see \link{dust_rng} +for details)} + +\item{\code{n_streams}}{The number of independent random number streams to +create} + +\item{\code{algorithm}}{The random number algorithm to use. The default is +\code{xoshiro256plus} which is a good general choice} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-sync}{}}} +\subsection{Method \code{sync()}}{ +Synchronise the R copy of the random number state. +Typically this is only needed before serialisation if you have +ever used the object. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{dust_rng_pointer$sync()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-state}{}}} +\subsection{Method \code{state()}}{ +Return a raw vector of state. This can be used to +create other generators with the same state. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{dust_rng_pointer$state()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-is_current}{}}} +\subsection{Method \code{is_current()}}{ +Return a logical, indicating if the random number +state that would be returned by \code{state()} is "current" (i.e., the +same as the copy held in the pointer) or not. This is \code{TRUE} on +creation or immediately after calling \verb{$sync()} or \verb{$state()} +and \code{FALSE} after any use of the pointer. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{dust_rng_pointer$is_current()}\if{html}{\out{
}} +} + +} +} diff --git a/scripts/build_gpu_vignette b/scripts/build_gpu_vignette deleted file mode 100755 index 408f793d9..000000000 --- a/scripts/build_gpu_vignette +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -# We can't run the gpu vignette meaningfully on any computer that -# lacks a suitable graphics card and toolchain (including CI or CRAN), -# so we pre-compile it occasionally here. The version in vignettes_src -# should be edited, then this script run to create the version in -# vignettes -(cd vignettes_src && Rscript -e 'knitr::knit("gpu.Rmd")') -header="DO NOT EDIT THIS FILE - see vignettes_src and make changes there" -sed -s 's/[[:space:]]*$//' vignettes_src/gpu.md | - sed 's/\r//g' | - sed "s/HEADER/$header/" > vignettes/gpu.Rmd diff --git a/scripts/build_vignette b/scripts/build_vignette new file mode 100755 index 000000000..845fee447 --- /dev/null +++ b/scripts/build_vignette @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -e +SRC="$1.Rmd" +DEST="$1.md" + +(cd vignettes_src && Rscript -e "knitr::knit(\"${SRC}\")") +header="DO NOT EDIT THIS FILE - see vignettes_src and make changes there" +sed -s 's/[[:space:]]*$//' vignettes_src/$DEST | + sed 's/\r//g' | + sed "s/HEADER/$header/" > vignettes/$SRC diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 80d335008..d4735c866 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -47,6 +47,28 @@ extern "C" SEXP _dust_density_poisson(SEXP x, SEXP lambda, SEXP log) { return cpp11::as_sexp(density_poisson(cpp11::as_cpp>(x), cpp11::as_cpp>(lambda), cpp11::as_cpp>(log))); END_CPP11 } +// dust_rng_pointer.cpp +cpp11::sexp dust_rng_pointer_init(int n_streams, cpp11::sexp seed, std::string algorithm); +extern "C" SEXP _dust_dust_rng_pointer_init(SEXP n_streams, SEXP seed, SEXP algorithm) { + BEGIN_CPP11 + return cpp11::as_sexp(dust_rng_pointer_init(cpp11::as_cpp>(n_streams), cpp11::as_cpp>(seed), cpp11::as_cpp>(algorithm))); + END_CPP11 +} +// dust_rng_pointer.cpp +void dust_rng_pointer_sync(cpp11::environment obj, std::string algorithm); +extern "C" SEXP _dust_dust_rng_pointer_sync(SEXP obj, SEXP algorithm) { + BEGIN_CPP11 + dust_rng_pointer_sync(cpp11::as_cpp>(obj), cpp11::as_cpp>(algorithm)); + return R_NilValue; + END_CPP11 +} +// dust_rng_pointer.cpp +double test_rng_pointer_get(cpp11::environment obj, int n_streams); +extern "C" SEXP _dust_test_rng_pointer_get(SEXP obj, SEXP n_streams) { + BEGIN_CPP11 + return cpp11::as_sexp(test_rng_pointer_get(cpp11::as_cpp>(obj), cpp11::as_cpp>(n_streams))); + END_CPP11 +} // dust_rng.cpp SEXP dust_rng_alloc(cpp11::sexp r_seed, int n_generators, bool deterministic, bool is_float); extern "C" SEXP _dust_dust_rng_alloc(SEXP r_seed, SEXP n_generators, SEXP deterministic, SEXP is_float) { @@ -518,10 +540,10 @@ extern "C" SEXP _dust_test_cuda_pars(SEXP r_gpu_config, SEXP n_particles, SEXP n END_CPP11 } // test_rng.cpp -std::vector test_xoshiro_run(std::string name); -extern "C" SEXP _dust_test_xoshiro_run(SEXP name) { +std::vector test_xoshiro_run(cpp11::environment obj); +extern "C" SEXP _dust_test_xoshiro_run(SEXP obj) { BEGIN_CPP11 - return cpp11::as_sexp(test_xoshiro_run(cpp11::as_cpp>(name))); + return cpp11::as_sexp(test_xoshiro_run(cpp11::as_cpp>(obj))); END_CPP11 } // tools.cpp @@ -1159,6 +1181,8 @@ static const R_CallMethodDef CallEntries[] = { {"_dust_dust_rng_long_jump", (DL_FUNC) &_dust_dust_rng_long_jump, 2}, {"_dust_dust_rng_multinomial", (DL_FUNC) &_dust_dust_rng_multinomial, 6}, {"_dust_dust_rng_normal", (DL_FUNC) &_dust_dust_rng_normal, 7}, + {"_dust_dust_rng_pointer_init", (DL_FUNC) &_dust_dust_rng_pointer_init, 3}, + {"_dust_dust_rng_pointer_sync", (DL_FUNC) &_dust_dust_rng_pointer_sync, 2}, {"_dust_dust_rng_poisson", (DL_FUNC) &_dust_dust_rng_poisson, 5}, {"_dust_dust_rng_random_normal", (DL_FUNC) &_dust_dust_rng_random_normal, 5}, {"_dust_dust_rng_random_real", (DL_FUNC) &_dust_dust_rng_random_real, 4}, @@ -1175,6 +1199,7 @@ static const R_CallMethodDef CallEntries[] = { {"_dust_dust_walk_capabilities", (DL_FUNC) &_dust_dust_walk_capabilities, 0}, {"_dust_dust_walk_gpu_info", (DL_FUNC) &_dust_dust_walk_gpu_info, 0}, {"_dust_test_cuda_pars", (DL_FUNC) &_dust_test_cuda_pars, 9}, + {"_dust_test_rng_pointer_get", (DL_FUNC) &_dust_test_rng_pointer_get, 2}, {"_dust_test_xoshiro_run", (DL_FUNC) &_dust_test_xoshiro_run, 1}, {NULL, NULL, 0} }; diff --git a/src/dust_rng.cpp b/src/dust_rng.cpp index 30cf24b92..b5d6923ff 100644 --- a/src/dust_rng.cpp +++ b/src/dust_rng.cpp @@ -18,7 +18,7 @@ using dust_rng32 = dust::random::prng>; template SEXP dust_rng_alloc(cpp11::sexp r_seed, int n_generators, bool deterministic) { - auto seed = dust::r::as_rng_seed(r_seed); + auto seed = dust::random::r::as_rng_seed(r_seed); T *rng = new T(n_generators, seed, deterministic); return cpp11::external_pointer(rng); } diff --git a/src/dust_rng_pointer.cpp b/src/dust_rng_pointer.cpp new file mode 100644 index 000000000..a4f0c7cab --- /dev/null +++ b/src/dust_rng_pointer.cpp @@ -0,0 +1,84 @@ +#include + +#ifdef _OPENMP +#include +#endif + +#include +#include + +[[cpp11::register]] +cpp11::sexp dust_rng_pointer_init(int n_streams, cpp11::sexp seed, + std::string algorithm) { + cpp11::sexp ret; + + using namespace dust::random; + if (algorithm == "xoshiro256starstar") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro256plusplus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro256plus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro128starstar") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro128plusplus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro128plus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoroshiro128starstar") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoroshiro128plusplus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoroshiro128plus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro512starstar") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro512plusplus") { + ret = r::rng_pointer_init(n_streams, seed); + } else if (algorithm == "xoshiro512plus") { + ret = r::rng_pointer_init(n_streams, seed); + } else { + cpp11::stop("Unknown algorithm '%s'", algorithm.c_str()); + } + + return ret; +} + +[[cpp11::register]] +void dust_rng_pointer_sync(cpp11::environment obj, std::string algorithm) { + using namespace dust::random; + if (algorithm == "xoshiro256starstar") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro256plusplus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro256plus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro128starstar") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro128plusplus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro128plus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoroshiro128starstar") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoroshiro128plusplus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoroshiro128plus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro512starstar") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro512plusplus") { + r::rng_pointer_sync(obj); + } else if (algorithm == "xoshiro512plus") { + r::rng_pointer_sync(obj); + } +} + +// This exists to check some error paths in rng_pointer_get; it is not +// for use by users. +[[cpp11::register]] +double test_rng_pointer_get(cpp11::environment obj, int n_streams) { + using namespace dust::random; + auto rng = r::rng_pointer_get(obj, n_streams); + return random_real(rng->state(0)); +} diff --git a/src/sir.cpp b/src/sir.cpp index 1c3445601..1417e5729 100644 --- a/src/sir.cpp +++ b/src/sir.cpp @@ -1,4 +1,4 @@ -// Generated by dust (version 0.11.4) - do not edit +// Generated by dust (version 0.11.6) - do not edit #include [[cpp11::register]] diff --git a/src/sirs.cpp b/src/sirs.cpp index d7941b1dd..54262ced3 100644 --- a/src/sirs.cpp +++ b/src/sirs.cpp @@ -1,4 +1,4 @@ -// Generated by dust (version 0.11.4) - do not edit +// Generated by dust (version 0.11.6) - do not edit #include [[cpp11::register]] diff --git a/src/test_rng.cpp b/src/test_rng.cpp index a37ac8013..815905227 100644 --- a/src/test_rng.cpp +++ b/src/test_rng.cpp @@ -5,7 +5,7 @@ #include #include - +#include template std::string to_string(const T& t) { std::ostringstream ss; @@ -14,8 +14,9 @@ std::string to_string(const T& t) { } template -std::vector test_xoshiro_run1() { - T state = dust::random::seed(42); +std::vector test_xoshiro_run1(cpp11::environment ptr) { + auto rng = dust::random::r::rng_pointer_get(ptr, 1); + auto& state = rng->state(0); constexpr int n = 10; @@ -34,32 +35,33 @@ std::vector test_xoshiro_run1() { } [[cpp11::register]] -std::vector test_xoshiro_run(std::string name) { +std::vector test_xoshiro_run(cpp11::environment obj) { + const auto algorithm = cpp11::as_cpp(obj["algorithm"]); std::vector ret; - if (name == "xoshiro256starstar") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro256plusplus") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro256plus") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro128starstar") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro128plusplus") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro128plus") { - ret = test_xoshiro_run1(); - } else if (name == "xoroshiro128starstar") { - ret = test_xoshiro_run1(); - } else if (name == "xoroshiro128plusplus") { - ret = test_xoshiro_run1(); - } else if (name == "xoroshiro128plus") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro512starstar") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro512plusplus") { - ret = test_xoshiro_run1(); - } else if (name == "xoshiro512plus") { - ret = test_xoshiro_run1(); + if (algorithm == "xoshiro256starstar") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro256plusplus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro256plus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro128starstar") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro128plusplus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro128plus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoroshiro128starstar") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoroshiro128plusplus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoroshiro128plus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro512starstar") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro512plusplus") { + ret = test_xoshiro_run1(obj); + } else if (algorithm == "xoshiro512plus") { + ret = test_xoshiro_run1(obj); } return ret; diff --git a/src/variable.cpp b/src/variable.cpp index 7dbdc8e2c..f42f7f5b6 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -1,4 +1,4 @@ -// Generated by dust (version 0.11.4) - do not edit +// Generated by dust (version 0.11.6) - do not edit #include [[cpp11::register]] diff --git a/src/volatility.cpp b/src/volatility.cpp index cfe826068..129cb6e5c 100644 --- a/src/volatility.cpp +++ b/src/volatility.cpp @@ -1,4 +1,4 @@ -// Generated by dust (version 0.11.4) - do not edit +// Generated by dust (version 0.11.6) - do not edit #include [[cpp11::register]] diff --git a/src/walk.cpp b/src/walk.cpp index 997a5a898..ba8f0f878 100644 --- a/src/walk.cpp +++ b/src/walk.cpp @@ -1,4 +1,4 @@ -// Generated by dust (version 0.11.4) - do not edit +// Generated by dust (version 0.11.6) - do not edit #include [[cpp11::register]] diff --git a/tests/testthat/helper-dust.R b/tests/testthat/helper-dust.R index 9466a318a..e2cd04b31 100644 --- a/tests/testthat/helper-dust.R +++ b/tests/testthat/helper-dust.R @@ -54,3 +54,10 @@ copy_directory <- function(src, as) { stop("Error copying files") } } + + +## Serialising and restoring an external pointer replaces the pointer +## with one to NULL. +corrupt_pointer <- function(x) { + unserialize(serialize(x, NULL)) +} diff --git a/tests/testthat/test-rng-interface.R b/tests/testthat/test-rng-interface.R new file mode 100644 index 000000000..583063f6d --- /dev/null +++ b/tests/testthat/test-rng-interface.R @@ -0,0 +1,78 @@ +test_that("Can create pointer object", { + obj <- dust_rng_pointer$new() + expect_true(obj$is_current()) + expect_type(obj$state(), "raw") + expect_length(obj$state(), 32) + expect_equal(obj$algorithm, "xoshiro256plus") + r <- obj$state() + obj$sync() + expect_identical(r, obj$state()) +}) + + +test_that("Using object requires sync", { + obj <- dust_rng_pointer$new() + expect_true(obj$is_current()) + r <- obj$state() + test_xoshiro_run(obj) + expect_false(obj$is_current()) + obj$sync() + expect_true(obj$is_current()) + expect_false(identical(obj$state(), r)) +}) + + +test_that("Fetching state syncs", { + obj <- dust_rng_pointer$new() + expect_true(obj$is_current()) + r <- obj$state() + test_xoshiro_run(obj) + expect_false(obj$is_current()) + expect_false(identical(obj$state(), r)) + expect_true(obj$is_current()) +}) + + +test_that("Invalidated pointers can be rebuilt", { + obj1 <- dust_rng_pointer$new() + obj2 <- corrupt_pointer(obj1) + expect_equal( + test_xoshiro_run(obj2), + test_xoshiro_run(obj1)) + + ## This saves that the pointer is invalid: + obj3 <- corrupt_pointer(obj2) + expect_error( + test_xoshiro_run(obj3), + "Can't unserialise an rng pointer that was not synced") + + ## But if we sync things it's ok: + obj2$sync() + obj4 <- corrupt_pointer(obj2) + expect_equal( + test_xoshiro_run(obj4), + test_xoshiro_run(obj1)) +}) + + +test_that("can't create invalid pointer types", { + expect_error( + dust_rng_pointer$new(algorithm = "mt19937"), + "Unknown algorithm 'mt19937'") +}) + + +test_that("Validate pointers on fetch", { + obj <- dust_rng_pointer$new(algorithm = "xoshiro256starstar") + expect_error( + test_rng_pointer_get(obj, 1), + "Incorrect rng type: given xoshiro256starstar, expected xoshiro256plus") + obj <- dust_rng_pointer$new(algorithm = "xoshiro256plus", n_streams = 4) + expect_error( + test_rng_pointer_get(obj, 20), + "Requested a rng with 20 streams but only have 4") + expect_silent( + test_rng_pointer_get(obj, 0)) + expect_silent( + test_rng_pointer_get(obj, 1)) +}) diff --git a/tests/testthat/test-xoshiro.R b/tests/testthat/test-xoshiro.R index d0dda85f5..879f8a13e 100644 --- a/tests/testthat/test-xoshiro.R +++ b/tests/testthat/test-xoshiro.R @@ -2,8 +2,13 @@ test_that("xoshiro output agrees with reference data", { path <- "xoshiro-ref" status <- list() for (name in dir(path)) { - res <- test_xoshiro_run(name) + obj <- dust_rng_pointer$new(seed = 42, algorithm = name) + res <- test_xoshiro_run(obj) + obj$sync() + len <- if (grepl("^xoshiro128", name)) 4 else 8 + s <- matrix(obj$state(), len)[rev(seq_len(len)), ] + s_str <- apply(s, 2, paste, collapse = "") cmp <- readLines(sprintf("%s/%s", path, name)) - expect_equal(res, cmp) + expect_equal(c(res, s_str), cmp) } }) diff --git a/tests/testthat/xoshiro-ref/xoroshiro128plus b/tests/testthat/xoshiro-ref/xoroshiro128plus index fa2817c08..84f2b1320 100644 --- a/tests/testthat/xoshiro-ref/xoroshiro128plus +++ b/tests/testthat/xoshiro-ref/xoroshiro128plus @@ -28,3 +28,5 @@ 5534324586141643616 12383152334171140518 6818487868096510966 +16d7ba4f38631d13 +457f7c83abfba2dd diff --git a/tests/testthat/xoshiro-ref/xoroshiro128plusplus b/tests/testthat/xoshiro-ref/xoroshiro128plusplus index 214c23def..dfb520946 100644 --- a/tests/testthat/xoshiro-ref/xoroshiro128plusplus +++ b/tests/testthat/xoshiro-ref/xoroshiro128plusplus @@ -28,3 +28,5 @@ 15221866165451680989 13489980999189886949 7198607794995435210 +a6e5ea2f7651cd1c +a354b7a33f8e8b0f diff --git a/tests/testthat/xoshiro-ref/xoroshiro128starstar b/tests/testthat/xoshiro-ref/xoroshiro128starstar index 1fd02ab86..aa280c377 100644 --- a/tests/testthat/xoshiro-ref/xoroshiro128starstar +++ b/tests/testthat/xoshiro-ref/xoroshiro128starstar @@ -28,3 +28,5 @@ 6223406593289342926 8842572349973568536 5532774325646040112 +16d7ba4f38631d13 +457f7c83abfba2dd diff --git a/tests/testthat/xoshiro-ref/xoshiro128plus b/tests/testthat/xoshiro-ref/xoshiro128plus index 078719c3d..680355aba 100644 --- a/tests/testthat/xoshiro-ref/xoshiro128plus +++ b/tests/testthat/xoshiro-ref/xoshiro128plus @@ -28,3 +28,7 @@ 1871045121 2791939146 4201551836 +cea64c5f +db6b6622 +b4d6d7df +f8370366 diff --git a/tests/testthat/xoshiro-ref/xoshiro128plusplus b/tests/testthat/xoshiro-ref/xoshiro128plusplus index e1ab3cecc..c0e6a5308 100644 --- a/tests/testthat/xoshiro-ref/xoshiro128plusplus +++ b/tests/testthat/xoshiro-ref/xoshiro128plusplus @@ -28,3 +28,7 @@ 3392677730 916576617 3653581116 +cea64c5f +db6b6622 +b4d6d7df +f8370366 diff --git a/tests/testthat/xoshiro-ref/xoshiro128starstar b/tests/testthat/xoshiro-ref/xoshiro128starstar index 770f2f9c0..30b317de7 100644 --- a/tests/testthat/xoshiro-ref/xoshiro128starstar +++ b/tests/testthat/xoshiro-ref/xoshiro128starstar @@ -28,3 +28,7 @@ 4105119796 192728354 934133035 +cea64c5f +db6b6622 +b4d6d7df +f8370366 diff --git a/tests/testthat/xoshiro-ref/xoshiro256plus b/tests/testthat/xoshiro-ref/xoshiro256plus index 6d4802b94..ae5950046 100644 --- a/tests/testthat/xoshiro-ref/xoshiro256plus +++ b/tests/testthat/xoshiro-ref/xoshiro256plus @@ -28,3 +28,7 @@ 4016147741394319913 10252714880087829589 4039534024713725545 +80a0fa56688f6d90 +bc1497c72c80d211 +56057dd5c1cdeb94 +c697d77d6ceb8c6f diff --git a/tests/testthat/xoshiro-ref/xoshiro256plusplus b/tests/testthat/xoshiro-ref/xoshiro256plusplus index ba6921b23..ce3076301 100644 --- a/tests/testthat/xoshiro-ref/xoshiro256plusplus +++ b/tests/testthat/xoshiro-ref/xoshiro256plusplus @@ -28,3 +28,7 @@ 5291826448566605555 11793225760405578669 11587817079881687253 +80a0fa56688f6d90 +bc1497c72c80d211 +56057dd5c1cdeb94 +c697d77d6ceb8c6f diff --git a/tests/testthat/xoshiro-ref/xoshiro256starstar b/tests/testthat/xoshiro-ref/xoshiro256starstar index 5f54f96ce..df937aeb5 100644 --- a/tests/testthat/xoshiro-ref/xoshiro256starstar +++ b/tests/testthat/xoshiro-ref/xoshiro256starstar @@ -28,3 +28,7 @@ 3974300978875138071 1780744086601141409 1647406970322170425 +80a0fa56688f6d90 +bc1497c72c80d211 +56057dd5c1cdeb94 +c697d77d6ceb8c6f diff --git a/tests/testthat/xoshiro-ref/xoshiro512plus b/tests/testthat/xoshiro-ref/xoshiro512plus index 0e18792c1..2c7ac3cf4 100644 --- a/tests/testthat/xoshiro-ref/xoshiro512plus +++ b/tests/testthat/xoshiro-ref/xoshiro512plus @@ -28,3 +28,11 @@ 4145877857605170073 9752473752304387592 10718358607918504099 +00b448f275493f8a +5b1367cee3a96bb0 +48b0391817bb975b +8b25dc046efc11a6 +d093498b8e575238 +d93c6d39fdc2f1a4 +36b2328cabb0ab34 +0c9338eb42605ae9 diff --git a/tests/testthat/xoshiro-ref/xoshiro512plusplus b/tests/testthat/xoshiro-ref/xoshiro512plusplus index 4a9f5ed0e..8a87e81f1 100644 --- a/tests/testthat/xoshiro-ref/xoshiro512plusplus +++ b/tests/testthat/xoshiro-ref/xoshiro512plusplus @@ -28,3 +28,11 @@ 2283681299546516403 3808647405580472584 16956876242805140340 +00b448f275493f8a +5b1367cee3a96bb0 +48b0391817bb975b +8b25dc046efc11a6 +d093498b8e575238 +d93c6d39fdc2f1a4 +36b2328cabb0ab34 +0c9338eb42605ae9 diff --git a/tests/testthat/xoshiro-ref/xoshiro512starstar b/tests/testthat/xoshiro-ref/xoshiro512starstar index 4839d97a4..585f79d27 100644 --- a/tests/testthat/xoshiro-ref/xoshiro512starstar +++ b/tests/testthat/xoshiro-ref/xoshiro512starstar @@ -28,3 +28,11 @@ 12006650672579575208 2731211645731671501 15840818449790708025 +00b448f275493f8a +5b1367cee3a96bb0 +48b0391817bb975b +8b25dc046efc11a6 +d093498b8e575238 +d93c6d39fdc2f1a4 +36b2328cabb0ab34 +0c9338eb42605ae9 diff --git a/vignettes/rng.Rmd b/vignettes/rng.Rmd index a3df8242b..9a017d9ae 100644 --- a/vignettes/rng.Rmd +++ b/vignettes/rng.Rmd @@ -321,6 +321,8 @@ plain_output(readLines(file.path(path_pkg, "NAMESPACE"))) Finally, run `cpp11::cpp_register()` before compiling your package so that the relevant interfaces are created (`R/cpp11.R` and `cpp11/cpp11.cpp`). A similar process would likely work with Rcpp without any dependency on cpp11. +The issue with this example is that every call to `random_normal` must provide its own `seed` argument, so the random number streams are not continued with each call to the function. This is is not very useful in practice and we describe more fully how to do this properly in `vignette("rng_package.Rmd")`. + ### Standalone, parallel with OpenMP *This is somewhat more experimental, so let us know if you have success using the library this way.* diff --git a/vignettes/rng_package.Rmd b/vignettes/rng_package.Rmd new file mode 100644 index 000000000..3e6765cd2 --- /dev/null +++ b/vignettes/rng_package.Rmd @@ -0,0 +1,358 @@ +--- +title: "Using RNGs from packages" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Using RNGs from packages} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + + + +The dust random number generators are suitable for use from other packages and we provide a few helpers in both R and C++ to make this easier. + +For illustration purposes we will assume that you want to estimate pi using rejection sampling. The basic idea of this algorithm is simple; sample two U(0, 1) points `x` and `y` and test if they lie in the unit circle (i.e. `sqrt(x^2 + x^2) < 1`) giving the ratio of the area of a unit circle to a square. Multiplying the fraction of the points that we accept by 4 yields our estimate of pi. + +## Background using R's random number generator + +First, an example that uses R's API for example (note that while the R API is C, we're using the cpp11 package interface here so that the following examples are similar): + +```cc +#include +#include + +[[cpp11::register]] +double pi_r(int n) { + int tot = 0; + GetRNGstate(); + for (int i = 0; i < n; ++i) { + const double u1 = unif_rand(); + const double u2 = unif_rand(); + if (u1 * u1 + u2 * u2 < 1) { + tot++; + } + } + PutRNGstate(); + return tot / static_cast(n) * 4.0; +} +``` + +With cpp11 we can load this with `cpp11::cpp_source` + + +```r +cpp11::cpp_source("rng_pi_r.cpp") +``` + +and then run it with + + +```r +pi_r(1e6) +#> [1] 3.140756 +``` + +The key bits within the code above are that we: + +* included random number support from R via the `R_ext/Random.h` header +* initialised the state of the global random number stream within our C++ context with `GetRNGstate` before drawing any random numbers +* drew some possible large number of numbers using `unif_rand` +* restored the random number state back to R. + +Failure to run the `GetRNGstate` / `PutRNGstate` will result in the stream not behaving properly. This is explained in detail in the "Writing R Extensions" manual. + +## Basic implementation using dust + +The implementation in dust will look very similar to above, but the way that we cope with the random number stream will look quite different. With the R version above we are looking after R's global stream (stored in the variable `.Random.seed`) and making sure that it is fetched and set on entry and exit to the function. + +One of the design ideas in dust is that there is no single global source of random numbers, so we need to create a source that our function can use. If we were to use the simulation function multiple times we would want the stream to pick up where it left off last time, so the act of calling the function should update the seed as a "side effect". + +The way we expose this for use within other packages is that the user (either package developer or user of the package) creates a "pointer" to some random number state. Passing that state into a C++ function will allow use of the random functions within dust, and will update the state correctly (see the following section for details). + +First we create a pointer object: + + +```r +rng <- dust:::dust_rng_pointer$new() +rng +#> +#> Public: +#> algorithm: xoshiro256plus +#> initialize: function (seed = NULL, n_streams = 1L, algorithm = "xoshiro256plus") +#> is_current: function () +#> n_streams: 1 +#> state: function () +#> sync: function () +#> Private: +#> is_current_: TRUE +#> ptr_: externalptr +#> state_: 2b af 1c 0b 86 40 6b 1b 5d 9b 94 99 3a 71 ff be 63 09 89 ... +``` + +Unlike the `dust::dust_rng` object there are no real useful methods on this object and from the R side we'll treat it as a black box. Importantly the `rng` object knows which algorithm it has been created to use + + +```r +rng$algorithm +#> [1] "xoshiro256plus" +``` + +The default will be suitable for most purposes. + +We can rewrite the pi approximation function as: + +```cc +#include +#include + +[[cpp11::linking_to(dust)]] +[[cpp11::register]] +double pi_dust(int n, cpp11::sexp ptr) { + auto rng = + dust::random::r::rng_pointer_get(ptr); + auto& state = rng->state(0); + int tot = 0; + for (int i = 0; i < n; ++i) { + const double u1 = dust::random::random_real(state); + const double u2 = dust::random::random_real(state); + if (u1 * u1 + u2 * u2 < 1) { + tot++; + } + } + return tot / static_cast(n) * 4.0; +} +``` + +This snippet looks much the same as above: + +* We've added `[[cpp::linking_to(dust)]]` and included the dust random interface (`dust/r/random.hpp`) +* The first line of the function safely creates a pointer to the random state data. The template argument here (``) refers to the rng algorithm and matches `rng$algorithm` +* The second line extracts a reference to the first (C++ indexing starting at 0) random number stream - this pair of lines is roughly equivalent to `GetRNGstate()` except that that the random numbers do not come from some global source +* After that the listing proceeds as before proceeds as before, except there is no equivalent to `PutRNGstate()` because the pointer object takes care of this automatically. + + + +```r +cpp11::cpp_source("rng_pi_dust.cpp") +``` + +and then run it with + + +```r +pi_dust(1e6, rng) +#> [1] 3.14102 +``` + +## Parallel implementation with dust and OpenMP + +Part of the point of dust's random number generators is that they create independent streams of random numbers that can be safely used in parallel. + +```cc +#include +#include + +#ifdef _OPENMP +#include +#endif + +[[cpp11::linking_to(dust)]] +[[cpp11::register]] +double pi_dust_parallel(int n, cpp11::sexp ptr, int n_threads) { + auto rng = + dust::random::r::rng_pointer_get(ptr); + const auto n_streams = rng->size(); + int tot = 0; +#ifdef _OPENMP +#pragma omp parallel for schedule(static) num_threads(n_threads) \ + reduction(+:tot) +#endif + for (size_t i = 0; i < n_streams; ++i) { + auto& state = rng->state(0); + int tot_i = 0; + for (int i = 0; i < n; ++i) { + const double u1 = dust::random::random_real(state); + const double u2 = dust::random::random_real(state); + if (u1 * u1 + u2 * u2 < 1) { + tot_i++; + } + } + tot += tot_i; + } + return tot / static_cast(n * n_streams) * 4.0; +} +``` + + +```r +cpp11::cpp_source("rng_pi_parallel.cpp") +``` + +Here we've made a number of decisions about how to split the problem up subject to a few constraints about using OpenMP together with R: + +* Generally speaking we want the answer to be independent of the number of threads used to run it, as this will vary in different sessions. As such avoid the temptation to do a loop over the threads; here we instead iterate over *streams* with the idea that there will be one or more streams used per threads. If we ran a single thread we'd get the same answer as if we ran one thread per stream. +* Each thread is going to do it's own loop of length `n` so we need to divide by `n * n_streams` at the end as that's many attempts we have made. +* We use OpenMP's `reduction` clause to safely accumulate the different subtotals (the `tot_i` values) into one `tot` value. +* In order to compile gracefully on machines that do not have OpenMP support both the `#include ` line and the `#pragma omp` line are wrapped in guards that test for `_OPENMP` (see "Writing R Extensions"). +* We let the generator tell us how many streams it has (`n_streams = rng->size()`) but we could as easily specify an ideal number of streams as an argument here and then test that the generator has *at least that many* by adding an argument to the call to `rng_pointer_get` (e.g., if we wanted `m` streams the call would be `rng_pointer_get(ptr, m)`) + + +```r +rng <- dust:::dust_rng_pointer$new(n_streams = 20) +pi_dust_parallel(1e6, rng, 4) +#> [1] 3.142025 +``` + +Unfortunately [`cpp11::cpp_source` does not support using OpenMP](https://github.com/r-lib/cpp11/issues/243) so in the example above the code will run in serial and we can't see if parallelisation will help. + +In order to compile with support, we need to build a little package and set up an appropriate `Makevars` file + + + +The package is fairly minimal: + + +``` +#> . +#> ├── DESCRIPTION +#> ├── NAMESPACE +#> └── src +#> ├── Makevars +#> └── code.cpp +``` + +We have an extremely minimal `DESCRIPTION`, which contains line `LinkingTo: cpp11, dust` from which R will arrange compiler flags to find both packages' headers: + +```plain +Package: piparallel +LinkingTo: cpp11, dust +Version: 0.0.1 +SystemRequirements: C++11 +``` + +The `NAMESPACE` loads the dynamic library + +```plain +useDynLib("piparallel", .registration = TRUE) +exportPattern("^[[:alpha:]]+") +``` + +The `src/Makevars` file contains important flags to pick up OpenMP support: + +```make +PKG_CXXFLAGS=-DHAVE_INLINE $(SHLIB_OPENMP_CXXFLAGS) +PKG_LIBS=$(SHLIB_OPENMP_CXXFLAGS) +``` + +And `src/code.cpp` contains the file above but without the `[[cpp11::linking_to(dust)]]` line: + +```cc +#include +#include + +#ifdef _OPENMP +#include +#endif + +[[cpp11::register]] +double pi_dust_parallel(int n, cpp11::sexp ptr, int n_threads) { + auto rng = + dust::random::r::rng_pointer_get(ptr); + const auto n_streams = rng->size(); + int tot = 0; +#ifdef _OPENMP +#pragma omp parallel for schedule(static) num_threads(n_threads) \ + reduction(+:tot) +#endif + for (size_t i = 0; i < n_streams; ++i) { + auto& state = rng->state(0); + int tot_i = 0; + for (int i = 0; i < n; ++i) { + const double u1 = dust::random::random_real(state); + const double u2 = dust::random::random_real(state); + if (u1 * u1 + u2 * u2 < 1) { + tot_i++; + } + } + tot += tot_i; + } + return tot / static_cast(n * n_streams) * 4.0; +} +``` + +After compiling and installing the package, `pi_dust_parallel` will be available + + + +Now we have a parallel version we can see a speed-up as we add threads: + + +```r +rng <- dust:::dust_rng_pointer$new(n_streams = 20) +bench::mark( + pi_dust_parallel(1e6, rng, 1), + pi_dust_parallel(1e6, rng, 2), + pi_dust_parallel(1e6, rng, 3), + pi_dust_parallel(1e6, rng, 4), + check = FALSE) +#> # A tibble: 4 x 6 +#> expression min median `itr/sec` mem_alloc `gc/sec` +#> +#> 1 pi_dust_parallel(1e+06, rng, 1) 44.1ms 44.4ms 22.5 0B 0 +#> 2 pi_dust_parallel(1e+06, rng, 2) 22.3ms 22.5ms 44.4 0B 0 +#> 3 pi_dust_parallel(1e+06, rng, 3) 15.7ms 15.8ms 63.2 0B 0 +#> 4 pi_dust_parallel(1e+06, rng, 4) 11.2ms 11.5ms 87.1 0B 0 +``` + +## More on the pointer object + +This section aims to de-mystify the pointer objects a little. The dust random number state is a series of integers (by default 64-bit unsigned integers) that are updated each time a state is drawn (see `vignette("rng.Rmd")`). We expose this state to R as a vector of "raw" values (literally a series of bytes of data). + + +```r +rng <- dust::dust_rng$new(seed = 1) +rng$state() +#> [1] c1 5c 02 89 ec 2d 0a 91 1e 61 39 74 08 ab 41 5e c3 86 8d 6d f4 02 8a b1 56 +#> [26] 49 ee d9 dd 95 81 e2 +``` + +When numbers are drawn from the stream, the state is modified as a side-effect: + + +```r +rng$random_real(20) +#> [1] 0.451351392 0.073534657 0.218129406 0.200139527 0.017172743 0.323288520 +#> [7] 0.338823899 0.862898581 0.005702738 0.006453255 0.844823829 0.222676329 +#> [13] 0.665130023 0.283016916 0.502155564 0.290045309 0.055841255 0.083163285 +#> [19] 0.391729373 0.744045249 +rng$state() +#> [1] 2c ed 49 d0 8f 7e 97 2c 2a 5e 9e a1 78 85 47 80 06 d5 05 38 a3 5b 08 12 59 +#> [26] 12 03 5c b3 ea 87 c8 +``` + +The same happens with our `dust_rng_pointer` objects used above: + + +```r +ptr <- dust::dust_rng_pointer$new(seed = 1) +ptr$state() +#> [1] c1 5c 02 89 ec 2d 0a 91 1e 61 39 74 08 ab 41 5e c3 86 8d 6d f4 02 8a b1 56 +#> [26] 49 ee d9 dd 95 81 e2 +``` + +Note that `ptr` starts with the same state here as `rng` did because we started from the same seed. When we draw 20 numbers from the stream (by drawing 10 pairs of numbers with our pi-estimation algorithm), we will advance the state + + +```r +pi_dust(10, ptr) +#> [1] 4 +ptr$state() +#> [1] 2c ed 49 d0 8f 7e 97 2c 2a 5e 9e a1 78 85 47 80 06 d5 05 38 a3 5b 08 12 59 +#> [26] 12 03 5c b3 ea 87 c8 +``` + +Note that the state here now matches the value returned by `rng`. + +Normally nobody needs to know this - treat the pointer as an object that you pass to functions and ignore the details. diff --git a/vignettes_src/rng_package.Rmd b/vignettes_src/rng_package.Rmd new file mode 100644 index 000000000..1f2176f47 --- /dev/null +++ b/vignettes_src/rng_package.Rmd @@ -0,0 +1,244 @@ +--- +title: "Using RNGs from packages" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Using RNGs from packages} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + +```{r, include = FALSE} +knitr::opts_chunk$set( + error = FALSE, + collapse = TRUE, + comment = "#>" +) +lang_output <- function(x, lang) { + cat(c(sprintf("```%s", lang), x, "```"), sep = "\n") +} +cc_output <- function(x) lang_output(x, "cc") +r_output <- function(x) lang_output(x, "r") +plain_output <- function(x) lang_output(x, "plain") +``` + +The dust random number generators are suitable for use from other packages and we provide a few helpers in both R and C++ to make this easier. + +For illustration purposes we will assume that you want to estimate pi using rejection sampling. The basic idea of this algorithm is simple; sample two U(0, 1) points `x` and `y` and test if they lie in the unit circle (i.e. `sqrt(x^2 + x^2) < 1`) giving the ratio of the area of a unit circle to a square. Multiplying the fraction of the points that we accept by 4 yields our estimate of pi. + +## Background using R's random number generator + +First, an example that uses R's API for example (note that while the R API is C, we're using the cpp11 package interface here so that the following examples are similar): + +```{r, results = "asis", echo = FALSE} +cc_output(readLines("rng_pi_r.cpp")) +``` + +With cpp11 we can load this with `cpp11::cpp_source` + +```{r} +cpp11::cpp_source("rng_pi_r.cpp") +``` + +and then run it with + +```{r} +pi_r(1e6) +``` + +The key bits within the code above are that we: + +* included random number support from R via the `R_ext/Random.h` header +* initialised the state of the global random number stream within our C++ context with `GetRNGstate` before drawing any random numbers +* drew some possible large number of numbers using `unif_rand` +* restored the random number state back to R. + +Failure to run the `GetRNGstate` / `PutRNGstate` will result in the stream not behaving properly. This is explained in detail in the "Writing R Extensions" manual. + +## Basic implementation using dust + +The implementation in dust will look very similar to above, but the way that we cope with the random number stream will look quite different. With the R version above we are looking after R's global stream (stored in the variable `.Random.seed`) and making sure that it is fetched and set on entry and exit to the function. + +One of the design ideas in dust is that there is no single global source of random numbers, so we need to create a source that our function can use. If we were to use the simulation function multiple times we would want the stream to pick up where it left off last time, so the act of calling the function should update the seed as a "side effect". + +The way we expose this for use within other packages is that the user (either package developer or user of the package) creates a "pointer" to some random number state. Passing that state into a C++ function will allow use of the random functions within dust, and will update the state correctly (see the following section for details). + +First we create a pointer object: + +```{r} +rng <- dust:::dust_rng_pointer$new() +rng +``` + +Unlike the `dust::dust_rng` object there are no real useful methods on this object and from the R side we'll treat it as a black box. Importantly the `rng` object knows which algorithm it has been created to use + +```{r} +rng$algorithm +``` + +The default will be suitable for most purposes. + +We can rewrite the pi approximation function as: + +```{r, results = "asis", echo = FALSE} +cc_output(readLines("rng_pi_dust.cpp")) +``` + +This snippet looks much the same as above: + +* We've added `[[cpp::linking_to(dust)]]` and included the dust random interface (`dust/r/random.hpp`) +* The first line of the function safely creates a pointer to the random state data. The template argument here (``) refers to the rng algorithm and matches `rng$algorithm` +* The second line extracts a reference to the first (C++ indexing starting at 0) random number stream - this pair of lines is roughly equivalent to `GetRNGstate()` except that that the random numbers do not come from some global source +* After that the listing proceeds as before proceeds as before, except there is no equivalent to `PutRNGstate()` because the pointer object takes care of this automatically. + + +```{r} +cpp11::cpp_source("rng_pi_dust.cpp") +``` + +and then run it with + +```{r} +pi_dust(1e6, rng) +``` + +## Parallel implementation with dust and OpenMP + +Part of the point of dust's random number generators is that they create independent streams of random numbers that can be safely used in parallel. + +```{r, results = "asis", echo = FALSE} +cc_output(readLines("rng_pi_parallel.cpp")) +``` + +```{r} +cpp11::cpp_source("rng_pi_parallel.cpp") +``` + +Here we've made a number of decisions about how to split the problem up subject to a few constraints about using OpenMP together with R: + +* Generally speaking we want the answer to be independent of the number of threads used to run it, as this will vary in different sessions. As such avoid the temptation to do a loop over the threads; here we instead iterate over *streams* with the idea that there will be one or more streams used per threads. If we ran a single thread we'd get the same answer as if we ran one thread per stream. +* Each thread is going to do it's own loop of length `n` so we need to divide by `n * n_streams` at the end as that's many attempts we have made. +* We use OpenMP's `reduction` clause to safely accumulate the different subtotals (the `tot_i` values) into one `tot` value. +* In order to compile gracefully on machines that do not have OpenMP support both the `#include ` line and the `#pragma omp` line are wrapped in guards that test for `_OPENMP` (see "Writing R Extensions"). +* We let the generator tell us how many streams it has (`n_streams = rng->size()`) but we could as easily specify an ideal number of streams as an argument here and then test that the generator has *at least that many* by adding an argument to the call to `rng_pointer_get` (e.g., if we wanted `m` streams the call would be `rng_pointer_get(ptr, m)`) + +```{r} +rng <- dust:::dust_rng_pointer$new(n_streams = 20) +pi_dust_parallel(1e6, rng, 4) +``` + +Unfortunately [`cpp11::cpp_source` does not support using OpenMP](https://github.com/r-lib/cpp11/issues/243) so in the example above the code will run in serial and we can't see if parallelisation will help. + +In order to compile with support, we need to build a little package and set up an appropriate `Makevars` file + +```{r, include = FALSE} +path <- tempfile() +dir.create(path) +dir.create(file.path(path, "src"), FALSE, TRUE) + +writeLines( + c("Package: piparallel", + "LinkingTo: cpp11, dust", + "Version: 0.0.1", + "SystemRequirements: C++11"), + file.path(path, "DESCRIPTION")) +writeLines( + c('useDynLib("piparallel", .registration = TRUE)', + 'exportPattern("^[[:alpha:]]+")'), + file.path(path, "NAMESPACE")) +writeLines( + c("PKG_CXXFLAGS=-DHAVE_INLINE $(SHLIB_OPENMP_CXXFLAGS)", + "PKG_LIBS=$(SHLIB_OPENMP_CXXFLAGS)"), + file.path(path, "src", "Makevars")) +code <- grep("cpp11::linking_to", readLines("rng_pi_parallel.cpp"), + invert = TRUE, value = TRUE) +writeLines(code, file.path(path, "src", "code.cpp")) +``` + +The package is fairly minimal: + +```{r pkg_tree, echo = FALSE} +withr::with_dir(path, fs::dir_tree()) +``` + +We have an extremely minimal `DESCRIPTION`, which contains line `LinkingTo: cpp11, dust` from which R will arrange compiler flags to find both packages' headers: + +```{r, results = "asis", echo = FALSE} +plain_output(readLines(file.path(path, "DESCRIPTION"))) +``` + +The `NAMESPACE` loads the dynamic library + +```{r, results = "asis", echo = FALSE} +plain_output(readLines(file.path(path, "NAMESPACE"))) +``` + +The `src/Makevars` file contains important flags to pick up OpenMP support: + +```{r, results = "asis", echo = FALSE} +lang_output(readLines(file.path(path, "src/Makevars")), "make") +``` + +And `src/code.cpp` contains the file above but without the `[[cpp11::linking_to(dust)]]` line: + +```{r, results = "asis", echo = FALSE} +cc_output(readLines(file.path(path, "src/code.cpp"))) +``` + +After compiling and installing the package, `pi_dust_parallel` will be available + +```{r, include = FALSE} +pkgbuild::compile_dll(path, quiet = TRUE, debug = FALSE) +pkg <- pkgload::load_all(path, compile = FALSE, recompile = FALSE, + warn_conflicts = FALSE, export_all = FALSE, + helpers = FALSE, attach_testthat = FALSE, + quiet = TRUE) +pi_dust_parallel <- pkg$env$pi_dust_parallel +``` + +Now we have a parallel version we can see a speed-up as we add threads: + +```{r} +rng <- dust:::dust_rng_pointer$new(n_streams = 20) +bench::mark( + pi_dust_parallel(1e6, rng, 1), + pi_dust_parallel(1e6, rng, 2), + pi_dust_parallel(1e6, rng, 3), + pi_dust_parallel(1e6, rng, 4), + check = FALSE) +``` + +## More on the pointer object + +This section aims to de-mystify the pointer objects a little. The dust random number state is a series of integers (by default 64-bit unsigned integers) that are updated each time a state is drawn (see `vignette("rng.Rmd")`). We expose this state to R as a vector of "raw" values (literally a series of bytes of data). + +```{r} +rng <- dust::dust_rng$new(seed = 1) +rng$state() +``` + +When numbers are drawn from the stream, the state is modified as a side-effect: + +```{r} +rng$random_real(20) +rng$state() +``` + +The same happens with our `dust_rng_pointer` objects used above: + +```{r} +ptr <- dust::dust_rng_pointer$new(seed = 1) +ptr$state() +``` + +Note that `ptr` starts with the same state here as `rng` did because we started from the same seed. When we draw 20 numbers from the stream (by drawing 10 pairs of numbers with our pi-estimation algorithm), we will advance the state + +```{r} +pi_dust(10, ptr) +ptr$state() +``` + +Note that the state here now matches the value returned by `rng`. + +Normally nobody needs to know this - treat the pointer as an object that you pass to functions and ignore the details. diff --git a/vignettes_src/rng_pi_dust.cpp b/vignettes_src/rng_pi_dust.cpp new file mode 100644 index 000000000..71d1e6791 --- /dev/null +++ b/vignettes_src/rng_pi_dust.cpp @@ -0,0 +1,19 @@ +#include +#include + +[[cpp11::linking_to(dust)]] +[[cpp11::register]] +double pi_dust(int n, cpp11::sexp ptr) { + auto rng = + dust::random::r::rng_pointer_get(ptr); + auto& state = rng->state(0); + int tot = 0; + for (int i = 0; i < n; ++i) { + const double u1 = dust::random::random_real(state); + const double u2 = dust::random::random_real(state); + if (u1 * u1 + u2 * u2 < 1) { + tot++; + } + } + return tot / static_cast(n) * 4.0; +} diff --git a/vignettes_src/rng_pi_parallel.cpp b/vignettes_src/rng_pi_parallel.cpp new file mode 100644 index 000000000..ba09be323 --- /dev/null +++ b/vignettes_src/rng_pi_parallel.cpp @@ -0,0 +1,32 @@ +#include +#include + +#ifdef _OPENMP +#include +#endif + +[[cpp11::linking_to(dust)]] +[[cpp11::register]] +double pi_dust_parallel(int n, cpp11::sexp ptr, int n_threads) { + auto rng = + dust::random::r::rng_pointer_get(ptr); + const auto n_streams = rng->size(); + int tot = 0; +#ifdef _OPENMP +#pragma omp parallel for schedule(static) num_threads(n_threads) \ + reduction(+:tot) +#endif + for (size_t i = 0; i < n_streams; ++i) { + auto& state = rng->state(0); + int tot_i = 0; + for (int i = 0; i < n; ++i) { + const double u1 = dust::random::random_real(state); + const double u2 = dust::random::random_real(state); + if (u1 * u1 + u2 * u2 < 1) { + tot_i++; + } + } + tot += tot_i; + } + return tot / static_cast(n * n_streams) * 4.0; +} diff --git a/vignettes_src/rng_pi_r.cpp b/vignettes_src/rng_pi_r.cpp new file mode 100644 index 000000000..d31f6af06 --- /dev/null +++ b/vignettes_src/rng_pi_r.cpp @@ -0,0 +1,17 @@ +#include +#include + +[[cpp11::register]] +double pi_r(int n) { + int tot = 0; + GetRNGstate(); + for (int i = 0; i < n; ++i) { + const double u1 = unif_rand(); + const double u2 = unif_rand(); + if (u1 * u1 + u2 * u2 < 1) { + tot++; + } + } + PutRNGstate(); + return tot / static_cast(n) * 4.0; +}