From 43267f7d931fb438cfa627c923d3bb612981d5b4 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Thu, 7 Dec 2017 20:35:58 +0100 Subject: [PATCH 1/5] Use a sign test for bools --- src/distributions/uniform.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs index 5dc9fc54cfb..fce3b6c9746 100644 --- a/src/distributions/uniform.rs +++ b/src/distributions/uniform.rs @@ -184,7 +184,11 @@ impl Distribution for Uniform { impl Distribution for Uniform { #[inline] fn sample(&self, rng: &mut R) -> bool { - rng.next_u32() & 1 == 1 + // We can compare against an arbitrary bit of an u32 to get a bool. + // Because the least significant bits of a lower quality RNG can have + // simple patterns, we compare against the most significant bit. This is + // easiest done using a sign test. + (rng.next_u32() as i32) < 0 } } From c8c720ebdc7f16f498738b02947bc0dea64e97aa Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Thu, 7 Dec 2017 20:40:00 +0100 Subject: [PATCH 2/5] Ignore the 3 least significant bits for the ziggurat layer --- src/distributions/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/distributions/mod.rs b/src/distributions/mod.rs index 2f5573a40d2..4c93fe5cc04 100644 --- a/src/distributions/mod.rs +++ b/src/distributions/mod.rs @@ -165,6 +165,8 @@ fn ziggurat( // Of the remaining 11 least significant bits we use 8 to construct `i`. // This saves us generating a whole extra random number, while the added // precision of using 64 bits for f64 does not buy us much. + // Because for some RNG's the least significant bits can be of lower + // statistical quality, we use bits 3..10 for i. let bits: u64 = uniform(rng); // u is either U(-1, 1) or U(0, 1) depending on if this is a @@ -176,7 +178,7 @@ fn ziggurat( } else { bits.closed_open01_fixed() }; - let i = (bits & 0xff) as usize; + let i = ((bits >> 3) & 0xff) as usize; let x = u * x_tab[i]; From 3f22ddd7eabc6d5bc500a20d5bfa648d05007305 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sun, 10 Dec 2017 09:02:59 +0100 Subject: [PATCH 3/5] Use widening multiply instead of modulus in RangeInt --- benches/distributions.rs | 5 +- src/distributions/range.rs | 146 +++++++++++++++++++++++++++++++++---- src/sequences/weighted.rs | 19 +++-- 3 files changed, 147 insertions(+), 23 deletions(-) diff --git a/benches/distributions.rs b/benches/distributions.rs index 8424bd6eb3f..d7415e9fb74 100644 --- a/benches/distributions.rs +++ b/benches/distributions.rs @@ -1,4 +1,5 @@ #![feature(test)] +#![cfg_attr(feature = "i128_support", feature(i128_type, i128))] extern crate test; extern crate rand; @@ -46,7 +47,9 @@ macro_rules! distr_range_int { distr_range_int!(distr_range_i8, i8, 20i8, 100); distr_range_int!(distr_range_i16, i16, -500i16, 2000); distr_range_int!(distr_range_i32, i32, -200_000_000i32, 800_000_000); -distr_range_int!(distr_range_i64, i64, 3i64, 134217671); +distr_range_int!(distr_range_i64, i64, 3i64, 12345678901234); +#[cfg(feature = "i128_support")] +distr_range_int!(distr_range_i128, i128, -12345678901234i128, 12345678901234567890); macro_rules! distr_float { ($fnn:ident, $ty:ty, $distr:expr) => { diff --git a/src/distributions/range.rs b/src/distributions/range.rs index e30d69fbace..06d8ddaf464 100644 --- a/src/distributions/range.rs +++ b/src/distributions/range.rs @@ -106,7 +106,7 @@ pub struct RangeInt { macro_rules! range_int_impl { ($ty:ty, $signed:ident, $unsigned:ident, - $i_large:ident, $u_large:ident) => { + $i_large:ident, $u_large:ident, $use_mult:expr) => { impl SampleRange for $ty { type T = RangeInt<$ty>; } @@ -159,6 +159,16 @@ macro_rules! range_int_impl { // small integer. For an u8 it only ever uses 7 bits. This means // that all but the last 7 bits of `zone` are always 1's (or 15 // in the case of u16). So nothing is lost by trucating `zone`. + // + // An alternative to using a modulus is widening multiply: + // After a widening multiply by `range`, the result is in the + // high word. Then comparing the low word against `zone` makes + // sure our distribution is uniform. + // + // FIXME: This method is not used for i128/u128. It will + // probably help a lot for performance, because a 128-bit + // modulus is terribly slow. It means hand-coding a big-integer + // widening multiply. let unsigned_max: $u_large = ::core::$u_large::MAX; @@ -192,8 +202,15 @@ macro_rules! range_int_impl { let zone = self.zone as $signed as $i_large as $u_large; loop { let v: $u_large = Rand::rand(rng, Uniform); - if v <= zone { - return self.low.wrapping_add((v % range) as $ty); + if $use_mult { + let (high, low) = v.wmul(range); + if low <= zone { + return self.low.wrapping_add(high as $ty); + } + } else { + if v <= zone { + return self.low.wrapping_add((v % range) as $ty); + } } } } else { @@ -205,20 +222,119 @@ macro_rules! range_int_impl { } } -range_int_impl! { i8, i8, u8, i32, u32 } -range_int_impl! { i16, i16, u16, i32, u32 } -range_int_impl! { i32, i32, u32, i32, u32 } -range_int_impl! { i64, i64, u64, i64, u64 } +range_int_impl! { i8, i8, u8, i32, u32, true } +range_int_impl! { i16, i16, u16, i32, u32, true } +range_int_impl! { i32, i32, u32, i32, u32, true } +range_int_impl! { i64, i64, u64, i64, u64, true } #[cfg(feature = "i128_support")] -range_int_impl! { i128, i128, u128, u128, u128 } -range_int_impl! { isize, isize, usize, isize, usize } -range_int_impl! { u8, i8, u8, i32, u32 } -range_int_impl! { u16, i16, u16, i32, u32 } -range_int_impl! { u32, i32, u32, i32, u32 } -range_int_impl! { u64, i64, u64, i64, u64 } +range_int_impl! { i128, i128, u128, u128, u128, false } +range_int_impl! { isize, isize, usize, isize, usize, true } +range_int_impl! { u8, i8, u8, i32, u32, true } +range_int_impl! { u16, i16, u16, i32, u32, true } +range_int_impl! { u32, i32, u32, i32, u32, true } +range_int_impl! { u64, i64, u64, i64, u64, true } +range_int_impl! { usize, isize, usize, isize, usize, true } #[cfg(feature = "i128_support")] -range_int_impl! { u128, u128, u128, i128, u128 } -range_int_impl! { usize, isize, usize, isize, usize } +range_int_impl! { u128, u128, u128, i128, u128, false } + + +trait WideningMultiply { + type Output; + + fn wmul(self, x: RHS) -> Self::Output; +} + +macro_rules! wmul_impl { + ($ty:ty, $wide:ident, $shift:expr) => { + impl WideningMultiply for $ty { + type Output = ($ty, $ty); + + #[inline(always)] + fn wmul(self, x: $ty) -> Self::Output { + let tmp = (self as $wide) * (x as $wide); + ((tmp >> $shift) as $ty, tmp as $ty) + } + } + } +} + +wmul_impl! { u8, u16, 8 } +wmul_impl! { u16, u32, 16 } +wmul_impl! { u32, u64, 32 } + +#[cfg(feature = "i128_support")] +wmul_impl! { u64, u128, 64 } + +#[cfg(not(feature = "i128_support"))] +impl WideningMultiply for u64 { + type Output = (u64, u64); + + // This code is a translation of the __mulddi3 function in LLVM's + // compiler-rt. It is an optimised variant of the common method + // `(a + b) * (c + d) = ac + ad + bc + bd`. + // + // For some reason LLVM can optimise the C version very well, but keeps + // shuffeling registers in this Rust translation. + #[inline(always)] + fn wmul(self, b: u64) -> Self::Output { + const LOWER_MASK: u64 = !0u64 >> 32; + let mut low = (self & LOWER_MASK).wrapping_mul(b & LOWER_MASK); + let mut t = low >> 32; + low &= LOWER_MASK; + t += (self >> 32).wrapping_mul(b & LOWER_MASK); + low += (t & LOWER_MASK) << 32; + let mut high = (t >> 32) as i64; + t = low >> 32; + low &= LOWER_MASK; + t += (b >> 32).wrapping_mul(self & LOWER_MASK); + low += (t & LOWER_MASK) << 32; + high += (t >> 32) as i64; + high += (self >> 32).wrapping_mul(b >> 32) as i64; + + (high as u64, low) + } +} + + +macro_rules! wmul_impl_usize { + ($ty:ty) => { + impl WideningMultiply for usize { + type Output = (usize, usize); + + #[inline(always)] + fn wmul(self, x: usize) -> Self::Output { + let (high, low) = (self as $ty).wmul(x as $ty); + (high as usize, low as usize) + } + } + } +} + +#[cfg(target_pointer_width = "32")] +wmul_impl_usize! { u32 } +#[cfg(target_pointer_width = "64")] +wmul_impl_usize! { u64 } + +#[cfg(feature = "i128_support")] +macro_rules! wmul_impl_128 { + ($ty:ty) => { + impl WideningMultiply for $ty { + type Output = ($ty, $ty); + + #[inline(always)] + fn wmul(self, _: $ty) -> Self::Output { + unimplemented!(); + } + } + } +} + +#[cfg(feature = "i128_support")] +wmul_impl_128! { i128 } +#[cfg(feature = "i128_support")] +wmul_impl_128! { u128 } + + /// Implementation of `RangeImpl` for float types. #[derive(Clone, Copy, Debug)] diff --git a/src/sequences/weighted.rs b/src/sequences/weighted.rs index 6589fa0a24b..37d88c0e610 100644 --- a/src/sequences/weighted.rs +++ b/src/sequences/weighted.rs @@ -141,17 +141,22 @@ mod tests { #[test] fn test_weighted_choice() { // this makes assumptions about the internal implementation of - // WeightedChoice, specifically: it doesn't reorder the items, - // it doesn't do weird things to the RNG (so 0 maps to 0, 1 to - // 1, internally; modulo a modulo operation). + // WeightedChoice. It may fail when the implementation in + // `distributions::range::RangeInt changes. macro_rules! t { ($items:expr, $expected:expr) => {{ let items = $items; + let mut total_weight = 0; + for item in &items { total_weight += item.weight; } + let wc = WeightedChoice::new(items); let expected = $expected; - let mut rng = MockAddRng::new(0, 1); + // Use extremely large steps between the random numbers, because + // we test with small ranges and RangeInt is designed to prefer + // the most significant bits. + let mut rng = MockAddRng::new(0, !0 / (total_weight as u64)); for &val in expected.iter() { assert_eq!(wc.sample(&mut rng), val) @@ -166,12 +171,12 @@ mod tests { Weighted { weight: 2, item: 21}, Weighted { weight: 0, item: 22}, Weighted { weight: 1, item: 23}), - [21,21, 23]); + [21, 21, 23]); // different weights t!(vec!(Weighted { weight: 4, item: 30}, Weighted { weight: 3, item: 31}), - [30,30,30,30, 31,31,31]); + [30, 31, 30, 31, 30, 31, 30]); // check that we're binary searching // correctly with some vectors of odd @@ -189,7 +194,7 @@ mod tests { Weighted { weight: 1, item: 54}, Weighted { weight: 1, item: 55}, Weighted { weight: 1, item: 56}), - [50, 51, 52, 53, 54, 55, 56]); + [50, 54, 51, 55, 52, 56, 53]); } #[test] From 68edc9fc4e67b9ebe6b6d36035b0059bee27cd78 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Mon, 11 Dec 2017 12:34:44 +0100 Subject: [PATCH 4/5] Add a few extra distr benchmarks --- benches/distributions.rs | 85 +++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 50 deletions(-) diff --git a/benches/distributions.rs b/benches/distributions.rs index d7415e9fb74..7fded29c9ea 100644 --- a/benches/distributions.rs +++ b/benches/distributions.rs @@ -9,29 +9,16 @@ const RAND_BENCH_N: u64 = 1000; use std::mem::size_of; use test::{black_box, Bencher}; -use rand::{Default, Rand, NewSeeded}; +use rand::NewSeeded; use rand::prng::XorShiftRng; use rand::distributions::*; - -#[bench] -fn distr_baseline(b: &mut Bencher) { - let mut rng = XorShiftRng::new().unwrap(); - - b.iter(|| { - for _ in 0..::RAND_BENCH_N { - black_box(u64::rand(&mut rng, Default)); - } - }); - b.bytes = size_of::() as u64 * ::RAND_BENCH_N; -} - -macro_rules! distr_range_int { - ($fnn:ident, $ty:ty, $low:expr, $high:expr) => { +macro_rules! distr { + ($fnn:ident, $ty:ty, $distr:expr) => { #[bench] fn $fnn(b: &mut Bencher) { let mut rng = XorShiftRng::new().unwrap(); - let distr = Range::new($low, $high); + let distr = $distr; b.iter(|| { for _ in 0..::RAND_BENCH_N { @@ -44,41 +31,39 @@ macro_rules! distr_range_int { } } -distr_range_int!(distr_range_i8, i8, 20i8, 100); -distr_range_int!(distr_range_i16, i16, -500i16, 2000); -distr_range_int!(distr_range_i32, i32, -200_000_000i32, 800_000_000); -distr_range_int!(distr_range_i64, i64, 3i64, 12345678901234); +// range +distr!(distr_range_i8, i8, Range::new(20i8, 100)); +distr!(distr_range_i16, i16, Range::new(-500i16, 2000)); +distr!(distr_range_i32, i32, Range::new(-200_000_000i32, 800_000_000)); +distr!(distr_range_i64, i64, Range::new(3i64, 12345678901234)); #[cfg(feature = "i128_support")] -distr_range_int!(distr_range_i128, i128, -12345678901234i128, 12345678901234567890); +distr!(distr_range_i128, i128, Range::new(-12345678901234i128, 12345678901234567890)); -macro_rules! distr_float { - ($fnn:ident, $ty:ty, $distr:expr) => { - #[bench] - fn $fnn(b: &mut Bencher) { - let mut rng = XorShiftRng::new().unwrap(); - let distr = $distr; +distr!(distr_range_float32, f32, Range::new(2.26f32, 2.319)); +distr!(distr_range_float, f64, Range::new(2.26f64, 2.319)); - b.iter(|| { - for _ in 0..::RAND_BENCH_N { - let x: $ty = distr.sample(&mut rng); - black_box(x); - } - }); - b.bytes = size_of::<$ty>() as u64 * ::RAND_BENCH_N; - } - } -} +// uniform +distr!(distr_uniform_i8, i8, Uniform); +distr!(distr_uniform_i16, i16, Uniform); +distr!(distr_uniform_i32, i32, Uniform); +distr!(distr_uniform_i64, i64, Uniform); +#[cfg(feature = "i128_support")] +distr!(distr_uniform_i128, i128, Uniform); + +distr!(distr_uniform_bool, bool, Uniform); +distr!(distr_uniform_ascii_char, char, AsciiWordChar); + +distr!(distr_uniform01_float32, f32, Uniform01); +distr!(distr_closed01_float32, f32, Closed01); +distr!(distr_open01_float32, f32, Open01); -distr_float!(distr_uniform01_float32, f32, Uniform01); -distr_float!(distr_closed01_float32, f32, Closed01); -distr_float!(distr_open01_float32, f32, Open01); +distr!(distr_uniform01_float, f64, Uniform01); +distr!(distr_closed01_float, f64, Closed01); +distr!(distr_open01_float, f64, Open01); -distr_float!(distr_uniform01_float, f64, Uniform01); -distr_float!(distr_closed01_float, f64, Closed01); -distr_float!(distr_open01_float, f64, Open01); -distr_float!(distr_range_float, f64, Range::new(2.26f64, 2.319f64)); -distr_float!(distr_exp, f64, Exp::new(2.71828 * 3.14159)); -distr_float!(distr_normal, f64, Normal::new(-2.71828, 3.14159)); -distr_float!(distr_log_normal, f64, LogNormal::new(-2.71828, 3.14159)); -distr_float!(distr_gamma_large_shape, f64, Gamma::new(10., 1.0)); -distr_float!(distr_gamma_small_shape, f64, Gamma::new(0.1, 1.0)); +// distributions +distr!(distr_exp, f64, Exp::new(2.71828 * 3.14159)); +distr!(distr_normal, f64, Normal::new(-2.71828, 3.14159)); +distr!(distr_log_normal, f64, LogNormal::new(-2.71828, 3.14159)); +distr!(distr_gamma_large_shape, f64, Gamma::new(10., 1.0)); +distr!(distr_gamma_small_shape, f64, Gamma::new(0.1, 1.0)); From 173c2b4c64ad08584043a179f1ac0697946c4cbe Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Mon, 11 Dec 2017 13:40:56 +0100 Subject: [PATCH 5/5] Benchmark constructing and sampling from a range --- benches/distributions.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/benches/distributions.rs b/benches/distributions.rs index 7fded29c9ea..ec2906a7353 100644 --- a/benches/distributions.rs +++ b/benches/distributions.rs @@ -67,3 +67,29 @@ distr!(distr_normal, f64, Normal::new(-2.71828, 3.14159)); distr!(distr_log_normal, f64, LogNormal::new(-2.71828, 3.14159)); distr!(distr_gamma_large_shape, f64, Gamma::new(10., 1.0)); distr!(distr_gamma_small_shape, f64, Gamma::new(0.1, 1.0)); + + +// construct and sample from a range +macro_rules! gen_range_int { + ($fnn:ident, $ty:ty, $low:expr, $high:expr) => { + #[bench] + fn $fnn(b: &mut Bencher) { + let mut rng = XorShiftRng::new().unwrap(); + + b.iter(|| { + for _ in 0..::RAND_BENCH_N { + let x: $ty = Range::new($low, $high).sample(&mut rng); + black_box(x); + } + }); + b.bytes = size_of::<$ty>() as u64 * ::RAND_BENCH_N; + } + } +} + +gen_range_int!(gen_range_i8, i8, 20i8, 100); +gen_range_int!(gen_range_i16, i16, -500i16, 2000); +gen_range_int!(gen_range_i32, i32, -200_000_000i32, 800_000_000); +gen_range_int!(gen_range_i64, i64, 3i64, 12345678901234); +#[cfg(feature = "i128_support")] +gen_range_int!(gen_range_i128, i128, -12345678901234i128, 12345678901234567890);