Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid low bits #68

Merged
merged 5 commits into from
Dec 11, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 52 additions & 38 deletions benches/distributions.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![feature(test)]
#![cfg_attr(feature = "i128_support", feature(i128_type, i128))]

extern crate test;
extern crate rand;
Expand All @@ -8,29 +9,16 @@ const RAND_BENCH_N: u64 = 1000;
use std::mem::size_of;
use test::{black_box, Bencher};

use rand::{Default, Rand, NewSeeded};
use rand::NewSeeded;
use rand::prng::XorShiftRng;
use rand::distributions::*;


#[bench]
fn distr_baseline(b: &mut Bencher) {
let mut rng = XorShiftRng::new().unwrap();

b.iter(|| {
for _ in 0..::RAND_BENCH_N {
black_box(u64::rand(&mut rng, Default));
}
});
b.bytes = size_of::<u64>() as u64 * ::RAND_BENCH_N;
}

macro_rules! distr_range_int {
($fnn:ident, $ty:ty, $low:expr, $high:expr) => {
macro_rules! distr {
($fnn:ident, $ty:ty, $distr:expr) => {
#[bench]
fn $fnn(b: &mut Bencher) {
let mut rng = XorShiftRng::new().unwrap();
let distr = Range::new($low, $high);
let distr = $distr;

b.iter(|| {
for _ in 0..::RAND_BENCH_N {
Expand All @@ -43,21 +31,54 @@ macro_rules! distr_range_int {
}
}

distr_range_int!(distr_range_i8, i8, 20i8, 100);
distr_range_int!(distr_range_i16, i16, -500i16, 2000);
distr_range_int!(distr_range_i32, i32, -200_000_000i32, 800_000_000);
distr_range_int!(distr_range_i64, i64, 3i64, 134217671);
// range
distr!(distr_range_i8, i8, Range::new(20i8, 100));
distr!(distr_range_i16, i16, Range::new(-500i16, 2000));
distr!(distr_range_i32, i32, Range::new(-200_000_000i32, 800_000_000));
distr!(distr_range_i64, i64, Range::new(3i64, 12345678901234));
#[cfg(feature = "i128_support")]
distr!(distr_range_i128, i128, Range::new(-12345678901234i128, 12345678901234567890));

macro_rules! distr_float {
($fnn:ident, $ty:ty, $distr:expr) => {
distr!(distr_range_float32, f32, Range::new(2.26f32, 2.319));
distr!(distr_range_float, f64, Range::new(2.26f64, 2.319));

// uniform
distr!(distr_uniform_i8, i8, Uniform);
distr!(distr_uniform_i16, i16, Uniform);
distr!(distr_uniform_i32, i32, Uniform);
distr!(distr_uniform_i64, i64, Uniform);
#[cfg(feature = "i128_support")]
distr!(distr_uniform_i128, i128, Uniform);

distr!(distr_uniform_bool, bool, Uniform);
distr!(distr_uniform_ascii_char, char, AsciiWordChar);

distr!(distr_uniform01_float32, f32, Uniform01);
distr!(distr_closed01_float32, f32, Closed01);
distr!(distr_open01_float32, f32, Open01);

distr!(distr_uniform01_float, f64, Uniform01);
distr!(distr_closed01_float, f64, Closed01);
distr!(distr_open01_float, f64, Open01);

// distributions
distr!(distr_exp, f64, Exp::new(2.71828 * 3.14159));
distr!(distr_normal, f64, Normal::new(-2.71828, 3.14159));
distr!(distr_log_normal, f64, LogNormal::new(-2.71828, 3.14159));
distr!(distr_gamma_large_shape, f64, Gamma::new(10., 1.0));
distr!(distr_gamma_small_shape, f64, Gamma::new(0.1, 1.0));


// construct and sample from a range
macro_rules! gen_range_int {
($fnn:ident, $ty:ty, $low:expr, $high:expr) => {
#[bench]
fn $fnn(b: &mut Bencher) {
let mut rng = XorShiftRng::new().unwrap();
let distr = $distr;

b.iter(|| {
for _ in 0..::RAND_BENCH_N {
let x: $ty = distr.sample(&mut rng);
let x: $ty = Range::new($low, $high).sample(&mut rng);
black_box(x);
}
});
Expand All @@ -66,16 +87,9 @@ macro_rules! distr_float {
}
}

distr_float!(distr_uniform01_float32, f32, Uniform01);
distr_float!(distr_closed01_float32, f32, Closed01);
distr_float!(distr_open01_float32, f32, Open01);

distr_float!(distr_uniform01_float, f64, Uniform01);
distr_float!(distr_closed01_float, f64, Closed01);
distr_float!(distr_open01_float, f64, Open01);
distr_float!(distr_range_float, f64, Range::new(2.26f64, 2.319f64));
distr_float!(distr_exp, f64, Exp::new(2.71828 * 3.14159));
distr_float!(distr_normal, f64, Normal::new(-2.71828, 3.14159));
distr_float!(distr_log_normal, f64, LogNormal::new(-2.71828, 3.14159));
distr_float!(distr_gamma_large_shape, f64, Gamma::new(10., 1.0));
distr_float!(distr_gamma_small_shape, f64, Gamma::new(0.1, 1.0));
gen_range_int!(gen_range_i8, i8, 20i8, 100);
gen_range_int!(gen_range_i16, i16, -500i16, 2000);
gen_range_int!(gen_range_i32, i32, -200_000_000i32, 800_000_000);
gen_range_int!(gen_range_i64, i64, 3i64, 12345678901234);
#[cfg(feature = "i128_support")]
gen_range_int!(gen_range_i128, i128, -12345678901234i128, 12345678901234567890);
4 changes: 3 additions & 1 deletion src/distributions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ fn ziggurat<R: Rng+?Sized, P, Z>(
// Of the remaining 11 least significant bits we use 8 to construct `i`.
// This saves us generating a whole extra random number, while the added
// precision of using 64 bits for f64 does not buy us much.
// Because for some RNG's the least significant bits can be of lower
// statistical quality, we use bits 3..10 for i.
let bits: u64 = uniform(rng);

// u is either U(-1, 1) or U(0, 1) depending on if this is a
Expand All @@ -176,7 +178,7 @@ fn ziggurat<R: Rng+?Sized, P, Z>(
} else {
bits.closed_open01_fixed()
};
let i = (bits & 0xff) as usize;
let i = ((bits >> 3) & 0xff) as usize;
Copy link
Owner

@dhardy dhardy Dec 8, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Were there three unused bits? I thought you used them all?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are 11 spare bits, and we only need 8


let x = u * x_tab[i];

Expand Down
146 changes: 131 additions & 15 deletions src/distributions/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ pub struct RangeInt<X> {

macro_rules! range_int_impl {
($ty:ty, $signed:ident, $unsigned:ident,
$i_large:ident, $u_large:ident) => {
$i_large:ident, $u_large:ident, $use_mult:expr) => {
impl SampleRange for $ty {
type T = RangeInt<$ty>;
}
Expand Down Expand Up @@ -159,6 +159,16 @@ macro_rules! range_int_impl {
// small integer. For an u8 it only ever uses 7 bits. This means
// that all but the last 7 bits of `zone` are always 1's (or 15
// in the case of u16). So nothing is lost by trucating `zone`.
//
// An alternative to using a modulus is widening multiply:
// After a widening multiply by `range`, the result is in the
// high word. Then comparing the low word against `zone` makes
// sure our distribution is uniform.
//
// FIXME: This method is not used for i128/u128. It will
// probably help a lot for performance, because a 128-bit
// modulus is terribly slow. It means hand-coding a big-integer
// widening multiply.

let unsigned_max: $u_large = ::core::$u_large::MAX;

Expand Down Expand Up @@ -192,8 +202,15 @@ macro_rules! range_int_impl {
let zone = self.zone as $signed as $i_large as $u_large;
loop {
let v: $u_large = Rand::rand(rng, Uniform);
if v <= zone {
return self.low.wrapping_add((v % range) as $ty);
if $use_mult {
let (high, low) = v.wmul(range);
if low <= zone {
return self.low.wrapping_add(high as $ty);
}
} else {
if v <= zone {
return self.low.wrapping_add((v % range) as $ty);
}
}
}
} else {
Expand All @@ -205,20 +222,119 @@ macro_rules! range_int_impl {
}
}

range_int_impl! { i8, i8, u8, i32, u32 }
range_int_impl! { i16, i16, u16, i32, u32 }
range_int_impl! { i32, i32, u32, i32, u32 }
range_int_impl! { i64, i64, u64, i64, u64 }
range_int_impl! { i8, i8, u8, i32, u32, true }
range_int_impl! { i16, i16, u16, i32, u32, true }
range_int_impl! { i32, i32, u32, i32, u32, true }
range_int_impl! { i64, i64, u64, i64, u64, true }
#[cfg(feature = "i128_support")]
range_int_impl! { i128, i128, u128, u128, u128 }
range_int_impl! { isize, isize, usize, isize, usize }
range_int_impl! { u8, i8, u8, i32, u32 }
range_int_impl! { u16, i16, u16, i32, u32 }
range_int_impl! { u32, i32, u32, i32, u32 }
range_int_impl! { u64, i64, u64, i64, u64 }
range_int_impl! { i128, i128, u128, u128, u128, false }
range_int_impl! { isize, isize, usize, isize, usize, true }
range_int_impl! { u8, i8, u8, i32, u32, true }
range_int_impl! { u16, i16, u16, i32, u32, true }
range_int_impl! { u32, i32, u32, i32, u32, true }
range_int_impl! { u64, i64, u64, i64, u64, true }
range_int_impl! { usize, isize, usize, isize, usize, true }
#[cfg(feature = "i128_support")]
range_int_impl! { u128, u128, u128, i128, u128 }
range_int_impl! { usize, isize, usize, isize, usize }
range_int_impl! { u128, u128, u128, i128, u128, false }


trait WideningMultiply<RHS = Self> {
type Output;

fn wmul(self, x: RHS) -> Self::Output;
}

macro_rules! wmul_impl {
($ty:ty, $wide:ident, $shift:expr) => {
impl WideningMultiply for $ty {
type Output = ($ty, $ty);

#[inline(always)]
fn wmul(self, x: $ty) -> Self::Output {
let tmp = (self as $wide) * (x as $wide);
((tmp >> $shift) as $ty, tmp as $ty)
}
}
}
}

wmul_impl! { u8, u16, 8 }
wmul_impl! { u16, u32, 16 }
wmul_impl! { u32, u64, 32 }

#[cfg(feature = "i128_support")]
wmul_impl! { u64, u128, 64 }

#[cfg(not(feature = "i128_support"))]
impl WideningMultiply for u64 {
type Output = (u64, u64);

// This code is a translation of the __mulddi3 function in LLVM's
// compiler-rt. It is an optimised variant of the common method
// `(a + b) * (c + d) = ac + ad + bc + bd`.
//
// For some reason LLVM can optimise the C version very well, but keeps
// shuffeling registers in this Rust translation.
#[inline(always)]
fn wmul(self, b: u64) -> Self::Output {
const LOWER_MASK: u64 = !0u64 >> 32;
let mut low = (self & LOWER_MASK).wrapping_mul(b & LOWER_MASK);
let mut t = low >> 32;
low &= LOWER_MASK;
t += (self >> 32).wrapping_mul(b & LOWER_MASK);
low += (t & LOWER_MASK) << 32;
let mut high = (t >> 32) as i64;
t = low >> 32;
low &= LOWER_MASK;
t += (b >> 32).wrapping_mul(self & LOWER_MASK);
low += (t & LOWER_MASK) << 32;
high += (t >> 32) as i64;
high += (self >> 32).wrapping_mul(b >> 32) as i64;

(high as u64, low)
}
}


macro_rules! wmul_impl_usize {
($ty:ty) => {
impl WideningMultiply for usize {
type Output = (usize, usize);

#[inline(always)]
fn wmul(self, x: usize) -> Self::Output {
let (high, low) = (self as $ty).wmul(x as $ty);
(high as usize, low as usize)
}
}
}
}

#[cfg(target_pointer_width = "32")]
wmul_impl_usize! { u32 }
#[cfg(target_pointer_width = "64")]
wmul_impl_usize! { u64 }

#[cfg(feature = "i128_support")]
macro_rules! wmul_impl_128 {
($ty:ty) => {
impl WideningMultiply for $ty {
type Output = ($ty, $ty);

#[inline(always)]
fn wmul(self, _: $ty) -> Self::Output {
unimplemented!();
}
}
}
}

#[cfg(feature = "i128_support")]
wmul_impl_128! { i128 }
#[cfg(feature = "i128_support")]
wmul_impl_128! { u128 }



/// Implementation of `RangeImpl` for float types.
#[derive(Clone, Copy, Debug)]
Expand Down
6 changes: 5 additions & 1 deletion src/distributions/uniform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,11 @@ impl Distribution<u128> for Uniform {
impl Distribution<bool> for Uniform {
#[inline]
fn sample<R: Rng+?Sized>(&self, rng: &mut R) -> bool {
rng.next_u32() & 1 == 1
// We can compare against an arbitrary bit of an u32 to get a bool.
// Because the least significant bits of a lower quality RNG can have
// simple patterns, we compare against the most significant bit. This is
// easiest done using a sign test.
(rng.next_u32() as i32) < 0
}
}

Expand Down
19 changes: 12 additions & 7 deletions src/sequences/weighted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,17 +141,22 @@ mod tests {
#[test]
fn test_weighted_choice() {
// this makes assumptions about the internal implementation of
// WeightedChoice, specifically: it doesn't reorder the items,
// it doesn't do weird things to the RNG (so 0 maps to 0, 1 to
// 1, internally; modulo a modulo operation).
// WeightedChoice. It may fail when the implementation in
// `distributions::range::RangeInt changes.

macro_rules! t {
($items:expr, $expected:expr) => {{
let items = $items;
let mut total_weight = 0;
for item in &items { total_weight += item.weight; }

let wc = WeightedChoice::new(items);
let expected = $expected;

let mut rng = MockAddRng::new(0, 1);
// Use extremely large steps between the random numbers, because
// we test with small ranges and RangeInt is designed to prefer
// the most significant bits.
let mut rng = MockAddRng::new(0, !0 / (total_weight as u64));

for &val in expected.iter() {
assert_eq!(wc.sample(&mut rng), val)
Expand All @@ -166,12 +171,12 @@ mod tests {
Weighted { weight: 2, item: 21},
Weighted { weight: 0, item: 22},
Weighted { weight: 1, item: 23}),
[21,21, 23]);
[21, 21, 23]);

// different weights
t!(vec!(Weighted { weight: 4, item: 30},
Weighted { weight: 3, item: 31}),
[30,30,30,30, 31,31,31]);
[30, 31, 30, 31, 30, 31, 30]);

// check that we're binary searching
// correctly with some vectors of odd
Expand All @@ -189,7 +194,7 @@ mod tests {
Weighted { weight: 1, item: 54},
Weighted { weight: 1, item: 55},
Weighted { weight: 1, item: 56}),
[50, 51, 52, 53, 54, 55, 56]);
[50, 54, 51, 55, 52, 56, 53]);
}

#[test]
Expand Down