From 5741e16fc3d6ba28c8892afc769179699de710a9 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sat, 16 Dec 2017 07:22:20 +0100 Subject: [PATCH 1/2] Rename AsciiWordChar to AlphanumericChar --- benches/distributions.rs | 2 +- src/distributions/mod.rs | 2 +- src/distributions/uniform.rs | 14 +++++++------- src/iter.rs | 8 ++++---- src/lib.rs | 4 ++-- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/benches/distributions.rs b/benches/distributions.rs index 48fe2c8aa64..a1294c508de 100644 --- a/benches/distributions.rs +++ b/benches/distributions.rs @@ -51,7 +51,7 @@ distr!(distr_uniform_i64, i64, Uniform); distr!(distr_uniform_i128, i128, Uniform); distr!(distr_uniform_bool, bool, Uniform); -distr!(distr_uniform_ascii_char, char, AsciiWordChar); +distr!(distr_uniform_alphanumeric, char, Alphanumeric); distr!(distr_uniform01_float32, f32, Uniform01); distr!(distr_closed01_float32, f32, Closed01); diff --git a/src/distributions/mod.rs b/src/distributions/mod.rs index 6db1a5af15c..076ab52c6cb 100644 --- a/src/distributions/mod.rs +++ b/src/distributions/mod.rs @@ -18,7 +18,7 @@ use Rng; pub use self::default::Default; -pub use self::uniform::{Uniform, Uniform01, Open01, Closed01, Codepoint, AsciiWordChar}; +pub use self::uniform::{Uniform, Uniform01, Open01, Closed01, Codepoint, Alphanumeric}; pub use self::range::Range; #[cfg(feature="std")] diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs index afda3ddbc14..2f1fd22ef56 100644 --- a/src/distributions/uniform.rs +++ b/src/distributions/uniform.rs @@ -14,7 +14,7 @@ use core::char; use core::mem; use Rng; -use distributions::Distribution; +use distributions::{Distribution, Range}; use utils::FloatConversions; // ----- Sampling distributions ----- @@ -45,7 +45,7 @@ pub struct Codepoint; /// Sample a `char`, uniformly distributed over ASCII letters and numbers: /// a-z, A-Z and 0-9. #[derive(Debug)] -pub struct AsciiWordChar; +pub struct Alphanumeric; // ----- actual implementations ----- @@ -220,7 +220,7 @@ impl Distribution for Codepoint { } } -impl Distribution for AsciiWordChar { +impl Distribution for Alphanumeric { fn sample(&self, rng: &mut R) -> char { const RANGE: u32 = 26 + 26 + 10; const GEN_ASCII_STR_CHARSET: &'static [u8] = @@ -228,7 +228,7 @@ impl Distribution for AsciiWordChar { abcdefghijklmnopqrstuvwxyz\ 0123456789"; loop { - let var = rng.next_u32() & 0x3F; + let var = rng.next_u32() >> 26; if var < RANGE { return GEN_ASCII_STR_CHARSET[var as usize] as char } @@ -241,7 +241,7 @@ impl Distribution for AsciiWordChar { mod tests { use {Sample, thread_rng, iter}; use distributions::{Uniform, Uniform01, Open01, Closed01, - Codepoint, AsciiWordChar}; + Codepoint, Alphanumeric}; #[test] fn test_integers() { @@ -269,10 +269,10 @@ mod tests { let mut rng = ::test::rng(); let _ = rng.sample(Codepoint); - let c = rng.sample(AsciiWordChar); + let c = rng.sample(Alphanumeric); assert!((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')); - let word: String = iter(&mut rng).take(5).map(|rng| rng.sample(AsciiWordChar)).collect(); + let word: String = iter(&mut rng).take(5).map(|rng| rng.sample(Alphanumeric)).collect(); assert_eq!(word.len(), 5); } diff --git a/src/iter.rs b/src/iter.rs index 1d44d2c9d0f..176a110aa45 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -37,13 +37,13 @@ pub struct Iter<'a, R: Rng+?Sized+'a> { /// /// ``` /// use rand::{thread_rng, Rng, Sample, iter}; -/// use rand::distributions::{Uniform, AsciiWordChar}; +/// use rand::distributions::{Uniform, Alphanumeric}; /// /// let mut rng = thread_rng(); /// let x: Vec = iter(&mut rng).take(10).map(|rng| rng.sample(Uniform)).collect(); /// println!("{:?}", x); /// -/// let w: String = iter(&mut rng).take(6).map(|rng| rng.sample(AsciiWordChar)).collect(); +/// let w: String = iter(&mut rng).take(6).map(|rng| rng.sample(Alphanumeric)).collect(); /// println!("{}", w); /// ``` pub fn iter<'a, R: Rng+?Sized+'a>(rng: &'a mut R) -> Iter<'a, R> { @@ -160,7 +160,7 @@ impl<'a, R:?Sized+'a, U, F> Iterator for FlatMap<'a, R, U, F> #[cfg(test)] mod tests { use {Rng, Sample, thread_rng, iter}; - use distributions::{Uniform, AsciiWordChar}; + use distributions::{Uniform, Alphanumeric}; #[test] fn test_iter() { @@ -181,7 +181,7 @@ mod tests { fn test_dyn_dispatch() { let r: &mut Rng = &mut thread_rng(); - let x: String = iter(r).take(10).map(|rng| rng.sample(AsciiWordChar)).collect(); + let x: String = iter(r).take(10).map(|rng| rng.sample(Alphanumeric)).collect(); assert_eq!(x.len(), 10); } } diff --git a/src/lib.rs b/src/lib.rs index fcc0e18078d..f05d8062740 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -435,10 +435,10 @@ pub trait Sample: Rng { /// /// ```rust /// use rand::{thread_rng, Sample}; - /// use rand::distributions::AsciiWordChar; + /// use rand::distributions::Alphanumeric; /// /// let mut rng = thread_rng(); - /// let x: String = rng.iter().map(|rng| rng.sample(AsciiWordChar)).take(6).collect(); + /// let x: String = rng.iter().map(|rng| rng.sample(Alphanumeric)).take(6).collect(); /// ``` fn iter<'a>(&'a mut self) -> iter::Iter<'a, Self> { iter(self) From a0deb78d6753d43f1a4ff01078e776554b345927 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sat, 16 Dec 2017 07:37:41 +0100 Subject: [PATCH 2/2] Optimize Codepoint distribution --- benches/distributions.rs | 1 + src/distributions/range.rs | 4 ++++ src/distributions/uniform.rs | 10 ++++------ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/benches/distributions.rs b/benches/distributions.rs index a1294c508de..bc0435968ca 100644 --- a/benches/distributions.rs +++ b/benches/distributions.rs @@ -52,6 +52,7 @@ distr!(distr_uniform_i128, i128, Uniform); distr!(distr_uniform_bool, bool, Uniform); distr!(distr_uniform_alphanumeric, char, Alphanumeric); +distr!(distr_uniform_codepoint, char, Codepoint); distr!(distr_uniform01_float32, f32, Uniform01); distr!(distr_closed01_float32, f32, Closed01); diff --git a/src/distributions/range.rs b/src/distributions/range.rs index 9120028de8d..fa43fd17ee5 100644 --- a/src/distributions/range.rs +++ b/src/distributions/range.rs @@ -179,10 +179,14 @@ macro_rules! range_int_impl { type X = $ty; + #[inline] // if the range is constant, this helps LLVM to do the + // calculations at compile-time. fn new(low: Self::X, high: Self::X) -> Self { RangeImpl::new_inclusive(low, high - 1) } + #[inline] // if the range is constant, this helps LLVM to do the + // calculations at compile-time. fn new_inclusive(low: Self::X, high: Self::X) -> Self { // For a closed range the number of possible numbers we should // generate is `range = (high - low + 1)`. It is not possible to diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs index 2f1fd22ef56..4b3f6cfc2c6 100644 --- a/src/distributions/uniform.rs +++ b/src/distributions/uniform.rs @@ -206,14 +206,12 @@ float_impls! { f64, Rng::next_u64 } impl Distribution for Codepoint { fn sample(&self, rng: &mut R) -> char { - // a char is 21 bits - const CHAR_MASK: u32 = 0x001f_ffff; + let range = Range::new(0u32, 0x11_0000); loop { - // Rejection sampling. About 0.2% of numbers with at most - // 21-bits are invalid codepoints (surrogates), so this - // will succeed first go almost every time. - match char::from_u32(rng.next_u32() & CHAR_MASK) { + match char::from_u32(range.sample(rng)) { Some(c) => return c, + // About 0.2% of numbers in the range 0..0x110000 are invalid + // codepoints (surrogates). None => {} } }