From d32bd1a226b29e06180bf0d39488d392a5be1ad4 Mon Sep 17 00:00:00 2001 From: Orion Yeung <11580988+orionyeung001@users.noreply.github.com> Date: Sat, 3 Aug 2024 12:54:57 -0500 Subject: [PATCH 1/5] refactor(perf): Multinomial samples from Binomial lint: fix lints --- src/distribution/multinomial.rs | 87 ++++++++++++++++++++++++++++----- 1 file changed, 76 insertions(+), 11 deletions(-) diff --git a/src/distribution/multinomial.rs b/src/distribution/multinomial.rs index 0794d352..ef6baa19 100644 --- a/src/distribution/multinomial.rs +++ b/src/distribution/multinomial.rs @@ -2,6 +2,7 @@ use crate::distribution::Discrete; use crate::function::factorial; use crate::statistics::*; use nalgebra::{DVector, Dim, Dyn, OMatrix, OVector}; +use std::cmp::Ordering; /// Implements the /// [Multinomial](https://en.wikipedia.org/wiki/Multinomial_distribution) @@ -99,6 +100,16 @@ where if p.len() < 2 { return Err(MultinomialError::NotEnoughProbabilities); } + // sort decreasing, place NAN at front + p.as_mut_slice().sort_unstable_by(|a, b| { + if a.is_nan() { + Ordering::Less + } else if b.is_nan() { + Ordering::Greater + } else { + b.partial_cmp(a).unwrap() + } + }); let mut sum = 0.0; for &val in &p { @@ -168,7 +179,7 @@ where nalgebra::DefaultAllocator: nalgebra::allocator::Allocator, { fn sample(&self, rng: &mut R) -> OVector { - sample_generic(self, rng) + sample_generic(self.p.as_slice(), self.n, self.p.shape_generic().0, rng) } } @@ -180,26 +191,40 @@ where nalgebra::DefaultAllocator: nalgebra::allocator::Allocator, { fn sample(&self, rng: &mut R) -> OVector { - sample_generic(self, rng) + sample_generic(self.p.as_slice(), self.n, self.p.shape_generic().0, rng) } } #[cfg(feature = "rand")] -fn sample_generic(dist: &Multinomial, rng: &mut R) -> OVector +fn sample_generic(p: &[f64], n: u64, dim: D, rng: &mut R) -> OVector where D: Dim, nalgebra::DefaultAllocator: nalgebra::allocator::Allocator, nalgebra::DefaultAllocator: nalgebra::allocator::Allocator, R: ::rand::Rng + ?Sized, - T: ::num_traits::Num + ::nalgebra::Scalar + ::std::ops::AddAssign, + T: ::num_traits::Num + + ::nalgebra::Scalar + + num_traits::AsPrimitive + + num_traits::FromPrimitive, + super::Binomial: rand::distributions::Distribution, { - use nalgebra::Const; - - let p_cdf = super::categorical::prob_mass_to_cdf(dist.p().as_slice()); - let mut res = OVector::zeros_generic(dist.p.shape_generic().0, Const::<1>); - for _ in 0..dist.n { - let i = super::categorical::sample_unchecked(rng, &p_cdf); - res[i] += T::one(); + use rand::distributions::Distribution; + let mut res = OVector::zeros_generic(dim, nalgebra::U1); + let mut probs_not_taken = 1.0; + let mut samples_left = n; + for (p, s) in p[..p.len() - 1].iter().zip(res.iter_mut()) { + if !(0.0..=1.0).contains(&probs_not_taken) { + break; + } + let p_binom = p / probs_not_taken; + *s = super::Binomial::new(p_binom, samples_left) + .expect("probability already on [0,1]") + .sample(rng); + samples_left -= s.as_(); + probs_not_taken -= p; + } + if let Some(x) = res.as_mut_slice().last_mut() { + *x = T::from_u64(samples_left).unwrap(); } res } @@ -576,4 +601,44 @@ mod tests { // let n = Multinomial::new(&[0.3, 0.7], 10).unwrap(); // n.ln_pmf(&[1, 3]); // } + + #[cfg(feature = "rand")] + #[test] + fn test_almost_zero_sample() { + use ::rand::{distributions::Distribution, prelude::thread_rng}; + let n = 10; + let weights = vec![0.0, 0.0, 0.0, 0.000000001]; + let multinomial = Multinomial::new(weights, n).unwrap(); + let sample: OVector = multinomial.sample(&mut thread_rng()); + assert_relative_eq!(sample[3], n as f64); + } + + #[test] + #[cfg(feature = "rand")] + fn test_uniform_samples() { + use crate::statistics::Statistics; + use ::rand::{distributions::Distribution, prelude::thread_rng}; + let n: f64 = 1000.0; + let k: usize = 20; + let weights = vec![1.0; k]; + let multinomial = Multinomial::new(weights, n as u64).unwrap(); + let samples: OVector = multinomial.sample(&mut thread_rng()); + eprintln!("{samples}"); + samples.iter().for_each(|&s| { + assert_abs_diff_eq!( + s, + n / k as f64, + epsilon = 3.0 * multinomial.variance().unwrap()[(0, 0)].sqrt(), + ) + }); + assert_abs_diff_eq!( + samples.iter().population_variance(), + multinomial.variance().unwrap()[(0, 0)], + epsilon = n.sqrt() + ); + assert_eq!( + samples.into_iter().map(|&x| x as u64).sum::(), + n as u64 + ); + } } From 0ef7b102a9b4a1fc14cfba285e1d6a7443eb72e8 Mon Sep 17 00:00:00 2001 From: Orion Yeung <11580988+orionyeung001@users.noreply.github.com> Date: Mon, 23 Sep 2024 11:59:42 -0500 Subject: [PATCH 2/5] test: mirror testing boiler where possible for multinomial testing --- src/distribution/internal.rs | 28 ++- src/distribution/multinomial.rs | 380 +++++++++++++++++++++++--------- 2 files changed, 297 insertions(+), 111 deletions(-) diff --git a/src/distribution/internal.rs b/src/distribution/internal.rs index 9075669e..5f9aa5af 100644 --- a/src/distribution/internal.rs +++ b/src/distribution/internal.rs @@ -50,7 +50,7 @@ pub mod test { #[macro_export] macro_rules! testing_boiler { ($($arg_name:ident: $arg_ty:ty),+; $dist:ty; $dist_err:ty) => { - fn make_param_text($($arg_name: $arg_ty),+) -> String { + fn make_param_text($($arg_name: &$arg_ty),+) -> String { // "" let mut param_text = String::new(); @@ -75,12 +75,13 @@ pub mod test { /// Creates and returns a distribution with the given parameters, /// panicking if `::new` fails. fn create_ok($($arg_name: $arg_ty),+) -> $dist { + let param_text = make_param_text($(&$arg_name),+); match <$dist>::new($($arg_name),+) { Ok(d) => d, Err(e) => panic!( "{}::new was expected to succeed, but failed for {} with error: '{}'", stringify!($dist), - make_param_text($($arg_name),+), + param_text, e ) } @@ -90,12 +91,13 @@ pub mod test { /// panicking if `::new` succeeds. #[allow(dead_code)] fn create_err($($arg_name: $arg_ty),+) -> $dist_err { + let param_text = make_param_text($(&$arg_name),+); match <$dist>::new($($arg_name),+) { Err(e) => e, Ok(d) => panic!( "{}::new was expected to fail, but succeeded for {} with result: {:?}", stringify!($dist), - make_param_text($($arg_name),+), + param_text, d ) } @@ -124,13 +126,14 @@ pub mod test { F: Fn($dist) -> T, T: ::core::cmp::PartialEq + ::core::fmt::Debug { + let param_text = make_param_text($(&$arg_name),+); let x = create_and_get($($arg_name),+, get_fn); if x != expected { panic!( "Expected {:?}, got {:?} for {}", expected, x, - make_param_text($($arg_name),+) + param_text ); } } @@ -142,10 +145,12 @@ pub mod test { /// /// Panics if `::new` fails. #[allow(dead_code)] - fn test_relative($($arg_name: $arg_ty),+, expected: f64, get_fn: F) + fn test_relative($($arg_name: $arg_ty),+, expected: T, get_fn: F) where - F: Fn($dist) -> f64, + F: Fn($dist) -> T, + T: ::std::fmt::Debug + ::approx::RelativeEq { + let param_text = make_param_text($(&$arg_name),+); let x = create_and_get($($arg_name),+, get_fn); let max_relative = $crate::consts::ACC; @@ -155,7 +160,7 @@ pub mod test { x, expected, max_relative, - make_param_text($($arg_name),+) + param_text ); } } @@ -171,6 +176,7 @@ pub mod test { where F: Fn($dist) -> f64, { + let param_text = make_param_text($(&$arg_name),+); let x = create_and_get($($arg_name),+, get_fn); // abs_diff_eq! cannot handle infinities, so we manually accept them here @@ -184,7 +190,7 @@ pub mod test { x, expected, acc, - make_param_text($($arg_name),+) + param_text ); } } @@ -196,6 +202,7 @@ pub mod test { #[allow(dead_code)] fn test_create_err($($arg_name: $arg_ty),+, expected: $dist_err) { + let param_text = make_param_text($(&$arg_name),+); let err = create_err($($arg_name),+); if err != expected { panic!( @@ -203,7 +210,7 @@ pub mod test { stringify!($dist), expected, err, - make_param_text($($arg_name),+) + param_text ) } } @@ -231,13 +238,14 @@ pub mod test { F: Fn($dist) -> Option, T: ::core::fmt::Debug, { + let param_text = make_param_text($(&$arg_name),+); let x = create_and_get($($arg_name),+, get_fn); if let Some(inner) = x { panic!( "Expected None, got {:?} for {}", inner, - make_param_text($($arg_name),+) + param_text ) } } diff --git a/src/distribution/multinomial.rs b/src/distribution/multinomial.rs index ef6baa19..3d9fcfe4 100644 --- a/src/distribution/multinomial.rs +++ b/src/distribution/multinomial.rs @@ -391,121 +391,85 @@ mod tests { distribution::{Discrete, Multinomial, MultinomialError}, statistics::{MeanN, VarianceN}, }; - use nalgebra::{dmatrix, dvector, vector, DimMin, Dyn, OVector}; - use std::fmt::{Debug, Display}; + use nalgebra::{dmatrix, dvector, matrix, vector, Dyn, OVector}; - fn try_create(p: OVector, n: u64) -> Multinomial - where - D: DimMin, - nalgebra::DefaultAllocator: nalgebra::allocator::Allocator, - { - let mvn = Multinomial::new_from_nalgebra(p, n); - assert!(mvn.is_ok()); - mvn.unwrap() - } - fn bad_create_case(p: OVector, n: u64) -> MultinomialError - where - D: DimMin, - nalgebra::DefaultAllocator: nalgebra::allocator::Allocator, - { - let dd = Multinomial::new_from_nalgebra(p, n); - assert!(dd.is_err()); - dd.unwrap_err() - } - - fn test_almost(p: OVector, n: u64, expected: T, acc: f64, eval: F) - where - T: Debug + Display + approx::RelativeEq, - F: FnOnce(Multinomial) -> T, - D: DimMin, - nalgebra::DefaultAllocator: nalgebra::allocator::Allocator, - { - let dd = try_create(p, n); - let x = eval(dd); - assert_relative_eq!(expected, x, epsilon = acc); - } + use super::boiler::*; #[test] fn test_create() { assert_relative_eq!( - *try_create(vector![1.0, 1.0, 1.0], 4).p(), + *create_ok(vector![1.0, 1.0, 1.0], 4).p(), vector![1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0] ); - try_create(dvector![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], 4); + create_ok(vector![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], 4); } #[test] fn test_bad_create() { assert_eq!( - bad_create_case(vector![0.5], 4), + create_err(vector![0.5], 4), MultinomialError::NotEnoughProbabilities, ); assert_eq!( - bad_create_case(vector![-1.0, 2.0], 4), + create_err(vector![-1.0, 2.0], 4), MultinomialError::ProbabilityInvalid, ); assert_eq!( - bad_create_case(vector![0.0, 0.0], 4), + create_err(vector![0.0, 0.0], 4), MultinomialError::ProbabilitySumZero, ); assert_eq!( - bad_create_case(vector![1.0, f64::NAN], 4), + create_err(vector![1.0, f64::NAN], 4), MultinomialError::ProbabilityInvalid, ); } #[test] fn test_mean() { - let mean = |x: Multinomial<_>| x.mean().unwrap(); - test_almost(dvector![0.3, 0.7], 5, dvector![1.5, 3.5], 1e-12, mean); - test_almost( + + test_relative(vector![0.3, 0.7], 5, dvector![1.5, 3.5], |x: Multinomial<_>| x.mean().unwrap()); + test_relative( dvector![0.1, 0.3, 0.6], 10, dvector![1.0, 3.0, 6.0], - 1e-12, - mean, - ); - test_almost( - dvector![1.0, 3.0, 6.0], + |x: Multinomial<_>| x.mean().unwrap(), + ); + test_relative( + vector![1.0, 3.0, 6.0], 10, dvector![1.0, 3.0, 6.0], - 1e-12, - mean, - ); - test_almost( - dvector![0.15, 0.35, 0.3, 0.2], + |x: Multinomial<_>| x.mean().unwrap() + ); + test_relative( + vector![0.15, 0.35, 0.3, 0.2], 20, dvector![3.0, 7.0, 6.0, 4.0], - 1e-12, - mean, - ); + |x: Multinomial<_>| x.mean().unwrap() + ); } #[test] fn test_variance() { - let variance = |x: Multinomial<_>| x.variance().unwrap(); - test_almost( - dvector![0.3, 0.7], + test_relative( + vector![0.3, 0.7], 5, - dmatrix![1.05, -1.05; + matrix![1.05, -1.05; -1.05, 1.05], - 1e-15, - variance, + |x: Multinomial<_>| x.variance().unwrap(), ); - test_almost( - dvector![0.1, 0.3, 0.6], + test_relative( + vector![0.1, 0.3, 0.6], 10, - dmatrix![0.9, -0.3, -0.6; + matrix![0.9, -0.3, -0.6; -0.3, 2.1, -1.8; -0.6, -1.8, 2.4; ], - 1e-15, - variance, + |x: Multinomial<_>| x.variance().unwrap(), ); - test_almost( + test_relative( dvector![0.15, 0.35, 0.3, 0.2], 20, dmatrix![2.55, -1.05, -0.90, -0.60; @@ -513,59 +477,46 @@ mod tests { -0.90, -2.10, 4.20, -1.20; -0.60, -1.40, -1.20, 3.20; ], - 1e-15, - variance, + |x: Multinomial<_>| x.variance().unwrap(), ); } - // // #[test] - // // fn test_skewness() { - // // let skewness = |x: Multinomial| x.skewness().unwrap(); - // // test_almost(&[0.3, 0.7], 5, &[0.390360029179413, -0.390360029179413], 1e-15, skewness); - // // test_almost(&[0.1, 0.3, 0.6], 10, &[0.843274042711568, 0.276026223736942, -0.12909944487358], 1e-15, skewness); - // // test_almost(&[0.15, 0.35, 0.3, 0.2], 20, &[0.438357003759605, 0.140642169281549, 0.195180014589707, 0.335410196624968], 1e-15, skewness); - // // } + // #[test] + // fn test_skewness() { + // test_relative(vector![0.3, 0.7], 5, vector![0.390360029179413, -0.390360029179413], |x: Multinomial<_>| x.skewness().unwrap()); + // test_relative(dvector![0.1, 0.3, 0.6], 10, dvector![0.843274042711568, 0.276026223736942, -0.12909944487358], |x: Multinomial<_>| x.skewness().unwrap()); + // test_relative(vector![0.15, 0.35, 0.3, 0.2], 20, vector![0.438357003759605, 0.140642169281549, 0.195180014589707, 0.335410196624968], |x: Multinomial<_>| x.skewness().unwrap()); + // } #[test] fn test_pmf() { - let pmf = |arg: OVector| move |x: Multinomial<_>| x.pmf(&arg); - test_almost( - dvector![0.3, 0.7], + test_relative( + vector![0.3, 0.7], 10, 0.121060821, - 1e-15, - pmf(dvector![1, 9]), + move |x: Multinomial<_>| x.pmf(&vector![1, 9]), ); - test_almost( - dvector![0.1, 0.3, 0.6], + test_relative( + vector![0.1, 0.3, 0.6], 10, 0.105815808, - 1e-15, - pmf(dvector![1, 3, 6]), + move |x: Multinomial<_>| x.pmf(&vector![1, 3, 6]), ); - test_almost( + test_relative( dvector![0.15, 0.35, 0.3, 0.2], 10, 0.000145152, - 1e-15, - pmf(dvector![1, 1, 1, 7]), + move |x: Multinomial<_>| x.pmf(&dvector![1, 1, 1, 7]), ); } #[test] - fn test_error_is_sync_send() { - fn assert_sync_send() {} - assert_sync_send::(); + #[should_panic] + fn test_pmf_x_wrong_length() { + let pmf = |arg: OVector| move |x: Multinomial<_>| x.pmf(&arg); + test_relative(dvector![0.3, 0.7], 10, f64::NAN, pmf(dvector![1])); } - // #[test] - // #[should_panic] - // fn test_pmf_x_wrong_length() { - // let pmf = |arg: &[u64]| move |x: Multinomial| x.pmf(arg); - // let n = Multinomial::new(&[0.3, 0.7], 10).unwrap(); - // n.pmf(&[1]); - // } - // #[test] // #[should_panic] // fn test_pmf_x_wrong_sum() { @@ -613,9 +564,10 @@ mod tests { assert_relative_eq!(sample[3], n as f64); } - #[test] #[cfg(feature = "rand")] - fn test_uniform_samples() { + #[test] + #[ignore = "stochastic tests will not always pass, need a solid rerun strategy"] + fn test_stochastic_uniform_samples() { use crate::statistics::Statistics; use ::rand::{distributions::Distribution, prelude::thread_rng}; let n: f64 = 1000.0; @@ -624,21 +576,247 @@ mod tests { let multinomial = Multinomial::new(weights, n as u64).unwrap(); let samples: OVector = multinomial.sample(&mut thread_rng()); eprintln!("{samples}"); + samples.iter().for_each(|&s| { assert_abs_diff_eq!( s, n / k as f64, - epsilon = 3.0 * multinomial.variance().unwrap()[(0, 0)].sqrt(), + epsilon = 2.0 * multinomial.variance().unwrap()[(0, 0)].sqrt(), ) }); + assert_abs_diff_eq!( samples.iter().population_variance(), multinomial.variance().unwrap()[(0, 0)], epsilon = n.sqrt() ); + assert_eq!( samples.into_iter().map(|&x| x as u64).sum::(), n as u64 ); } } + +#[cfg(test)] +mod boiler { + use super::{Multinomial, MultinomialError}; + use nalgebra::OVector; + + pub fn make_param_text(p: &OVector, n: u64) -> String + where + D: nalgebra::Dim, + nalgebra::DefaultAllocator: + nalgebra::allocator::Allocator + nalgebra::allocator::Allocator, + { + // "" + format!("n: {n}, p: {p}") + } + + /// Creates and returns a distribution with the given parameters, + /// panicking if `::new` fails. + pub fn create_ok(p: OVector, n: u64) -> Multinomial + where + D: nalgebra::Dim, + nalgebra::DefaultAllocator: + nalgebra::allocator::Allocator + nalgebra::allocator::Allocator, + { + let param_text = make_param_text(&p, n); + match Multinomial::new_from_nalgebra(p, n) { + Ok(d) => d, + Err(e) => panic!( + "{}::new was expected to succeed, but failed for {} with error: '{}'", + stringify!(Multinomial), + param_text, + e + ), + } + } + + /// Returns the error when creating a distribution with the given parameters, + /// panicking if `::new` succeeds. + pub fn create_err(p: OVector, n: u64) -> MultinomialError + where + D: nalgebra::Dim, + nalgebra::DefaultAllocator: + nalgebra::allocator::Allocator + nalgebra::allocator::Allocator, + { + let param_text = make_param_text(&p, n); + match Multinomial::new_from_nalgebra(p, n) { + Err(e) => e, + Ok(d) => panic!( + "{}::new was expected to fail, but succeeded for {} with result: {:?}", + stringify!(Multinomial), + param_text, + d + ), + } + } + + /// Creates a distribution with the given parameters, calls the `get_fn` + /// function with the new distribution and returns the result of `get_fn`. + /// + /// Panics if ctor fails. + pub fn create_and_get(p: OVector, n: u64, get_fn: F) -> T + where + F: Fn(Multinomial) -> T, + D: nalgebra::Dim, + nalgebra::DefaultAllocator: + nalgebra::allocator::Allocator + nalgebra::allocator::Allocator, + { + let n = create_ok(p, n); + get_fn(n) + } + + /// Creates a distribution with the given parameters, calls the `get_fn` + /// function with the new distribution and compares the result of `get_fn` + /// to `expected` exactly. + /// + /// Panics if `::new` fails. + #[allow(dead_code)] + pub fn test_exact(p: OVector, n: u64, expected: T, get_fn: F) + where + F: Fn(Multinomial) -> T, + T: ::core::cmp::PartialEq + ::core::fmt::Debug, + D: nalgebra::Dim, + nalgebra::DefaultAllocator: + nalgebra::allocator::Allocator + nalgebra::allocator::Allocator, + { + let param_text = make_param_text(&p, n); + let x = create_and_get(p, n, get_fn); + if x != expected { + panic!("Expected {:?}, got {:?} for {}", expected, x, param_text); + } + } + + /// Gets a value for the given parameters by calling `create_and_get` + /// and compares it to `expected`. + /// + /// Allows relative error of up to [`crate::consts::ACC`]. + /// + /// Panics if `::new` fails. + #[allow(dead_code)] + pub fn test_relative(p: OVector, n: u64, expected: T, get_fn: F) + where + F: Fn(Multinomial) -> U, + T: ::std::fmt::Debug + ::approx::RelativeEq, + U: ::std::fmt::Debug, + D: nalgebra::Dim, + nalgebra::DefaultAllocator: + nalgebra::allocator::Allocator + nalgebra::allocator::Allocator, + { + let param_text = make_param_text(&p, n); + let x = create_and_get(p, n, get_fn); + let max_relative = crate::consts::ACC; + + if !::approx::relative_eq!(expected, x, max_relative = max_relative) { + panic!( + "Expected {:?} to be almost equal to {:?} (max. relative error of {:?}), but wasn't for {}", + x, + expected, + max_relative, + param_text + ); + } + } + + /// Gets a value for the given parameters by calling `create_and_get` + /// and compares it to `expected`. + /// + /// Allows absolute error of up to `acc`. + /// + /// Panics if `::new` fails. + #[allow(dead_code)] + pub fn test_absolute(p: OVector, n: u64, expected: T, acc: f64, get_fn: F) + where + F: Fn(Multinomial) -> U, + T: ::std::fmt::Display + ::approx::RelativeEq + num_traits::Float, + U: ::std::fmt::Display, + D: nalgebra::Dim, + nalgebra::DefaultAllocator: + nalgebra::allocator::Allocator + nalgebra::allocator::Allocator, + { + let param_text = make_param_text(&p, n); + let x = create_and_get(p, n, get_fn); + + // abs_diff_eq! cannot handle infinities, so we manually accept them here + if expected.is_infinite() { + return; + } + + if ::approx::abs_diff_ne!(expected, x, epsilon = acc) { + panic!( + "Expected {} to be almost equal to {} (max. absolute error of {:e}), but wasn't for {}", + x, + expected, + acc, + param_text + ); + } + } + + /// Purposely fails creating a distribution with the given + /// parameters and compares the returned error to `expected`. + /// + /// Panics if `::new` succeeds. + #[allow(dead_code)] + pub fn test_create_err(p: OVector, n: u64, expected: MultinomialError) + where + D: nalgebra::Dim, + nalgebra::DefaultAllocator: + nalgebra::allocator::Allocator + nalgebra::allocator::Allocator, + { + let param_text = make_param_text(&p, n); + let err = create_err(p, n); + if err != expected { + panic!( + "{}::new was expected to fail with error {expected}, but failed with error {err} for {param_text}", + stringify!(Multinomial), + ) + } + } + + /// Gets a value for the given parameters by calling `create_and_get` + /// and asserts that it is [`NAN`]. + /// + /// Panics if `::new` fails. + #[allow(dead_code)] + pub fn test_is_nan(p: OVector, n: u64, get_fn: F) + where + F: Fn(Multinomial) -> f64, + D: nalgebra::Dim, + nalgebra::DefaultAllocator: + nalgebra::allocator::Allocator + nalgebra::allocator::Allocator, + { + let x = create_and_get(p, n, get_fn); + assert!(x.is_nan()); + } + + /// Gets a value for the given parameters by calling `create_and_get` + /// and asserts that it is [`None`]. + /// + /// Panics if `::new` fails. + #[allow(dead_code)] + pub fn test_none(p: OVector, n: u64, get_fn: F) + where + F: Fn(Multinomial) -> Option, + T: ::core::fmt::Debug, + D: nalgebra::Dim, + nalgebra::DefaultAllocator: + nalgebra::allocator::Allocator + nalgebra::allocator::Allocator, + { + let param_text = make_param_text(&p, n); + let x = create_and_get(p, n, get_fn); + + if let Some(inner) = x { + panic!("Expected None, got {:?} for {}", inner, param_text) + } + } + + /// Asserts that associated error type is Send and Sync + #[test] + pub fn test_error_is_sync_send() { + pub fn assert_sync_send() {} + assert_sync_send::(); + } +} From b159ac0fa0893c1fe18c01c3b17e499934af028f Mon Sep 17 00:00:00 2001 From: Orion Yeung <11580988+orionyeung001@users.noreply.github.com> Date: Wed, 11 Sep 2024 13:48:56 -0500 Subject: [PATCH 3/5] fix: correct sampling for multinomial, rely on Binomial --- src/distribution/multinomial.rs | 51 ++++++++++++++++----------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/src/distribution/multinomial.rs b/src/distribution/multinomial.rs index 3d9fcfe4..805852bf 100644 --- a/src/distribution/multinomial.rs +++ b/src/distribution/multinomial.rs @@ -2,7 +2,6 @@ use crate::distribution::Discrete; use crate::function::factorial; use crate::statistics::*; use nalgebra::{DVector, Dim, Dyn, OMatrix, OVector}; -use std::cmp::Ordering; /// Implements the /// [Multinomial](https://en.wikipedia.org/wiki/Multinomial_distribution) @@ -100,16 +99,6 @@ where if p.len() < 2 { return Err(MultinomialError::NotEnoughProbabilities); } - // sort decreasing, place NAN at front - p.as_mut_slice().sort_unstable_by(|a, b| { - if a.is_nan() { - Ordering::Less - } else if b.is_nan() { - Ordering::Greater - } else { - b.partial_cmp(a).unwrap() - } - }); let mut sum = 0.0; for &val in &p { @@ -212,19 +201,29 @@ where let mut res = OVector::zeros_generic(dim, nalgebra::U1); let mut probs_not_taken = 1.0; let mut samples_left = n; - for (p, s) in p[..p.len() - 1].iter().zip(res.iter_mut()) { - if !(0.0..=1.0).contains(&probs_not_taken) { + + let mut p_sorted_inds: Vec<_> = (0..p.len()).collect(); + + // unwrap because NAN elements not allowed from this struct's `new` + p_sorted_inds.sort_unstable_by(|&i, &j| p[j].partial_cmp(&p[i]).unwrap()); + + for ind in p_sorted_inds.into_iter().take(p.len() - 1) { + let pi = p[ind]; + if pi == 0.0 { + continue; + } + if !(0.0..=1.0).contains(&probs_not_taken) || samples_left == 0 { break; } - let p_binom = p / probs_not_taken; - *s = super::Binomial::new(p_binom, samples_left) - .expect("probability already on [0,1]") + let p_binom = pi / probs_not_taken; + res[ind] = super::Binomial::new(p_binom, samples_left) + .unwrap() .sample(rng); - samples_left -= s.as_(); - probs_not_taken -= p; + samples_left -= res[ind].as_(); + probs_not_taken -= pi; } - if let Some(x) = res.as_mut_slice().last_mut() { - *x = T::from_u64(samples_left).unwrap(); + if samples_left > 0 { + *res.as_mut_slice().last_mut().unwrap() = T::from_u64(samples_left).unwrap(); } res } @@ -268,11 +267,11 @@ where /// where `n` is the number of trials, `p_i` is the `i`th probability, /// and `k` is the total number of probabilities fn variance(&self) -> Option> { - let mut cov = OMatrix::from_diagonal(&self.p.map(|x| x * (1.0 - x))); - let mut offdiag = |x: usize, y: usize| { - let elt = -self.p[x] * self.p[y]; + let mut cov = OMatrix::from_diagonal(&self.p.map(|p| p * (1.0 - p))); + let mut offdiag = |i: usize, j: usize| { + let elt = -self.p[i] * self.p[j]; // cov[(x, y)] = elt; - cov[(y, x)] = elt; + cov[(j, i)] = elt; }; for i in 0..self.p.len() { @@ -558,7 +557,7 @@ mod tests { fn test_almost_zero_sample() { use ::rand::{distributions::Distribution, prelude::thread_rng}; let n = 10; - let weights = vec![0.0, 0.0, 0.0, 0.000000001]; + let weights = vec![0.0, 0.0, 0.0, 0.1]; let multinomial = Multinomial::new(weights, n).unwrap(); let sample: OVector = multinomial.sample(&mut thread_rng()); assert_relative_eq!(sample[3], n as f64); @@ -566,7 +565,7 @@ mod tests { #[cfg(feature = "rand")] #[test] - #[ignore = "stochastic tests will not always pass, need a solid rerun strategy"] + #[ignore = "this test is designed to assess approximately normal results within 2σ"] fn test_stochastic_uniform_samples() { use crate::statistics::Statistics; use ::rand::{distributions::Distribution, prelude::thread_rng}; From 7b4180c928fcf467b9c3ea61a585c52ce1deb7aa Mon Sep 17 00:00:00 2001 From: Orion Yeung <11580988+orionyeung001@users.noreply.github.com> Date: Fri, 13 Sep 2024 19:21:51 -0500 Subject: [PATCH 4/5] test: stochastic test for multinomial - need another pair of eyes I'm still unsure if I'm sampling incorrectly or am using bad verification technique, should rely on a common GoF test instead of my homebrew statistics --- src/distribution/multinomial.rs | 58 +++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/src/distribution/multinomial.rs b/src/distribution/multinomial.rs index 805852bf..587f7f78 100644 --- a/src/distribution/multinomial.rs +++ b/src/distribution/multinomial.rs @@ -567,33 +567,51 @@ mod tests { #[test] #[ignore = "this test is designed to assess approximately normal results within 2σ"] fn test_stochastic_uniform_samples() { - use crate::statistics::Statistics; use ::rand::{distributions::Distribution, prelude::thread_rng}; + use crate::statistics::Statistics; + + // describe multinomial such that each binomial variable is viable normal approximation + let k: f64 = 60.0; let n: f64 = 1000.0; - let k: usize = 20; - let weights = vec![1.0; k]; + let weights = vec![1.0; k as usize]; let multinomial = Multinomial::new(weights, n as u64).unwrap(); - let samples: OVector = multinomial.sample(&mut thread_rng()); - eprintln!("{samples}"); - samples.iter().for_each(|&s| { + // obtain sample statistics for multiple draws from this multinomial distribution + // during iteration, verify that each event is ~ Binom(n, 1/k) under normal approx + let n_trials = 20; + let stats_across_multinom_events = std::iter::repeat_with(|| { + let samples: OVector = multinomial.sample(&mut thread_rng()); + samples.iter().enumerate().for_each(|(i, &s)| { + assert_abs_diff_eq!(s, n / k, epsilon = multinomial.variance().unwrap()[(i, i)],) + }); + samples + }) + .take(n_trials) + .map(|sample| (sample.mean(), sample.population_variance())); + + println!("{:#?}", stats_across_multinom_events.clone().collect::>()); + + // claim: for X from a given trial, Var[X] ~ χ^2(k-1) + // the variance across this multinomial sample should be against the true mean + // Var[X] = sum (X_i - bar{X})^2 = sum (X_i - n/k)^2 + // alternatively, variance is linear, so we should also have + // Var[X] = k Var[X_i] = k npq = k n (k-1)/k^2 = n (k-1)/k as these are iid Binom(n, 1/k) + // + // since parameters of the binomial variable are around np ~ 20, each binomial variable is approximately normal + // + // therefore, population variance should be a sum of squares of normal variables from their mean. + // i.e. population variances of these multinomial samples should be a scaling of χ^2 squared variables + // with k-1 dof + // + // normal approximation of χ^2(k-1) should be valid for k = 50, so assert against 2σ_normal + for (_mu, var) in stats_across_multinom_events { assert_abs_diff_eq!( - s, - n / k as f64, - epsilon = 2.0 * multinomial.variance().unwrap()[(0, 0)].sqrt(), + var, + k - 1.0, + epsilon = ((2.0*k - 2.0)/n_trials as f64).sqrt() ) - }); - - assert_abs_diff_eq!( - samples.iter().population_variance(), - multinomial.variance().unwrap()[(0, 0)], - epsilon = n.sqrt() - ); + } - assert_eq!( - samples.into_iter().map(|&x| x as u64).sum::(), - n as u64 - ); } } From 66b8d7e642e8106d6b359b1fe1a7f71535b886bf Mon Sep 17 00:00:00 2001 From: Orion Yeung <11580988+orionyeung001@users.noreply.github.com> Date: Tue, 24 Sep 2024 12:44:47 -0500 Subject: [PATCH 5/5] fix: remove unneeded trait bounds also adds a comment demonstrating success of unwrap --- src/distribution/multinomial.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/distribution/multinomial.rs b/src/distribution/multinomial.rs index 587f7f78..bad51b66 100644 --- a/src/distribution/multinomial.rs +++ b/src/distribution/multinomial.rs @@ -188,11 +188,10 @@ where fn sample_generic(p: &[f64], n: u64, dim: D, rng: &mut R) -> OVector where D: Dim, - nalgebra::DefaultAllocator: nalgebra::allocator::Allocator, nalgebra::DefaultAllocator: nalgebra::allocator::Allocator, R: ::rand::Rng + ?Sized, - T: ::num_traits::Num - + ::nalgebra::Scalar + T: nalgebra::Scalar + + num_traits::Zero + num_traits::AsPrimitive + num_traits::FromPrimitive, super::Binomial: rand::distributions::Distribution, @@ -203,8 +202,7 @@ where let mut samples_left = n; let mut p_sorted_inds: Vec<_> = (0..p.len()).collect(); - - // unwrap because NAN elements not allowed from this struct's `new` + // unwrap succeeds as NAN elements not allowed from this struct's `new` p_sorted_inds.sort_unstable_by(|&i, &j| p[j].partial_cmp(&p[i]).unwrap()); for ind in p_sorted_inds.into_iter().take(p.len() - 1) { @@ -215,6 +213,11 @@ where if !(0.0..=1.0).contains(&probs_not_taken) || samples_left == 0 { break; } + // since $p_j \le p_i \forall j < i$ and $\vec{p}$ is normalized, then + // $1 - sum(p_j, j, 0, i-1) = sum(p_j, j, i, n) = p_i + sum(p_j, j, i+1, n) > p_i$ + // this guarantees that logically p_binom on [0,1] + // TODO: demonstrate that this behavior also behaves well with floating point + // the logical reasoning provides that `unwrap` of Binomial::new will typically succeed let p_binom = pi / probs_not_taken; res[ind] = super::Binomial::new(p_binom, samples_left) .unwrap()