diff --git a/Cargo.toml b/Cargo.toml index 9823299f..4901c1f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,3 +25,4 @@ num-traits = "0.2.14" [dev-dependencies] criterion = "0.3.3" +anyhow = "1.0" diff --git a/prep_data.sh b/prep_data.sh new file mode 100755 index 00000000..93ba2842 --- /dev/null +++ b/prep_data.sh @@ -0,0 +1,33 @@ +#! /bin/bash + +process_file() { + # Define input and output file names + SOURCE=$1 + FILENAME=$2 + curl -fsSL ${SOURCE}/$FILENAME > $FILENAME + + # Extract line numbers for Certified Values and Data from the header + INFO=$(grep "Certified Values:" $FILENAME) + CERTIFIED_VALUES_START=$(echo $INFO | awk '{print $4}') + CERTIFIED_VALUES_END=$(echo $INFO | awk '{print $6}') + + INFO=$(grep "Data :" $FILENAME) + DATA_START=$(echo $INFO | awk '{print $4}') + DATA_END=$(echo $INFO | awk '{print $6}') + + # Extract and reformat sections + # Certified values + sed -n -i \ + -e "${CERTIFIED_VALUES_START},${CERTIFIED_VALUES_END}p" \ + -e "${DATA_START},${DATA_END}p" \ + $FILENAME + # sed -n -i -e "${CERTIFIED_VALUES_START},${CERTIFIED_VALUES_END}s/\(exact\)//p" $FILENAME + +} + +URL='https://www.itl.nist.gov/div898/strd/univ/data' +for file in Lottery.dat Lew.dat Mavro.dat Michelso.dat NumAcc1.dat NumAcc2.dat NumAcc3.dat +do + process_file $URL $file +done + diff --git a/tests/nist_tests.rs b/tests/nist_tests.rs index d14f4cde..e437a079 100644 --- a/tests/nist_tests.rs +++ b/tests/nist_tests.rs @@ -1,153 +1,122 @@ // #![cfg(test)] -use statrs::assert_almost_eq; +use anyhow::Result; +use approx::assert_relative_eq; use statrs::statistics::Statistics; + use std::io::{BufRead, BufReader}; +use std::path::PathBuf; use std::{env, fs}; -#[cfg(test)] -const NIST_DATA_DIR_ENV: &str = "STATRS_NIST_DATA_DIR"; - -fn load_data(pathname: String) -> Vec { - let f = fs::File::open(pathname).unwrap(); - let mut reader = BufReader::new(f); +struct TestCase { + certified: CertifiedValues, + values: Vec, +} - let mut buf = String::new(); - let mut data: Vec = vec![]; - while reader.read_line(&mut buf).unwrap() > 0 { - data.push(buf.trim().parse::().unwrap()); - buf.clear(); +impl std::fmt::Debug for TestCase { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "TestCase({:?}, [...]", self.certified) } - data } -#[test] -#[ignore = "NIST tests should not run from typical `cargo test` calls"] -fn nist_test_mean() { - let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap(); - let mut data = load_data(dbg!(path_dir.clone() + "lottery.txt")); - assert_almost_eq!((&data).mean(), 518.958715596330, 1e-12); - - data = load_data(dbg!(path_dir.clone() + "lew.txt")); - assert_almost_eq!((&data).mean(), -177.435000000000, 1e-13); - - data = load_data(dbg!(path_dir.clone() + "mavro.txt")); - assert_almost_eq!((&data).mean(), 2.00185600000000, 1e-15); - - data = load_data(dbg!(path_dir.clone() + "michaelso.txt")); - assert_almost_eq!((&data).mean(), 299.852400000000, 1e-13); - - data = load_data(dbg!(path_dir.clone() + "numacc1.txt")); - assert_eq!((&data).mean(), 10000002.0); - - data = load_data(dbg!(path_dir.clone() + "numacc2.txt")); - assert_almost_eq!((&data).mean(), 1.2, 1e-15); - - data = load_data(dbg!(path_dir.clone() + "numacc3.txt")); - assert_eq!((&data).mean(), 1000000.2); +#[derive(Debug)] +struct CertifiedValues { + mean: f64, + std_dev: f64, + corr: f64, +} - data = load_data(dbg!(path_dir.clone() + "numacc4.txt")); - assert_almost_eq!((&data).mean(), 10000000.2, 1e-8); +impl std::fmt::Display for CertifiedValues { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "μ={:.3e}, σ={:.3e}, r={:.3e}", + self.mean, self.std_dev, self.corr + ) + } } +#[cfg(test)] +const NIST_DATA_DIR_ENV: &str = "STATRS_NIST_DATA_DIR"; +#[cfg(test)] +const FILENAMES: [&str; 7] = [ + "Lottery.dat", + "Lew.dat", + "Mavro.dat", + "Michelso.dat", + "NumAcc1.dat", + "NumAcc2.dat", + "NumAcc3.dat", +]; + #[test] #[ignore = "NIST tests should not run from typical `cargo test` calls"] -fn nist_test_std_dev() { - let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap(); - let mut data = load_data(dbg!(path_dir.clone() + "lottery.txt")); - assert_almost_eq!((&data).std_dev(), 291.699727470969, 1e-13); - - data = load_data(dbg!(path_dir.clone() + "lew.txt")); - assert_almost_eq!((&data).std_dev(), 277.332168044316, 1e-12); - - data = load_data(dbg!(path_dir.clone() + "mavro.txt")); - assert_almost_eq!((&data).std_dev(), 0.000429123454003053, 1e-15); - - data = load_data(dbg!(path_dir.clone() + "michaelso.txt")); - assert_almost_eq!((&data).std_dev(), 0.0790105478190518, 1e-13); - - data = load_data(dbg!(path_dir.clone() + "numacc1.txt")); - assert_eq!((&data).std_dev(), 1.0); - - data = load_data(dbg!(path_dir.clone() + "numacc2.txt")); - assert_almost_eq!((&data).std_dev(), 0.1, 1e-16); - - data = load_data(dbg!(path_dir.clone() + "numacc3.txt")); - assert_almost_eq!((&data).std_dev(), 0.1, 1e-10); - - data = load_data(dbg!(path_dir.clone() + "numacc4.txt")); - assert_almost_eq!((&data).std_dev(), 0.1, 1e-9); +fn nist_strd_univariate_mean() { + let path_prefix = env::var(NIST_DATA_DIR_ENV).unwrap_or_else(|e| panic!("{}", e)); + + for fname in FILENAMES { + let case = parse_file([&path_prefix, fname].iter().collect::()) + .unwrap_or_else(|e| panic!("failed parsing file {} with {:?}", fname, e)); + assert_relative_eq!( + case.values.iter().mean(), + case.certified.mean, + epsilon = 1e-12 + ); + } } #[test] -#[ignore = "NIST tests should not run from typical `cargo test` calls"] -fn nist_test_covariance_consistent_with_variance() { - let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap(); - let mut data = load_data(dbg!(path_dir.clone() + "lottery.txt")); - assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10); - - data = load_data(dbg!(path_dir.clone() + "lew.txt")); - assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10); - - data = load_data(dbg!(path_dir.clone() + "mavro.txt")); - assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10); +#[ignore] +fn nist_strd_univariate_std_dev() { + let path_prefix = + env::var(NIST_DATA_DIR_ENV).expect(&format!("env {} not set", NIST_DATA_DIR_ENV)); + + for fname in FILENAMES { + let case = parse_file([&path_prefix, fname].iter().collect::()) + .unwrap_or_else(|e| panic!("failed parsing file {} with {:?}", fname, e)); + assert_relative_eq!( + case.values.iter().std_dev(), + case.certified.std_dev, + epsilon = 1e-10 + ); + } +} - data = load_data(dbg!(path_dir.clone() + "michaelso.txt")); - assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10); +fn parse_certified_value(line: String) -> Result { + line.chars() + .skip_while(|&c| c != ':') + .skip(1) // skip through ':' delimiter + .skip_while(|&c| c.is_whitespace()) // effectively `String` trim + .take_while(|&c| matches!(c, '0'..='9' | '-' | '.')) + .collect::() + .parse::() + .map_err(|e| e.into()) +} - data = load_data(dbg!(path_dir.clone() + "numacc1.txt")); - assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10); +fn parse_file(path: impl AsRef) -> anyhow::Result { + let f = fs::File::open(path)?; + let reader = BufReader::new(f); + let mut lines = reader.lines(); + + let mean = parse_certified_value(lines.next().expect("file should not be exhausted")?)?; + let std_dev = parse_certified_value(lines.next().expect("file should not be exhausted")?)?; + let corr = parse_certified_value(lines.next().expect("file should not be exhausted")?)?; + + Ok(TestCase { + certified: CertifiedValues { + mean, + std_dev, + corr, + }, + values: lines + .map_while(|line| line.ok()?.trim().parse().ok()) + .collect(), + }) } #[test] #[ignore = "NIST tests should not run from typical `cargo test` calls"] -fn nist_test_pop_covar_consistent_with_pop_var() { - let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap(); - let mut data = load_data(dbg!(path_dir.clone() + "lottery.txt")); - assert_almost_eq!( - (&data).population_variance(), - (&data).population_covariance(&data), - 1e-10, - ); - - data = load_data(dbg!(path_dir.clone() + "lew.txt")); - assert_almost_eq!( - (&data).population_variance(), - (&data).population_covariance(&data), - 1e-10, - ); - - data = load_data(dbg!(path_dir.clone() + "mavro.txt")); - assert_almost_eq!( - (&data).population_variance(), - (&data).population_covariance(&data), - 1e-10, - ); - - data = load_data(dbg!(path_dir.clone() + "michaelso.txt")); - assert_almost_eq!( - (&data).population_variance(), - (&data).population_covariance(&data), - 1e-10, - ); - - data = load_data(dbg!(path_dir.clone() + "numacc1.txt")); - assert_almost_eq!( - (&data).population_variance(), - (&data).population_covariance(&data), - 1e-10, - ); -} +fn nist_test_covariance_consistent_with_variance() {} #[test] #[ignore = "NIST tests should not run from typical `cargo test` calls"] -fn nist_test_covariance_is_symmetric() { - let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap(); - let data_a = &load_data(dbg!(path_dir.clone() + "lottery.txt"))[0..200]; - let data_b = &load_data(dbg!(path_dir.clone() + "lew.txt"))[0..200]; - assert_almost_eq!(data_a.covariance(data_b), data_b.covariance(data_a), 1e-10); - assert_almost_eq!( - data_a.population_covariance(data_b), - data_b.population_covariance(data_a), - 1e-11, - ); -} +fn nist_test_covariance_is_symmetric() {}