-
Notifications
You must be signed in to change notification settings - Fork 85
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
test: first pass NIST strd univariate test
shell script needs curl, grep, and sed
- Loading branch information
1 parent
6443762
commit 2bc3a93
Showing
3 changed files
with
130 additions
and
127 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,3 +25,4 @@ num-traits = "0.2.14" | |
|
||
[dev-dependencies] | ||
criterion = "0.3.3" | ||
anyhow = "1.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#! /bin/bash | ||
|
||
process_file() { | ||
# Define input and output file names | ||
SOURCE=$1 | ||
FILENAME=$2 | ||
curl -fsSL ${SOURCE}/$FILENAME > $FILENAME | ||
|
||
# Extract line numbers for Certified Values and Data from the header | ||
INFO=$(grep "Certified Values:" $FILENAME) | ||
CERTIFIED_VALUES_START=$(echo $INFO | awk '{print $4}') | ||
CERTIFIED_VALUES_END=$(echo $INFO | awk '{print $6}') | ||
|
||
INFO=$(grep "Data :" $FILENAME) | ||
DATA_START=$(echo $INFO | awk '{print $4}') | ||
DATA_END=$(echo $INFO | awk '{print $6}') | ||
|
||
# Extract and reformat sections | ||
# Certified values | ||
sed -n -i \ | ||
-e "${CERTIFIED_VALUES_START},${CERTIFIED_VALUES_END}p" \ | ||
-e "${DATA_START},${DATA_END}p" \ | ||
$FILENAME | ||
# sed -n -i -e "${CERTIFIED_VALUES_START},${CERTIFIED_VALUES_END}s/\(exact\)//p" $FILENAME | ||
|
||
} | ||
|
||
URL='https://www.itl.nist.gov/div898/strd/univ/data' | ||
for file in Lottery.dat Lew.dat Mavro.dat Michelso.dat NumAcc1.dat NumAcc2.dat NumAcc3.dat | ||
do | ||
process_file $URL $file | ||
done | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,153 +1,122 @@ | ||
// #![cfg(test)] | ||
use statrs::assert_almost_eq; | ||
use anyhow::Result; | ||
use approx::assert_relative_eq; | ||
use statrs::statistics::Statistics; | ||
|
||
use std::io::{BufRead, BufReader}; | ||
use std::path::PathBuf; | ||
use std::{env, fs}; | ||
|
||
#[cfg(test)] | ||
const NIST_DATA_DIR_ENV: &str = "STATRS_NIST_DATA_DIR"; | ||
|
||
fn load_data(pathname: String) -> Vec<f64> { | ||
let f = fs::File::open(pathname).unwrap(); | ||
let mut reader = BufReader::new(f); | ||
struct TestCase { | ||
certified: CertifiedValues, | ||
values: Vec<f64>, | ||
} | ||
|
||
let mut buf = String::new(); | ||
let mut data: Vec<f64> = vec![]; | ||
while reader.read_line(&mut buf).unwrap() > 0 { | ||
data.push(buf.trim().parse::<f64>().unwrap()); | ||
buf.clear(); | ||
impl std::fmt::Debug for TestCase { | ||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
write!(f, "TestCase({:?}, [...]", self.certified) | ||
} | ||
data | ||
} | ||
|
||
#[test] | ||
#[ignore = "NIST tests should not run from typical `cargo test` calls"] | ||
fn nist_test_mean() { | ||
let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap(); | ||
let mut data = load_data(dbg!(path_dir.clone() + "lottery.txt")); | ||
assert_almost_eq!((&data).mean(), 518.958715596330, 1e-12); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "lew.txt")); | ||
assert_almost_eq!((&data).mean(), -177.435000000000, 1e-13); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "mavro.txt")); | ||
assert_almost_eq!((&data).mean(), 2.00185600000000, 1e-15); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "michaelso.txt")); | ||
assert_almost_eq!((&data).mean(), 299.852400000000, 1e-13); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "numacc1.txt")); | ||
assert_eq!((&data).mean(), 10000002.0); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "numacc2.txt")); | ||
assert_almost_eq!((&data).mean(), 1.2, 1e-15); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "numacc3.txt")); | ||
assert_eq!((&data).mean(), 1000000.2); | ||
#[derive(Debug)] | ||
struct CertifiedValues { | ||
mean: f64, | ||
std_dev: f64, | ||
corr: f64, | ||
} | ||
|
||
data = load_data(dbg!(path_dir.clone() + "numacc4.txt")); | ||
assert_almost_eq!((&data).mean(), 10000000.2, 1e-8); | ||
impl std::fmt::Display for CertifiedValues { | ||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
write!( | ||
f, | ||
"μ={:.3e}, σ={:.3e}, r={:.3e}", | ||
self.mean, self.std_dev, self.corr | ||
) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
const NIST_DATA_DIR_ENV: &str = "STATRS_NIST_DATA_DIR"; | ||
#[cfg(test)] | ||
const FILENAMES: [&str; 7] = [ | ||
"Lottery.dat", | ||
"Lew.dat", | ||
"Mavro.dat", | ||
"Michelso.dat", | ||
"NumAcc1.dat", | ||
"NumAcc2.dat", | ||
"NumAcc3.dat", | ||
]; | ||
|
||
#[test] | ||
#[ignore = "NIST tests should not run from typical `cargo test` calls"] | ||
fn nist_test_std_dev() { | ||
let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap(); | ||
let mut data = load_data(dbg!(path_dir.clone() + "lottery.txt")); | ||
assert_almost_eq!((&data).std_dev(), 291.699727470969, 1e-13); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "lew.txt")); | ||
assert_almost_eq!((&data).std_dev(), 277.332168044316, 1e-12); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "mavro.txt")); | ||
assert_almost_eq!((&data).std_dev(), 0.000429123454003053, 1e-15); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "michaelso.txt")); | ||
assert_almost_eq!((&data).std_dev(), 0.0790105478190518, 1e-13); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "numacc1.txt")); | ||
assert_eq!((&data).std_dev(), 1.0); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "numacc2.txt")); | ||
assert_almost_eq!((&data).std_dev(), 0.1, 1e-16); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "numacc3.txt")); | ||
assert_almost_eq!((&data).std_dev(), 0.1, 1e-10); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "numacc4.txt")); | ||
assert_almost_eq!((&data).std_dev(), 0.1, 1e-9); | ||
fn nist_strd_univariate_mean() { | ||
let path_prefix = env::var(NIST_DATA_DIR_ENV).unwrap_or_else(|e| panic!("{}", e)); | ||
|
||
for fname in FILENAMES { | ||
let case = parse_file([&path_prefix, fname].iter().collect::<PathBuf>()) | ||
.unwrap_or_else(|e| panic!("failed parsing file {} with {:?}", fname, e)); | ||
assert_relative_eq!( | ||
case.values.iter().mean(), | ||
case.certified.mean, | ||
epsilon = 1e-12 | ||
); | ||
} | ||
} | ||
|
||
#[test] | ||
#[ignore = "NIST tests should not run from typical `cargo test` calls"] | ||
fn nist_test_covariance_consistent_with_variance() { | ||
let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap(); | ||
let mut data = load_data(dbg!(path_dir.clone() + "lottery.txt")); | ||
assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "lew.txt")); | ||
assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "mavro.txt")); | ||
assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10); | ||
#[ignore] | ||
fn nist_strd_univariate_std_dev() { | ||
let path_prefix = | ||
env::var(NIST_DATA_DIR_ENV).expect(&format!("env {} not set", NIST_DATA_DIR_ENV)); | ||
|
||
for fname in FILENAMES { | ||
let case = parse_file([&path_prefix, fname].iter().collect::<PathBuf>()) | ||
.unwrap_or_else(|e| panic!("failed parsing file {} with {:?}", fname, e)); | ||
assert_relative_eq!( | ||
case.values.iter().std_dev(), | ||
case.certified.std_dev, | ||
epsilon = 1e-10 | ||
); | ||
} | ||
} | ||
|
||
data = load_data(dbg!(path_dir.clone() + "michaelso.txt")); | ||
assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10); | ||
fn parse_certified_value(line: String) -> Result<f64> { | ||
line.chars() | ||
.skip_while(|&c| c != ':') | ||
.skip(1) // skip through ':' delimiter | ||
.skip_while(|&c| c.is_whitespace()) // effectively `String` trim | ||
.take_while(|&c| matches!(c, '0'..='9' | '-' | '.')) | ||
.collect::<String>() | ||
.parse::<f64>() | ||
.map_err(|e| e.into()) | ||
} | ||
|
||
data = load_data(dbg!(path_dir.clone() + "numacc1.txt")); | ||
assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10); | ||
fn parse_file(path: impl AsRef<std::path::Path>) -> anyhow::Result<TestCase> { | ||
let f = fs::File::open(path)?; | ||
let reader = BufReader::new(f); | ||
let mut lines = reader.lines(); | ||
|
||
let mean = parse_certified_value(lines.next().expect("file should not be exhausted")?)?; | ||
let std_dev = parse_certified_value(lines.next().expect("file should not be exhausted")?)?; | ||
let corr = parse_certified_value(lines.next().expect("file should not be exhausted")?)?; | ||
|
||
Ok(TestCase { | ||
certified: CertifiedValues { | ||
mean, | ||
std_dev, | ||
corr, | ||
}, | ||
values: lines | ||
.map_while(|line| line.ok()?.trim().parse().ok()) | ||
.collect(), | ||
}) | ||
} | ||
|
||
#[test] | ||
#[ignore = "NIST tests should not run from typical `cargo test` calls"] | ||
fn nist_test_pop_covar_consistent_with_pop_var() { | ||
let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap(); | ||
let mut data = load_data(dbg!(path_dir.clone() + "lottery.txt")); | ||
assert_almost_eq!( | ||
(&data).population_variance(), | ||
(&data).population_covariance(&data), | ||
1e-10, | ||
); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "lew.txt")); | ||
assert_almost_eq!( | ||
(&data).population_variance(), | ||
(&data).population_covariance(&data), | ||
1e-10, | ||
); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "mavro.txt")); | ||
assert_almost_eq!( | ||
(&data).population_variance(), | ||
(&data).population_covariance(&data), | ||
1e-10, | ||
); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "michaelso.txt")); | ||
assert_almost_eq!( | ||
(&data).population_variance(), | ||
(&data).population_covariance(&data), | ||
1e-10, | ||
); | ||
|
||
data = load_data(dbg!(path_dir.clone() + "numacc1.txt")); | ||
assert_almost_eq!( | ||
(&data).population_variance(), | ||
(&data).population_covariance(&data), | ||
1e-10, | ||
); | ||
} | ||
fn nist_test_covariance_consistent_with_variance() {} | ||
|
||
#[test] | ||
#[ignore = "NIST tests should not run from typical `cargo test` calls"] | ||
fn nist_test_covariance_is_symmetric() { | ||
let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap(); | ||
let data_a = &load_data(dbg!(path_dir.clone() + "lottery.txt"))[0..200]; | ||
let data_b = &load_data(dbg!(path_dir.clone() + "lew.txt"))[0..200]; | ||
assert_almost_eq!(data_a.covariance(data_b), data_b.covariance(data_a), 1e-10); | ||
assert_almost_eq!( | ||
data_a.population_covariance(data_b), | ||
data_b.population_covariance(data_a), | ||
1e-11, | ||
); | ||
} | ||
fn nist_test_covariance_is_symmetric() {} |