Skip to content

Commit

Permalink
test: first pass NIST strd univariate test
Browse files Browse the repository at this point in the history
shell script needs curl, grep, and sed
  • Loading branch information
YeungOnion committed Jun 23, 2024
1 parent 6443762 commit 2bc3a93
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 127 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ num-traits = "0.2.14"

[dev-dependencies]
criterion = "0.3.3"
anyhow = "1.0"
33 changes: 33 additions & 0 deletions prep_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#! /bin/bash

process_file() {
# Define input and output file names
SOURCE=$1
FILENAME=$2
curl -fsSL ${SOURCE}/$FILENAME > $FILENAME

# Extract line numbers for Certified Values and Data from the header
INFO=$(grep "Certified Values:" $FILENAME)
CERTIFIED_VALUES_START=$(echo $INFO | awk '{print $4}')
CERTIFIED_VALUES_END=$(echo $INFO | awk '{print $6}')

INFO=$(grep "Data :" $FILENAME)
DATA_START=$(echo $INFO | awk '{print $4}')
DATA_END=$(echo $INFO | awk '{print $6}')

# Extract and reformat sections
# Certified values
sed -n -i \
-e "${CERTIFIED_VALUES_START},${CERTIFIED_VALUES_END}p" \
-e "${DATA_START},${DATA_END}p" \
$FILENAME
# sed -n -i -e "${CERTIFIED_VALUES_START},${CERTIFIED_VALUES_END}s/\(exact\)//p" $FILENAME

}

URL='https://www.itl.nist.gov/div898/strd/univ/data'
for file in Lottery.dat Lew.dat Mavro.dat Michelso.dat NumAcc1.dat NumAcc2.dat NumAcc3.dat
do
process_file $URL $file
done

223 changes: 96 additions & 127 deletions tests/nist_tests.rs
Original file line number Diff line number Diff line change
@@ -1,153 +1,122 @@
// #![cfg(test)]
use statrs::assert_almost_eq;
use anyhow::Result;
use approx::assert_relative_eq;
use statrs::statistics::Statistics;

use std::io::{BufRead, BufReader};
use std::path::PathBuf;
use std::{env, fs};

#[cfg(test)]
const NIST_DATA_DIR_ENV: &str = "STATRS_NIST_DATA_DIR";

fn load_data(pathname: String) -> Vec<f64> {
let f = fs::File::open(pathname).unwrap();
let mut reader = BufReader::new(f);
struct TestCase {
certified: CertifiedValues,
values: Vec<f64>,
}

let mut buf = String::new();
let mut data: Vec<f64> = vec![];
while reader.read_line(&mut buf).unwrap() > 0 {
data.push(buf.trim().parse::<f64>().unwrap());
buf.clear();
impl std::fmt::Debug for TestCase {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "TestCase({:?}, [...]", self.certified)
}
data
}

#[test]
#[ignore = "NIST tests should not run from typical `cargo test` calls"]
fn nist_test_mean() {
let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap();
let mut data = load_data(dbg!(path_dir.clone() + "lottery.txt"));
assert_almost_eq!((&data).mean(), 518.958715596330, 1e-12);

data = load_data(dbg!(path_dir.clone() + "lew.txt"));
assert_almost_eq!((&data).mean(), -177.435000000000, 1e-13);

data = load_data(dbg!(path_dir.clone() + "mavro.txt"));
assert_almost_eq!((&data).mean(), 2.00185600000000, 1e-15);

data = load_data(dbg!(path_dir.clone() + "michaelso.txt"));
assert_almost_eq!((&data).mean(), 299.852400000000, 1e-13);

data = load_data(dbg!(path_dir.clone() + "numacc1.txt"));
assert_eq!((&data).mean(), 10000002.0);

data = load_data(dbg!(path_dir.clone() + "numacc2.txt"));
assert_almost_eq!((&data).mean(), 1.2, 1e-15);

data = load_data(dbg!(path_dir.clone() + "numacc3.txt"));
assert_eq!((&data).mean(), 1000000.2);
#[derive(Debug)]
struct CertifiedValues {
mean: f64,
std_dev: f64,
corr: f64,
}

data = load_data(dbg!(path_dir.clone() + "numacc4.txt"));
assert_almost_eq!((&data).mean(), 10000000.2, 1e-8);
impl std::fmt::Display for CertifiedValues {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"μ={:.3e}, σ={:.3e}, r={:.3e}",
self.mean, self.std_dev, self.corr
)
}
}

#[cfg(test)]
const NIST_DATA_DIR_ENV: &str = "STATRS_NIST_DATA_DIR";
#[cfg(test)]
const FILENAMES: [&str; 7] = [
"Lottery.dat",
"Lew.dat",
"Mavro.dat",
"Michelso.dat",
"NumAcc1.dat",
"NumAcc2.dat",
"NumAcc3.dat",
];

#[test]
#[ignore = "NIST tests should not run from typical `cargo test` calls"]
fn nist_test_std_dev() {
let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap();
let mut data = load_data(dbg!(path_dir.clone() + "lottery.txt"));
assert_almost_eq!((&data).std_dev(), 291.699727470969, 1e-13);

data = load_data(dbg!(path_dir.clone() + "lew.txt"));
assert_almost_eq!((&data).std_dev(), 277.332168044316, 1e-12);

data = load_data(dbg!(path_dir.clone() + "mavro.txt"));
assert_almost_eq!((&data).std_dev(), 0.000429123454003053, 1e-15);

data = load_data(dbg!(path_dir.clone() + "michaelso.txt"));
assert_almost_eq!((&data).std_dev(), 0.0790105478190518, 1e-13);

data = load_data(dbg!(path_dir.clone() + "numacc1.txt"));
assert_eq!((&data).std_dev(), 1.0);

data = load_data(dbg!(path_dir.clone() + "numacc2.txt"));
assert_almost_eq!((&data).std_dev(), 0.1, 1e-16);

data = load_data(dbg!(path_dir.clone() + "numacc3.txt"));
assert_almost_eq!((&data).std_dev(), 0.1, 1e-10);

data = load_data(dbg!(path_dir.clone() + "numacc4.txt"));
assert_almost_eq!((&data).std_dev(), 0.1, 1e-9);
fn nist_strd_univariate_mean() {
let path_prefix = env::var(NIST_DATA_DIR_ENV).unwrap_or_else(|e| panic!("{}", e));

for fname in FILENAMES {
let case = parse_file([&path_prefix, fname].iter().collect::<PathBuf>())
.unwrap_or_else(|e| panic!("failed parsing file {} with {:?}", fname, e));
assert_relative_eq!(
case.values.iter().mean(),
case.certified.mean,
epsilon = 1e-12
);
}
}

#[test]
#[ignore = "NIST tests should not run from typical `cargo test` calls"]
fn nist_test_covariance_consistent_with_variance() {
let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap();
let mut data = load_data(dbg!(path_dir.clone() + "lottery.txt"));
assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10);

data = load_data(dbg!(path_dir.clone() + "lew.txt"));
assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10);

data = load_data(dbg!(path_dir.clone() + "mavro.txt"));
assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10);
#[ignore]
fn nist_strd_univariate_std_dev() {
let path_prefix =
env::var(NIST_DATA_DIR_ENV).expect(&format!("env {} not set", NIST_DATA_DIR_ENV));

for fname in FILENAMES {
let case = parse_file([&path_prefix, fname].iter().collect::<PathBuf>())
.unwrap_or_else(|e| panic!("failed parsing file {} with {:?}", fname, e));
assert_relative_eq!(
case.values.iter().std_dev(),
case.certified.std_dev,
epsilon = 1e-10
);
}
}

data = load_data(dbg!(path_dir.clone() + "michaelso.txt"));
assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10);
fn parse_certified_value(line: String) -> Result<f64> {
line.chars()
.skip_while(|&c| c != ':')
.skip(1) // skip through ':' delimiter
.skip_while(|&c| c.is_whitespace()) // effectively `String` trim
.take_while(|&c| matches!(c, '0'..='9' | '-' | '.'))
.collect::<String>()
.parse::<f64>()
.map_err(|e| e.into())
}

data = load_data(dbg!(path_dir.clone() + "numacc1.txt"));
assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10);
fn parse_file(path: impl AsRef<std::path::Path>) -> anyhow::Result<TestCase> {
let f = fs::File::open(path)?;
let reader = BufReader::new(f);
let mut lines = reader.lines();

let mean = parse_certified_value(lines.next().expect("file should not be exhausted")?)?;
let std_dev = parse_certified_value(lines.next().expect("file should not be exhausted")?)?;
let corr = parse_certified_value(lines.next().expect("file should not be exhausted")?)?;

Ok(TestCase {
certified: CertifiedValues {
mean,
std_dev,
corr,
},
values: lines
.map_while(|line| line.ok()?.trim().parse().ok())
.collect(),
})
}

#[test]
#[ignore = "NIST tests should not run from typical `cargo test` calls"]
fn nist_test_pop_covar_consistent_with_pop_var() {
let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap();
let mut data = load_data(dbg!(path_dir.clone() + "lottery.txt"));
assert_almost_eq!(
(&data).population_variance(),
(&data).population_covariance(&data),
1e-10,
);

data = load_data(dbg!(path_dir.clone() + "lew.txt"));
assert_almost_eq!(
(&data).population_variance(),
(&data).population_covariance(&data),
1e-10,
);

data = load_data(dbg!(path_dir.clone() + "mavro.txt"));
assert_almost_eq!(
(&data).population_variance(),
(&data).population_covariance(&data),
1e-10,
);

data = load_data(dbg!(path_dir.clone() + "michaelso.txt"));
assert_almost_eq!(
(&data).population_variance(),
(&data).population_covariance(&data),
1e-10,
);

data = load_data(dbg!(path_dir.clone() + "numacc1.txt"));
assert_almost_eq!(
(&data).population_variance(),
(&data).population_covariance(&data),
1e-10,
);
}
fn nist_test_covariance_consistent_with_variance() {}

#[test]
#[ignore = "NIST tests should not run from typical `cargo test` calls"]
fn nist_test_covariance_is_symmetric() {
let path_dir = env::var(NIST_DATA_DIR_ENV).unwrap();
let data_a = &load_data(dbg!(path_dir.clone() + "lottery.txt"))[0..200];
let data_b = &load_data(dbg!(path_dir.clone() + "lew.txt"))[0..200];
assert_almost_eq!(data_a.covariance(data_b), data_b.covariance(data_a), 1e-10);
assert_almost_eq!(
data_a.population_covariance(data_b),
data_b.population_covariance(data_a),
1e-11,
);
}
fn nist_test_covariance_is_symmetric() {}

0 comments on commit 2bc3a93

Please sign in to comment.