Skip to content

Commit

Permalink
batch error logging
Browse files Browse the repository at this point in the history
  • Loading branch information
hussain-jafari committed Feb 10, 2025
1 parent a9d9e55 commit 14fda9d
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 10 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ exclude = [
[[tool.mypy.overrides]]
module = [
"scipy.*",
"pytest_check",
]
ignore_missing_imports = true
2 changes: 1 addition & 1 deletion tests/integration/release/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
)
def test_release_tests(pytest_args: list[str]) -> None:
os.chdir(Path(__file__).parent) # need this to access cli options from conftest.py
base_cmd = ["pytest", "--release", "test_release.py"]
base_cmd = ["pytest", "--release", "test_release.py", "--check-max-tb=1000"]
cmd = base_cmd + pytest_args
result = subprocess.run(cmd, capture_output=True, text=True)
assert result.returncode == 0
Expand Down
28 changes: 19 additions & 9 deletions tests/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
import numpy.typing as npt
import pandas as pd
from pytest_check import check
from vivarium_testing_utils import FuzzyChecker

from pseudopeople.configuration import Keys, get_configuration
Expand All @@ -34,7 +35,8 @@ def run_column_noising_tests(

# Check that originally missing data remained missing
originally_missing_idx = check_original.index[check_original[col.name].isna()]
assert check_noised.loc[originally_missing_idx, col.name].isna().all()
with check:
assert check_noised.loc[originally_missing_idx, col.name].isna().all()

# Check for noising where applicable
to_compare_idx = shared_idx.difference(originally_missing_idx)
Expand All @@ -43,7 +45,8 @@ def run_column_noising_tests(
check_original.loc[to_compare_idx, col.name].values
!= check_noised.loc[to_compare_idx, col.name].values
)
assert different_check.any()
with check:
assert different_check.any()

noise_level = different_check.sum()

Expand All @@ -64,7 +67,8 @@ def run_column_noising_tests(
== check_noised.loc[to_compare_idx, col.name].values
)

assert same_check.all()
with check:
assert same_check.all()


def run_omit_row_or_do_not_respond_tests(
Expand All @@ -87,15 +91,20 @@ def run_omit_row_or_do_not_respond_tests(
]:
# Census and household surveys have do_not_respond and omit_row.
# For all other datasets they are mutually exclusive
assert len(noise_types) == 2
with check:
assert len(noise_types) == 2
else:
assert len(noise_types) < 2
with check:
assert len(noise_types) < 2
if not noise_types: # Check that there are no missing indexes
assert noised_data.index.symmetric_difference(original_data.index).empty
with check:
assert noised_data.index.symmetric_difference(original_data.index).empty
else: # Check that there are some omissions
# TODO: assert levels are as expected
assert noised_data.index.difference(original_data.index).empty
assert not original_data.index.difference(noised_data.index).empty
with check:
assert noised_data.index.difference(original_data.index).empty
with check:
assert not original_data.index.difference(noised_data.index).empty


def validate_column_noise_level(
Expand Down Expand Up @@ -158,7 +167,8 @@ def validate_column_noise_level(
[1 - p for p in token_probability]
)
else:
assert isinstance(tokens_per_string, pd.Series)
with check:
assert isinstance(tokens_per_string, pd.Series)
avg_probability_any_token_noised = (
1 - (1 - token_probability) ** tokens_per_string
).mean()
Expand Down

0 comments on commit 14fda9d

Please sign in to comment.