Skip to content

Commit

Permalink
move test to release folder
Browse files Browse the repository at this point in the history
  • Loading branch information
hussain-jafari committed Feb 19, 2025
1 parent 62ce877 commit b8125b6
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 51 deletions.
22 changes: 20 additions & 2 deletions tests/integration/release/test_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@

from typing import Any

import numpy as np
import pandas as pd
import pytest
from _pytest.fixtures import FixtureRequest
from vivarium_testing_utils import FuzzyChecker

from pseudopeople.dataset import Dataset
from pseudopeople.schema_entities import COLUMNS, DATASET_SCHEMAS
from tests.constants import DATASET_GENERATION_FUNCS
from tests.integration.conftest import IDX_COLS, _get_common_datasets, get_unnoised_data
from tests.utilities import (
initialize_dataset_with_sample,
Expand Down Expand Up @@ -49,6 +48,25 @@ def test_row_noising_omit_row_or_do_not_respond(
run_omit_row_or_do_not_respond_tests(dataset_name, config, original_data, noised_data)


def test_column_dtypes(
unnoised_dataset: Dataset,
noised_data: pd.DataFrame,
dataset_name: str,
config: dict[str, Any],
) -> None:
"""Tests that column dtypes are as expected"""
for col_name in noised_data.columns:
col = COLUMNS.get_column(col_name)
expected_dtype = col.dtype_name
if expected_dtype == np.dtype(object):
# str dtype is 'object'
# Check that they are actually strings and not some other
# type of object.
actual_types = noised_data[col.name].dropna().apply(type)
assert (actual_types == str).all(), actual_types.unique()
assert noised_data[col.name].dtype == expected_dtype


def test_unnoised_id_cols(dataset_name: str, request: FixtureRequest) -> None:
"""Tests that all datasets retain unnoised simulant_id and household_id
(except for SSA which does not include household_id)
Expand Down
49 changes: 0 additions & 49 deletions tests/integration/test_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,55 +175,6 @@ def test_seed_behavior(
assert not noised_data.equals(noised_data_different_seed)


@pytest.mark.parametrize(
"dataset_name",
[
DATASET_SCHEMAS.census.name,
DATASET_SCHEMAS.acs.name,
DATASET_SCHEMAS.cps.name,
DATASET_SCHEMAS.ssa.name,
DATASET_SCHEMAS.tax_w2_1099.name,
DATASET_SCHEMAS.wic.name,
DATASET_SCHEMAS.tax_1040.name,
],
)
@pytest.mark.parametrize(
"engine",
[
"pandas",
"dask",
],
)
def test_column_dtypes(
dataset_name: str, engine: str, config: dict[str, Any], request: FixtureRequest
) -> None:
"""Tests that column dtypes are as expected"""
if "TODO" in dataset_name:
pytest.skip(reason=dataset_name)

if engine == "dask":
generation_function = DATASET_GENERATION_FUNCS[dataset_name]
noised_data = generation_function(
seed=SEED,
year=None,
config=config,
engine=engine,
).compute()
else:
noised_data = request.getfixturevalue(f"noised_sample_data_{dataset_name}")

for col_name in noised_data.columns:
col = COLUMNS.get_column(col_name)
expected_dtype = col.dtype_name
if expected_dtype == np.dtype(object):
# str dtype is 'object'
# Check that they are actually strings and not some other
# type of object.
actual_types = noised_data[col.name].dropna().apply(type)
assert (actual_types == str).all(), actual_types.unique()
assert noised_data[col.name].dtype == expected_dtype


@pytest.mark.skip(reason="TODO: Implement duplication row noising")
@pytest.mark.parametrize(
"dataset_name",
Expand Down

0 comments on commit b8125b6

Please sign in to comment.