Skip to content

Commit

Permalink
update for testing
Browse files Browse the repository at this point in the history
  • Loading branch information
hussain-jafari committed Feb 11, 2025
1 parent b4fad39 commit c308b49
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/pseudopeople/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def _reformat_dates_for_noising(self) -> None:
# re-parse the format string for each row
# https://github.com/pandas-dev/pandas/issues/44764
# Year is already guaranteed to be 4-digit: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-timestamp-limits
is_na = data[column].isna()
is_na = pd.to_datetime(data[column].isna())
data_column = data.loc[~is_na, column]
year_string = data_column.dt.year.astype(str)
month_string = _zfill_fast(data_column.dt.month.astype(str), 2)
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/release/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"cps": "current_population_survey",
"census": "decennial_census",
"ssa": "social_security",
"taxes_1040": "taxes_1040",
"tax_1040": "taxes_1040",
"taxes_w2_and_1099": "taxes_w2_and_1099",
"wic": "women_infants_and_children",
}
Expand Down
18 changes: 11 additions & 7 deletions tests/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,17 @@ def validate_column_noise_level(

expected_noise = 1 - not_noised
# Fuzzy checker
validator.fuzzy_assert_proportion(
name=fuzzy_name,
observed_numerator=noise_level,
observed_denominator=len(check_data.loc[check_idx, col.name]),
target_proportion=expected_noise,
name_additional=f"{dataset_name}_{col.name}_{col_noise_type.name}",
)
try:
# Fuzzy checker
validator.fuzzy_assert_proportion(
name=fuzzy_name,
observed_numerator=noise_level,
observed_denominator=len(check_data.loc[check_idx, col.name]),
target_proportion=expected_noise,
name_additional=f"{dataset_name}_{col.name}_{col_noise_type.name}",
)
except:
print(f"{dataset_name} and {col.name} have expected {expected_noise} and actual {noise_level / len(check_data.loc[check_idx, col.name])}")


def initialize_dataset_with_sample(dataset_name: str) -> Dataset:
Expand Down

0 comments on commit c308b49

Please sign in to comment.