From 4594e27646f381469b2ef5b84f945766cbbf577d Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Tue, 9 Jan 2024 11:35:22 -0500 Subject: [PATCH] better docstring and test --- .../infer_feature_attributes.py | 4 ++- .../test_infer_time_series_attributes.py | 29 +++++++++++++++++++ .../feature_attributes/time_series.py | 4 ++- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/howso/utilities/feature_attributes/infer_feature_attributes.py b/howso/utilities/feature_attributes/infer_feature_attributes.py index e9c1fda7..1dbebdac 100644 --- a/howso/utilities/feature_attributes/infer_feature_attributes.py +++ b/howso/utilities/feature_attributes/infer_feature_attributes.py @@ -217,7 +217,9 @@ def infer_feature_attributes(data: Union[pd.DataFrame, SQLRelationalDatastorePro If True, the time feature will be treated as universal and future data is excluded while making predictions. If False, the time feature will not be treated as universal and only future data within the same series - is excluded while making predictions. + is excluded while making predictions. It is recommended to set this + value to True if there is any possibility of global relevancy of time, + which is the default behavior. time_series_type_default : str, default 'rate' (Optional) Type specifying how time series is generated. diff --git a/howso/utilities/feature_attributes/tests/test_infer_time_series_attributes.py b/howso/utilities/feature_attributes/tests/test_infer_time_series_attributes.py index 56b187ca..c55485bb 100644 --- a/howso/utilities/feature_attributes/tests/test_infer_time_series_attributes.py +++ b/howso/utilities/feature_attributes/tests/test_infer_time_series_attributes.py @@ -199,3 +199,32 @@ def test_set_rate_delta_boundaries(): assert 'rate_max' not in features['date']['time_series'] assert 'delta_min' not in features['f3']['time_series'] assert 'delta_max' not in features['f3']['time_series'] + + +@pytest.mark.parametrize( + ("universal_value", "expected"), + [ + (True, True), + (False, False), + (None, None), + ] +) +def test_time_feature_is_universal(universal_value, expected): + """Validates that time_feature_is_universal is working as expected.""" + df = pd.read_csv(data_path) + + # Define time format + time_format = "%Y%m%d" + # Identify id-feature and time-feature + id_feature_name = "ID" + time_feature_name = "date" + + features = infer_feature_attributes( + df, + time_feature_name=time_feature_name, + id_feature_name=id_feature_name, + datetime_feature_formats={time_feature_name: time_format}, + time_feature_is_universal=universal_value, + ) + + assert features[time_feature_name]['time_series'].get("universal") == expected diff --git a/howso/utilities/feature_attributes/time_series.py b/howso/utilities/feature_attributes/time_series.py index 9e072525..671ce91e 100644 --- a/howso/utilities/feature_attributes/time_series.py +++ b/howso/utilities/feature_attributes/time_series.py @@ -428,7 +428,9 @@ def _process( # noqa: C901 If True, the time feature will be treated as universal and future data is excluded while making predictions. If False, the time feature will not be treated as universal and only future data within the same series - is excluded while making predictions. + is excluded while making predictions. It is recommended to set this + value to True if there is any possibility of global relevancy of time, + which is the default behavior. time_series_type_default : str, default 'rate' (Optional) Type specifying how time series is generated.