Skip to content

Commit

Permalink
18898: Adds 'time_feature_is_universal' to infer_feature_attributes
Browse files Browse the repository at this point in the history
…, MINOR (#98)

Adds a boolean flag 'time_feature_is_universal' to
`infer_feature_attributes`.

If 'time_feature_is_universal' is specified by the user, then that value
is directly copied into the feature attributes of the specified time
feature under `features[time_feature_name]['time_series']['universal']`.

If the value is not specified, then nothing is stored there, and it's
left to the Engine to assume default behavior.
Default value is None.
  • Loading branch information
cademack authored Jan 9, 2024
1 parent f2ac444 commit 34ef786
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,14 @@ def infer_feature_attributes(data: Union[pd.DataFrame, SQLRelationalDatastorePro
This will cause the bounds for the start and end times set
to the same bounds as observed in the original data.
time_feature_is_universal : bool, optional
If True, the time feature will be treated as universal and future data
is excluded while making predictions. If False, the time feature will
not be treated as universal and only future data within the same series
is excluded while making predictions. It is recommended to set this
value to True if there is any possibility of global relevancy of time,
which is the default behavior.
time_series_type_default : str, default 'rate'
(Optional) Type specifying how time series is generated.
One of 'rate' or 'delta', default is 'rate'. If 'rate',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,32 @@ def test_set_rate_delta_boundaries():
assert 'rate_max' not in features['date']['time_series']
assert 'delta_min' not in features['f3']['time_series']
assert 'delta_max' not in features['f3']['time_series']


@pytest.mark.parametrize(
("universal_value", "expected"),
[
(True, True),
(False, False),
(None, None),
]
)
def test_time_feature_is_universal(universal_value, expected):
"""Validates that time_feature_is_universal is working as expected."""
df = pd.read_csv(data_path)

# Define time format
time_format = "%Y%m%d"
# Identify id-feature and time-feature
id_feature_name = "ID"
time_feature_name = "date"

features = infer_feature_attributes(
df,
time_feature_name=time_feature_name,
id_feature_name=id_feature_name,
datetime_feature_formats={time_feature_name: time_format},
time_feature_is_universal=universal_value,
)

assert features[time_feature_name]['time_series'].get("universal") == expected
13 changes: 13 additions & 0 deletions howso/utilities/feature_attributes/time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def _process( # noqa: C901
attempt_infer_extended_nominals: bool = False,
nominal_substitution_config: Optional[Dict[str, Dict]] = None,
include_extended_nominal_probabilities: Optional[bool] = False,
time_feature_is_universal: bool = None,
time_series_type_default: Optional[str] = 'rate',
time_series_types_override: Optional[Dict] = None,
orders_of_derivatives: Optional[Dict] = None,
Expand Down Expand Up @@ -423,6 +424,14 @@ def _process( # noqa: C901
(Optional) If true, extended nominal probabilities will be appended
as metadata into the feature object.
time_feature_is_universal : bool, optional
If True, the time feature will be treated as universal and future data
is excluded while making predictions. If False, the time feature will
not be treated as universal and only future data within the same series
is excluded while making predictions. It is recommended to set this
value to True if there is any possibility of global relevancy of time,
which is the default behavior.
time_series_type_default : str, default 'rate'
(Optional) Type specifying how time series is generated.
One of 'rate' or 'delta', default is 'rate'. If 'rate',
Expand Down Expand Up @@ -616,6 +625,10 @@ def _process( # noqa: C901

if self.time_feature_name in features:
features[self.time_feature_name]['time_series']['time_feature'] = True

# Assign universal value if specified
if time_feature_is_universal is not None:
features[self.time_feature_name]['time_series']['universal'] = time_feature_is_universal
# Force time_feature to be `continuous`
features[self.time_feature_name]['type'] = "continuous"
# Set time_series as 'delta' so that lag and delta are computed
Expand Down

0 comments on commit 34ef786

Please sign in to comment.