Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

19016: Adds auto ablation to Howso Engine, MAJOR #109

Merged
merged 33 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
d92229b
Add to client.
jdbeel Jan 18, 2024
f5928dd
Add to core.
jdbeel Jan 18, 2024
d0abd0c
Add to Trainee.
jdbeel Jan 18, 2024
fff8f92
Add get_auto_ablate_params
jdbeel Jan 22, 2024
9f58b81
Automated requirements/license generation [3.8]
Jan 22, 2024
acde26a
Automated requirements/license generation [3.9]
Jan 22, 2024
696d0d6
Automated requirements/license generation [3.10]
Jan 22, 2024
feb1493
Automated requirements/license generation [3.11]
Jan 22, 2024
ceca2ee
Automated requirements/license generation [licenses]
Jan 22, 2024
ca76082
Foiled by copy pasting
jdbeel Jan 23, 2024
c46651e
Merge branch '19016-add-auto-ablation' of github.com:howsoai/howso-en…
jdbeel Jan 23, 2024
f9c0a0c
Automated requirements/license generation [3.8]
Jan 23, 2024
9ed3fc2
Automated requirements/license generation [3.9]
Jan 23, 2024
02567a1
Automated requirements/license generation [3.10]
Jan 23, 2024
882fa51
Automated requirements/license generation [3.11]
Jan 23, 2024
2622820
Automated requirements/license generation [licenses]
Jan 23, 2024
7f52d11
Change docstring.
jdbeel Jan 23, 2024
a2f45a2
Merge branch '19016-add-auto-ablation' of github.com:howsoai/howso-en…
jdbeel Jan 23, 2024
2ea46d9
Address copy paste errors.
jdbeel Jan 23, 2024
01c7813
Merge remote-tracking branch 'origin/main' into 19016-add-auto-ablation
jdbeel Jan 23, 2024
58d0329
Add ablate flag.
jdbeel Jan 25, 2024
a3aa139
Remove ablate argument to train
jdbeel Jan 25, 2024
e6b65f1
Address comments, sort parameters, declare kw-only.
jdbeel Jan 25, 2024
173640f
Address more comments.
jdbeel Jan 25, 2024
2771861
Docstrings and comments.
jdbeel Jan 25, 2024
cdbcc4e
Change analyze -> auto ablation
jdbeel Jan 25, 2024
88ae2ec
Remove ablatement_params
jdbeel Jan 25, 2024
6d64134
Remove ablatement_params.
jdbeel Jan 25, 2024
2d337da
ablate_params -> ablation_params
jdbeel Jan 26, 2024
81758b9
auto_ablate -> autoAblation
jdbeel Jan 26, 2024
e6782da
Add influence_weight_entropy to react_into_features.
jdbeel Jan 26, 2024
35be0ef
Auto ablate -> auto ablation
jdbeel Jan 26, 2024
d991eae
Merge remote-tracking branch 'origin/main' into 19016-add-auto-ablation
jdbeel Jan 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
670 changes: 670 additions & 0 deletions LICENSE-3RD-PARTY.txt

Large diffs are not rendered by default.

30 changes: 28 additions & 2 deletions howso/client/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def set_random_seed(self, trainee_id, seed):
@abstractmethod
def train(
self, trainee_id, cases, features=None, *,
ablatement_params=None,
accumulate_weight_feature=None,
batch_size=None,
derived_features=None,
Expand Down Expand Up @@ -287,11 +286,12 @@ def react_into_features(
distance_contribution: Union[bool, str] = False,
familiarity_conviction_addition: Union[bool, str] = False,
familiarity_conviction_removal: Union[bool, str] = False,
features=None,
influence_weight_entropy: Union[bool, str] = False,
p_value_of_addition: Union[bool, str] = False,
p_value_of_removal: Union[bool, str] = False,
similarity_conviction: Union[bool, str] = False,
use_case_weights: Union[bool, str] = False,
features=None,
weight_feature=None
):
"""Calculate conviction and other data for the specified feature(s)."""
Expand Down Expand Up @@ -408,6 +408,32 @@ def analyze(
def auto_analyze(self, trainee_id):
"""Auto-analyze the trainee model."""

@abstractmethod
def set_auto_ablation_params(
self,
trainee_id,
auto_ablation_enabled=False,
*,
auto_ablation_weight_feature=".case_weight",
conviction_lower_threshold=None,
conviction_upper_threshold=None,
exact_prediction_features=None,
infleunce_weight_entropy_threshold=0.6,
minimum_model_size=1_000,
relative_prediction_threshold_map=None,
residual_prediction_features=None,
tolerance_prediction_threshold_map=None,
**kwargs
):
"""Set trainee parameters for auto ablation."""

@abstractmethod
def get_auto_ablation_params(
self,
trainee_id
):
"""Get trainee parameters for auto ablation set by :meth:`set_auto_ablation_params`."""

@abstractmethod
def set_auto_analyze_params(
self,
Expand Down
115 changes: 93 additions & 22 deletions howso/direct/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1182,7 +1182,6 @@ def train( # noqa: C901
cases: Union[List[List[object]], DataFrame],
features: Optional[Iterable[str]] = None,
*,
ablatement_params: Optional[Dict[str, List[object]]] = None,
accumulate_weight_feature: Optional[str] = None,
batch_size: Optional[int] = None,
derived_features: Optional[Iterable[str]] = None,
Expand Down Expand Up @@ -1211,20 +1210,6 @@ def train( # noqa: C901
cases DataFrame.
c. You want to re-order the columns that are trained.
ablatement_params : dict of str to list of object, optional
Where keys are a feature name and values are threshold_type where
threshold_type is one of:
- ['exact']: Don't train if prediction matches exactly
- ['tolerance', MIN, MAX]: Don't train if ``prediction
>= (case value - MIN) & prediction <= (case value + MAX)``
- ['relative', PERCENT]: Don't train if
``abs(prediction - case value) / prediction <= PERCENT``
- ['residual']: Don't train if
``abs(prediction - case value) <= feature residual``
>>> {'species': ['exact'], 'sepal_length': ['tolerance', 0.1, 0.25]}
accumulate_weight_feature : str, optional
Name of feature into which to accumulate neighbors'
influences as weight for ablated cases. If unspecified, will not
Expand Down Expand Up @@ -1327,7 +1312,6 @@ def train( # noqa: C901
end = progress.current_tick + batch_size
response = self.howso.train(
trainee_id,
ablatement_params=ablatement_params,
accumulate_weight_feature=accumulate_weight_feature,
derived_features=derived_features,
features=features,
Expand Down Expand Up @@ -3427,15 +3411,16 @@ def react_into_features(
self,
trainee_id: str,
*,
features: Optional[Iterable[str]] = None,
distance_contribution: Optional[Union[str, bool]] = False,
familiarity_conviction_addition: Optional[Union[str, bool]] = False,
familiarity_conviction_removal: Optional[Union[str, bool]] = False,
features: Optional[Iterable[str]] = None,
influence_weight_entropy: Union[bool, str] = False,
p_value_of_addition: Optional[Union[str, bool]] = False,
p_value_of_removal: Optional[Union[str, bool]] = False,
similarity_conviction: Optional[Union[str, bool]] = False,
distance_contribution: Optional[Union[str, bool]] = False,
use_case_weights: bool = False,
weight_feature: Optional[str] = None,
use_case_weights: bool = False
):
"""
Calculate and cache conviction and other statistics.
Expand All @@ -3454,6 +3439,10 @@ def react_into_features(
The name of the feature to store conviction of removal
values. If set to True the values will be stored to the feature
'familiarity_conviction_removal'.
influence_weight_entropy : bool or str, default False
The name of the feature to store influence weight entropy values in.
If set to True, the values will be stored in the feature
'influence_weight_entropy'.
p_value_of_addition : bool or str, default False
The name of the feature to store p value of addition
values. If set to True the values will be stored to the feature
Expand Down Expand Up @@ -3486,6 +3475,7 @@ def react_into_features(
features=features,
familiarity_conviction_addition=familiarity_conviction_addition,
familiarity_conviction_removal=familiarity_conviction_removal,
influence_weight_entropy=influence_weight_entropy,
p_value_of_addition=p_value_of_addition,
p_value_of_removal=p_value_of_removal,
similarity_conviction=similarity_conviction,
Expand Down Expand Up @@ -5060,9 +5050,8 @@ def set_auto_analyze_params( # noqa: C901
if kwargs:
warn_params = ', '.join(kwargs)
warnings.warn(
f'The following auto analyze parameter(s) "{warn_params}" '
'are not officially supported by analyze and may or may not '
'have an effect.', UserWarning)
f'The following auto ablation parameter(s) "{warn_params}" '
'are not officially supported or may not have an effect.', UserWarning)

self.howso.auto_analyze_params(
trainee_id=trainee_id,
Expand All @@ -5074,6 +5063,88 @@ def set_auto_analyze_params( # noqa: C901
**kwargs
)
self._auto_persist_trainee(trainee_id)

def get_auto_ablation_params(self, trainee_id: str):
"""
Get parameters set by :meth:`set_auto_ablation_params`.
"""
self._auto_resolve_trainee(trainee_id)
return self.howso.get_auto_ablation_params(trainee_id)

def set_auto_ablation_params(
self,
trainee_id: str,
auto_ablation_enabled: bool = False,
*,
auto_ablation_weight_feature: str = ".case_weight",
conviction_lower_threshold: Optional[float] = None,
conviction_upper_threshold: Optional[float] = None,
exact_prediction_features: Optional[List[str]] = None,
influence_weight_entropy_threshold: float = 0.6,
minimum_model_size: int = 1_000,
relative_prediction_threshold_map: Optional[Dict[str, float]] = None,
residual_prediction_features: Optional[List[str]] = None,
tolerance_prediction_threshold_map: Optional[Dict[str, Tuple[float, float]]] = None,
**kwargs
):
"""
Set trainee parameters for auto ablation.
.. note::
Auto-ablation is experimental and the API may change without deprecation.
Parameters
----------
trainee_id : str
The ID of the Trainee to set auto ablation parameters for.
auto_ablation_enabled : bool, default False
When True, the :meth:`train` method will ablate cases that meet the set criteria.
auto_ablation_weight_feature : str, default ".case_weight"
The weight feature that should be accumulated to when cases are ablated.
minimum_model_size : int, default 1,000
The threshold of the minimum number of cases at which the model should auto-ablate.
influence_weight_entropy_threshold : float, default 0.6
The influence weight entropy quantile that a case must be beneath in order to be trained.
exact_prediction_features : Optional[List[str]], optional
For each of the features specified, will ablate a case if the prediction matches exactly.
residual_prediction_features : Optional[List[str]], optional
For each of the features specified, will ablate a case if
abs(prediction - case value) / prediction <= feature residual.
tolerance_prediction_threshold_map : Optional[Dict[str, Tuple[float, float]]], optional
For each of the features specified, will ablate a case if the prediction >= (case value - MIN)
and the prediction <= (case value + MAX).
relative_prediction_threshold_map : Optional[Dict[str, float]], optional
For each of the features specified, will ablate a case if
abs(prediction - case value) / prediction <= relative threshold
conviction_lower_threshold : Optional[float], optional
The conviction value above which cases will be ablated.
conviction_upper_threshold : Optional[float], optional
The conviction value below which cases will be ablated.
"""
params = dict(
auto_ablation_enabled=auto_ablation_enabled,
auto_ablation_weight_feature=auto_ablation_weight_feature,
minimum_model_size=minimum_model_size,
influence_weight_entropy_threshold=influence_weight_entropy_threshold,
exact_prediction_features=exact_prediction_features,
residual_prediction_features=residual_prediction_features,
tolerance_prediction_threshold_map=tolerance_prediction_threshold_map,
relative_prediction_threshold_map=relative_prediction_threshold_map,
conviction_lower_threshold=conviction_lower_threshold,
conviction_upper_threshold=conviction_upper_threshold,
)
params.update(kwargs)
if kwargs:
warn_params = ", ".join(kwargs)
warnings.warn(
f'The following parameter(s) "{warn_params}" are '
'not officially supported by auto ablation and may or may not have an effect.',
UserWarning
)
self._auto_resolve_trainee(trainee_id)
self.howso.set_auto_ablation_params(
trainee_id, **params
)

def optimize(self, *args, **kwargs):
"""
Expand Down
99 changes: 88 additions & 11 deletions howso/direct/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,81 @@ def get_num_training_cases(self, trainee_id: str) -> Dict:
"""
return self._execute("get_num_training_cases", {"trainee": trainee_id})

def get_auto_ablation_params(self, trainee_id: str):
"""
Get trainee parameters for auto ablation set by :meth:`set_auto_ablation_params`.
"""
return self._execute(
"get_auto_ablation_params", {"trainee": trainee_id}
)

def set_auto_ablation_params(
self,
trainee_id: str,
auto_ablation_enabled: bool = False,
*,
auto_ablation_weight_feature: str = ".case_weight",
conviction_lower_threshold: Optional[float] = None,
conviction_upper_threshold: Optional[float] = None,
exact_prediction_features: Optional[List[str]] = None,
influence_weight_entropy_threshold: float = 0.6,
minimum_model_size: int = 1_000,
relative_prediction_threshold_map: Optional[Dict[str, float]] = None,
residual_prediction_features: Optional[List[str]] = None,
tolerance_prediction_threshold_map: Optional[Dict[str, Tuple[float, float]]] = None,
**kwargs
jdbeel marked this conversation as resolved.
Show resolved Hide resolved
):
"""
Set trainee parameters for auto ablation.
.. note::
Auto-ablation is experimental and the API may change without deprecation.
Parameters
----------
trainee_id : str
The ID of the Trainee to set auto ablation parameters for.
auto_ablation_enabled : bool, default False
When True, the :meth:`train` method will ablate cases that meet the set criteria.
auto_ablation_weight_feature : str, default ".case_weight"
The weight feature that should be accumulated to when cases are ablated.
minimum_model_size : int, default 1,000
The threshold ofr the minimum number of cases at which the model should auto-ablate.
influence_weight_entropy_threshold : float, default 0.6
The influence weight entropy quantile that a case must be beneath in order to be trained.
exact_prediction_features : Optional[List[str]], optional
For each of the features specified, will ablate a case if the prediction matches exactly.
residual_prediction_features : Optional[List[str]], optional
For each of the features specified, will ablate a case if
abs(prediction - case value) / prediction <= feature residual.
tolerance_prediction_threshold_map : Optional[Dict[str, Tuple[float, float]]], optional
For each of the features specified, will ablate a case if the prediction >= (case value - MIN)
and the prediction <= (case value + MAX).
relative_prediction_threshold_map : Optional[Dict[str, float]], optional
For each of the features specified, will ablate a case if
abs(prediction - case value) / prediction <= relative threshold
conviction_lower_threshold : Optional[float], optional
The conviction value above which cases will be ablated.
conviction_upper_threshold : Optional[float], optional
The conviction value below which cases will be ablated.
"""
return self._execute(
"set_auto_ablation_params",
{
"trainee": trainee_id,
"auto_ablation_enabled": auto_ablation_enabled,
"auto_ablation_weight_feature": auto_ablation_weight_feature,
"minimum_model_size": minimum_model_size,
"influence_weight_entropy_threshold": influence_weight_entropy_threshold,
"exact_prediction_features": exact_prediction_features,
"residual_prediction_features": residual_prediction_features,
"tolerance_prediction_threshold_map": tolerance_prediction_threshold_map,
"relative_prediction_threshold_map": relative_prediction_threshold_map,
"conviction_lower_threshold": conviction_lower_threshold,
"conviction_upper_threshold": conviction_upper_threshold,
}
)

def auto_analyze_params(
self,
trainee_id: str,
Expand Down Expand Up @@ -1050,7 +1125,6 @@ def train(
input_cases: List[List[Any]],
features: Optional[Iterable[str]] = None,
*,
ablatement_params: Optional[Dict[str, List[Any]]] = None,
accumulate_weight_feature: Optional[str] = None,
derived_features: Optional[Iterable[str]] = None,
input_is_substituted: bool = False,
Expand All @@ -1069,8 +1143,6 @@ def train(
One or more cases to train into the model.
features : iterable of str, optional
An iterable of feature names corresponding to the input cases.
ablatement_params : dict of str to list of object, optional
Parameters describing how to ablate cases.
accumulate_weight_feature : str, optional
Name of feature into which to accumulate neighbors'
influences as weight for ablated cases. If unspecified, will not
Expand Down Expand Up @@ -1099,13 +1171,12 @@ def train(
return self._execute("train", {
"trainee": trainee_id,
"input_cases": input_cases,
"features": features,
"accumulate_weight_feature": accumulate_weight_feature,
"derived_features": derived_features,
"session": session,
"ablatement_params": ablatement_params,
"series": series,
"features": features,
"input_is_substituted": input_is_substituted,
"accumulate_weight_feature": accumulate_weight_feature,
"series": series,
"session": session,
"train_weights_only": train_weights_only,
})

Expand Down Expand Up @@ -1804,15 +1875,16 @@ def react_into_features(
self,
trainee_id: str,
*,
features: Optional[Iterable[str]] = None,
distance_contribution: bool = False,
familiarity_conviction_addition: bool = False,
familiarity_conviction_removal: bool = False,
features: Optional[Iterable[str]] = None,
influence_weight_entropy: Union[bool, str] = False,
p_value_of_addition: bool = False,
p_value_of_removal: bool = False,
similarity_conviction: bool = False,
distance_contribution: bool = False,
use_case_weights: bool = False,
weight_feature: Optional[str] = None,
use_case_weights: bool = False
) -> None:
"""
Calculate and cache conviction and other statistics.
Expand All @@ -1831,6 +1903,10 @@ def react_into_features(
The name of the feature to store conviction of removal
values. If set to True the values will be stored to the feature
'familiarity_conviction_removal'.
influence_weight_entropy : bool or str, default False
The name of the feature to store influence weight entropy values in.
If set to True, the values will be stored in the feature
'influence_weight_entropy'.
p_value_of_addition : bool or str, default False
The name of the feature to store p value of addition
values. If set to True the values will be stored to the feature
Expand Down Expand Up @@ -1859,6 +1935,7 @@ def react_into_features(
"features": features,
"familiarity_conviction_addition": familiarity_conviction_addition,
"familiarity_conviction_removal": familiarity_conviction_removal,
"influence_weight_entropy": influence_weight_entropy,
"p_value_of_addition": p_value_of_addition,
"p_value_of_removal": p_value_of_removal,
"similarity_conviction": similarity_conviction,
Expand Down
Loading
Loading