21929: Updates auto ablation parameter names & defaults, bumps Amalga…

…m version, MAJOR (#326)
howsoai · Nov 21, 2024 · 9d8599a · 9d8599a
1 parent 65a3c62
commit 9d8599a
Show file tree

Hide file tree

Showing 14 changed files with 470 additions and 453 deletions.
diff --git a/LICENSE-3RD-PARTY.txt b/LICENSE-3RD-PARTY.txt
@@ -1,10 +1,10 @@
 Faker
-30.8.2
+33.0.0
 MIT License
 joke2k
 https://github.com/joke2k/faker
 Faker is a Python package that generates fake data for you.
-/home/runner/.pyenv/versions/3.12.1/lib/python3.12/site-packages/Faker-30.8.2.dist-info/LICENSE.txt
+/home/runner/.pyenv/versions/3.12.1/lib/python3.12/site-packages/Faker-33.0.0.dist-info/LICENSE.txt
 Copyright (c) 2012 Daniele Faraglia
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -27,12 +27,12 @@ THE SOFTWARE.
 
 
 PyJWT
-2.9.0
+2.10.0
 MIT License
-Jose Padilla
+Jose Padilla <[email protected]>
 https://github.com/jpadilla/pyjwt
 JSON Web Token implementation in Python
-/home/runner/.pyenv/versions/3.12.1/lib/python3.12/site-packages/PyJWT-2.9.0.dist-info/LICENSE
+/home/runner/.pyenv/versions/3.12.1/lib/python3.12/site-packages/PyJWT-2.10.0.dist-info/LICENSE
 The MIT License (MIT)
 
 Copyright (c) 2015-2022 José Padilla
@@ -1664,12 +1664,12 @@ License: LGPL-2.1-or-later
 
 
 packaging
-24.1
+24.2
 Apache Software License; BSD License
 Donald Stufft <[email protected]>
 https://github.com/pypa/packaging
 Core utilities for Python packages
-/home/runner/.pyenv/versions/3.12.1/lib/python3.12/site-packages/packaging-24.1.dist-info/LICENSE
+/home/runner/.pyenv/versions/3.12.1/lib/python3.12/site-packages/packaging-24.2.dist-info/LICENSE
 This software is made available under the terms of *either* of the licenses
 found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made
 under the terms of *both* these licenses.

diff --git a/howso/client/base.py b/howso/client/base.py
@@ -4030,14 +4030,15 @@ def set_auto_ablation_params(
         *,
         ablated_cases_distribution_batch_size: int = 100,
         abs_threshold_map: AblationThresholdMap = None,
+        auto_ablation_influence_weight_entropy_threshold: float = 0.15,
         auto_ablation_weight_feature: str = ".case_weight",
         batch_size: int = 2_000,
         conviction_lower_threshold: t.Optional[float] = None,
         conviction_upper_threshold: t.Optional[float] = None,
         delta_threshold_map: AblationThresholdMap = None,
         exact_prediction_features: t.Optional[Collection[str]] = None,
-        influence_weight_entropy_threshold: float = 0.6,
-        minimum_num_cases: int = 1_000,
+        min_num_cases: int = 1_000,
+        reduce_data_influence_weight_entropy_threshold: float = 0.6,
         rel_threshold_map: AblationThresholdMap = None,
         relative_prediction_threshold_map: t.Optional[Mapping[str, float]] = None,
         residual_prediction_features: t.Optional[Collection[str]] = None,
@@ -4052,7 +4053,7 @@ def set_auto_ablation_params(
             have their API changed without deprecation.
 
         .. seealso::
-            The params ``influence_weight_entropy_threshold`` and ``auto_ablation_weight_feature`` that are
+            The params ``reduce_data_influence_weight_entropy_threshold`` and ``auto_ablation_weight_feature`` that are
             set using this endpoint are used as defaults by :meth:`reduce_data`.
 
         Parameters
@@ -4063,15 +4064,15 @@ def set_auto_ablation_params(
             When True, the :meth:`train` method will ablate cases that meet the set criteria.
         ablated_cases_distribution_batch_size: int, default 100
             Number of cases in a batch to distribute ablated cases' influence weights.
+        auto_ablation_influence_weight_entropy_threshold : float, default 0.15
+            The influence weight entropy quantile that a case must be beneath in order to be trained.
         auto_ablation_weight_feature : str, default ".case_weight"
             The weight feature that should be accumulated to when cases are ablated.
         batch_size: number, default 2,000
             Number of cases in a batch to consider for ablation prior to training and
             to recompute influence weight entropy.
-        minimum_num_cases : int, default 1,000
+        min_num_cases : int, default 1,000
             The threshold of the minimum number of cases at which the model should auto-ablate.
-        influence_weight_entropy_threshold : float, default 0.6
-            The influence weight entropy quantile that a case must be beneath in order to be trained.
         exact_prediction_features : Optional[List[str]], optional
             For each of the features specified, will ablate a case if the prediction matches exactly.
         residual_prediction_features : Optional[List[str]], optional
@@ -4080,6 +4081,8 @@ def set_auto_ablation_params(
         tolerance_prediction_threshold_map : Optional[dict[str, tuple[float, float]]], optional
             For each of the features specified, will ablate a case if the prediction >= (case value - MIN)
             and the prediction <= (case value + MAX).
+        reduce_data_influence_weight_entropy_threshold: float, default 0.6
+            The influence weight entropy quantile that a case must be above in order to not be removed.
         relative_prediction_threshold_map : Optional[dict[str, float]], optional
             For each of the features specified, will ablate a case if
             abs(prediction - case value) / prediction <= relative threshold
@@ -4111,14 +4114,15 @@ def set_auto_ablation_params(
             ablated_cases_distribution_batch_size=ablated_cases_distribution_batch_size,
             abs_threshold_map=abs_threshold_map,
             auto_ablation_enabled=auto_ablation_enabled,
+            auto_ablation_influence_weight_entropy_threshold=auto_ablation_influence_weight_entropy_threshold,
             auto_ablation_weight_feature=auto_ablation_weight_feature,
             batch_size=batch_size,
             conviction_lower_threshold=conviction_lower_threshold,
             conviction_upper_threshold=conviction_upper_threshold,
             delta_threshold_map=delta_threshold_map,
             exact_prediction_features=exact_prediction_features,
-            influence_weight_entropy_threshold=influence_weight_entropy_threshold,
-            minimum_num_cases=minimum_num_cases,
+            min_num_cases=min_num_cases,
+            reduce_data_influence_weight_entropy_threshold=reduce_data_influence_weight_entropy_threshold,
             rel_threshold_map=rel_threshold_map,
             relative_prediction_threshold_map=relative_prediction_threshold_map,
             residual_prediction_features=residual_prediction_features,
@@ -4173,7 +4177,7 @@ def reduce_data(
             which defaults to ".case_weight".
         influence_weight_entropy_threshold : float, optional
             The quantile of influence weight entropy above which cases will be removed. This defaults
-            to the value of ``influence_weight_entropy_threshold`` from :meth:`set_auto_ablation_params`,
+            to the value of ``reduce_data_influence_weight_entropy_threshold`` from :meth:`set_auto_ablation_params`,
             which defaults to 0.6.
         skip_auto_analyze : bool, default False
             Whether to skip auto-analyzing as cases are removed.

diff --git a/howso/engine/tests/test_engine.py b/howso/engine/tests/test_engine.py
@@ -349,7 +349,7 @@ def test_reduce_data(self, trainee):
         """Test `reduce_data`."""
         pre_reduction_cases = trainee.get_cases()
 
-        trainee.set_auto_ablation_params(minimum_num_cases=50)
+        trainee.set_auto_ablation_params(min_num_cases=50)
         trainee.reduce_data(influence_weight_entropy_threshold=0.5)
 
         post_reduction_cases = trainee.get_cases(features=[".case_weight"])

diff --git a/howso/engine/trainee.py b/howso/engine/trainee.py
@@ -747,14 +747,15 @@ def set_auto_ablation_params(
         *,
         ablated_cases_distribution_batch_size: int = 100,
         abs_threshold_map: AblationThresholdMap = None,
+        auto_ablation_influence_weight_entropy_threshold: float = 0.15,
         auto_ablation_weight_feature: str = ".case_weight",
         batch_size: int = 2_000,
         conviction_lower_threshold: t.Optional[float] = None,
         conviction_upper_threshold: t.Optional[float] = None,
         delta_threshold_map: AblationThresholdMap = None,
         exact_prediction_features: t.Optional[Collection[str]] = None,
-        influence_weight_entropy_threshold: float = 0.6,
-        minimum_num_cases: int = 1_000,
+        min_num_cases: int = 1_000,
+        reduce_data_influence_weight_entropy_threshold: float = 0.6,
         rel_threshold_map: AblationThresholdMap = None,
         relative_prediction_threshold_map: t.Optional[Mapping[str, float]] = None,
         residual_prediction_features: t.Optional[Collection[str]] = None,
@@ -769,7 +770,7 @@ def set_auto_ablation_params(
             have their API changed without deprecation.
 
         .. seealso::
-            The params ``influence_weight_entropy_threshold`` and ``auto_ablation_weight_feature`` that are
+            The params ``reduce_data_influence_weight_entropy_threshold`` and ``auto_ablation_weight_feature`` that are
             set using this endpoint are used as defaults by :meth:`reduce_data`.
 
         Parameters
@@ -778,15 +779,15 @@ def set_auto_ablation_params(
             When True, the :meth:`train` method will ablate cases that meet the set criteria.
         ablated_cases_distribution_batch_size: int, default 100
             Number of cases in a batch to distribute ablated cases' influence weights.
+        auto_ablation_influence_weight_entropy_threshold : float, default 0.15
+            The influence weight entropy quantile that a case must be beneath in order to be trained.
         auto_ablation_weight_feature : str, default ".case_weight"
             The weight feature that should be accumulated to when cases are ablated.
         batch_size: number, default 2,000
             Number of cases in a batch to consider for ablation prior to training and
             to recompute influence weight entropy.
-        minimum_num_cases : int, default 1,000
+        min_num_cases : int, default 1,000
             The threshold ofr the minimum number of cases at which the model should auto-ablate.
-        influence_weight_entropy_threshold : float, default 0.6
-            The influence weight entropy quantile that a case must be beneath in order to be trained.
         exact_prediction_features : Collection of str, optional
             For each of the features specified, will ablate a case if the prediction matches exactly.
         residual_prediction_features : Collection of str, optional
@@ -795,6 +796,8 @@ def set_auto_ablation_params(
         tolerance_prediction_threshold_map : map of str to tuple of float, optional
             For each of the features specified, will ablate a case if the prediction >= (case value - MIN)
             and the prediction <= (case value + MAX).
+        reduce_data_influence_weight_entropy_threshold: float, default 0.6
+            The influence weight entropy quantile that a case must be above in order to not be removed.
         relative_prediction_threshold_map : map of str -> (float, float), optional
             For each of the features specified, will ablate a case if
             abs(prediction - case value) / prediction <= relative threshold
@@ -824,16 +827,18 @@ def set_auto_ablation_params(
         if isinstance(self.client, AbstractHowsoClient):
             self.client.set_auto_ablation_params(
                 trainee_id=self.id,
-                ablated_cases_distribution_batch_size=ablated_cases_distribution_batch_size,abs_threshold_map=abs_threshold_map,
+                ablated_cases_distribution_batch_size=ablated_cases_distribution_batch_size,
+                abs_threshold_map=abs_threshold_map,
                 auto_ablation_enabled=auto_ablation_enabled,
+                auto_ablation_influence_weight_entropy_threshold=auto_ablation_influence_weight_entropy_threshold,
                 auto_ablation_weight_feature=auto_ablation_weight_feature,
                 batch_size=batch_size,
                 conviction_lower_threshold=conviction_lower_threshold,
                 conviction_upper_threshold=conviction_upper_threshold,
                 delta_threshold_map=delta_threshold_map,
                 exact_prediction_features=exact_prediction_features,
-                influence_weight_entropy_threshold=influence_weight_entropy_threshold,
-                minimum_num_cases=minimum_num_cases,
+                min_num_cases=min_num_cases,
+                reduce_data_influence_weight_entropy_threshold=reduce_data_influence_weight_entropy_threshold,
                 rel_threshold_map=rel_threshold_map,
                 relative_prediction_threshold_map=relative_prediction_threshold_map,
                 residual_prediction_features=residual_prediction_features,
@@ -879,7 +884,7 @@ def reduce_data(
             which defaults to ".case_weight".
         influence_weight_entropy_threshold : float, optional
             The quantile of influence weight entropy above which cases will be removed. This defaults
-            to the value of ``influence_weight_entropy_threshold`` from :meth:`set_auto_ablation_params`,
+            to the value of ``reduce_data_influence_weight_entropy_threshold`` from :meth:`set_auto_ablation_params`,
             which defaults to 0.6.
         skip_auto_analyze : bool, default False
             Whether to skip auto-analyzing as cases are removed.

diff --git a/pyproject.toml b/pyproject.toml
@@ -46,7 +46,7 @@ dependencies = [
    "rich>=12.5.1",
    "semantic-version~=2.0",
    "typing-extensions~=4.9",
-   "amalgam-lang==14.0.0", # Use exact since Engine is exact in version.json
+   "amalgam-lang==15.0.1", # Use exact since Engine is exact in version.json
 ]
 
 [project.optional-dependencies]