microsoft · ClaudioSalvatoreArcidiacono · Sep 15, 2023 · Sep 18, 2023 · Sep 20, 2023 · Jan 16, 2024
@@ -244,6 +244,10 @@ This works with both metrics to minimize (L2, log loss, etc.) and to maximize (N
 Note that if you specify more than one evaluation metric, all of them will be used for early stopping.
 However, you can change this behavior and make LightGBM check only the first metric for early stopping by passing ``first_metric_only=True`` in ``early_stopping`` callback constructor.
 
+In the scikit-learn API of lightgbm, early stopping can also be enabled by setting the parameter ``early_stopping`` to ``True``
+When early stopping is enabled and no validation set is provided, a portion of the training data will be used as validation set.
+The amount of data to use for validation is controlled by the parameter ``validation_fraction`` and defaults to 0.1.
+
 Prediction
 ----------
 

@@ -1134,6 +1134,9 @@ def __init__(
         random_state: Optional[Union[int, np.random.RandomState, "np.random.Generator"]] = None,
         n_jobs: Optional[int] = None,
         importance_type: str = "split",
+        early_stopping: bool = False,
+        n_iter_no_change: int = 10,
+        validation_fraction: Optional[float] = 0.1,
         client: Optional[Client] = None,
         **kwargs: Any,
     ):
@@ -1337,6 +1340,9 @@ def __init__(
         random_state: Optional[Union[int, np.random.RandomState, "np.random.Generator"]] = None,
         n_jobs: Optional[int] = None,
         importance_type: str = "split",
+        early_stopping: bool = False,
+        n_iter_no_change: int = 10,
+        validation_fraction: Optional[float] = 0.1,
         client: Optional[Client] = None,
         **kwargs: Any,
     ):
@@ -1504,6 +1510,9 @@ def __init__(
         random_state: Optional[Union[int, np.random.RandomState, "np.random.Generator"]] = None,
         n_jobs: Optional[int] = None,
         importance_type: str = "split",
+        early_stopping: bool = False,
+        n_iter_no_change: int = 10,
+        validation_fraction: Optional[float] = 0.1,
         client: Optional[Client] = None,
         **kwargs: Any,
     ):

@@ -510,11 +510,9 @@ def _make_n_folds(
     nfold: int,
     params: Dict[str, Any],
     seed: int,
-    fpreproc: Optional[_LGBM_PreprocFunction],
     stratified: bool,
     shuffle: bool,
-    eval_train_metric: bool,
-) -> CVBooster:
+) -> Iterable[Tuple[np.ndarray, np.ndarray]]:
     """Make a n-fold list of Booster from random indices."""
     full_data = full_data.construct()
     num_data = full_data.num_data()
@@ -559,7 +557,16 @@ def _make_n_folds(
             test_id = [randidx[i : i + kstep] for i in range(0, num_data, kstep)]
             train_id = [np.concatenate([test_id[i] for i in range(nfold) if k != i]) for k in range(nfold)]
             folds = zip(train_id, test_id)
+    return folds
 
+
+def _make_cvbooster(
+    full_data: Dataset,
+    params: Dict[str, Any],
+    folds: Iterable[Tuple[np.ndarray, np.ndarray]],
+    fpreproc: Optional[_LGBM_PreprocFunction],
+    eval_train_metric: bool,
+) -> CVBooster:
     ret = CVBooster()
     for train_idx, test_idx in folds:
         train_set = full_data.subset(sorted(train_idx))
@@ -764,10 +771,11 @@ def cv(
         nfold=nfold,
         params=params,
         seed=seed,
-        fpreproc=fpreproc,
         stratified=stratified,
         shuffle=shuffle,
-        eval_train_metric=eval_train_metric,
+    )
+    cvbooster = _make_cvbooster(
+        full_data=train_set, params=params, folds=cvfolds, fpreproc=fpreproc, eval_train_metric=eval_train_metric
     )
 
     # setup callbacks

@@ -44,7 +44,7 @@
     dt_DataTable,
     pd_DataFrame,
 )
-from .engine import train
+from .engine import _make_n_folds, train
 
 if TYPE_CHECKING:
     from .compat import _sklearn_Tags
@@ -507,7 +507,10 @@ def __init__(
         random_state: Optional[Union[int, np.random.RandomState, np.random.Generator]] = None,
         n_jobs: Optional[int] = None,
         importance_type: str = "split",
-        **kwargs: Any,
+        early_stopping: bool = False,
-        early_stopping: bool = False,
+        *,
+        early_stopping: bool = False,
-        early_stopping: bool = False,
+        *,
+        early_stopping: bool = False,
+        n_iter_no_change: int = 10,
+        validation_fraction: Optional[float] = 0.1,
+        **kwargs,
-        **kwargs,
+        **kwargs: Any,
-        **kwargs,
+        **kwargs: Any,
     ):
         r"""Construct a gradient boosting model.
 
@@ -587,6 +590,16 @@ def __init__(
             The type of feature importance to be filled into ``feature_importances_``.
             If 'split', result contains numbers of times the feature is used in a model.
             If 'gain', result contains total gains of splits which use the feature.
+        early_stopping : bool, optional (default=False)
+            Whether to enable early stopping. If set to True, training will stop if the validation score does not improve
+            for a specified number of rounds (controlled by `n_iter_no_change`).
+        n_iter_no_change : int, optional (default=10)
+            If early stopping is enabled, this parameter specifies the number of iterations with no
+            improvement after which training will be stopped.
+        validation_fraction : float or None, optional (default=0.1)
+            Proportion of training data to set aside as
+            validation data for early stopping. If None, early stopping is done on
+            the training data. Only used if early stopping is performed.
         **kwargs
             Other parameters for the model.
             Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more parameters.
@@ -651,6 +664,9 @@ def __init__(
         self.random_state = random_state
         self.n_jobs = n_jobs
         self.importance_type = importance_type
+        self.early_stopping = early_stopping
+        self.n_iter_no_change = n_iter_no_change
+        self.validation_fraction = validation_fraction
         self._Booster: Optional[Booster] = None
         self._evals_result: _EvalResultDict = {}
         self._best_score: _LGBM_BoosterBestScoreType = {}
@@ -816,11 +832,19 @@ def _process_params(self, stage: str) -> Dict[str, Any]:
         params.pop("importance_type", None)
         params.pop("n_estimators", None)
         params.pop("class_weight", None)
+        params.pop("validation_fraction", None)
+        params.pop("early_stopping", None)
+        params.pop("n_iter_no_change", None)
 
         if isinstance(params["random_state"], np.random.RandomState):
             params["random_state"] = params["random_state"].randint(np.iinfo(np.int32).max)
         elif isinstance(params["random_state"], np.random.Generator):
             params["random_state"] = int(params["random_state"].integers(np.iinfo(np.int32).max))
+
+        params = _choose_param_value("early_stopping_round", params, self.n_iter_no_change)
+        if self.early_stopping is not True:
+            params["early_stopping_round"] = None
-        if self.early_stopping is not True:
-            params["early_stopping_round"] = None
-        if self.early_stopping is not True:
-            params["early_stopping_round"] = None
+
         if self._n_classes > 2:
             for alias in _ConfigAliases.get("num_class"):
                 params.pop(alias, None)
@@ -1006,6 +1030,27 @@ def fit(
 
                 valid_sets.append(valid_set)
 
+        elif self.early_stopping is True:
+            if self.validation_fraction is not None:
+                n_splits = max(int(np.ceil(1 / self.validation_fraction)), 2)
+                stratified = isinstance(self, LGBMClassifier)
+                cvfolds = _make_n_folds(
+                    full_data=train_set,
+                    folds=None,
+                    nfold=n_splits,
+                    params=params,
+                    seed=self.random_state,
+                    stratified=stratified,
+                    shuffle=True,
+                )
+                train_idx, val_idx = next(cvfolds)
+                valid_set = train_set.subset(sorted(val_idx))
+                train_set = train_set.subset(sorted(train_idx))
+            else:
+                valid_set = train_set
+            valid_set = valid_set.construct()
+            valid_sets = [valid_set]
+
         if isinstance(init_model, LGBMModel):
             init_model = init_model.booster_
 

@@ -278,6 +278,112 @@ def test_binary_classification_with_custom_objective():
     assert ret < 0.05
 
 
+def test_auto_early_stopping_binary_classification():
+    X, y = load_breast_cancer(return_X_y=True)
+    n_estimators = 200
+    gbm = lgb.LGBMClassifier(n_estimators=n_estimators, random_state=42, verbose=-1, early_stopping=True, num_leaves=5)
+    gbm.fit(X, y)
+    assert gbm._Booster.params["early_stopping_round"] == 10
+    assert gbm._Booster.num_trees() < n_estimators
+    assert gbm.best_iteration_ < n_estimators
+
+
+def test_auto_early_stopping_compatibility_with_histgradientboostingclassifier():
+    X, y = load_breast_cancer(return_X_y=True)
+    n_estimators = 200
+    n_iter_no_change = 5
+    gbm = lgb.LGBMClassifier(
+        n_estimators=n_estimators,
+        random_state=42,
+        verbose=-1,
+        early_stopping=True,
+        num_leaves=5,
+        n_iter_no_change=n_iter_no_change,
+    )
+    gbm.fit(X, y)
+    assert gbm._Booster.params["early_stopping_round"] == n_iter_no_change
+    assert gbm._Booster.num_trees() < n_estimators
+    assert gbm.best_iteration_ < n_estimators
+
+
+def test_auto_early_stopping_categorical_features_set_during_fit(rng_fixed_seed):
+    pd = pytest.importorskip("pandas")
+    X = pd.DataFrame(
+        {
+            "A": pd.Categorical(
+                rng_fixed_seed.permutation(["z", "y", "x", "w", "v"] * 60), ordered=True
+            ),  # str and ordered categorical
+            "B": rng_fixed_seed.permutation([1, 2, 3] * 100),  # int
+            "C": rng_fixed_seed.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60),  # float
+            "D": rng_fixed_seed.permutation([True, False] * 150),  # bool
+        }
+    )
+    cat_cols_actual = ["A", "B", "C", "D"]
+    y = rng_fixed_seed.permutation([0, 1] * 150)
+    n_estimators = 5
+    gbm = lgb.LGBMClassifier(n_estimators=n_estimators, random_state=42, verbose=-1, early_stopping=True, num_leaves=5)
+    gbm.fit(X, y, categorical_feature=cat_cols_actual)
+    assert gbm._Booster.params["early_stopping_round"] == 10
+    assert gbm._Booster.num_trees() < 5
+    assert gbm.best_iteration_ < 5
+
+
+def test_early_stopping_is_deactivated_by_default_regression():
+    X, y = make_synthetic_regression(n_samples=10_001)
+    n_estimators = 5
+    gbm = lgb.LGBMRegressor(n_estimators=n_estimators, random_state=42, verbose=-1)
+    gbm.fit(X, y)
+
+    # Check that early stopping did not kick in
+    assert gbm._Booster.params.get("early_stopping_round") is None
+    assert gbm._Booster.num_trees() == n_estimators
+
+
+def test_early_stopping_is_deactivated_by_default_classification():
+    X, y = load_breast_cancer(return_X_y=True)
+    n_estimators = 5
+    gbm = lgb.LGBMClassifier(n_estimators=n_estimators, random_state=42, verbose=-1)
+    gbm.fit(X, y)
+
+    # Check that early stopping did not kick in
+    assert gbm._Booster.params.get("early_stopping_round") is None
+    assert gbm._Booster.num_trees() == n_estimators
+
+
+def test_early_stopping_is_deactivated_by_default_lambdarank():
+    rank_example_dir = Path(__file__).absolute().parents[2] / "examples" / "lambdarank"
+    X_train, y_train = load_svmlight_file(str(rank_example_dir / "rank.train"))
+    q_train = np.loadtxt(str(rank_example_dir / "rank.train.query"))
+    n_estimators = 5
+    gbm = lgb.LGBMRanker(n_estimators=n_estimators, random_state=42, verbose=-1)
+    gbm.fit(X_train, y_train, group=q_train)  # Assuming 10 samples in one group
-    gbm.fit(X_train, y_train, group=q_train)  # Assuming 10 samples in one group
+    gbm.fit(X_train, y_train, group=q_train)
-    gbm.fit(X_train, y_train, group=q_train)  # Assuming 10 samples in one group
+    gbm.fit(X_train, y_train, group=q_train)
+
+    # Check that early stopping did not kick in
+    assert gbm._Booster.params.get("early_stopping_round") is None
+    assert gbm._Booster.num_trees() == n_estimators
+
+
+@pytest.mark.skipif(
+    getenv("TASK", "") == "cuda", reason="Skip due to differences in implementation details of CUDA version"
+)
+def test_auto_early_stopping_lambdarank():
+    rank_example_dir = Path(__file__).absolute().parents[2] / "examples" / "lambdarank"
+    X_train, y_train = load_svmlight_file(str(rank_example_dir / "rank.train"))
+    q_train = np.loadtxt(str(rank_example_dir / "rank.train.query"))
+    n_estimators = 5
+    gbm = lgb.LGBMRanker(n_estimators=n_estimators, random_state=42, early_stopping=True, num_leaves=5)
+    gbm.fit(
+        X_train,
+        y_train,
+        group=q_train,
+        eval_at=[1, 3],
+        callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))],
-        callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))],
-        callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))],
+    )
+    assert gbm._Booster.params["early_stopping_round"] == 10
+    assert gbm._Booster.num_trees() < n_estimators
+    assert gbm.best_iteration_ < n_estimators
+
+
 def test_dart():
     X, y = make_synthetic_regression()
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
@@ -1168,6 +1274,7 @@ def fit_and_check(eval_set_names, metric_names, assumed_iteration, first_metric_
         "verbose": -1,
         "seed": 123,
         "early_stopping_rounds": 5,
+        "early_stopping": True,
 if callback._should_enable_early_stopping(params.get("early_stopping_round", 0)): 
     callbacks_set.add( 
         callback.early_stopping( 
             stopping_rounds=params["early_stopping_round"],  # type: ignore[arg-type] 
             first_metric_only=first_metric_only, 
             min_delta=params.get("early_stopping_min_delta", 0.0), 
             verbose=_choose_param_value( 
                 main_param_name="verbosity", 
                 params=params, 
                 default_value=1, 
             ).pop("verbosity") 
             > 0, 
         ) 
     ) 
 if callback._should_enable_early_stopping(params.get("early_stopping_round", 0)): 
     callbacks_set.add( 
         callback.early_stopping( 
             stopping_rounds=params["early_stopping_round"],  # type: ignore[arg-type] 
             first_metric_only=first_metric_only, 
             min_delta=params.get("early_stopping_min_delta", 0.0), 
             verbose=_choose_param_value( 
                 main_param_name="verbosity", 
                 params=params, 
                 default_value=1, 
             ).pop("verbosity") 
             > 0, 
         ) 
     ) 
     }  # early stop should be supported via global LightGBM parameter
     params_fit = {"X": X_train, "y": y_train}