From 95113b30e0b6cbd2ea1e7d6b48c36aae3e1491cb Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sun, 31 Oct 2021 01:21:44 +0300 Subject: [PATCH 1/4] respect objective aliases --- python-package/lightgbm/sklearn.py | 32 ++++++++++---------- tests/python_package_test/test_sklearn.py | 36 +++++++++++++++++++---- 2 files changed, 46 insertions(+), 22 deletions(-) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index ac3957543894..41a003c0054f 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -578,21 +578,32 @@ def fit(self, X, y, feature_name='auto', categorical_feature='auto', callbacks=None, init_model=None): """Docstring is set after definition, using a template.""" + params = self.get_params() + + params.pop('objective', None) + for alias in _ConfigAliases.get('objective'): + if alias in params: + self._objective = params.pop(alias) + _log_warning(f"Found `{alias}` in params. Will use it instead of argument") if self._objective is None: if isinstance(self, LGBMRegressor): self._objective = "regression" elif isinstance(self, LGBMClassifier): - self._objective = "binary" + if self._n_classes > 2: + self._objective = "multiclass" + else: + self._objective = "binary" elif isinstance(self, LGBMRanker): self._objective = "lambdarank" else: raise ValueError("Unknown LGBMModel type.") if callable(self._objective): self._fobj = _ObjectiveFunctionWrapper(self._objective) + params['objective'] = 'None' # objective = nullptr for unknown objective else: self._fobj = None + params['objective'] = self._objective - params = self.get_params() # user can set verbose with kwargs, it has higher priority if self.silent != "warn": _log_warning("'silent' argument is deprecated and will be removed in a future release of LightGBM. " @@ -603,13 +614,13 @@ def fit(self, X, y, if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent: params['verbose'] = -1 params.pop('silent', None) + params.pop('importance_type', None) params.pop('n_estimators', None) params.pop('class_weight', None) + if isinstance(params['random_state'], np.random.RandomState): params['random_state'] = params['random_state'].randint(np.iinfo(np.int32).max) - for alias in _ConfigAliases.get('objective'): - params.pop(alias, None) if self._n_classes is not None and self._n_classes > 2: for alias in _ConfigAliases.get('num_class'): params.pop(alias, None) @@ -621,20 +632,15 @@ def fit(self, X, y, _log_warning(f"Found '{alias}' in params. Will use it instead of 'eval_at' argument") eval_at = params.pop(alias) params['eval_at'] = eval_at - params['objective'] = self._objective - if self._fobj: - params['objective'] = 'None' # objective = nullptr for unknown objective # Do not modify original args in fit function # Refer to https://github.com/microsoft/LightGBM/pull/2619 eval_metric_list = copy.deepcopy(eval_metric) if not isinstance(eval_metric_list, list): eval_metric_list = [eval_metric_list] - # Separate built-in from callable evaluation metrics eval_metrics_callable = [_EvalFunctionWrapper(f) for f in eval_metric_list if callable(f)] eval_metrics_builtin = [m for m in eval_metric_list if isinstance(m, str)] - # register default metric for consistency with callable eval_metric case original_metric = self._objective if isinstance(self._objective, str) else None if original_metric is None: @@ -645,10 +651,8 @@ def fit(self, X, y, original_metric = "multi_logloss" if self._n_classes > 2 else "binary_logloss" elif isinstance(self, LGBMRanker): original_metric = "ndcg" - # overwrite default metric by explicitly set metric params = _choose_param_value("metric", params, original_metric) - # concatenate metric from params (or default if not provided in params) and eval_metric params['metric'] = [params['metric']] if isinstance(params['metric'], (str, type(None))) else params['metric'] params['metric'] = [e for e in eval_metrics_builtin if e not in params['metric']] + params['metric'] @@ -930,12 +934,6 @@ def fit(self, X, y, self._classes = self._le.classes_ self._n_classes = len(self._classes) - if self._n_classes > 2: - # Switch to using a multiclass objective in the underlying LGBM instance - ova_aliases = {"multiclassova", "multiclass_ova", "ova", "ovr"} - if self._objective not in ova_aliases and not callable(self._objective): - self._objective = "multiclass" - if not callable(eval_metric): if isinstance(eval_metric, (str, type(None))): eval_metric = [eval_metric] diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index 152757c79634..ca69c978bc8c 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -157,13 +157,41 @@ def test_eval_at_aliases(): X_test, y_test = load_svmlight_file(str(rank_example_dir / 'rank.test')) q_train = np.loadtxt(str(rank_example_dir / 'rank.train.query')) q_test = np.loadtxt(str(rank_example_dir / 'rank.test.query')) - for alias in ('eval_at', 'ndcg_eval_at', 'ndcg_at', 'map_eval_at', 'map_at'): + for alias in lgb.basic._ConfigAliases.get('eval_at'): gbm = lgb.LGBMRanker(n_estimators=5, **{alias: [1, 2, 3, 9]}) with pytest.warns(UserWarning, match=f"Found '{alias}' in params. Will use it instead of 'eval_at' argument"): gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)], eval_group=[q_test]) assert list(gbm.evals_result_['valid_0'].keys()) == ['ndcg@1', 'ndcg@2', 'ndcg@3', 'ndcg@9'] +@pytest.mark.parametrize("custom_objective", [True, False]) +def test_objective_aliases(custom_objective): + X, y = load_boston(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) + if custom_objective: + obj = custom_dummy_obj + metric_name = 'l2' # default one + else: + obj = 'mape' + metric_name = 'mape' + evals = [] + for alias in lgb.basic._ConfigAliases.get('objective'): + gbm = lgb.LGBMRegressor(n_estimators=5, **{alias: obj}) + if alias != 'objective': + with pytest.warns(UserWarning, match=f"Found `{alias}` in params. Will use it instead of argument"): + gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)]) + else: + gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)]) + assert list(gbm.evals_result_['valid_0'].keys()) == [metric_name] + evals.append(gbm.evals_result_['valid_0'][metric_name]) + evals_t = np.array(evals).T + for i in range(evals_t.shape[0]): + np.testing.assert_array_equal(evals_t[i], evals_t[i][0]) + # check that really dummy objective was used and estimator didn't learn anything + if custom_objective: + np.testing.assert_array_equal(evals_t, evals_t[0][0]) + + def test_regression_with_custom_objective(): X, y = load_boston(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) @@ -910,10 +938,8 @@ def test_metrics(): assert 'multi_logloss' in gbm.evals_result_['training'] assert 'multi_error' in gbm.evals_result_['training'] - # invalid objective is replaced with default multiclass one - # and invalid binary metric is replaced with multiclass alternative - gbm = lgb.LGBMClassifier(objective='invalid_obj', - **params).fit(eval_metric='binary_error', **params_fit) + # invalid binary metric is replaced with multiclass alternative + gbm = lgb.LGBMClassifier(**params).fit(eval_metric='binary_error', **params_fit) assert gbm.objective_ == 'multiclass' assert len(gbm.evals_result_['training']) == 2 assert 'multi_logloss' in gbm.evals_result_['training'] From 252f36cacee530269ecf25d631f6d5c0ef45b0ac Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Mon, 1 Nov 2021 16:03:51 +0300 Subject: [PATCH 2/4] Update test_sklearn.py --- tests/python_package_test/test_sklearn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index ca69c978bc8c..58e928722195 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -186,10 +186,10 @@ def test_objective_aliases(custom_objective): evals.append(gbm.evals_result_['valid_0'][metric_name]) evals_t = np.array(evals).T for i in range(evals_t.shape[0]): - np.testing.assert_array_equal(evals_t[i], evals_t[i][0]) + np.testing.assert_allclose(evals_t[i], evals_t[i][0]) # check that really dummy objective was used and estimator didn't learn anything if custom_objective: - np.testing.assert_array_equal(evals_t, evals_t[0][0]) + np.testing.assert_allclose(evals_t, evals_t[0][0]) def test_regression_with_custom_objective(): From 771cb5859c704f5e2ed7a58d8abb5a2744604b68 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 3 Nov 2021 16:51:49 +0300 Subject: [PATCH 3/4] revert removal of blank lines --- python-package/lightgbm/sklearn.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 41a003c0054f..eea6c933f36f 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -638,9 +638,11 @@ def fit(self, X, y, eval_metric_list = copy.deepcopy(eval_metric) if not isinstance(eval_metric_list, list): eval_metric_list = [eval_metric_list] + # Separate built-in from callable evaluation metrics eval_metrics_callable = [_EvalFunctionWrapper(f) for f in eval_metric_list if callable(f)] eval_metrics_builtin = [m for m in eval_metric_list if isinstance(m, str)] + # register default metric for consistency with callable eval_metric case original_metric = self._objective if isinstance(self._objective, str) else None if original_metric is None: @@ -651,8 +653,10 @@ def fit(self, X, y, original_metric = "multi_logloss" if self._n_classes > 2 else "binary_logloss" elif isinstance(self, LGBMRanker): original_metric = "ndcg" + # overwrite default metric by explicitly set metric params = _choose_param_value("metric", params, original_metric) + # concatenate metric from params (or default if not provided in params) and eval_metric params['metric'] = [params['metric']] if isinstance(params['metric'], (str, type(None))) else params['metric'] params['metric'] = [e for e in eval_metrics_builtin if e not in params['metric']] + params['metric'] From 2d0392ef6c5e5a8670dcc22684c63afe9faaa2ba Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 3 Nov 2021 17:01:16 +0300 Subject: [PATCH 4/4] add argument name which is being overwritten in warning message --- python-package/lightgbm/sklearn.py | 2 +- tests/python_package_test/test_sklearn.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index eea6c933f36f..fefc69bfe468 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -584,7 +584,7 @@ def fit(self, X, y, for alias in _ConfigAliases.get('objective'): if alias in params: self._objective = params.pop(alias) - _log_warning(f"Found `{alias}` in params. Will use it instead of argument") + _log_warning(f"Found '{alias}' in params. Will use it instead of 'objective' argument") if self._objective is None: if isinstance(self, LGBMRegressor): self._objective = "regression" diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index 58e928722195..f9c6ecf4be6b 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -178,7 +178,7 @@ def test_objective_aliases(custom_objective): for alias in lgb.basic._ConfigAliases.get('objective'): gbm = lgb.LGBMRegressor(n_estimators=5, **{alias: obj}) if alias != 'objective': - with pytest.warns(UserWarning, match=f"Found `{alias}` in params. Will use it instead of argument"): + with pytest.warns(UserWarning, match=f"Found '{alias}' in params. Will use it instead of 'objective' argument"): gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)]) else: gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)])