Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python][sklearn] respect objective aliases #4758

Merged
merged 4 commits into from
Nov 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 15 additions & 13 deletions python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,21 +578,32 @@ def fit(self, X, y,
feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None):
"""Docstring is set after definition, using a template."""
params = self.get_params()

params.pop('objective', None)
for alias in _ConfigAliases.get('objective'):
if alias in params:
self._objective = params.pop(alias)
_log_warning(f"Found '{alias}' in params. Will use it instead of 'objective' argument")
if self._objective is None:
if isinstance(self, LGBMRegressor):
self._objective = "regression"
elif isinstance(self, LGBMClassifier):
self._objective = "binary"
if self._n_classes > 2:
self._objective = "multiclass"
else:
self._objective = "binary"
elif isinstance(self, LGBMRanker):
self._objective = "lambdarank"
else:
raise ValueError("Unknown LGBMModel type.")
if callable(self._objective):
self._fobj = _ObjectiveFunctionWrapper(self._objective)
params['objective'] = 'None' # objective = nullptr for unknown objective
else:
self._fobj = None
params['objective'] = self._objective

params = self.get_params()
# user can set verbose with kwargs, it has higher priority
if self.silent != "warn":
_log_warning("'silent' argument is deprecated and will be removed in a future release of LightGBM. "
Expand All @@ -603,13 +614,13 @@ def fit(self, X, y,
if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent:
params['verbose'] = -1
params.pop('silent', None)

params.pop('importance_type', None)
params.pop('n_estimators', None)
params.pop('class_weight', None)

if isinstance(params['random_state'], np.random.RandomState):
params['random_state'] = params['random_state'].randint(np.iinfo(np.int32).max)
for alias in _ConfigAliases.get('objective'):
params.pop(alias, None)
if self._n_classes is not None and self._n_classes > 2:
for alias in _ConfigAliases.get('num_class'):
params.pop(alias, None)
Expand All @@ -621,9 +632,6 @@ def fit(self, X, y,
_log_warning(f"Found '{alias}' in params. Will use it instead of 'eval_at' argument")
eval_at = params.pop(alias)
params['eval_at'] = eval_at
params['objective'] = self._objective
if self._fobj:
params['objective'] = 'None' # objective = nullptr for unknown objective

# Do not modify original args in fit function
# Refer to https://github.com/microsoft/LightGBM/pull/2619
Expand Down Expand Up @@ -930,12 +938,6 @@ def fit(self, X, y,
self._classes = self._le.classes_
self._n_classes = len(self._classes)

if self._n_classes > 2:
# Switch to using a multiclass objective in the underlying LGBM instance
ova_aliases = {"multiclassova", "multiclass_ova", "ova", "ovr"}
if self._objective not in ova_aliases and not callable(self._objective):
self._objective = "multiclass"

if not callable(eval_metric):
if isinstance(eval_metric, (str, type(None))):
eval_metric = [eval_metric]
Expand Down
36 changes: 31 additions & 5 deletions tests/python_package_test/test_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,41 @@ def test_eval_at_aliases():
X_test, y_test = load_svmlight_file(str(rank_example_dir / 'rank.test'))
q_train = np.loadtxt(str(rank_example_dir / 'rank.train.query'))
q_test = np.loadtxt(str(rank_example_dir / 'rank.test.query'))
for alias in ('eval_at', 'ndcg_eval_at', 'ndcg_at', 'map_eval_at', 'map_at'):
for alias in lgb.basic._ConfigAliases.get('eval_at'):
gbm = lgb.LGBMRanker(n_estimators=5, **{alias: [1, 2, 3, 9]})
with pytest.warns(UserWarning, match=f"Found '{alias}' in params. Will use it instead of 'eval_at' argument"):
gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)], eval_group=[q_test])
assert list(gbm.evals_result_['valid_0'].keys()) == ['ndcg@1', 'ndcg@2', 'ndcg@3', 'ndcg@9']


@pytest.mark.parametrize("custom_objective", [True, False])
def test_objective_aliases(custom_objective):
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
if custom_objective:
obj = custom_dummy_obj
metric_name = 'l2' # default one
else:
obj = 'mape'
metric_name = 'mape'
evals = []
for alias in lgb.basic._ConfigAliases.get('objective'):
gbm = lgb.LGBMRegressor(n_estimators=5, **{alias: obj})
if alias != 'objective':
with pytest.warns(UserWarning, match=f"Found '{alias}' in params. Will use it instead of 'objective' argument"):
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)])
else:
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)])
assert list(gbm.evals_result_['valid_0'].keys()) == [metric_name]
evals.append(gbm.evals_result_['valid_0'][metric_name])
evals_t = np.array(evals).T
for i in range(evals_t.shape[0]):
np.testing.assert_allclose(evals_t[i], evals_t[i][0])
# check that really dummy objective was used and estimator didn't learn anything
if custom_objective:
np.testing.assert_allclose(evals_t, evals_t[0][0])


def test_regression_with_custom_objective():
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
Expand Down Expand Up @@ -910,10 +938,8 @@ def test_metrics():
assert 'multi_logloss' in gbm.evals_result_['training']
assert 'multi_error' in gbm.evals_result_['training']

# invalid objective is replaced with default multiclass one
# and invalid binary metric is replaced with multiclass alternative
gbm = lgb.LGBMClassifier(objective='invalid_obj',
**params).fit(eval_metric='binary_error', **params_fit)
Comment on lines -913 to -916
Copy link
Collaborator Author

@StrikerRUS StrikerRUS Nov 1, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this behavior is controversial and I'd better remove such silent replacement of objective function and instead raise error.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. We should explicitly tell the user what they intended to use is not valid. Or at least we should give a warning.

# invalid binary metric is replaced with multiclass alternative
gbm = lgb.LGBMClassifier(**params).fit(eval_metric='binary_error', **params_fit)
assert gbm.objective_ == 'multiclass'
assert len(gbm.evals_result_['training']) == 2
assert 'multi_logloss' in gbm.evals_result_['training']
Expand Down