From 4072e9f793db14e2cdc950f6c6be65c5b912084a Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 1 Dec 2021 02:32:41 +0300 Subject: [PATCH] [python][sklearn] remove `verbose` argument from `fit()` method (#4832) --- python-package/lightgbm/dask.py | 6 +-- python-package/lightgbm/sklearn.py | 35 +++---------- tests/python_package_test/test_dask.py | 3 +- tests/python_package_test/test_plotting.py | 6 +-- tests/python_package_test/test_sklearn.py | 57 ++++++++++------------ 5 files changed, 39 insertions(+), 68 deletions(-) diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py index 247accb481d4..4c5ec8e99ad9 100644 --- a/python-package/lightgbm/dask.py +++ b/python-package/lightgbm/dask.py @@ -1205,7 +1205,7 @@ def fit( + _base_doc[_base_doc.find('eval_metric :'):]) _base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')] - + _base_doc[_base_doc.find('verbose :'):]) + + _base_doc[_base_doc.find('feature_name :'):]) # DaskLGBMClassifier support for callbacks and init_model is not tested fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs @@ -1388,7 +1388,7 @@ def fit( + _base_doc[_base_doc.find('eval_metric :'):]) _base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')] - + _base_doc[_base_doc.find('verbose :'):]) + + _base_doc[_base_doc.find('feature_name :'):]) # DaskLGBMRegressor support for callbacks and init_model is not tested fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs @@ -1554,7 +1554,7 @@ def fit( _base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')] + "eval_at : iterable of int, optional (default=(1, 2, 3, 4, 5))\n" + f"{' ':8}The evaluation positions of the specified metric.\n" - + f"{' ':4}{_base_doc[_base_doc.find('verbose :'):]}") + + f"{' ':4}{_base_doc[_base_doc.find('feature_name :'):]}") # DaskLGBMRanker support for callbacks and init_model is not tested fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 7b332999509a..e760ea21e6bb 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -258,17 +258,6 @@ def __call__(self, preds, dataset): If there's more than one, will check all of them. But the training data is ignored anyway. To check only the first metric, set the ``first_metric_only`` parameter to ``True`` in additional parameters ``**kwargs`` of the model constructor. - verbose : bool or int, optional (default=True) - Requires at least one evaluation data. - If True, the eval metric on the eval set is printed at each boosting stage. - If int, the eval metric on the eval set is printed at every ``verbose`` boosting stage. - The last boosting stage or the boosting stage found by using ``early_stopping_rounds`` is also printed. - - .. rubric:: Example - - With ``verbose`` = 4 and at least one item in ``eval_set``, - an evaluation metric is printed every 4 (instead of 1) boosting stages. - feature_name : list of str, or 'auto', optional (default='auto') Feature names. If 'auto' and data is pandas DataFrame, data columns names are used. @@ -597,7 +586,7 @@ def fit(self, X, y, sample_weight=None, init_score=None, group=None, eval_set=None, eval_names=None, eval_sample_weight=None, eval_class_weight=None, eval_init_score=None, eval_group=None, - eval_metric=None, early_stopping_rounds=None, verbose='warn', + eval_metric=None, early_stopping_rounds=None, feature_name='auto', categorical_feature='auto', callbacks=None, init_model=None): """Docstring is set after definition, using a template.""" @@ -752,16 +741,6 @@ def _get_meta_data(collection, name, i): else: callbacks = copy.copy(callbacks) # don't use deepcopy here to allow non-serializable objects - if verbose != 'warn': - _log_warning("'verbose' argument is deprecated and will be removed in a future release of LightGBM. " - "Pass 'log_evaluation()' callback via 'callbacks' argument instead.") - else: - if callbacks: # assume user has already specified log_evaluation callback - verbose = False - else: - verbose = True - callbacks.append(log_evaluation(int(verbose))) - evals_result = {} callbacks.append(record_evaluation(evals_result)) @@ -931,13 +910,13 @@ def fit(self, X, y, sample_weight=None, init_score=None, eval_set=None, eval_names=None, eval_sample_weight=None, eval_init_score=None, eval_metric=None, early_stopping_rounds=None, - verbose='warn', feature_name='auto', categorical_feature='auto', + feature_name='auto', categorical_feature='auto', callbacks=None, init_model=None): """Docstring is inherited from the LGBMModel.""" super().fit(X, y, sample_weight=sample_weight, init_score=init_score, eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, eval_init_score=eval_init_score, eval_metric=eval_metric, - early_stopping_rounds=early_stopping_rounds, verbose=verbose, feature_name=feature_name, + early_stopping_rounds=early_stopping_rounds, feature_name=feature_name, categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model) return self @@ -957,7 +936,7 @@ def fit(self, X, y, sample_weight=None, init_score=None, eval_set=None, eval_names=None, eval_sample_weight=None, eval_class_weight=None, eval_init_score=None, eval_metric=None, - early_stopping_rounds=None, verbose='warn', + early_stopping_rounds=None, feature_name='auto', categorical_feature='auto', callbacks=None, init_model=None): """Docstring is inherited from the LGBMModel.""" @@ -1004,7 +983,7 @@ def fit(self, X, y, eval_names=eval_names, eval_sample_weight=eval_sample_weight, eval_class_weight=eval_class_weight, eval_init_score=eval_init_score, eval_metric=eval_metric, early_stopping_rounds=early_stopping_rounds, - verbose=verbose, feature_name=feature_name, categorical_feature=categorical_feature, + feature_name=feature_name, categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model) return self @@ -1079,7 +1058,7 @@ def fit(self, X, y, sample_weight=None, init_score=None, group=None, eval_set=None, eval_names=None, eval_sample_weight=None, eval_init_score=None, eval_group=None, eval_metric=None, - eval_at=(1, 2, 3, 4, 5), early_stopping_rounds=None, verbose='warn', + eval_at=(1, 2, 3, 4, 5), early_stopping_rounds=None, feature_name='auto', categorical_feature='auto', callbacks=None, init_model=None): """Docstring is inherited from the LGBMModel.""" @@ -1103,7 +1082,7 @@ def fit(self, X, y, super().fit(X, y, sample_weight=sample_weight, init_score=init_score, group=group, eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, eval_init_score=eval_init_score, eval_group=eval_group, eval_metric=eval_metric, - early_stopping_rounds=early_stopping_rounds, verbose=verbose, feature_name=feature_name, + early_stopping_rounds=early_stopping_rounds, feature_name=feature_name, categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model) return self diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py index 13d31e514625..1b35758ef1ba 100644 --- a/tests/python_package_test/test_dask.py +++ b/tests/python_package_test/test_dask.py @@ -900,8 +900,7 @@ def test_eval_set_no_early_stopping(task, output, eval_sizes, eval_names_prefix, 'eval_names': eval_names, 'eval_sample_weight': eval_sample_weight, 'eval_init_score': eval_init_score, - 'eval_metric': eval_metrics, - 'verbose': True + 'eval_metric': eval_metrics } if task == 'ranking': fit_params.update( diff --git a/tests/python_package_test/test_plotting.py b/tests/python_package_test/test_plotting.py index 9fb7a2897343..05bcb6da778b 100644 --- a/tests/python_package_test/test_plotting.py +++ b/tests/python_package_test/test_plotting.py @@ -143,7 +143,7 @@ def test_plot_split_value_histogram(params, breast_cancer_split, train_data): def test_plot_tree(breast_cancer_split): X_train, _, y_train, _ = breast_cancer_split gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, verbose=-1) - gbm.fit(X_train, y_train, verbose=False) + gbm.fit(X_train, y_train) with pytest.raises(IndexError): lgb.plot_tree(gbm, tree_index=83) @@ -161,7 +161,7 @@ def test_create_tree_digraph(breast_cancer_split): constraints = [-1, 1] * int(X_train.shape[1] / 2) gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, verbose=-1, monotone_constraints=constraints) - gbm.fit(X_train, y_train, verbose=False) + gbm.fit(X_train, y_train) with pytest.raises(IndexError): lgb.create_tree_digraph(gbm, tree_index=83) @@ -265,7 +265,7 @@ def test_plot_metrics(params, breast_cancer_split, train_data): lgb.plot_metric(evals_result1) gbm2 = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, verbose=-1) - gbm2.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False) + gbm2.fit(X_train, y_train, eval_set=[(X_test, y_test)]) ax4 = lgb.plot_metric(gbm2, title=None, xlabel=None, ylabel=None) assert isinstance(ax4, matplotlib.axes.Axes) assert ax4.get_title() == '' diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index de4ff34a634a..abdf6e6045a7 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -92,7 +92,7 @@ def test_binary(): X, y = load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) gbm = lgb.LGBMClassifier(n_estimators=50, verbose=-1) - gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) + gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5) ret = log_loss(y_test, gbm.predict_proba(X_test)) assert ret < 0.12 assert gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1] == pytest.approx(ret) @@ -102,7 +102,7 @@ def test_regression(): X, y = load_boston(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) gbm = lgb.LGBMRegressor(n_estimators=50, verbose=-1) - gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) + gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5) ret = mean_squared_error(y_test, gbm.predict(X_test)) assert ret < 7 assert gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1] == pytest.approx(ret) @@ -112,7 +112,7 @@ def test_multiclass(): X, y = load_digits(n_class=10, return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) gbm = lgb.LGBMClassifier(n_estimators=50, verbose=-1) - gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) + gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5) ret = multi_error(y_test, gbm.predict(X_test)) assert ret < 0.05 ret = multi_logloss(y_test, gbm.predict_proba(X_test)) @@ -128,7 +128,7 @@ def test_lambdarank(): q_test = np.loadtxt(str(rank_example_dir / 'rank.test.query')) gbm = lgb.LGBMRanker(n_estimators=50) gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)], - eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False, + eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))]) assert gbm.best_iteration_ <= 24 assert gbm.best_score_['valid_0']['ndcg@1'] > 0.5674 @@ -143,7 +143,7 @@ def test_xendcg(): q_test = np.loadtxt(str(xendcg_example_dir / 'rank.test.query')) gbm = lgb.LGBMRanker(n_estimators=50, objective='rank_xendcg', random_state=5, n_jobs=1) gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)], - eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False, + eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, eval_metric='ndcg', callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))]) assert gbm.best_iteration_ <= 24 @@ -196,7 +196,7 @@ def test_regression_with_custom_objective(): X, y = load_boston(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) gbm = lgb.LGBMRegressor(n_estimators=50, verbose=-1, objective=objective_ls) - gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) + gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5) ret = mean_squared_error(y_test, gbm.predict(X_test)) assert ret < 7.0 assert gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1] == pytest.approx(ret) @@ -206,7 +206,7 @@ def test_binary_classification_with_custom_objective(): X, y = load_digits(n_class=2, return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) gbm = lgb.LGBMClassifier(n_estimators=50, verbose=-1, objective=logregobj) - gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False) + gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5) # prediction result is actually not transformed (is raw) due to custom objective y_pred_raw = gbm.predict_proba(X_test) assert not np.all(y_pred_raw >= 0) @@ -285,8 +285,7 @@ def test_grid_search(): grid_params = dict(boosting_type=['rf', 'gbdt'], n_estimators=[4, 6], reg_alpha=[0.01, 0.005]) - fit_params = dict(verbose=False, - eval_set=[(X_val, y_val)], + fit_params = dict(eval_set=[(X_val, y_val)], eval_metric=constant_metric, early_stopping_rounds=2) grid = GridSearchCV(estimator=lgb.LGBMClassifier(**params), param_grid=grid_params, @@ -317,8 +316,7 @@ def test_random_search(): param_dist = dict(boosting_type=['rf', 'gbdt'], n_estimators=[np.random.randint(low=3, high=10) for i in range(n_iter)], reg_alpha=[np.random.uniform(low=0.01, high=0.06) for i in range(n_iter)]) - fit_params = dict(verbose=False, - eval_set=[(X_val, y_val)], + fit_params = dict(eval_set=[(X_val, y_val)], eval_metric=constant_metric, early_stopping_rounds=2) rand = RandomizedSearchCV(estimator=lgb.LGBMClassifier(**params), @@ -422,7 +420,7 @@ def test_regressor_chain(): def test_clone_and_property(): X, y = load_boston(return_X_y=True) gbm = lgb.LGBMRegressor(n_estimators=10, verbose=-1) - gbm.fit(X, y, verbose=False) + gbm.fit(X, y) gbm_clone = clone(gbm) assert isinstance(gbm.booster_, lgb.Booster) @@ -430,7 +428,7 @@ def test_clone_and_property(): X, y = load_digits(n_class=2, return_X_y=True) clf = lgb.LGBMClassifier(n_estimators=10, verbose=-1) - clf.fit(X, y, verbose=False) + clf.fit(X, y) assert sorted(clf.classes_) == [0, 1] assert clf.n_classes_ == 2 assert isinstance(clf.booster_, lgb.Booster) @@ -443,7 +441,7 @@ def test_joblib(): gbm = lgb.LGBMRegressor(n_estimators=10, objective=custom_asymmetric_obj, verbose=-1, importance_type='split') gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], - eval_metric=mse, early_stopping_rounds=5, verbose=False, + eval_metric=mse, early_stopping_rounds=5, callbacks=[lgb.reset_parameter(learning_rate=list(np.arange(1, 0, -0.1)))]) joblib.dump(gbm, 'lgb.pkl') # test model with custom functions @@ -695,7 +693,7 @@ def test_evaluate_train_set(): X, y = load_boston(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) gbm = lgb.LGBMRegressor(n_estimators=10, verbose=-1) - gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], verbose=False) + gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)]) assert len(gbm.evals_result_) == 2 assert 'training' in gbm.evals_result_ assert len(gbm.evals_result_['training']) == 1 @@ -708,7 +706,7 @@ def test_evaluate_train_set(): def test_metrics(): X, y = load_boston(return_X_y=True) params = {'n_estimators': 2, 'verbose': -1} - params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False} + params_fit = {'X': X, 'y': y, 'eval_set': (X, y)} # no custom objective, no custom metric # default metric @@ -750,8 +748,7 @@ def test_metrics(): params_classification = {'n_estimators': 2, 'verbose': -1, 'objective': 'binary', 'metric': 'binary_logloss'} params_fit_classification = {'X': X_classification, 'y': y_classification, - 'eval_set': (X_classification, y_classification), - 'verbose': False} + 'eval_set': (X_classification, y_classification)} gbm = lgb.LGBMClassifier(**params_classification).fit(eval_metric=['fair', 'error'], **params_fit_classification) assert len(gbm.evals_result_['training']) == 3 @@ -930,7 +927,7 @@ def test_metrics(): assert 'error' in gbm.evals_result_['training'] X, y = load_digits(n_class=3, return_X_y=True) - params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False} + params_fit = {'X': X, 'y': y, 'eval_set': (X, y)} # default metric and invalid binary metric is replaced with multiclass alternative gbm = lgb.LGBMClassifier(**params).fit(eval_metric='binary_error', **params_fit) @@ -955,7 +952,7 @@ def test_metrics(): assert 'multi_error' in gbm.evals_result_['training'] X, y = load_digits(n_class=2, return_X_y=True) - params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False} + params_fit = {'X': X, 'y': y, 'eval_set': (X, y)} # default metric and invalid multiclass metric is replaced with binary alternative gbm = lgb.LGBMClassifier(**params).fit(eval_metric='multi_error', **params_fit) @@ -975,7 +972,7 @@ def test_multiple_eval_metrics(): X, y = load_breast_cancer(return_X_y=True) params = {'n_estimators': 2, 'verbose': -1, 'objective': 'binary', 'metric': 'binary_logloss'} - params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False} + params_fit = {'X': X, 'y': y, 'eval_set': (X, y)} # Verify that can receive a list of metrics, only callable gbm = lgb.LGBMClassifier(**params).fit(eval_metric=[constant_metric, decreasing_metric], **params_fit) @@ -1016,7 +1013,7 @@ def test_inf_handle(): weight = np.full(nrows, 1e10) params = {'n_estimators': 20, 'verbose': -1} params_fit = {'X': X, 'y': y, 'sample_weight': weight, 'eval_set': (X, y), - 'verbose': False, 'early_stopping_rounds': 5} + 'early_stopping_rounds': 5} gbm = lgb.LGBMRegressor(**params).fit(**params_fit) np.testing.assert_allclose(gbm.evals_result_['training']['l2'], np.inf) @@ -1029,7 +1026,7 @@ def test_nan_handle(): weight = np.zeros(nrows) params = {'n_estimators': 20, 'verbose': -1} params_fit = {'X': X, 'y': y, 'sample_weight': weight, 'eval_set': (X, y), - 'verbose': False, 'early_stopping_rounds': 5} + 'early_stopping_rounds': 5} gbm = lgb.LGBMRegressor(**params).fit(**params_fit) np.testing.assert_allclose(gbm.evals_result_['training']['l2'], np.nan) @@ -1066,8 +1063,7 @@ def fit_and_check(eval_set_names, metric_names, assumed_iteration, first_metric_ 'seed': 123} params_fit = {'X': X_train, 'y': y_train, - 'early_stopping_rounds': 5, - 'verbose': False} + 'early_stopping_rounds': 5} iter_valid1_l1 = 3 iter_valid1_l2 = 18 @@ -1146,8 +1142,7 @@ def test_class_weight(): gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test), (X_test, y_test), (X_test, y_test), (X_test, y_test)], - eval_class_weight=['balanced', None, 'balanced', {1: 10, 4: 20}, {5: 30, 2: 40}], - verbose=False) + eval_class_weight=['balanced', None, 'balanced', {1: 10, 4: 20}, {5: 30, 2: 40}]) for eval_set1, eval_set2 in itertools.combinations(gbm.evals_result_.keys(), 2): for metric in gbm.evals_result_[eval_set1]: np.testing.assert_raises(AssertionError, @@ -1158,8 +1153,7 @@ def test_class_weight(): gbm_str.fit(X_train, y_train_str, eval_set=[(X_train, y_train_str), (X_test, y_test_str), (X_test, y_test_str), (X_test, y_test_str), (X_test, y_test_str)], - eval_class_weight=['balanced', None, 'balanced', {'1': 10, '4': 20}, {'5': 30, '2': 40}], - verbose=False) + eval_class_weight=['balanced', None, 'balanced', {'1': 10, '4': 20}, {'5': 30, '2': 40}]) for eval_set1, eval_set2 in itertools.combinations(gbm_str.evals_result_.keys(), 2): for metric in gbm_str.evals_result_[eval_set1]: np.testing.assert_raises(AssertionError, @@ -1175,10 +1169,9 @@ def test_class_weight(): def test_continue_training_with_model(): X, y = load_digits(n_class=3, return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) - init_gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test), - verbose=False) + init_gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test)) gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test), - verbose=False, init_model=init_gbm) + init_model=init_gbm) assert len(init_gbm.evals_result_['valid_0']['multi_logloss']) == len(gbm.evals_result_['valid_0']['multi_logloss']) assert len(init_gbm.evals_result_['valid_0']['multi_logloss']) == 5 assert gbm.evals_result_['valid_0']['multi_logloss'][-1] < init_gbm.evals_result_['valid_0']['multi_logloss'][-1]