From 4c627cb98822d77c2ec7b6497b27f91511bf2a5d Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sat, 14 Dec 2024 16:42:51 +0900 Subject: [PATCH 01/16] add pred_leaf to reshape if condition --- python-package/lightgbm/basic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 1db55385af1b..2f6ead08909d 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -1248,7 +1248,7 @@ def predict( if pred_leaf: preds = preds.astype(np.int32) is_sparse = isinstance(preds, (list, scipy.sparse.spmatrix)) - if not is_sparse and preds.size != nrow: + if not is_sparse and preds.size != nrow or pred_leaf: if preds.size % nrow == 0: preds = preds.reshape(nrow, -1) else: From 52c6a315f347316f52b1689d2b9cd70289ac16e9 Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sat, 14 Dec 2024 16:43:51 +0900 Subject: [PATCH 02/16] add refit with one tree test --- tests/python_package_test/test_engine.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index cb2e893c9612..ef20b78a40c4 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2307,6 +2307,15 @@ def test_refit(): assert err_pred > new_err_pred +def test_refit_with_one_tree(): + X, y = load_breast_cancer(return_X_y=True) + lgb_train = lgb.Dataset(X, label=y) + params={"objective": "binary", "num_trees": 1, "verbosity": -1} + model = lgb.train(params, lgb_train, num_boost_round=1) + model_refit = model.refit(X, y) + assert isinstance(model_refit, lgb.Booster) + + def test_refit_dataset_params(rng): # check refit accepts dataset_params X, y = load_breast_cancer(return_X_y=True) From 7bdbacb54ffc15223bcd6ef6ef288be73658517d Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sat, 14 Dec 2024 17:08:58 +0900 Subject: [PATCH 03/16] make code lint to pass lint test --- tests/python_package_test/test_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index ef20b78a40c4..226727ad6985 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2310,7 +2310,7 @@ def test_refit(): def test_refit_with_one_tree(): X, y = load_breast_cancer(return_X_y=True) lgb_train = lgb.Dataset(X, label=y) - params={"objective": "binary", "num_trees": 1, "verbosity": -1} + params = {"objective": "binary", "num_trees": 1, "verbosity": -1} model = lgb.train(params, lgb_train, num_boost_round=1) model_refit = model.refit(X, y) assert isinstance(model_refit, lgb.Booster) From 088a2b99e5c9dd78cf9ed27711cb0eec2c9dc00e Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sun, 15 Dec 2024 13:39:46 +0900 Subject: [PATCH 04/16] add pred_contrib to condition --- python-package/lightgbm/basic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 2f6ead08909d..87e9eac58ea9 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -1248,7 +1248,7 @@ def predict( if pred_leaf: preds = preds.astype(np.int32) is_sparse = isinstance(preds, (list, scipy.sparse.spmatrix)) - if not is_sparse and preds.size != nrow or pred_leaf: + if not is_sparse and preds.size != nrow or (pred_leaf or pred_contrib): if preds.size % nrow == 0: preds = preds.reshape(nrow, -1) else: From b6bb3c93da3c28dfbdaf11748f6b03015a991155 Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sun, 15 Dec 2024 13:41:41 +0900 Subject: [PATCH 05/16] remove num tree parameter --- tests/python_package_test/test_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 226727ad6985..2f6c0e6befd9 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2310,7 +2310,7 @@ def test_refit(): def test_refit_with_one_tree(): X, y = load_breast_cancer(return_X_y=True) lgb_train = lgb.Dataset(X, label=y) - params = {"objective": "binary", "num_trees": 1, "verbosity": -1} + params = {"objective": "binary", "verbosity": -1} model = lgb.train(params, lgb_train, num_boost_round=1) model_refit = model.refit(X, y) assert isinstance(model_refit, lgb.Booster) From ba39a6f3b466c7f0833751ee6cc300da6127d366 Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sun, 15 Dec 2024 14:04:40 +0900 Subject: [PATCH 06/16] add explicit tests for pred_leaf shape --- tests/python_package_test/test_engine.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 2f6c0e6befd9..1c8945b8067d 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -15,7 +15,7 @@ import psutil import pytest from scipy.sparse import csr_matrix, isspmatrix_csc, isspmatrix_csr -from sklearn.datasets import load_svmlight_file, make_blobs, make_multilabel_classification +from sklearn.datasets import load_svmlight_file, make_blobs, make_multilabel_classification, make_regression from sklearn.metrics import average_precision_score, log_loss, mean_absolute_error, mean_squared_error, roc_auc_score from sklearn.model_selection import GroupKFold, TimeSeriesSplit, train_test_split @@ -2316,6 +2316,14 @@ def test_refit_with_one_tree(): assert isinstance(model_refit, lgb.Booster) +def test_pred_leaf_output_shape(): + X, y = make_regression(n_samples=10_000, n_features=10) + dtrain = lgb.Dataset(X, label=y) + params = {"objective": "regression", "verbosity": -1} + assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_leaf=True).shape == (10_000, 1) + assert lgb.train(params, dtrain, num_boost_round=2).predict(X, pred_leaf=True).shape == (10_000, 2) + + def test_refit_dataset_params(rng): # check refit accepts dataset_params X, y = load_breast_cancer(return_X_y=True) From 46a0ddc7d01402e4607471605e416e572822927a Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sun, 15 Dec 2024 14:13:42 +0900 Subject: [PATCH 07/16] add regression example --- tests/python_package_test/test_engine.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 1c8945b8067d..5f59eb3ddcd0 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2315,6 +2315,13 @@ def test_refit_with_one_tree(): model_refit = model.refit(X, y) assert isinstance(model_refit, lgb.Booster) + X, y = make_regression(n_samples=10_000, n_features=10) + lgb_train = lgb.Dataset(X, label=y) + params = {"objective": "regression", "verbosity": -1} + model = lgb.train(params, lgb_train, num_boost_round=1) + model_refit = model.refit(X, y) + assert isinstance(model_refit, lgb.Booster) + def test_pred_leaf_output_shape(): X, y = make_regression(n_samples=10_000, n_features=10) From 03e41c61f919bf718d8c07fe416733e59e888611 Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sun, 15 Dec 2024 15:02:23 +0900 Subject: [PATCH 08/16] add multiclass test and split tests --- tests/python_package_test/test_engine.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 5f59eb3ddcd0..cffc16d299db 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2307,7 +2307,16 @@ def test_refit(): assert err_pred > new_err_pred -def test_refit_with_one_tree(): +def test_refit_with_one_tree_regression(): + X, y = make_regression(n_samples=10_000, n_features=10) + lgb_train = lgb.Dataset(X, label=y) + params = {"objective": "regression", "verbosity": -1} + model = lgb.train(params, lgb_train, num_boost_round=1) + model_refit = model.refit(X, y) + assert isinstance(model_refit, lgb.Booster) + + +def test_refit_with_one_tree_binary_classification(): X, y = load_breast_cancer(return_X_y=True) lgb_train = lgb.Dataset(X, label=y) params = {"objective": "binary", "verbosity": -1} @@ -2315,9 +2324,11 @@ def test_refit_with_one_tree(): model_refit = model.refit(X, y) assert isinstance(model_refit, lgb.Booster) - X, y = make_regression(n_samples=10_000, n_features=10) - lgb_train = lgb.Dataset(X, label=y) - params = {"objective": "regression", "verbosity": -1} + +def test_refit_with_one_tree_multiclass_classification(): + X, y = load_iris(return_X_y=True) + lgb_train = lgb.Dataset(X, y) + params = {"objective": "multiclass", "num_class": 3, "verbose": -1} model = lgb.train(params, lgb_train, num_boost_round=1) model_refit = model.refit(X, y) assert isinstance(model_refit, lgb.Booster) From c09202ccca71780ace695851933f0ac8783bed03 Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sun, 15 Dec 2024 15:25:56 +0900 Subject: [PATCH 09/16] change condition or in bracet --- python-package/lightgbm/basic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 87e9eac58ea9..13b3610e874c 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -1248,7 +1248,7 @@ def predict( if pred_leaf: preds = preds.astype(np.int32) is_sparse = isinstance(preds, (list, scipy.sparse.spmatrix)) - if not is_sparse and preds.size != nrow or (pred_leaf or pred_contrib): + if not is_sparse and (preds.size != nrow or pred_leaf or pred_contrib): if preds.size % nrow == 0: preds = preds.reshape(nrow, -1) else: From d73f1894e7949e4cb7adb086d9d5c1288205cf1d Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sun, 15 Dec 2024 15:46:31 +0900 Subject: [PATCH 10/16] change test location and add additional shape test --- tests/python_package_test/test_engine.py | 73 +++++++++++++++++++++--- 1 file changed, 64 insertions(+), 9 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index cffc16d299db..f08040473d6b 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -15,7 +15,7 @@ import psutil import pytest from scipy.sparse import csr_matrix, isspmatrix_csc, isspmatrix_csr -from sklearn.datasets import load_svmlight_file, make_blobs, make_multilabel_classification, make_regression +from sklearn.datasets import load_svmlight_file, make_blobs, make_classification, make_multilabel_classification, make_regression from sklearn.metrics import average_precision_score, log_loss, mean_absolute_error, mean_squared_error, roc_auc_score from sklearn.model_selection import GroupKFold, TimeSeriesSplit, train_test_split @@ -2334,14 +2334,6 @@ def test_refit_with_one_tree_multiclass_classification(): assert isinstance(model_refit, lgb.Booster) -def test_pred_leaf_output_shape(): - X, y = make_regression(n_samples=10_000, n_features=10) - dtrain = lgb.Dataset(X, label=y) - params = {"objective": "regression", "verbosity": -1} - assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_leaf=True).shape == (10_000, 1) - assert lgb.train(params, dtrain, num_boost_round=2).predict(X, pred_leaf=True).shape == (10_000, 2) - - def test_refit_dataset_params(rng): # check refit accepts dataset_params X, y = load_breast_cancer(return_X_y=True) @@ -3886,6 +3878,69 @@ def test_predict_stump(rng, use_init_score): np.testing.assert_allclose(preds_all, np.full_like(preds_all, fill_value=y_avg)) +def test_predict_regression_output_shape(): + X, y = make_regression(n_samples=10_000, n_features=10) + dtrain = lgb.Dataset(X, label=y) + params = {"objective": "regression", "verbosity": -1} + assert lgb.train(params, dtrain, num_boost_round=1).predict(X).shape == (10_000,) + + +def test_predict_binary_classification_output_shape(): + X, y = make_classification(n_samples=10_000, n_features=10, n_classes=2) + dtrain = lgb.Dataset(X, label=y) + params = {"objective": "binary", "verbosity": -1} + assert lgb.train(params, dtrain, num_boost_round=1).predict(X).shape == (10_000,) + + +def test_predict_multiclass_classification_output_shape(): + X, y = make_classification(n_samples=10_000, n_features=10, n_classes=3, n_informative=6) + dtrain = lgb.Dataset(X, label=y) + params = {"objective": "multiclass", "verbosity": -1, "num_class": 3} + assert lgb.train(params, dtrain, num_boost_round=1).predict(X).shape == (10_000, 3) + + +def test_predict_leaf_regression_output_shape(): + X, y = make_regression(n_samples=10_000, n_features=10) + dtrain = lgb.Dataset(X, label=y) + params = {"objective": "regression", "verbosity": -1} + assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_leaf=True).shape == (10_000, 1) + + +def test_predict_leaf_binary_classification_output_shape(): + X, y = make_classification(n_samples=10_000, n_features=10, n_classes=2) + dtrain = lgb.Dataset(X, label=y) + params = {"objective": "binary", "verbosity": -1} + assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_leaf=True).shape == (10_000, 1) + + +def test_predict_leaf_multiclass_classification_output_shape(): + X, y = make_classification(n_samples=10_000, n_features=10, n_classes=3, n_informative=6) + dtrain = lgb.Dataset(X, label=y) + params = {"objective": "multiclass", "verbosity": -1, "num_class": 3} + assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_leaf=True).shape == (10_000, 3) + + +def test_predict_contrib_regression_output_shape(): + X, y = make_regression(n_samples=10_000, n_features=10) + dtrain = lgb.Dataset(X, label=y) + params = {"objective": "regression", "verbosity": -1} + assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_contrib=True).shape == (10_000, 11) + + +def test_predict_contrib_binary_classification_output_shape(): + X, y = make_classification(n_samples=10_000, n_features=10, n_classes=2) + dtrain = lgb.Dataset(X, label=y) + params = {"objective": "binary", "verbosity": -1} + assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_contrib=True).shape == (10_000, 11) + + +def test_predict_contrib_multiclass_classification_output_shape(): + X, y = make_classification(n_samples=10_000, n_features=10, n_classes=3, n_informative=6) + dtrain = lgb.Dataset(X, label=y) + params = {"objective": "multiclass", "verbosity": -1, "num_class": 3} + assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_contrib=True).shape == (10_000, 33) + + def test_average_precision_metric(): # test against sklearn average precision metric X, y = load_breast_cancer(return_X_y=True) From eb256bc4f52a062fade0d5f4c2244c5d9979f8af Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sun, 15 Dec 2024 16:31:23 +0900 Subject: [PATCH 11/16] make isort happy --- tests/python_package_test/test_engine.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index f08040473d6b..9ca102c7cb01 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -15,7 +15,13 @@ import psutil import pytest from scipy.sparse import csr_matrix, isspmatrix_csc, isspmatrix_csr -from sklearn.datasets import load_svmlight_file, make_blobs, make_classification, make_multilabel_classification, make_regression +from sklearn.datasets import ( + load_svmlight_file, + make_blobs, + make_classification, + make_multilabel_classification, + make_regression, +) from sklearn.metrics import average_precision_score, log_loss, mean_absolute_error, mean_squared_error, roc_auc_score from sklearn.model_selection import GroupKFold, TimeSeriesSplit, train_test_split From e9101a11bd62d5d40e442f94ea06c29fa352b269 Mon Sep 17 00:00:00 2001 From: RektPunk Date: Sun, 15 Dec 2024 16:34:38 +0900 Subject: [PATCH 12/16] remove whitespace --- tests/python_package_test/test_engine.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 9ca102c7cb01..60534bf061bc 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -16,10 +16,10 @@ import pytest from scipy.sparse import csr_matrix, isspmatrix_csc, isspmatrix_csr from sklearn.datasets import ( - load_svmlight_file, - make_blobs, - make_classification, - make_multilabel_classification, + load_svmlight_file, + make_blobs, + make_classification, + make_multilabel_classification, make_regression, ) from sklearn.metrics import average_precision_score, log_loss, mean_absolute_error, mean_squared_error, roc_auc_score From 6ad5c498c6bacd1919a83266d060c8e50a8af2b6 Mon Sep 17 00:00:00 2001 From: RektPunk Date: Tue, 17 Dec 2024 13:04:23 +0900 Subject: [PATCH 13/16] change smaller example refit test --- tests/python_package_test/test_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 60534bf061bc..499f65964860 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2314,7 +2314,7 @@ def test_refit(): def test_refit_with_one_tree_regression(): - X, y = make_regression(n_samples=10_000, n_features=10) + X, y = make_regression(n_samples=1_000, n_features=2) lgb_train = lgb.Dataset(X, label=y) params = {"objective": "regression", "verbosity": -1} model = lgb.train(params, lgb_train, num_boost_round=1) From 2b4bfd72dfcecf1c218f303bcb3dc050244c011d Mon Sep 17 00:00:00 2001 From: RektPunk Date: Tue, 17 Dec 2024 13:25:24 +0900 Subject: [PATCH 14/16] re-organize tests and remove make regression --- tests/python_package_test/test_engine.py | 86 +++++++++++++----------- 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 499f65964860..2e346ff804bd 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -15,13 +15,7 @@ import psutil import pytest from scipy.sparse import csr_matrix, isspmatrix_csc, isspmatrix_csr -from sklearn.datasets import ( - load_svmlight_file, - make_blobs, - make_classification, - make_multilabel_classification, - make_regression, -) +from sklearn.datasets import load_svmlight_file, make_blobs, make_classification, make_multilabel_classification from sklearn.metrics import average_precision_score, log_loss, mean_absolute_error, mean_squared_error, roc_auc_score from sklearn.model_selection import GroupKFold, TimeSeriesSplit, train_test_split @@ -2314,7 +2308,7 @@ def test_refit(): def test_refit_with_one_tree_regression(): - X, y = make_regression(n_samples=1_000, n_features=2) + X, y = make_synthetic_regression(n_samples=1_000, n_features=2) lgb_train = lgb.Dataset(X, label=y) params = {"objective": "regression", "verbosity": -1} model = lgb.train(params, lgb_train, num_boost_round=1) @@ -3885,38 +3879,64 @@ def test_predict_stump(rng, use_init_score): def test_predict_regression_output_shape(): - X, y = make_regression(n_samples=10_000, n_features=10) + n_samples = 1_000 + n_features = 4 + X, y = make_synthetic_regression(n_samples=n_samples, n_features=n_features) dtrain = lgb.Dataset(X, label=y) params = {"objective": "regression", "verbosity": -1} - assert lgb.train(params, dtrain, num_boost_round=1).predict(X).shape == (10_000,) + + # 1-round model + bst = lgb.train(params, dtrain, num_boost_round=1) + assert bst.predict(X).shape == (n_samples,) + assert bst.predict(X, pred_contrib=True).shape == (n_samples, n_features + 1) + assert bst.predict(X, pred_leaf=True).shape == (n_samples, 1) + + # 2-round model + bst = lgb.train(params, dtrain, num_boost_round=2) + assert bst.predict(X).shape == (n_samples,) + assert bst.predict(X, pred_contrib=True).shape == (n_samples, n_features + 1) + assert bst.predict(X, pred_leaf=True).shape == (n_samples, 2) def test_predict_binary_classification_output_shape(): - X, y = make_classification(n_samples=10_000, n_features=10, n_classes=2) + n_samples = 1_000 + n_features = 4 + X, y = make_classification(n_samples=n_samples, n_features=n_features, n_classes=2) dtrain = lgb.Dataset(X, label=y) params = {"objective": "binary", "verbosity": -1} - assert lgb.train(params, dtrain, num_boost_round=1).predict(X).shape == (10_000,) + # 1-round model + bst = lgb.train(params, dtrain, num_boost_round=1) + assert bst.predict(X).shape == (n_samples,) + assert bst.predict(X, pred_contrib=True).shape == (n_samples, n_features + 1) + assert bst.predict(X, pred_leaf=True).shape == (n_samples, 1) -def test_predict_multiclass_classification_output_shape(): - X, y = make_classification(n_samples=10_000, n_features=10, n_classes=3, n_informative=6) - dtrain = lgb.Dataset(X, label=y) - params = {"objective": "multiclass", "verbosity": -1, "num_class": 3} - assert lgb.train(params, dtrain, num_boost_round=1).predict(X).shape == (10_000, 3) + # 2-round model + bst = lgb.train(params, dtrain, num_boost_round=2) + assert bst.predict(X).shape == (n_samples,) + assert bst.predict(X, pred_contrib=True).shape == (n_samples, n_features + 1) + assert bst.predict(X, pred_leaf=True).shape == (n_samples, 2) -def test_predict_leaf_regression_output_shape(): - X, y = make_regression(n_samples=10_000, n_features=10) +def test_predict_multiclass_classification_output_shape(): + n_samples = 1_000 + n_features = 10 + n_classes = 3 + X, y = make_classification(n_samples=n_samples, n_features=n_features, n_classes=n_classes, n_informative=6) dtrain = lgb.Dataset(X, label=y) - params = {"objective": "regression", "verbosity": -1} - assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_leaf=True).shape == (10_000, 1) + params = {"objective": "multiclass", "verbosity": -1, "num_class": n_classes} + # 1-round model + bst = lgb.train(params, dtrain, num_boost_round=1) + assert bst.predict(X).shape == (n_samples, n_classes) + assert bst.predict(X, pred_contrib=True).shape == (n_samples, n_classes * (n_features + 1)) + assert bst.predict(X, pred_leaf=True).shape == (n_samples, n_classes) -def test_predict_leaf_binary_classification_output_shape(): - X, y = make_classification(n_samples=10_000, n_features=10, n_classes=2) - dtrain = lgb.Dataset(X, label=y) - params = {"objective": "binary", "verbosity": -1} - assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_leaf=True).shape == (10_000, 1) + # 2-round model + bst = lgb.train(params, dtrain, num_boost_round=2) + assert bst.predict(X).shape == (n_samples, n_classes) + assert bst.predict(X, pred_contrib=True).shape == (n_samples, n_classes * (n_features + 1)) + assert bst.predict(X, pred_leaf=True).shape == (n_samples, n_classes * 2) def test_predict_leaf_multiclass_classification_output_shape(): @@ -3926,20 +3946,6 @@ def test_predict_leaf_multiclass_classification_output_shape(): assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_leaf=True).shape == (10_000, 3) -def test_predict_contrib_regression_output_shape(): - X, y = make_regression(n_samples=10_000, n_features=10) - dtrain = lgb.Dataset(X, label=y) - params = {"objective": "regression", "verbosity": -1} - assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_contrib=True).shape == (10_000, 11) - - -def test_predict_contrib_binary_classification_output_shape(): - X, y = make_classification(n_samples=10_000, n_features=10, n_classes=2) - dtrain = lgb.Dataset(X, label=y) - params = {"objective": "binary", "verbosity": -1} - assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_contrib=True).shape == (10_000, 11) - - def test_predict_contrib_multiclass_classification_output_shape(): X, y = make_classification(n_samples=10_000, n_features=10, n_classes=3, n_informative=6) dtrain = lgb.Dataset(X, label=y) From 75489a5b6671d56fd568572f3a5dea5bacedc607 Mon Sep 17 00:00:00 2001 From: RektPunk Date: Tue, 17 Dec 2024 13:26:25 +0900 Subject: [PATCH 15/16] remove meanless space --- tests/python_package_test/test_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 2e346ff804bd..cde22c9bc2e0 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -3890,7 +3890,7 @@ def test_predict_regression_output_shape(): assert bst.predict(X).shape == (n_samples,) assert bst.predict(X, pred_contrib=True).shape == (n_samples, n_features + 1) assert bst.predict(X, pred_leaf=True).shape == (n_samples, 1) - + # 2-round model bst = lgb.train(params, dtrain, num_boost_round=2) assert bst.predict(X).shape == (n_samples,) From 64574990ae928fcd59de1312cbd8db66255be072 Mon Sep 17 00:00:00 2001 From: RektPunk Date: Tue, 17 Dec 2024 13:30:10 +0900 Subject: [PATCH 16/16] remove multiclass tests --- tests/python_package_test/test_engine.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index cde22c9bc2e0..54363f28ca09 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -3939,20 +3939,6 @@ def test_predict_multiclass_classification_output_shape(): assert bst.predict(X, pred_leaf=True).shape == (n_samples, n_classes * 2) -def test_predict_leaf_multiclass_classification_output_shape(): - X, y = make_classification(n_samples=10_000, n_features=10, n_classes=3, n_informative=6) - dtrain = lgb.Dataset(X, label=y) - params = {"objective": "multiclass", "verbosity": -1, "num_class": 3} - assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_leaf=True).shape == (10_000, 3) - - -def test_predict_contrib_multiclass_classification_output_shape(): - X, y = make_classification(n_samples=10_000, n_features=10, n_classes=3, n_informative=6) - dtrain = lgb.Dataset(X, label=y) - params = {"objective": "multiclass", "verbosity": -1, "num_class": 3} - assert lgb.train(params, dtrain, num_boost_round=1).predict(X, pred_contrib=True).shape == (10_000, 33) - - def test_average_precision_metric(): # test against sklearn average precision metric X, y = load_breast_cancer(return_X_y=True)