From 1090a93b39e16f49621aa6824cd09d4390c3678a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Morales?= Date: Sat, 14 Dec 2024 23:45:10 -0600 Subject: [PATCH] [python-package] do not copy column-major numpy arrays when predicting (#6751) --- python-package/lightgbm/basic.py | 7 ++----- tests/python_package_test/test_engine.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 1db55385af1b..0f2e3697f6ec 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -1291,10 +1291,7 @@ def __inner_predict_np2d( predict_type: int, preds: Optional[np.ndarray], ) -> Tuple[np.ndarray, int]: - if mat.dtype == np.float32 or mat.dtype == np.float64: - data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype) - else: # change non-float data to float data, need to copy - data = np.array(mat.reshape(mat.size), dtype=np.float32) + data, layout = _np2d_to_np1d(mat) ptr_data, type_ptr_data, _ = _c_float_array(data) n_preds = self.__get_num_preds( start_iteration=start_iteration, @@ -1314,7 +1311,7 @@ def __inner_predict_np2d( ctypes.c_int(type_ptr_data), ctypes.c_int32(mat.shape[0]), ctypes.c_int32(mat.shape[1]), - ctypes.c_int(_C_API_IS_ROW_MAJOR), + ctypes.c_int(layout), ctypes.c_int(predict_type), ctypes.c_int(start_iteration), ctypes.c_int(num_iteration), diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index cb2e893c9612..05afddb77c77 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -4611,3 +4611,18 @@ def test_bagging_by_query_in_lambdarank(): ndcg_score_no_bagging_by_query = gbm_no_bagging_by_query.best_score["valid_0"]["ndcg@5"] assert ndcg_score_bagging_by_query >= ndcg_score - 0.1 assert ndcg_score_no_bagging_by_query >= ndcg_score - 0.1 + + +def test_equal_predict_from_row_major_and_col_major_data(): + X_row, y = make_synthetic_regression() + assert X_row.flags["C_CONTIGUOUS"] and not X_row.flags["F_CONTIGUOUS"] + ds = lgb.Dataset(X_row, y) + params = {"num_leaves": 8, "verbose": -1} + bst = lgb.train(params, ds, num_boost_round=5) + preds_row = bst.predict(X_row) + + X_col = np.asfortranarray(X_row) + assert X_col.flags["F_CONTIGUOUS"] and not X_col.flags["C_CONTIGUOUS"] + preds_col = bst.predict(X_col) + + np.testing.assert_allclose(preds_row, preds_col)