diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 6d52f670c0fd..36d5a92ede23 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -189,11 +189,18 @@ def _get_sample_count(total_nrow: int, params: str) -> int: def _np2d_to_np1d(mat: np.ndarray) -> np.ndarray: - data = mat.ravel(order="A") # keeps memory layout - if data.dtype not in (np.float32, np.float64): - # change non-float data to float data, need to copy - data = data.astype(np.float32) - return data + if mat.dtype in (np.float32, np.float64): + dtype = mat.dtype + else: + dtype = np.float32 + if mat.flags["F_CONTIGUOUS"]: + order = "F" + else: + order = "C" + # ensure dtype and order, copies if either do not match + data = np.asarray(mat, dtype=dtype, order=order) + # flatten array without copying + return data.ravel(order=order) class _MissingType(Enum): @@ -2307,10 +2314,12 @@ def __init_from_np2d( self._handle = ctypes.c_void_p() data = _np2d_to_np1d(mat) - if mat.flags["C_CONTIGUOUS"]: - layout = _C_API_IS_ROW_MAJOR - else: + if mat.flags["F_CONTIGUOUS"]: layout = _C_API_IS_COL_MAJOR + else: + # the array was row major or not contiguous (slice) + # in any case we flatten as row-major + layout = _C_API_IS_ROW_MAJOR ptr_data, type_ptr_data, _ = _c_float_array(data) _safe_call( diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 07735daeec86..fa50868a20e0 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -4621,7 +4621,6 @@ def test_train_with_column_major_dataset(): assert X_row.flags["C_CONTIGUOUS"] ds_row = lgb.Dataset(X_row, y) bst_row = lgb.train(params, ds_row, num_boost_round=rounds) - pred_row = bst_row.predict(X_row) # check that we didn't get a trivial model dumped_row = bst_row.dump_model() assert len(dumped_row["tree_info"]) == rounds