diff --git a/.ci/test-python-latest.sh b/.ci/test-python-latest.sh new file mode 100755 index 000000000000..08fc8558ef3e --- /dev/null +++ b/.ci/test-python-latest.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +set -e -E -u -o pipefail + +# latest versions of lightgbm's dependencies, +# including pre-releases and nightlies +# +# ref: https://github.com/pydata/xarray/blob/31111b3afe44fd6f7dac363264e94186cc5168d2/.github/workflows/upstream-dev-ci.yaml +echo "installing testing dependencies" +python -m pip install \ + cloudpickle \ + psutil \ + pytest +echo "done installing testing dependencies" + +echo "installing lightgbm's dependencies" +python -m pip install \ + --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \ + --prefer-binary \ + --pre \ + --upgrade \ + 'numpy>=2.0.0.dev0' \ + 'matplotlib>=3.10.0.dev0' \ + 'pandas>=3.0.0.dev0' \ + 'scikit-learn>=1.6.dev0' \ + 'scipy>=1.15.0.dev0' + +python -m pip install \ + --extra-index-url https://pypi.fury.io/arrow-nightlies/ \ + --prefer-binary \ + --pre \ + --upgrade \ + 'pyarrow>=17.0.0.dev0' + +python -m pip install \ + 'cffi>=1.15.1' + +echo "done installing lightgbm's dependencies" + +echo "installing lightgbm" +pip install --no-deps dist/*.whl +echo "done installing lightgbm" + +echo "installed package versions:" +pip freeze + +echo "" +echo "running tests" +pytest tests/c_api_test/ +pytest tests/python_package_test/ diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml index 73fbc78a2ea1..83149a078cf6 100644 --- a/.github/workflows/python_package.yml +++ b/.github/workflows/python_package.yml @@ -75,6 +75,33 @@ jobs: export PATH=${CONDA}/bin:${PATH} $GITHUB_WORKSPACE/.ci/setup.sh || exit 1 $GITHUB_WORKSPACE/.ci/test.sh || exit 1 + test-latest-versions: + name: Python - latest versions (ubuntu-latest) + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 5 + submodules: true + - name: Create wheel + run: | + docker run \ + --rm \ + --env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \ + -v $(pwd):/opt/lgb-build \ + -w /opt/lgb-build \ + lightgbm/vsts-agent:manylinux_2_28_x86_64 \ + /bin/bash -c 'PATH=/opt/miniforge/bin:$PATH sh ./build-python.sh bdist_wheel --nomp' + - name: Test compatibility + run: | + docker run \ + --rm \ + -v $(pwd):/opt/lgb-build \ + -w /opt/lgb-build \ + python:3.11 \ + /bin/bash ./.ci/test-python-latest.sh test-oldest-versions: name: Python - oldest supported versions (ubuntu-latest) runs-on: ubuntu-latest @@ -89,6 +116,7 @@ jobs: run: | docker run \ --rm \ + --env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \ -v $(pwd):/opt/lgb-build \ -w /opt/lgb-build \ lightgbm/vsts-agent:manylinux_2_28_x86_64 \ @@ -104,7 +132,7 @@ jobs: all-python-package-jobs-successful: if: always() runs-on: ubuntu-latest - needs: [test, test-oldest-versions] + needs: [test, test-latest-versions, test-oldest-versions] steps: - name: Note that all tests succeeded uses: re-actors/alls-green@v1.2.2 diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 5bfb8dcbbb58..373c9911303a 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -356,10 +356,10 @@ def _list_to_1d_numpy( array = data.ravel() return _cast_numpy_array_to_dtype(array, dtype) elif _is_1d_list(data): - return np.array(data, dtype=dtype, copy=False) + return np.asarray(data, dtype=dtype) elif isinstance(data, pd_Series): _check_for_bad_pandas_dtypes(data.to_frame().dtypes) - return np.array(data, dtype=dtype, copy=False) # SparseArray should be supported as well + return np.asarray(data, dtype=dtype) # SparseArray should be supported as well else: raise TypeError( f"Wrong type({type(data).__name__}) for {name}.\n" "It should be list, numpy 1-D array or pandas Series" @@ -728,7 +728,7 @@ def _convert_from_sliced_object(data: np.ndarray) -> np.ndarray: def _c_float_array(data: np.ndarray) -> Tuple[_ctypes_float_ptr, int, np.ndarray]: """Get pointer of float numpy array / list.""" if _is_1d_list(data): - data = np.array(data, copy=False) + data = np.asarray(data) if _is_numpy_1d_array(data): data = _convert_from_sliced_object(data) assert data.flags.c_contiguous @@ -749,7 +749,7 @@ def _c_float_array(data: np.ndarray) -> Tuple[_ctypes_float_ptr, int, np.ndarray def _c_int_array(data: np.ndarray) -> Tuple[_ctypes_int_ptr, int, np.ndarray]: """Get pointer of int numpy array / list.""" if _is_1d_list(data): - data = np.array(data, copy=False) + data = np.asarray(data) if _is_numpy_1d_array(data): data = _convert_from_sliced_object(data) assert data.flags.c_contiguous @@ -1270,7 +1270,7 @@ def __inner_predict_np2d( preds: Optional[np.ndarray], ) -> Tuple[np.ndarray, int]: if mat.dtype == np.float32 or mat.dtype == np.float64: - data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False) + data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype) else: # change non-float data to float data, need to copy data = np.array(mat.reshape(mat.size), dtype=np.float32) ptr_data, type_ptr_data, _ = _c_float_array(data) @@ -2285,9 +2285,9 @@ def __init_from_np2d( self._handle = ctypes.c_void_p() if mat.dtype == np.float32 or mat.dtype == np.float64: - data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False) + data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype) else: # change non-float data to float data, need to copy - data = np.array(mat.reshape(mat.size), dtype=np.float32) + data = np.asarray(mat.reshape(mat.size), dtype=np.float32) ptr_data, type_ptr_data, _ = _c_float_array(data) _safe_call( @@ -2332,7 +2332,7 @@ def __init_from_list_np2d( nrow[i] = mat.shape[0] if mat.dtype == np.float32 or mat.dtype == np.float64: - mats[i] = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False) + mats[i] = np.asarray(mat.reshape(mat.size), dtype=mat.dtype) else: # change non-float data to float data, need to copy mats[i] = np.array(mat.reshape(mat.size), dtype=np.float32) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 5f93824458d4..9325cf50c203 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -512,7 +512,7 @@ def _make_n_folds( if hasattr(folds, "split"): group_info = full_data.get_group() if group_info is not None: - group_info = np.array(group_info, dtype=np.int32, copy=False) + group_info = np.asarray(group_info, dtype=np.int32) flatted_group = np.repeat(range(len(group_info)), repeats=group_info) else: flatted_group = np.zeros(num_data, dtype=np.int32) @@ -526,7 +526,7 @@ def _make_n_folds( if not SKLEARN_INSTALLED: raise LightGBMError("scikit-learn is required for ranking cv") # ranking task, split according to groups - group_info = np.array(full_data.get_group(), dtype=np.int32, copy=False) + group_info = np.asarray(full_data.get_group(), dtype=np.int32) flatted_group = np.repeat(range(len(group_info)), repeats=group_info) group_kfold = _LGBMGroupKFold(n_splits=nfold) folds = group_kfold.split(X=np.empty(num_data), groups=flatted_group) diff --git a/tests/c_api_test/test_.py b/tests/c_api_test/test_.py index 0abd40ecec31..77fb7f6e8ead 100644 --- a/tests/c_api_test/test_.py +++ b/tests/c_api_test/test_.py @@ -125,7 +125,7 @@ def load_from_mat(filename, reference): mat = np.loadtxt(str(filename), dtype=np.float64) label = mat[:, 0].astype(np.float32) mat = mat[:, 1:] - data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False) + data = np.asarray(mat.reshape(mat.size), dtype=np.float64) handle = ctypes.c_void_p() ref = None if reference is not None: @@ -203,7 +203,7 @@ def test_booster(): mat = data[:, 1:] preb = np.empty(mat.shape[0], dtype=np.float64) num_preb = ctypes.c_int64(0) - data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False) + data = np.asarray(mat.reshape(mat.size), dtype=np.float64) LIB.LGBM_BoosterPredictForMat( booster2, data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), diff --git a/tests/python_package_test/test_arrow.py b/tests/python_package_test/test_arrow.py index ab871048a949..3a7e0f8d4fce 100644 --- a/tests/python_package_test/test_arrow.py +++ b/tests/python_package_test/test_arrow.py @@ -20,6 +20,10 @@ else: import pyarrow as pa # type: ignore + assert ( + lgb.compat.PYARROW_INSTALLED is True + ), "'pyarrow' and its dependencies must be installed to run the arrow tests" + # ----------------------------------------------------------------------------------------------- # # UTILITIES # # ----------------------------------------------------------------------------------------------- # diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index 14a621a1604f..f3367c59f911 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -777,7 +777,10 @@ def test_custom_objective_safety(rng): def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name, rng): pd = pytest.importorskip("pandas") X = rng.uniform(size=(10, 2)).astype(dtype) - df = pd.DataFrame(X) + # copy=False is necessary because starting with pandas 3.0, pd.DataFrame() creates + # a copy of the input numpy array by default + # ref: https://github.com/pandas-dev/pandas/issues/58913 + df = pd.DataFrame(X, copy=False) built_data = lgb.basic._data_from_pandas( data=df, feature_name=feature_name, categorical_feature="auto", pandas_categorical=None )[0]