Skip to content

Commit

Permalink
[python-package] Add support for NumPy 2.0, test against nightly vers…
Browse files Browse the repository at this point in the history
…ions of dependencies (fixes #6454) (#6467)
  • Loading branch information
jameslamb authored Jun 13, 2024
1 parent 6392682 commit 1e7ebc5
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 14 deletions.
50 changes: 50 additions & 0 deletions .ci/test-python-latest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

set -e -E -u -o pipefail

# latest versions of lightgbm's dependencies,
# including pre-releases and nightlies
#
# ref: https://github.com/pydata/xarray/blob/31111b3afe44fd6f7dac363264e94186cc5168d2/.github/workflows/upstream-dev-ci.yaml
echo "installing testing dependencies"
python -m pip install \
cloudpickle \
psutil \
pytest
echo "done installing testing dependencies"

echo "installing lightgbm's dependencies"
python -m pip install \
--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
--prefer-binary \
--pre \
--upgrade \
'numpy>=2.0.0.dev0' \
'matplotlib>=3.10.0.dev0' \
'pandas>=3.0.0.dev0' \
'scikit-learn>=1.6.dev0' \
'scipy>=1.15.0.dev0'

python -m pip install \
--extra-index-url https://pypi.fury.io/arrow-nightlies/ \
--prefer-binary \
--pre \
--upgrade \
'pyarrow>=17.0.0.dev0'

python -m pip install \
'cffi>=1.15.1'

echo "done installing lightgbm's dependencies"

echo "installing lightgbm"
pip install --no-deps dist/*.whl
echo "done installing lightgbm"

echo "installed package versions:"
pip freeze

echo ""
echo "running tests"
pytest tests/c_api_test/
pytest tests/python_package_test/
30 changes: 29 additions & 1 deletion .github/workflows/python_package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,33 @@ jobs:
export PATH=${CONDA}/bin:${PATH}
$GITHUB_WORKSPACE/.ci/setup.sh || exit 1
$GITHUB_WORKSPACE/.ci/test.sh || exit 1
test-latest-versions:
name: Python - latest versions (ubuntu-latest)
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 5
submodules: true
- name: Create wheel
run: |
docker run \
--rm \
--env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \
-v $(pwd):/opt/lgb-build \
-w /opt/lgb-build \
lightgbm/vsts-agent:manylinux_2_28_x86_64 \
/bin/bash -c 'PATH=/opt/miniforge/bin:$PATH sh ./build-python.sh bdist_wheel --nomp'
- name: Test compatibility
run: |
docker run \
--rm \
-v $(pwd):/opt/lgb-build \
-w /opt/lgb-build \
python:3.11 \
/bin/bash ./.ci/test-python-latest.sh
test-oldest-versions:
name: Python - oldest supported versions (ubuntu-latest)
runs-on: ubuntu-latest
Expand All @@ -89,6 +116,7 @@ jobs:
run: |
docker run \
--rm \
--env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \
-v $(pwd):/opt/lgb-build \
-w /opt/lgb-build \
lightgbm/vsts-agent:manylinux_2_28_x86_64 \
Expand All @@ -104,7 +132,7 @@ jobs:
all-python-package-jobs-successful:
if: always()
runs-on: ubuntu-latest
needs: [test, test-oldest-versions]
needs: [test, test-latest-versions, test-oldest-versions]
steps:
- name: Note that all tests succeeded
uses: re-actors/[email protected]
Expand Down
16 changes: 8 additions & 8 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,10 +356,10 @@ def _list_to_1d_numpy(
array = data.ravel()
return _cast_numpy_array_to_dtype(array, dtype)
elif _is_1d_list(data):
return np.array(data, dtype=dtype, copy=False)
return np.asarray(data, dtype=dtype)
elif isinstance(data, pd_Series):
_check_for_bad_pandas_dtypes(data.to_frame().dtypes)
return np.array(data, dtype=dtype, copy=False) # SparseArray should be supported as well
return np.asarray(data, dtype=dtype) # SparseArray should be supported as well
else:
raise TypeError(
f"Wrong type({type(data).__name__}) for {name}.\n" "It should be list, numpy 1-D array or pandas Series"
Expand Down Expand Up @@ -728,7 +728,7 @@ def _convert_from_sliced_object(data: np.ndarray) -> np.ndarray:
def _c_float_array(data: np.ndarray) -> Tuple[_ctypes_float_ptr, int, np.ndarray]:
"""Get pointer of float numpy array / list."""
if _is_1d_list(data):
data = np.array(data, copy=False)
data = np.asarray(data)
if _is_numpy_1d_array(data):
data = _convert_from_sliced_object(data)
assert data.flags.c_contiguous
Expand All @@ -749,7 +749,7 @@ def _c_float_array(data: np.ndarray) -> Tuple[_ctypes_float_ptr, int, np.ndarray
def _c_int_array(data: np.ndarray) -> Tuple[_ctypes_int_ptr, int, np.ndarray]:
"""Get pointer of int numpy array / list."""
if _is_1d_list(data):
data = np.array(data, copy=False)
data = np.asarray(data)
if _is_numpy_1d_array(data):
data = _convert_from_sliced_object(data)
assert data.flags.c_contiguous
Expand Down Expand Up @@ -1270,7 +1270,7 @@ def __inner_predict_np2d(
preds: Optional[np.ndarray],
) -> Tuple[np.ndarray, int]:
if mat.dtype == np.float32 or mat.dtype == np.float64:
data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
else: # change non-float data to float data, need to copy
data = np.array(mat.reshape(mat.size), dtype=np.float32)
ptr_data, type_ptr_data, _ = _c_float_array(data)
Expand Down Expand Up @@ -2285,9 +2285,9 @@ def __init_from_np2d(

self._handle = ctypes.c_void_p()
if mat.dtype == np.float32 or mat.dtype == np.float64:
data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
else: # change non-float data to float data, need to copy
data = np.array(mat.reshape(mat.size), dtype=np.float32)
data = np.asarray(mat.reshape(mat.size), dtype=np.float32)

ptr_data, type_ptr_data, _ = _c_float_array(data)
_safe_call(
Expand Down Expand Up @@ -2332,7 +2332,7 @@ def __init_from_list_np2d(
nrow[i] = mat.shape[0]

if mat.dtype == np.float32 or mat.dtype == np.float64:
mats[i] = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
mats[i] = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
else: # change non-float data to float data, need to copy
mats[i] = np.array(mat.reshape(mat.size), dtype=np.float32)

Expand Down
4 changes: 2 additions & 2 deletions python-package/lightgbm/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ def _make_n_folds(
if hasattr(folds, "split"):
group_info = full_data.get_group()
if group_info is not None:
group_info = np.array(group_info, dtype=np.int32, copy=False)
group_info = np.asarray(group_info, dtype=np.int32)
flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
else:
flatted_group = np.zeros(num_data, dtype=np.int32)
Expand All @@ -526,7 +526,7 @@ def _make_n_folds(
if not SKLEARN_INSTALLED:
raise LightGBMError("scikit-learn is required for ranking cv")
# ranking task, split according to groups
group_info = np.array(full_data.get_group(), dtype=np.int32, copy=False)
group_info = np.asarray(full_data.get_group(), dtype=np.int32)
flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
group_kfold = _LGBMGroupKFold(n_splits=nfold)
folds = group_kfold.split(X=np.empty(num_data), groups=flatted_group)
Expand Down
4 changes: 2 additions & 2 deletions tests/c_api_test/test_.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def load_from_mat(filename, reference):
mat = np.loadtxt(str(filename), dtype=np.float64)
label = mat[:, 0].astype(np.float32)
mat = mat[:, 1:]
data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
data = np.asarray(mat.reshape(mat.size), dtype=np.float64)
handle = ctypes.c_void_p()
ref = None
if reference is not None:
Expand Down Expand Up @@ -203,7 +203,7 @@ def test_booster():
mat = data[:, 1:]
preb = np.empty(mat.shape[0], dtype=np.float64)
num_preb = ctypes.c_int64(0)
data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
data = np.asarray(mat.reshape(mat.size), dtype=np.float64)
LIB.LGBM_BoosterPredictForMat(
booster2,
data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
Expand Down
4 changes: 4 additions & 0 deletions tests/python_package_test/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
else:
import pyarrow as pa # type: ignore

assert (
lgb.compat.PYARROW_INSTALLED is True
), "'pyarrow' and its dependencies must be installed to run the arrow tests"

# ----------------------------------------------------------------------------------------------- #
# UTILITIES #
# ----------------------------------------------------------------------------------------------- #
Expand Down
5 changes: 4 additions & 1 deletion tests/python_package_test/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,10 @@ def test_custom_objective_safety(rng):
def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name, rng):
pd = pytest.importorskip("pandas")
X = rng.uniform(size=(10, 2)).astype(dtype)
df = pd.DataFrame(X)
# copy=False is necessary because starting with pandas 3.0, pd.DataFrame() creates
# a copy of the input numpy array by default
# ref: https://github.com/pandas-dev/pandas/issues/58913
df = pd.DataFrame(X, copy=False)
built_data = lgb.basic._data_from_pandas(
data=df, feature_name=feature_name, categorical_feature="auto", pandas_categorical=None
)[0]
Expand Down

0 comments on commit 1e7ebc5

Please sign in to comment.