Skip to content

Commit

Permalink
[python-package] remove uses of deprecated NumPy random number genera…
Browse files Browse the repository at this point in the history
…tion APIs, require 'numpy>=1.17.0' (#6468)
  • Loading branch information
jameslamb authored Jun 4, 2024
1 parent ebac9e8 commit e0cda88
Show file tree
Hide file tree
Showing 10 changed files with 223 additions and 218 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ python-package/lightgbm/VERSION.txt

# R build artefacts
**/autom4te.cache/
conftest*
R-package/conftest*
R-package/config.status
!R-package/data/agaricus.test.rda
!R-package/data/agaricus.train.rda
Expand Down
14 changes: 9 additions & 5 deletions docs/Python-Intro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ Many of the examples in this page use functionality from ``numpy``. To run the e

.. code:: python
data = np.random.rand(500, 10) # 500 entities, each contains 10 features
label = np.random.randint(2, size=500) # binary target
rng = np.random.default_rng()
data = rng.uniform(size=(500, 10)) # 500 entities, each contains 10 features
label = rng.integers(low=0, high=2, size=(500, )) # binary target
train_data = lgb.Dataset(data, label=label)
**To load a scipy.sparse.csr\_matrix array into Dataset:**
Expand Down Expand Up @@ -139,15 +140,17 @@ It doesn't need to convert to one-hot encoding, and is much faster than one-hot

.. code:: python
w = np.random.rand(500, )
rng = np.random.default_rng()
w = rng.uniform(size=(500, ))
train_data = lgb.Dataset(data, label=label, weight=w)
or

.. code:: python
train_data = lgb.Dataset(data, label=label)
w = np.random.rand(500, )
rng = np.random.default_rng()
w = rng.uniform(size=(500, ))
train_data.set_weight(w)
And you can use ``Dataset.set_init_score()`` to set initial score, and ``Dataset.set_group()`` to set group/query data for ranking tasks.
Expand Down Expand Up @@ -249,7 +252,8 @@ A model that has been trained or loaded can perform predictions on datasets:
.. code:: python
# 7 entities, each contains 10 features
data = np.random.rand(7, 10)
rng = np.random.default_rng()
data = rng.uniform(size=(7, 10))
ypred = bst.predict(data)
If early stopping is enabled during training, you can get predictions from the best iteration with ``bst.best_iteration``:
Expand Down
9 changes: 4 additions & 5 deletions examples/python-guide/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@
#################
# Simulate some binary data with a single categorical and
# single continuous predictor
np.random.seed(0)
rng = np.random.default_rng(seed=0)
N = 1000
X = pd.DataFrame({"continuous": range(N), "categorical": np.repeat([0, 1, 2, 3, 4], N / 5)})
CATEGORICAL_EFFECTS = [-1, -1, -2, -2, 2]
LINEAR_TERM = np.array(
[-0.5 + 0.01 * X["continuous"][k] + CATEGORICAL_EFFECTS[X["categorical"][k]] for k in range(X.shape[0])]
) + np.random.normal(0, 1, X.shape[0])
) + rng.normal(loc=0, scale=1, size=X.shape[0])
TRUE_PROB = expit(LINEAR_TERM)
Y = np.random.binomial(1, TRUE_PROB, size=N)
Y = rng.binomial(n=1, p=TRUE_PROB, size=N)
DATA = {
"X": X,
"probability_labels": TRUE_PROB,
Expand Down Expand Up @@ -65,10 +65,9 @@ def experiment(objective, label_type, data):
result : dict
Experiment summary stats.
"""
np.random.seed(0)
nrounds = 5
lgb_data = data[f"lgb_with_{label_type}_labels"]
params = {"objective": objective, "feature_fraction": 1, "bagging_fraction": 1, "verbose": -1}
params = {"objective": objective, "feature_fraction": 1, "bagging_fraction": 1, "verbose": -1, "seed": 123}
time_zero = time.time()
gbm = lgb.train(params, lgb_data, num_boost_round=nrounds)
y_fitted = gbm.predict(data["X"])
Expand Down
12 changes: 0 additions & 12 deletions python-package/lightgbm/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,6 @@ def __init__(self, *args: Any, **kwargs: Any):

concat = None

"""numpy"""
try:
from numpy.random import Generator as np_random_Generator
except ImportError:

class np_random_Generator: # type: ignore
"""Dummy class for np.random.Generator."""

def __init__(self, *args: Any, **kwargs: Any):
pass


"""matplotlib"""
try:
import matplotlib # noqa: F401
Expand Down
5 changes: 2 additions & 3 deletions python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
_LGBMModelBase,
_LGBMRegressorBase,
dt_DataTable,
np_random_Generator,
pd_DataFrame,
)
from .engine import train
Expand Down Expand Up @@ -476,7 +475,7 @@ def __init__(
colsample_bytree: float = 1.0,
reg_alpha: float = 0.0,
reg_lambda: float = 0.0,
random_state: Optional[Union[int, np.random.RandomState, "np.random.Generator"]] = None,
random_state: Optional[Union[int, np.random.RandomState, np.random.Generator]] = None,
n_jobs: Optional[int] = None,
importance_type: str = "split",
**kwargs: Any,
Expand Down Expand Up @@ -739,7 +738,7 @@ def _process_params(self, stage: str) -> Dict[str, Any]:

if isinstance(params["random_state"], np.random.RandomState):
params["random_state"] = params["random_state"].randint(np.iinfo(np.int32).max)
elif isinstance(params["random_state"], np_random_Generator):
elif isinstance(params["random_state"], np.random.Generator):
params["random_state"] = int(params["random_state"].integers(np.iinfo(np.int32).max))
if self._n_classes > 2:
for alias in _ConfigAliases.get("num_class"):
Expand Down
4 changes: 3 additions & 1 deletion python-package/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ classifiers = [
"Topic :: Scientific/Engineering :: Artificial Intelligence"
]
dependencies = [
"numpy",
"numpy>=1.17.0",
"scipy"
]
description = "LightGBM Python Package"
Expand Down Expand Up @@ -156,6 +156,8 @@ select = [
"E",
# pyflakes
"F",
# NumPy-specific rules
"NPY",
# pylint
"PL",
# flake8-return: unnecessary assignment before return
Expand Down
12 changes: 12 additions & 0 deletions tests/python_package_test/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import numpy as np
import pytest


@pytest.fixture(scope="function")
def rng():
return np.random.default_rng()


@pytest.fixture(scope="function")
def rng_fixed_seed():
return np.random.default_rng(seed=42)
Loading

0 comments on commit e0cda88

Please sign in to comment.