From c633c6c2afc42816b9f1e3d522ec1eb02ca4e11e Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Fri, 10 Apr 2020 06:53:21 +0300 Subject: [PATCH] [python] Re-enable scikit-learn 0.22+ support (#2949) * Revert "specify the last supported version of scikit-learn (#2637)" This reverts commit d100277649ecbd40ed7b9b35ecf7a0df86ddc10a. * ban scikit-learn 0.22.0 and skip broken test * fix updated test * fix lint test * Revert "fix lint test" This reverts commit 8b4db0805fe7a9e7f7eb0be3eac231f85026d196. --- .ci/test.sh | 2 +- .ci/test_windows.ps1 | 2 +- docker/dockerfile-python | 2 +- docker/gpu/dockerfile.gpu | 4 ++-- docs/GPU-Tutorial.rst | 4 ++-- docs/Python-API.rst | 4 ---- docs/Python-Intro.rst | 4 ++-- examples/python-guide/README.md | 2 +- python-package/lightgbm/compat.py | 19 +++++++++++++------ python-package/lightgbm/sklearn.py | 10 ++++------ python-package/setup.py | 2 +- tests/python_package_test/test_sklearn.py | 5 +++++ 12 files changed, 33 insertions(+), 27 deletions(-) diff --git a/.ci/test.sh b/.ci/test.sh index 84b7d82cad38..eb360731b398 100755 --- a/.ci/test.sh +++ b/.ci/test.sh @@ -74,7 +74,7 @@ if [[ $TASK == "r-package" ]]; then exit 0 fi -conda install -q -y -n $CONDA_ENV joblib matplotlib numpy pandas psutil pytest python-graphviz "scikit-learn<=0.21.3" scipy +conda install -q -y -n $CONDA_ENV joblib matplotlib numpy pandas psutil pytest python-graphviz scikit-learn scipy if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "clang" ]]; then # fix "OMP: Error #15: Initializing libiomp5.dylib, but found libomp.dylib already initialized." (OpenMP library conflict due to conda's MKL) diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1 index 1d6624fa344c..82849577152d 100644 --- a/.ci/test_windows.ps1 +++ b/.ci/test_windows.ps1 @@ -17,7 +17,7 @@ conda init powershell conda activate conda config --set always_yes yes --set changeps1 no conda update -q -y conda -conda create -q -y -n $env:CONDA_ENV python=$env:PYTHON_VERSION joblib matplotlib numpy pandas psutil pytest python-graphviz "scikit-learn<=0.21.3" scipy ; Check-Output $? +conda create -q -y -n $env:CONDA_ENV python=$env:PYTHON_VERSION joblib matplotlib numpy pandas psutil pytest python-graphviz scikit-learn scipy ; Check-Output $? conda activate $env:CONDA_ENV if ($env:TASK -eq "regular") { diff --git a/docker/dockerfile-python b/docker/dockerfile-python index 4029a097fac4..b157b41117ba 100644 --- a/docker/dockerfile-python +++ b/docker/dockerfile-python @@ -18,7 +18,7 @@ RUN apt-get update && \ export PATH="$CONDA_DIR/bin:$PATH" && \ conda config --set always_yes yes --set changeps1 no && \ # lightgbm - conda install -q -y numpy scipy "scikit-learn<=0.21.3" pandas && \ + conda install -q -y numpy scipy scikit-learn pandas && \ git clone --recursive --branch stable --depth 1 https://github.com/Microsoft/LightGBM && \ cd LightGBM/python-package && python setup.py install && \ # clean diff --git a/docker/gpu/dockerfile.gpu b/docker/gpu/dockerfile.gpu index 2060b39974bf..c4801d6e462f 100644 --- a/docker/gpu/dockerfile.gpu +++ b/docker/gpu/dockerfile.gpu @@ -75,8 +75,8 @@ RUN echo "export PATH=$CONDA_DIR/bin:"'$PATH' > /etc/profile.d/conda.sh && \ rm ~/miniconda.sh RUN conda config --set always_yes yes --set changeps1 no && \ - conda create -y -q -n py2 python=2.7 mkl numpy scipy "scikit-learn<=0.21.3" jupyter notebook ipython pandas matplotlib && \ - conda create -y -q -n py3 python=3.6 mkl numpy scipy "scikit-learn<=0.21.3" jupyter notebook ipython pandas matplotlib + conda create -y -q -n py2 python=2.7 mkl numpy scipy scikit-learn jupyter notebook ipython pandas matplotlib && \ + conda create -y -q -n py3 python=3.6 mkl numpy scipy scikit-learn jupyter notebook ipython pandas matplotlib ################################################################################################################# # LightGBM diff --git a/docs/GPU-Tutorial.rst b/docs/GPU-Tutorial.rst index dca95c1e2031..d8da7ec83385 100644 --- a/docs/GPU-Tutorial.rst +++ b/docs/GPU-Tutorial.rst @@ -1,4 +1,4 @@ -LightGBM GPU Tutorial +LightGBM GPU Tutorial ===================== The purpose of this document is to give you a quick step-by-step tutorial on GPU training. @@ -78,7 +78,7 @@ If you want to use the Python interface of LightGBM, you can install it now (alo :: sudo apt-get -y install python-pip - sudo -H pip install setuptools numpy scipy "scikit-learn<=0.21.3" -U + sudo -H pip install setuptools numpy scipy scikit-learn -U cd python-package/ sudo python setup.py install --precompile cd .. diff --git a/docs/Python-API.rst b/docs/Python-API.rst index e87a3523223b..de6b1ec6f2b9 100644 --- a/docs/Python-API.rst +++ b/docs/Python-API.rst @@ -24,10 +24,6 @@ Training API Scikit-learn API ---------------- -.. warning:: - - The last supported version of scikit-learn is ``0.21.3``. Our estimators are incompatible with newer versions. - .. autosummary:: :toctree: pythonapi/ diff --git a/docs/Python-Intro.rst b/docs/Python-Intro.rst index 8293733809c5..1c69af8d0c53 100644 --- a/docs/Python-Intro.rst +++ b/docs/Python-Intro.rst @@ -15,11 +15,11 @@ Install ------- Install Python-package dependencies, -``setuptools``, ``wheel``, ``numpy`` and ``scipy`` are required, ``scikit-learn<=0.21.3`` is required for sklearn interface and recommended: +``setuptools``, ``wheel``, ``numpy`` and ``scipy`` are required, ``scikit-learn`` is required for sklearn interface and recommended: :: - pip install setuptools wheel numpy scipy "scikit-learn<=0.21.3" -U + pip install setuptools wheel numpy scipy scikit-learn -U Refer to `Python-package`_ folder for the installation guide. diff --git a/examples/python-guide/README.md b/examples/python-guide/README.md index 8ff716344d33..aba3c9f51d7a 100644 --- a/examples/python-guide/README.md +++ b/examples/python-guide/README.md @@ -8,7 +8,7 @@ You should install LightGBM [Python-package](https://github.com/microsoft/LightG You also need scikit-learn, pandas, matplotlib (only for plot example), and scipy (only for logistic regression example) to run the examples, but they are not required for the package itself. You can install them with pip: ``` -pip install "scikit-learn<=0.21.3" pandas matplotlib scipy -U +pip install scikit-learn pandas matplotlib scipy -U ``` Now you can run examples in this folder, for example: diff --git a/python-package/lightgbm/compat.py b/python-package/lightgbm/compat.py index 358478d7305f..5d951a56800a 100644 --- a/python-package/lightgbm/compat.py +++ b/python-package/lightgbm/compat.py @@ -116,16 +116,24 @@ class DataTable(object): from sklearn.preprocessing import LabelEncoder from sklearn.utils.class_weight import compute_sample_weight from sklearn.utils.multiclass import check_classification_targets - from sklearn.utils.validation import (assert_all_finite, check_X_y, - check_array, check_consistent_length) + from sklearn.utils.validation import assert_all_finite, check_X_y, check_array try: from sklearn.model_selection import StratifiedKFold, GroupKFold from sklearn.exceptions import NotFittedError except ImportError: from sklearn.cross_validation import StratifiedKFold, GroupKFold from sklearn.utils.validation import NotFittedError + try: + from sklearn.utils.validation import _check_sample_weight + except ImportError: + from sklearn.utils.validation import check_consistent_length + + # dummy function to support older version of scikit-learn + def _check_sample_weight(sample_weight, X, dtype=None): + check_consistent_length(sample_weight, X) + return sample_weight + SKLEARN_INSTALLED = True - from sklearn import __version__ as SKLEARN_VERSION _LGBMModelBase = BaseEstimator _LGBMRegressorBase = RegressorMixin _LGBMClassifierBase = ClassifierMixin @@ -135,13 +143,12 @@ class DataTable(object): _LGBMGroupKFold = GroupKFold _LGBMCheckXY = check_X_y _LGBMCheckArray = check_array - _LGBMCheckConsistentLength = check_consistent_length + _LGBMCheckSampleWeight = _check_sample_weight _LGBMAssertAllFinite = assert_all_finite _LGBMCheckClassificationTargets = check_classification_targets _LGBMComputeSampleWeight = compute_sample_weight except ImportError: SKLEARN_INSTALLED = False - SKLEARN_VERSION = '0.0.0' _LGBMModelBase = object _LGBMClassifierBase = object _LGBMRegressorBase = object @@ -151,7 +158,7 @@ class DataTable(object): _LGBMGroupKFold = None _LGBMCheckXY = None _LGBMCheckArray = None - _LGBMCheckConsistentLength = None + _LGBMCheckSampleWeight = None _LGBMAssertAllFinite = None _LGBMCheckClassificationTargets = None _LGBMComputeSampleWeight = None diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index e6d5b33a651f..2731bb120a9a 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -7,9 +7,9 @@ import numpy as np from .basic import Dataset, LightGBMError, _ConfigAliases -from .compat import (SKLEARN_INSTALLED, SKLEARN_VERSION, _LGBMClassifierBase, +from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase, LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase, - _LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckConsistentLength, + _LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckSampleWeight, _LGBMAssertAllFinite, _LGBMCheckClassificationTargets, _LGBMComputeSampleWeight, argc_, range_, zip_, string_type, DataFrame, DataTable) from .engine import train @@ -298,9 +298,6 @@ def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1, """ if not SKLEARN_INSTALLED: raise LightGBMError('Scikit-learn is required for this module') - elif SKLEARN_VERSION > '0.21.3': - raise RuntimeError("The last supported version of scikit-learn is 0.21.3.\n" - "Found version: {0}.".format(SKLEARN_VERSION)) self.boosting_type = boosting_type self.objective = objective @@ -547,7 +544,8 @@ def fit(self, X, y, if not isinstance(X, (DataFrame, DataTable)): _X, _y = _LGBMCheckXY(X, y, accept_sparse=True, force_all_finite=False, ensure_min_samples=2) - _LGBMCheckConsistentLength(_X, _y, sample_weight) + if sample_weight is not None: + sample_weight = _LGBMCheckSampleWeight(sample_weight, _X) else: _X, _y = X, y diff --git a/python-package/setup.py b/python-package/setup.py index f04eaa405028..d308403d09ff 100644 --- a/python-package/setup.py +++ b/python-package/setup.py @@ -276,7 +276,7 @@ def run(self): install_requires=[ 'numpy', 'scipy', - 'scikit-learn<=0.21.3' + 'scikit-learn!=0.22.0' ], maintainer='Guolin Ke', maintainer_email='guolin.ke@microsoft.com', diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index 747508f0d5ce..1fb44d0dc12c 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -293,6 +293,11 @@ def test_sklearn_integration(self): check_name = check.func.__name__ if hasattr(check, 'func') else check.__name__ if check_name == 'check_estimators_nan_inf': continue # skip test because LightGBM deals with nan + elif check_name == "check_no_attributes_set_in_init": + # skip test because scikit-learn incorrectly asserts that + # private attributes cannot be set in __init__ + # (see https://github.com/microsoft/LightGBM/issues/2628) + continue try: check(name, estimator) except SkipTest as message: