From 6b6dfa340829347389da00b227e8edf1045007d5 Mon Sep 17 00:00:00 2001 From: IdoKendo Date: Wed, 18 Jan 2023 14:33:39 +0200 Subject: [PATCH 1/4] [python-package] Fix mypy errors for fit() incompatible signature --- python-package/lightgbm/dask.py | 57 ++++++++++++++++++++++++------ python-package/lightgbm/sklearn.py | 12 +++++++ 2 files changed, 58 insertions(+), 11 deletions(-) diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py index 6823756a60db..2d92e820c9bf 100644 --- a/python-package/lightgbm/dask.py +++ b/python-package/lightgbm/dask.py @@ -6,18 +6,19 @@ It is based on dask-lightgbm, which was based on dask-xgboost. """ +from pathlib import Path import socket from collections import defaultdict, namedtuple from copy import deepcopy from enum import Enum, auto from functools import partial -from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type, Union from urllib.parse import urlparse import numpy as np import scipy.sparse as ss -from .basic import LightGBMError, _choose_param_value, _ConfigAliases, _log_info, _log_warning +from .basic import Booster, LightGBMError, _choose_param_value, _ConfigAliases, _log_info, _log_warning from .compat import (DASK_INSTALLED, PANDAS_INSTALLED, SKLEARN_INSTALLED, Client, LGBMNotFittedError, concat, dask_Array, dask_array_from_delayed, dask_bag_from_delayed, dask_DataFrame, dask_Series, default_client, delayed, pd_DataFrame, pd_Series, wait) @@ -1166,14 +1167,20 @@ def fit( X: _DaskMatrixLike, y: _DaskCollection, sample_weight: Optional[_DaskVectorLike] = None, - init_score: Optional[_DaskCollection] = None, + init_score: Optional[_DaskVectorLike] = None, + group: Optional[_DaskVectorLike] = None, eval_set: Optional[List[Tuple[_DaskMatrixLike, _DaskCollection]]] = None, eval_names: Optional[List[str]] = None, eval_sample_weight: Optional[List[_DaskVectorLike]] = None, - eval_class_weight: Optional[List[Union[dict, str]]] = None, - eval_init_score: Optional[List[_DaskCollection]] = None, + eval_class_weight: Optional[List] = None, + eval_init_score: Optional[List[_DaskVectorLike]] = None, + eval_group: Optional[List[_DaskVectorLike]] = None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, - **kwargs: Any + eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5), + feature_name: str = 'auto', + categorical_feature: str = 'auto', + callbacks: Optional[List[Callable]] = None, + init_model: Optional[Union[str, Path, Booster, "LGBMModel"]] = None ) -> "DaskLGBMClassifier": """Docstring is inherited from the lightgbm.LGBMClassifier.fit.""" return self._lgb_dask_fit( @@ -1182,13 +1189,19 @@ def fit( y=y, sample_weight=sample_weight, init_score=init_score, + group=group, eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, eval_class_weight=eval_class_weight, eval_init_score=eval_init_score, + eval_group=eval_group, eval_metric=eval_metric, - **kwargs + eval_at=eval_at, + feature_name=feature_name, + categorical_feature=categorical_feature, + callbacks=callbacks, + init_model=init_model, ) _base_doc = _lgbmmodel_doc_fit.format( @@ -1339,12 +1352,19 @@ def fit( y: _DaskCollection, sample_weight: Optional[_DaskVectorLike] = None, init_score: Optional[_DaskVectorLike] = None, + group: Optional[_DaskVectorLike] = None, eval_set: Optional[List[Tuple[_DaskMatrixLike, _DaskCollection]]] = None, eval_names: Optional[List[str]] = None, eval_sample_weight: Optional[List[_DaskVectorLike]] = None, + eval_class_weight: Optional[List] = None, eval_init_score: Optional[List[_DaskVectorLike]] = None, + eval_group: Optional[List[_DaskVectorLike]] = None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, - **kwargs: Any + eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5), + feature_name: str = 'auto', + categorical_feature: str = 'auto', + callbacks: Optional[List[Callable]] = None, + init_model: Optional[Union[str, Path, Booster, "LGBMModel"]] = None ) -> "DaskLGBMRegressor": """Docstring is inherited from the lightgbm.LGBMRegressor.fit.""" return self._lgb_dask_fit( @@ -1353,12 +1373,19 @@ def fit( y=y, sample_weight=sample_weight, init_score=init_score, + group=group, eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, + eval_class_weight=eval_class_weight, eval_init_score=eval_init_score, + eval_group=eval_group, eval_metric=eval_metric, - **kwargs + eval_at=eval_at, + feature_name=feature_name, + categorical_feature=categorical_feature, + callbacks=callbacks, + init_model=init_model, ) _base_doc = _lgbmmodel_doc_fit.format( @@ -1496,11 +1523,15 @@ def fit( eval_set: Optional[List[Tuple[_DaskMatrixLike, _DaskCollection]]] = None, eval_names: Optional[List[str]] = None, eval_sample_weight: Optional[List[_DaskVectorLike]] = None, + eval_class_weight: Optional[List] = None, eval_init_score: Optional[List[_DaskVectorLike]] = None, eval_group: Optional[List[_DaskVectorLike]] = None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5), - **kwargs: Any + feature_name: str = 'auto', + categorical_feature: str = 'auto', + callbacks: Optional[List[Callable]] = None, + init_model: Optional[Union[str, Path, Booster, "LGBMModel"]] = None ) -> "DaskLGBMRanker": """Docstring is inherited from the lightgbm.LGBMRanker.fit.""" return self._lgb_dask_fit( @@ -1513,11 +1544,15 @@ def fit( eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, + eval_class_weight=eval_class_weight, eval_init_score=eval_init_score, eval_group=eval_group, eval_metric=eval_metric, eval_at=eval_at, - **kwargs + feature_name=feature_name, + categorical_feature=categorical_feature, + callbacks=callbacks, + init_model=init_model, ) _base_doc = _lgbmmodel_doc_fit.format( diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 87a4c1f8e81b..60d2ffabeae6 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -704,6 +704,7 @@ def fit( eval_init_score=None, eval_group=None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, + eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5), feature_name='auto', categorical_feature='auto', callbacks=None, @@ -990,11 +991,15 @@ def fit( y, sample_weight=None, init_score=None, + group=None, eval_set=None, eval_names: Optional[List[str]] = None, eval_sample_weight=None, + eval_class_weight=None, eval_init_score=None, + eval_group=None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, + eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5), feature_name='auto', categorical_feature='auto', callbacks=None, @@ -1006,10 +1011,13 @@ def fit( y, sample_weight=sample_weight, init_score=init_score, + group=group, eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, + eval_class_weight=eval_class_weight, eval_init_score=eval_init_score, + eval_group=eval_group, eval_metric=eval_metric, feature_name=feature_name, categorical_feature=categorical_feature, @@ -1036,12 +1044,15 @@ def fit( y, sample_weight=None, init_score=None, + group=None, eval_set=None, eval_names: Optional[List[str]] = None, eval_sample_weight=None, eval_class_weight=None, eval_init_score=None, + eval_group=None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, + eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5), feature_name='auto', categorical_feature='auto', callbacks=None, @@ -1201,6 +1212,7 @@ def fit( eval_set=None, eval_names: Optional[List[str]] = None, eval_sample_weight=None, + eval_class_weight=None, eval_init_score=None, eval_group=None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, From ddffe86939b59ed714ebb9e9317abb8be358ca4e Mon Sep 17 00:00:00 2001 From: IdoKendo Date: Wed, 18 Jan 2023 14:59:16 +0200 Subject: [PATCH 2/4] [python-package] fix import order --- python-package/lightgbm/dask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py index 2d92e820c9bf..5920b70ae280 100644 --- a/python-package/lightgbm/dask.py +++ b/python-package/lightgbm/dask.py @@ -6,12 +6,12 @@ It is based on dask-lightgbm, which was based on dask-xgboost. """ -from pathlib import Path import socket from collections import defaultdict, namedtuple from copy import deepcopy from enum import Enum, auto from functools import partial +from pathlib import Path from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type, Union from urllib.parse import urlparse From 4b4e0ee9c666b522d032952a3fcff3b4b64f6ada Mon Sep 17 00:00:00 2001 From: IdoKendo Date: Tue, 24 Jan 2023 15:39:53 +0200 Subject: [PATCH 3/4] [python-package] fix fit() pr comments --- python-package/lightgbm/dask.py | 16 ++++++---------- python-package/lightgbm/sklearn.py | 5 +---- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py index 5920b70ae280..c1b9df9cd609 100644 --- a/python-package/lightgbm/dask.py +++ b/python-package/lightgbm/dask.py @@ -1167,8 +1167,7 @@ def fit( X: _DaskMatrixLike, y: _DaskCollection, sample_weight: Optional[_DaskVectorLike] = None, - init_score: Optional[_DaskVectorLike] = None, - group: Optional[_DaskVectorLike] = None, + init_score: Optional[_DaskCollection] = None, eval_set: Optional[List[Tuple[_DaskMatrixLike, _DaskCollection]]] = None, eval_names: Optional[List[str]] = None, eval_sample_weight: Optional[List[_DaskVectorLike]] = None, @@ -1176,11 +1175,11 @@ def fit( eval_init_score: Optional[List[_DaskVectorLike]] = None, eval_group: Optional[List[_DaskVectorLike]] = None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, - eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5), feature_name: str = 'auto', categorical_feature: str = 'auto', callbacks: Optional[List[Callable]] = None, - init_model: Optional[Union[str, Path, Booster, "LGBMModel"]] = None + init_model: Optional[Union[str, Path, Booster, "LGBMModel"]] = None, + **kwargs: Any ) -> "DaskLGBMClassifier": """Docstring is inherited from the lightgbm.LGBMClassifier.fit.""" return self._lgb_dask_fit( @@ -1189,7 +1188,6 @@ def fit( y=y, sample_weight=sample_weight, init_score=init_score, - group=group, eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, @@ -1197,11 +1195,11 @@ def fit( eval_init_score=eval_init_score, eval_group=eval_group, eval_metric=eval_metric, - eval_at=eval_at, feature_name=feature_name, categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model, + **kwargs ) _base_doc = _lgbmmodel_doc_fit.format( @@ -1530,8 +1528,7 @@ def fit( eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5), feature_name: str = 'auto', categorical_feature: str = 'auto', - callbacks: Optional[List[Callable]] = None, - init_model: Optional[Union[str, Path, Booster, "LGBMModel"]] = None + **kwargs: Any ) -> "DaskLGBMRanker": """Docstring is inherited from the lightgbm.LGBMRanker.fit.""" return self._lgb_dask_fit( @@ -1551,8 +1548,7 @@ def fit( eval_at=eval_at, feature_name=feature_name, categorical_feature=categorical_feature, - callbacks=callbacks, - init_model=init_model, + **kwargs ) _base_doc = _lgbmmodel_doc_fit.format( diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 60d2ffabeae6..679f70c940d4 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -1019,6 +1019,7 @@ def fit( eval_init_score=eval_init_score, eval_group=eval_group, eval_metric=eval_metric, + eval_at=eval_at, feature_name=feature_name, categorical_feature=categorical_feature, callbacks=callbacks, @@ -1044,15 +1045,12 @@ def fit( y, sample_weight=None, init_score=None, - group=None, eval_set=None, eval_names: Optional[List[str]] = None, eval_sample_weight=None, eval_class_weight=None, eval_init_score=None, - eval_group=None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, - eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5), feature_name='auto', categorical_feature='auto', callbacks=None, @@ -1212,7 +1210,6 @@ def fit( eval_set=None, eval_names: Optional[List[str]] = None, eval_sample_weight=None, - eval_class_weight=None, eval_init_score=None, eval_group=None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, From b73c45041f54bd32da8231c7e14fb1921af0e68f Mon Sep 17 00:00:00 2001 From: IdoKendo Date: Thu, 26 Jan 2023 09:41:49 +0200 Subject: [PATCH 4/4] [python-package] ignore override for fit() method --- python-package/lightgbm/dask.py | 49 ++++++------------------------ python-package/lightgbm/sklearn.py | 15 ++------- 2 files changed, 12 insertions(+), 52 deletions(-) diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py index c1b9df9cd609..c1381dd34447 100644 --- a/python-package/lightgbm/dask.py +++ b/python-package/lightgbm/dask.py @@ -11,14 +11,13 @@ from copy import deepcopy from enum import Enum, auto from functools import partial -from pathlib import Path -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type, Union +from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union from urllib.parse import urlparse import numpy as np import scipy.sparse as ss -from .basic import Booster, LightGBMError, _choose_param_value, _ConfigAliases, _log_info, _log_warning +from .basic import LightGBMError, _choose_param_value, _ConfigAliases, _log_info, _log_warning from .compat import (DASK_INSTALLED, PANDAS_INSTALLED, SKLEARN_INSTALLED, Client, LGBMNotFittedError, concat, dask_Array, dask_array_from_delayed, dask_bag_from_delayed, dask_DataFrame, dask_Series, default_client, delayed, pd_DataFrame, pd_Series, wait) @@ -1162,7 +1161,7 @@ def __init__( def __getstate__(self) -> Dict[Any, Any]: return self._lgb_dask_getstate() - def fit( + def fit( # type: ignore[override] self, X: _DaskMatrixLike, y: _DaskCollection, @@ -1171,14 +1170,9 @@ def fit( eval_set: Optional[List[Tuple[_DaskMatrixLike, _DaskCollection]]] = None, eval_names: Optional[List[str]] = None, eval_sample_weight: Optional[List[_DaskVectorLike]] = None, - eval_class_weight: Optional[List] = None, - eval_init_score: Optional[List[_DaskVectorLike]] = None, - eval_group: Optional[List[_DaskVectorLike]] = None, + eval_class_weight: Optional[List[Union[dict, str]]] = None, + eval_init_score: Optional[List[_DaskCollection]] = None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, - feature_name: str = 'auto', - categorical_feature: str = 'auto', - callbacks: Optional[List[Callable]] = None, - init_model: Optional[Union[str, Path, Booster, "LGBMModel"]] = None, **kwargs: Any ) -> "DaskLGBMClassifier": """Docstring is inherited from the lightgbm.LGBMClassifier.fit.""" @@ -1193,12 +1187,7 @@ def fit( eval_sample_weight=eval_sample_weight, eval_class_weight=eval_class_weight, eval_init_score=eval_init_score, - eval_group=eval_group, eval_metric=eval_metric, - feature_name=feature_name, - categorical_feature=categorical_feature, - callbacks=callbacks, - init_model=init_model, **kwargs ) @@ -1344,25 +1333,18 @@ def __init__( def __getstate__(self) -> Dict[Any, Any]: return self._lgb_dask_getstate() - def fit( + def fit( # type: ignore[override] self, X: _DaskMatrixLike, y: _DaskCollection, sample_weight: Optional[_DaskVectorLike] = None, init_score: Optional[_DaskVectorLike] = None, - group: Optional[_DaskVectorLike] = None, eval_set: Optional[List[Tuple[_DaskMatrixLike, _DaskCollection]]] = None, eval_names: Optional[List[str]] = None, eval_sample_weight: Optional[List[_DaskVectorLike]] = None, - eval_class_weight: Optional[List] = None, eval_init_score: Optional[List[_DaskVectorLike]] = None, - eval_group: Optional[List[_DaskVectorLike]] = None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, - eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5), - feature_name: str = 'auto', - categorical_feature: str = 'auto', - callbacks: Optional[List[Callable]] = None, - init_model: Optional[Union[str, Path, Booster, "LGBMModel"]] = None + **kwargs: Any ) -> "DaskLGBMRegressor": """Docstring is inherited from the lightgbm.LGBMRegressor.fit.""" return self._lgb_dask_fit( @@ -1371,19 +1353,12 @@ def fit( y=y, sample_weight=sample_weight, init_score=init_score, - group=group, eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, - eval_class_weight=eval_class_weight, eval_init_score=eval_init_score, - eval_group=eval_group, eval_metric=eval_metric, - eval_at=eval_at, - feature_name=feature_name, - categorical_feature=categorical_feature, - callbacks=callbacks, - init_model=init_model, + **kwargs ) _base_doc = _lgbmmodel_doc_fit.format( @@ -1511,7 +1486,7 @@ def __init__( def __getstate__(self) -> Dict[Any, Any]: return self._lgb_dask_getstate() - def fit( + def fit( # type: ignore[override] self, X: _DaskMatrixLike, y: _DaskCollection, @@ -1521,13 +1496,10 @@ def fit( eval_set: Optional[List[Tuple[_DaskMatrixLike, _DaskCollection]]] = None, eval_names: Optional[List[str]] = None, eval_sample_weight: Optional[List[_DaskVectorLike]] = None, - eval_class_weight: Optional[List] = None, eval_init_score: Optional[List[_DaskVectorLike]] = None, eval_group: Optional[List[_DaskVectorLike]] = None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5), - feature_name: str = 'auto', - categorical_feature: str = 'auto', **kwargs: Any ) -> "DaskLGBMRanker": """Docstring is inherited from the lightgbm.LGBMRanker.fit.""" @@ -1541,13 +1513,10 @@ def fit( eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, - eval_class_weight=eval_class_weight, eval_init_score=eval_init_score, eval_group=eval_group, eval_metric=eval_metric, eval_at=eval_at, - feature_name=feature_name, - categorical_feature=categorical_feature, **kwargs ) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 679f70c940d4..61790627381a 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -704,7 +704,6 @@ def fit( eval_init_score=None, eval_group=None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, - eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5), feature_name='auto', categorical_feature='auto', callbacks=None, @@ -985,21 +984,17 @@ def feature_name_(self): class LGBMRegressor(_LGBMRegressorBase, LGBMModel): """LightGBM regressor.""" - def fit( + def fit( # type: ignore[override] self, X, y, sample_weight=None, init_score=None, - group=None, eval_set=None, eval_names: Optional[List[str]] = None, eval_sample_weight=None, - eval_class_weight=None, eval_init_score=None, - eval_group=None, eval_metric: Optional[_LGBM_ScikitEvalMetricType] = None, - eval_at: Union[List[int], Tuple[int, ...]] = (1, 2, 3, 4, 5), feature_name='auto', categorical_feature='auto', callbacks=None, @@ -1011,15 +1006,11 @@ def fit( y, sample_weight=sample_weight, init_score=init_score, - group=group, eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, - eval_class_weight=eval_class_weight, eval_init_score=eval_init_score, - eval_group=eval_group, eval_metric=eval_metric, - eval_at=eval_at, feature_name=feature_name, categorical_feature=categorical_feature, callbacks=callbacks, @@ -1039,7 +1030,7 @@ def fit( class LGBMClassifier(_LGBMClassifierBase, LGBMModel): """LightGBM classifier.""" - def fit( + def fit( # type: ignore[override] self, X, y, @@ -1200,7 +1191,7 @@ class LGBMRanker(LGBMModel): Please use this class mainly for training and applying ranking models in common sklearnish way. """ - def fit( + def fit( # type: ignore[override] self, X, y,