From b5468707a7a6a4bc18033f6c6a058fbdb4a736f0 Mon Sep 17 00:00:00 2001 From: StrikerRUS Date: Wed, 2 Oct 2019 03:58:48 +0300 Subject: [PATCH 1/2] handle aliases centralized --- python-package/lightgbm/basic.py | 65 ++++++++++++++++++++++++----- python-package/lightgbm/callback.py | 13 +++--- python-package/lightgbm/engine.py | 20 +++++---- python-package/lightgbm/sklearn.py | 12 ++++-- 4 files changed, 81 insertions(+), 29 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 26270543fa10..71492c47a588 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -196,6 +196,48 @@ class LightGBMError(Exception): "feature_penalty": C_API_DTYPE_FLOAT64, "monotone_constraints": C_API_DTYPE_INT8} +CONFIG_ALIASES = {"boosting": {"boosting", + "boosting_type", + "boost"}, + "categorical_feature": {"categorical_feature", + "cat_feature", + "categorical_column", + "cat_column"}, + "early_stopping_round": {"early_stopping_round", + "early_stopping_rounds", + "early_stopping", + "n_iter_no_change"}, + "eval_at": {"eval_at", + "ndcg_eval_at", + "ndcg_at", + "map_eval_at", + "map_at"}, + "header": {"header", + "has_header"}, + "machines": {"machines", + "workers", + "nodes"}, + "metric": {"metric", + "metrics", + "metric_types"}, + "num_class": {"num_class", + "num_classes"}, + "num_iterations": {"num_iterations", + "num_iteration", + "n_iter", + "num_tree", + "num_trees", + "num_round", + "num_rounds", + "num_boost_round", + "n_estimators"}, + "objective": {"objective", + "objective_type", + "app", + "application"}, + "verbosity": {"verbosity", + "verbose"}} + def convert_from_sliced_object(data): """Fix the memory of multi-dimensional sliced object.""" @@ -741,8 +783,7 @@ def _set_init_score_by_predictor(self, predictor, data, used_indices=None): data_has_header = False if isinstance(data, string_type): # check data has header or not - if self.params.get("has_header", False) or self.params.get("header", False): - data_has_header = True + data_has_header = any(self.params.get(alias, False) for alias in CONFIG_ALIASES["header"]) init_score = predictor.predict(data, raw_score=True, data_has_header=data_has_header, @@ -793,7 +834,7 @@ def _lazy_init(self, data, label=None, reference=None, 'Please use {0} argument of the Dataset constructor to pass this parameter.' .format(key)) # user can set verbose with params, it has higher priority - if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent: + if not any(verbose_alias in params for verbose_alias in CONFIG_ALIASES["verbosity"]) and silent: params["verbose"] = -1 # get categorical features if categorical_feature is not None: @@ -810,10 +851,10 @@ def _lazy_init(self, data, label=None, reference=None, raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature" .format(type(name).__name__, name)) if categorical_indices: - if "categorical_feature" in params or "categorical_column" in params: - warnings.warn('categorical_feature in param dict is overridden.') - params.pop("categorical_feature", None) - params.pop("categorical_column", None) + for cat_alias in CONFIG_ALIASES["categorical_feature"]: + if cat_alias in params: + warnings.warn('{} in param dict is overridden.'.format(cat_alias)) + params.pop(cat_alias, None) params['categorical_column'] = sorted(categorical_indices) params_str = param_dict_to_str(params) @@ -1259,7 +1300,9 @@ def _set_predictor(self, predictor): """ if predictor is self._predictor: return self - if self.data is not None or (self.used_indices is not None and self.reference is not None and self.reference.data is not None): + if self.data is not None or (self.used_indices is not None + and self.reference is not None + and self.reference.data is not None): self._predictor = predictor return self._free_handle() else: @@ -1634,7 +1677,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None, self.best_score = {} params = {} if params is None else copy.deepcopy(params) # user can set verbose with params, it has higher priority - if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent: + if not any(verbose_alias in params for verbose_alias in CONFIG_ALIASES["verbosity"]) and silent: params["verbose"] = -1 if train_set is not None: # Training task @@ -1643,7 +1686,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None, .format(type(train_set).__name__)) params_str = param_dict_to_str(params) # set network if necessary - for alias in ["machines", "workers", "nodes"]: + for alias in CONFIG_ALIASES["machines"]: if alias in params: machines = params[alias] if isinstance(machines, string_type): @@ -1863,7 +1906,7 @@ def reset_parameter(self, params): self : Booster Booster with new parameters. """ - if any(metric_alias in params for metric_alias in ('metric', 'metrics', 'metric_types')): + if any(metric_alias in params for metric_alias in CONFIG_ALIASES["metric"]): self.__need_reload_eval_info = True params_str = param_dict_to_str(params) if params_str: diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index 7cb53791b2b7..f67dcdab5476 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -7,6 +7,7 @@ import warnings from operator import gt, lt +from .basic import CONFIG_ALIASES from .compat import range_ @@ -130,9 +131,9 @@ def reset_parameter(**kwargs): def _callback(env): new_parameters = {} for key, value in kwargs.items(): - if key in ['num_class', 'num_classes', - 'boosting', 'boost', 'boosting_type', - 'metric', 'metrics', 'metric_types']: + if key in (CONFIG_ALIASES["num_class"] + | CONFIG_ALIASES["boosting"] + | CONFIG_ALIASES["metric"]): raise RuntimeError("Cannot reset {} during training".format(repr(key))) if isinstance(value, list): if len(value) != env.end_iteration - env.begin_iteration: @@ -184,10 +185,8 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True): first_metric = [''] def _init(env): - enabled[0] = not any((boost_alias in env.params - and env.params[boost_alias] == 'dart') for boost_alias in ('boosting', - 'boosting_type', - 'boost')) + enabled[0] = not any(env.params.get(boost_alias, "") == 'dart' for boost_alias + in CONFIG_ALIASES["boosting"]) if not enabled[0]: warnings.warn('Early stopping is not available in dart mode') return diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index a3f0189e571b..612f5ce1e08c 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -11,7 +11,7 @@ import numpy as np from . import callback -from .basic import Booster, Dataset, LightGBMError, _InnerPredictor +from .basic import Booster, Dataset, LightGBMError, _InnerPredictor, CONFIG_ALIASES from .compat import (SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold, string_type, integer_types, range_, zip_) @@ -140,14 +140,15 @@ def train(params, train_set, num_boost_round=100, # create predictor first params = copy.deepcopy(params) if fobj is not None: + for obj_alias in CONFIG_ALIASES["objective"]: + params.pop(obj_alias, None) params['objective'] = 'none' - for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", - "num_round", "num_rounds", "num_boost_round", "n_estimators"]: + for alias in CONFIG_ALIASES["num_iterations"]: if alias in params: num_boost_round = params.pop(alias) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) break - for alias in ["early_stopping_round", "early_stopping_rounds", "early_stopping", "n_iter_no_change"]: + for alias in CONFIG_ALIASES["early_stopping_round"]: if alias in params: early_stopping_rounds = params.pop(alias) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) @@ -314,7 +315,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi flatted_group = np.zeros(num_data, dtype=np.int32) folds = folds.split(X=np.zeros(num_data), y=full_data.get_label(), groups=flatted_group) else: - if 'objective' in params and params['objective'] == 'lambdarank': + if any(params.get(obj_alias, "") == "lambdarank" for obj_alias in CONFIG_ALIASES["objective"]): if not SKLEARN_INSTALLED: raise LightGBMError('Scikit-learn is required for lambdarank cv.') # lambdarank task, split according to groups @@ -495,14 +496,15 @@ def cv(params, train_set, num_boost_round=100, params = copy.deepcopy(params) if fobj is not None: + for obj_alias in CONFIG_ALIASES["objective"]: + params.pop(obj_alias, None) params['objective'] = 'none' - for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", - "num_round", "num_rounds", "num_boost_round", "n_estimators"]: + for alias in CONFIG_ALIASES["num_iterations"]: if alias in params: warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) num_boost_round = params.pop(alias) break - for alias in ["early_stopping_round", "early_stopping_rounds", "early_stopping", "n_iter_no_change"]: + for alias in CONFIG_ALIASES["early_stopping_round"]: if alias in params: warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) early_stopping_rounds = params.pop(alias) @@ -523,6 +525,8 @@ def cv(params, train_set, num_boost_round=100, .set_categorical_feature(categorical_feature) if metrics is not None: + for metric_alias in CONFIG_ALIASES["metric"]: + params.pop(metric_alias, None) params['metric'] = metrics results = collections.defaultdict(list) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index a9bd76e70178..f02ba3a77728 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -5,7 +5,7 @@ import numpy as np -from .basic import Dataset, LightGBMError +from .basic import Dataset, LightGBMError, CONFIG_ALIASES from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase, LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase, _LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckConsistentLength, @@ -489,15 +489,21 @@ def fit(self, X, y, evals_result = {} params = self.get_params() # user can set verbose with kwargs, it has higher priority - if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and self.silent: + if not any(verbose_alias in params for verbose_alias in CONFIG_ALIASES["verbosity"]) and self.silent: params['verbose'] = -1 params.pop('silent', None) params.pop('importance_type', None) params.pop('n_estimators', None) params.pop('class_weight', None) + for alias in CONFIG_ALIASES['objective']: + params.pop(alias, None) if self._n_classes is not None and self._n_classes > 2: + for alias in CONFIG_ALIASES['num_class']: + params.pop(alias, None) params['num_class'] = self._n_classes if hasattr(self, '_eval_at'): + for alias in CONFIG_ALIASES['eval_at']: + params.pop(alias, None) params['eval_at'] = self._eval_at params['objective'] = self._objective if self._fobj: @@ -518,7 +524,7 @@ def fit(self, X, y, elif isinstance(self, LGBMRanker): original_metric = "ndcg" # overwrite default metric by explicitly set metric - for metric_alias in ['metric', 'metrics', 'metric_types']: + for metric_alias in CONFIG_ALIASES["metric"]: if metric_alias in params: original_metric = params.pop(metric_alias) # concatenate metric from params (or default if not provided in params) and eval_metric From 4ceb43f6395638fd6f562b2014d4ffcd2a86172f Mon Sep 17 00:00:00 2001 From: StrikerRUS Date: Fri, 18 Oct 2019 21:58:24 +0300 Subject: [PATCH 2/2] convert aliases dict to class --- python-package/lightgbm/basic.py | 105 +++++++++++++++------------- python-package/lightgbm/callback.py | 8 +-- python-package/lightgbm/engine.py | 18 ++--- python-package/lightgbm/sklearn.py | 12 ++-- 4 files changed, 75 insertions(+), 68 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 71492c47a588..4742e814b76b 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -170,6 +170,57 @@ class LightGBMError(Exception): pass +class _ConfigAliases(object): + aliases = {"boosting": {"boosting", + "boosting_type", + "boost"}, + "categorical_feature": {"categorical_feature", + "cat_feature", + "categorical_column", + "cat_column"}, + "early_stopping_round": {"early_stopping_round", + "early_stopping_rounds", + "early_stopping", + "n_iter_no_change"}, + "eval_at": {"eval_at", + "ndcg_eval_at", + "ndcg_at", + "map_eval_at", + "map_at"}, + "header": {"header", + "has_header"}, + "machines": {"machines", + "workers", + "nodes"}, + "metric": {"metric", + "metrics", + "metric_types"}, + "num_class": {"num_class", + "num_classes"}, + "num_iterations": {"num_iterations", + "num_iteration", + "n_iter", + "num_tree", + "num_trees", + "num_round", + "num_rounds", + "num_boost_round", + "n_estimators"}, + "objective": {"objective", + "objective_type", + "app", + "application"}, + "verbosity": {"verbosity", + "verbose"}} + + @classmethod + def get(cls, *args): + ret = set() + for i in args: + ret |= cls.aliases.get(i, set()) + return ret + + MAX_INT32 = (1 << 31) - 1 """Macro definition of data type in C API of LightGBM""" @@ -196,48 +247,6 @@ class LightGBMError(Exception): "feature_penalty": C_API_DTYPE_FLOAT64, "monotone_constraints": C_API_DTYPE_INT8} -CONFIG_ALIASES = {"boosting": {"boosting", - "boosting_type", - "boost"}, - "categorical_feature": {"categorical_feature", - "cat_feature", - "categorical_column", - "cat_column"}, - "early_stopping_round": {"early_stopping_round", - "early_stopping_rounds", - "early_stopping", - "n_iter_no_change"}, - "eval_at": {"eval_at", - "ndcg_eval_at", - "ndcg_at", - "map_eval_at", - "map_at"}, - "header": {"header", - "has_header"}, - "machines": {"machines", - "workers", - "nodes"}, - "metric": {"metric", - "metrics", - "metric_types"}, - "num_class": {"num_class", - "num_classes"}, - "num_iterations": {"num_iterations", - "num_iteration", - "n_iter", - "num_tree", - "num_trees", - "num_round", - "num_rounds", - "num_boost_round", - "n_estimators"}, - "objective": {"objective", - "objective_type", - "app", - "application"}, - "verbosity": {"verbosity", - "verbose"}} - def convert_from_sliced_object(data): """Fix the memory of multi-dimensional sliced object.""" @@ -783,7 +792,7 @@ def _set_init_score_by_predictor(self, predictor, data, used_indices=None): data_has_header = False if isinstance(data, string_type): # check data has header or not - data_has_header = any(self.params.get(alias, False) for alias in CONFIG_ALIASES["header"]) + data_has_header = any(self.params.get(alias, False) for alias in _ConfigAliases.get("header")) init_score = predictor.predict(data, raw_score=True, data_has_header=data_has_header, @@ -834,7 +843,7 @@ def _lazy_init(self, data, label=None, reference=None, 'Please use {0} argument of the Dataset constructor to pass this parameter.' .format(key)) # user can set verbose with params, it has higher priority - if not any(verbose_alias in params for verbose_alias in CONFIG_ALIASES["verbosity"]) and silent: + if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent: params["verbose"] = -1 # get categorical features if categorical_feature is not None: @@ -851,7 +860,7 @@ def _lazy_init(self, data, label=None, reference=None, raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature" .format(type(name).__name__, name)) if categorical_indices: - for cat_alias in CONFIG_ALIASES["categorical_feature"]: + for cat_alias in _ConfigAliases.get("categorical_feature"): if cat_alias in params: warnings.warn('{} in param dict is overridden.'.format(cat_alias)) params.pop(cat_alias, None) @@ -1677,7 +1686,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None, self.best_score = {} params = {} if params is None else copy.deepcopy(params) # user can set verbose with params, it has higher priority - if not any(verbose_alias in params for verbose_alias in CONFIG_ALIASES["verbosity"]) and silent: + if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent: params["verbose"] = -1 if train_set is not None: # Training task @@ -1686,7 +1695,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None, .format(type(train_set).__name__)) params_str = param_dict_to_str(params) # set network if necessary - for alias in CONFIG_ALIASES["machines"]: + for alias in _ConfigAliases.get("machines"): if alias in params: machines = params[alias] if isinstance(machines, string_type): @@ -1906,7 +1915,7 @@ def reset_parameter(self, params): self : Booster Booster with new parameters. """ - if any(metric_alias in params for metric_alias in CONFIG_ALIASES["metric"]): + if any(metric_alias in params for metric_alias in _ConfigAliases.get("metric")): self.__need_reload_eval_info = True params_str = param_dict_to_str(params) if params_str: diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index f67dcdab5476..b19b8afd5e2a 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -7,7 +7,7 @@ import warnings from operator import gt, lt -from .basic import CONFIG_ALIASES +from .basic import _ConfigAliases from .compat import range_ @@ -131,9 +131,7 @@ def reset_parameter(**kwargs): def _callback(env): new_parameters = {} for key, value in kwargs.items(): - if key in (CONFIG_ALIASES["num_class"] - | CONFIG_ALIASES["boosting"] - | CONFIG_ALIASES["metric"]): + if key in _ConfigAliases.get("num_class", "boosting", "metric"): raise RuntimeError("Cannot reset {} during training".format(repr(key))) if isinstance(value, list): if len(value) != env.end_iteration - env.begin_iteration: @@ -186,7 +184,7 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True): def _init(env): enabled[0] = not any(env.params.get(boost_alias, "") == 'dart' for boost_alias - in CONFIG_ALIASES["boosting"]) + in _ConfigAliases.get("boosting")) if not enabled[0]: warnings.warn('Early stopping is not available in dart mode') return diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 612f5ce1e08c..b9752db516fd 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -11,7 +11,7 @@ import numpy as np from . import callback -from .basic import Booster, Dataset, LightGBMError, _InnerPredictor, CONFIG_ALIASES +from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor from .compat import (SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold, string_type, integer_types, range_, zip_) @@ -140,15 +140,15 @@ def train(params, train_set, num_boost_round=100, # create predictor first params = copy.deepcopy(params) if fobj is not None: - for obj_alias in CONFIG_ALIASES["objective"]: + for obj_alias in _ConfigAliases.get("objective"): params.pop(obj_alias, None) params['objective'] = 'none' - for alias in CONFIG_ALIASES["num_iterations"]: + for alias in _ConfigAliases.get("num_iterations"): if alias in params: num_boost_round = params.pop(alias) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) break - for alias in CONFIG_ALIASES["early_stopping_round"]: + for alias in _ConfigAliases.get("early_stopping_round"): if alias in params: early_stopping_rounds = params.pop(alias) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) @@ -315,7 +315,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi flatted_group = np.zeros(num_data, dtype=np.int32) folds = folds.split(X=np.zeros(num_data), y=full_data.get_label(), groups=flatted_group) else: - if any(params.get(obj_alias, "") == "lambdarank" for obj_alias in CONFIG_ALIASES["objective"]): + if any(params.get(obj_alias, "") == "lambdarank" for obj_alias in _ConfigAliases.get("objective")): if not SKLEARN_INSTALLED: raise LightGBMError('Scikit-learn is required for lambdarank cv.') # lambdarank task, split according to groups @@ -496,15 +496,15 @@ def cv(params, train_set, num_boost_round=100, params = copy.deepcopy(params) if fobj is not None: - for obj_alias in CONFIG_ALIASES["objective"]: + for obj_alias in _ConfigAliases.get("objective"): params.pop(obj_alias, None) params['objective'] = 'none' - for alias in CONFIG_ALIASES["num_iterations"]: + for alias in _ConfigAliases.get("num_iterations"): if alias in params: warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) num_boost_round = params.pop(alias) break - for alias in CONFIG_ALIASES["early_stopping_round"]: + for alias in _ConfigAliases.get("early_stopping_round"): if alias in params: warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) early_stopping_rounds = params.pop(alias) @@ -525,7 +525,7 @@ def cv(params, train_set, num_boost_round=100, .set_categorical_feature(categorical_feature) if metrics is not None: - for metric_alias in CONFIG_ALIASES["metric"]: + for metric_alias in _ConfigAliases.get("metric"): params.pop(metric_alias, None) params['metric'] = metrics diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index f02ba3a77728..bedb4227c0c6 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -5,7 +5,7 @@ import numpy as np -from .basic import Dataset, LightGBMError, CONFIG_ALIASES +from .basic import Dataset, LightGBMError, _ConfigAliases from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase, LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase, _LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckConsistentLength, @@ -489,20 +489,20 @@ def fit(self, X, y, evals_result = {} params = self.get_params() # user can set verbose with kwargs, it has higher priority - if not any(verbose_alias in params for verbose_alias in CONFIG_ALIASES["verbosity"]) and self.silent: + if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and self.silent: params['verbose'] = -1 params.pop('silent', None) params.pop('importance_type', None) params.pop('n_estimators', None) params.pop('class_weight', None) - for alias in CONFIG_ALIASES['objective']: + for alias in _ConfigAliases.get('objective'): params.pop(alias, None) if self._n_classes is not None and self._n_classes > 2: - for alias in CONFIG_ALIASES['num_class']: + for alias in _ConfigAliases.get('num_class'): params.pop(alias, None) params['num_class'] = self._n_classes if hasattr(self, '_eval_at'): - for alias in CONFIG_ALIASES['eval_at']: + for alias in _ConfigAliases.get('eval_at'): params.pop(alias, None) params['eval_at'] = self._eval_at params['objective'] = self._objective @@ -524,7 +524,7 @@ def fit(self, X, y, elif isinstance(self, LGBMRanker): original_metric = "ndcg" # overwrite default metric by explicitly set metric - for metric_alias in CONFIG_ALIASES["metric"]: + for metric_alias in _ConfigAliases.get("metric"): if metric_alias in params: original_metric = params.pop(metric_alias) # concatenate metric from params (or default if not provided in params) and eval_metric