diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 26270543fa10..4742e814b76b 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -170,6 +170,57 @@ class LightGBMError(Exception): pass +class _ConfigAliases(object): + aliases = {"boosting": {"boosting", + "boosting_type", + "boost"}, + "categorical_feature": {"categorical_feature", + "cat_feature", + "categorical_column", + "cat_column"}, + "early_stopping_round": {"early_stopping_round", + "early_stopping_rounds", + "early_stopping", + "n_iter_no_change"}, + "eval_at": {"eval_at", + "ndcg_eval_at", + "ndcg_at", + "map_eval_at", + "map_at"}, + "header": {"header", + "has_header"}, + "machines": {"machines", + "workers", + "nodes"}, + "metric": {"metric", + "metrics", + "metric_types"}, + "num_class": {"num_class", + "num_classes"}, + "num_iterations": {"num_iterations", + "num_iteration", + "n_iter", + "num_tree", + "num_trees", + "num_round", + "num_rounds", + "num_boost_round", + "n_estimators"}, + "objective": {"objective", + "objective_type", + "app", + "application"}, + "verbosity": {"verbosity", + "verbose"}} + + @classmethod + def get(cls, *args): + ret = set() + for i in args: + ret |= cls.aliases.get(i, set()) + return ret + + MAX_INT32 = (1 << 31) - 1 """Macro definition of data type in C API of LightGBM""" @@ -741,8 +792,7 @@ def _set_init_score_by_predictor(self, predictor, data, used_indices=None): data_has_header = False if isinstance(data, string_type): # check data has header or not - if self.params.get("has_header", False) or self.params.get("header", False): - data_has_header = True + data_has_header = any(self.params.get(alias, False) for alias in _ConfigAliases.get("header")) init_score = predictor.predict(data, raw_score=True, data_has_header=data_has_header, @@ -793,7 +843,7 @@ def _lazy_init(self, data, label=None, reference=None, 'Please use {0} argument of the Dataset constructor to pass this parameter.' .format(key)) # user can set verbose with params, it has higher priority - if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent: + if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent: params["verbose"] = -1 # get categorical features if categorical_feature is not None: @@ -810,10 +860,10 @@ def _lazy_init(self, data, label=None, reference=None, raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature" .format(type(name).__name__, name)) if categorical_indices: - if "categorical_feature" in params or "categorical_column" in params: - warnings.warn('categorical_feature in param dict is overridden.') - params.pop("categorical_feature", None) - params.pop("categorical_column", None) + for cat_alias in _ConfigAliases.get("categorical_feature"): + if cat_alias in params: + warnings.warn('{} in param dict is overridden.'.format(cat_alias)) + params.pop(cat_alias, None) params['categorical_column'] = sorted(categorical_indices) params_str = param_dict_to_str(params) @@ -1259,7 +1309,9 @@ def _set_predictor(self, predictor): """ if predictor is self._predictor: return self - if self.data is not None or (self.used_indices is not None and self.reference is not None and self.reference.data is not None): + if self.data is not None or (self.used_indices is not None + and self.reference is not None + and self.reference.data is not None): self._predictor = predictor return self._free_handle() else: @@ -1634,7 +1686,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None, self.best_score = {} params = {} if params is None else copy.deepcopy(params) # user can set verbose with params, it has higher priority - if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent: + if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent: params["verbose"] = -1 if train_set is not None: # Training task @@ -1643,7 +1695,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None, .format(type(train_set).__name__)) params_str = param_dict_to_str(params) # set network if necessary - for alias in ["machines", "workers", "nodes"]: + for alias in _ConfigAliases.get("machines"): if alias in params: machines = params[alias] if isinstance(machines, string_type): @@ -1863,7 +1915,7 @@ def reset_parameter(self, params): self : Booster Booster with new parameters. """ - if any(metric_alias in params for metric_alias in ('metric', 'metrics', 'metric_types')): + if any(metric_alias in params for metric_alias in _ConfigAliases.get("metric")): self.__need_reload_eval_info = True params_str = param_dict_to_str(params) if params_str: diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index 7cb53791b2b7..b19b8afd5e2a 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -7,6 +7,7 @@ import warnings from operator import gt, lt +from .basic import _ConfigAliases from .compat import range_ @@ -130,9 +131,7 @@ def reset_parameter(**kwargs): def _callback(env): new_parameters = {} for key, value in kwargs.items(): - if key in ['num_class', 'num_classes', - 'boosting', 'boost', 'boosting_type', - 'metric', 'metrics', 'metric_types']: + if key in _ConfigAliases.get("num_class", "boosting", "metric"): raise RuntimeError("Cannot reset {} during training".format(repr(key))) if isinstance(value, list): if len(value) != env.end_iteration - env.begin_iteration: @@ -184,10 +183,8 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True): first_metric = [''] def _init(env): - enabled[0] = not any((boost_alias in env.params - and env.params[boost_alias] == 'dart') for boost_alias in ('boosting', - 'boosting_type', - 'boost')) + enabled[0] = not any(env.params.get(boost_alias, "") == 'dart' for boost_alias + in _ConfigAliases.get("boosting")) if not enabled[0]: warnings.warn('Early stopping is not available in dart mode') return diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index a3f0189e571b..b9752db516fd 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -11,7 +11,7 @@ import numpy as np from . import callback -from .basic import Booster, Dataset, LightGBMError, _InnerPredictor +from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor from .compat import (SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold, string_type, integer_types, range_, zip_) @@ -140,14 +140,15 @@ def train(params, train_set, num_boost_round=100, # create predictor first params = copy.deepcopy(params) if fobj is not None: + for obj_alias in _ConfigAliases.get("objective"): + params.pop(obj_alias, None) params['objective'] = 'none' - for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", - "num_round", "num_rounds", "num_boost_round", "n_estimators"]: + for alias in _ConfigAliases.get("num_iterations"): if alias in params: num_boost_round = params.pop(alias) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) break - for alias in ["early_stopping_round", "early_stopping_rounds", "early_stopping", "n_iter_no_change"]: + for alias in _ConfigAliases.get("early_stopping_round"): if alias in params: early_stopping_rounds = params.pop(alias) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) @@ -314,7 +315,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi flatted_group = np.zeros(num_data, dtype=np.int32) folds = folds.split(X=np.zeros(num_data), y=full_data.get_label(), groups=flatted_group) else: - if 'objective' in params and params['objective'] == 'lambdarank': + if any(params.get(obj_alias, "") == "lambdarank" for obj_alias in _ConfigAliases.get("objective")): if not SKLEARN_INSTALLED: raise LightGBMError('Scikit-learn is required for lambdarank cv.') # lambdarank task, split according to groups @@ -495,14 +496,15 @@ def cv(params, train_set, num_boost_round=100, params = copy.deepcopy(params) if fobj is not None: + for obj_alias in _ConfigAliases.get("objective"): + params.pop(obj_alias, None) params['objective'] = 'none' - for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", - "num_round", "num_rounds", "num_boost_round", "n_estimators"]: + for alias in _ConfigAliases.get("num_iterations"): if alias in params: warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) num_boost_round = params.pop(alias) break - for alias in ["early_stopping_round", "early_stopping_rounds", "early_stopping", "n_iter_no_change"]: + for alias in _ConfigAliases.get("early_stopping_round"): if alias in params: warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) early_stopping_rounds = params.pop(alias) @@ -523,6 +525,8 @@ def cv(params, train_set, num_boost_round=100, .set_categorical_feature(categorical_feature) if metrics is not None: + for metric_alias in _ConfigAliases.get("metric"): + params.pop(metric_alias, None) params['metric'] = metrics results = collections.defaultdict(list) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index a9bd76e70178..bedb4227c0c6 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -5,7 +5,7 @@ import numpy as np -from .basic import Dataset, LightGBMError +from .basic import Dataset, LightGBMError, _ConfigAliases from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase, LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase, _LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckConsistentLength, @@ -489,15 +489,21 @@ def fit(self, X, y, evals_result = {} params = self.get_params() # user can set verbose with kwargs, it has higher priority - if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and self.silent: + if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and self.silent: params['verbose'] = -1 params.pop('silent', None) params.pop('importance_type', None) params.pop('n_estimators', None) params.pop('class_weight', None) + for alias in _ConfigAliases.get('objective'): + params.pop(alias, None) if self._n_classes is not None and self._n_classes > 2: + for alias in _ConfigAliases.get('num_class'): + params.pop(alias, None) params['num_class'] = self._n_classes if hasattr(self, '_eval_at'): + for alias in _ConfigAliases.get('eval_at'): + params.pop(alias, None) params['eval_at'] = self._eval_at params['objective'] = self._objective if self._fobj: @@ -518,7 +524,7 @@ def fit(self, X, y, elif isinstance(self, LGBMRanker): original_metric = "ndcg" # overwrite default metric by explicitly set metric - for metric_alias in ['metric', 'metrics', 'metric_types']: + for metric_alias in _ConfigAliases.get("metric"): if metric_alias in params: original_metric = params.pop(metric_alias) # concatenate metric from params (or default if not provided in params) and eval_metric