Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python] handle params aliases centralized #2489

Merged
merged 2 commits into from
Oct 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 63 additions & 11 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,57 @@ class LightGBMError(Exception):
pass


class _ConfigAliases(object):
aliases = {"boosting": {"boosting",
"boosting_type",
"boost"},
"categorical_feature": {"categorical_feature",
"cat_feature",
"categorical_column",
"cat_column"},
"early_stopping_round": {"early_stopping_round",
"early_stopping_rounds",
"early_stopping",
"n_iter_no_change"},
"eval_at": {"eval_at",
"ndcg_eval_at",
"ndcg_at",
"map_eval_at",
"map_at"},
"header": {"header",
"has_header"},
"machines": {"machines",
"workers",
"nodes"},
"metric": {"metric",
"metrics",
"metric_types"},
"num_class": {"num_class",
"num_classes"},
"num_iterations": {"num_iterations",
"num_iteration",
"n_iter",
"num_tree",
"num_trees",
"num_round",
"num_rounds",
"num_boost_round",
"n_estimators"},
"objective": {"objective",
"objective_type",
"app",
"application"},
"verbosity": {"verbosity",
"verbose"}}

@classmethod
def get(cls, *args):
ret = set()
for i in args:
ret |= cls.aliases.get(i, set())
return ret


MAX_INT32 = (1 << 31) - 1

"""Macro definition of data type in C API of LightGBM"""
Expand Down Expand Up @@ -741,8 +792,7 @@ def _set_init_score_by_predictor(self, predictor, data, used_indices=None):
data_has_header = False
if isinstance(data, string_type):
# check data has header or not
if self.params.get("has_header", False) or self.params.get("header", False):
data_has_header = True
data_has_header = any(self.params.get(alias, False) for alias in _ConfigAliases.get("header"))
init_score = predictor.predict(data,
raw_score=True,
data_has_header=data_has_header,
Expand Down Expand Up @@ -793,7 +843,7 @@ def _lazy_init(self, data, label=None, reference=None,
'Please use {0} argument of the Dataset constructor to pass this parameter.'
.format(key))
# user can set verbose with params, it has higher priority
if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent:
if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent:
params["verbose"] = -1
# get categorical features
if categorical_feature is not None:
Expand All @@ -810,10 +860,10 @@ def _lazy_init(self, data, label=None, reference=None,
raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature"
.format(type(name).__name__, name))
if categorical_indices:
if "categorical_feature" in params or "categorical_column" in params:
warnings.warn('categorical_feature in param dict is overridden.')
params.pop("categorical_feature", None)
params.pop("categorical_column", None)
for cat_alias in _ConfigAliases.get("categorical_feature"):
if cat_alias in params:
warnings.warn('{} in param dict is overridden.'.format(cat_alias))
params.pop(cat_alias, None)
params['categorical_column'] = sorted(categorical_indices)

params_str = param_dict_to_str(params)
Expand Down Expand Up @@ -1259,7 +1309,9 @@ def _set_predictor(self, predictor):
"""
if predictor is self._predictor:
return self
if self.data is not None or (self.used_indices is not None and self.reference is not None and self.reference.data is not None):
if self.data is not None or (self.used_indices is not None
and self.reference is not None
and self.reference.data is not None):
self._predictor = predictor
return self._free_handle()
else:
Expand Down Expand Up @@ -1634,7 +1686,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None,
self.best_score = {}
params = {} if params is None else copy.deepcopy(params)
# user can set verbose with params, it has higher priority
if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent:
if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent:
params["verbose"] = -1
if train_set is not None:
# Training task
Expand All @@ -1643,7 +1695,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None,
.format(type(train_set).__name__))
params_str = param_dict_to_str(params)
# set network if necessary
for alias in ["machines", "workers", "nodes"]:
for alias in _ConfigAliases.get("machines"):
if alias in params:
machines = params[alias]
if isinstance(machines, string_type):
Expand Down Expand Up @@ -1863,7 +1915,7 @@ def reset_parameter(self, params):
self : Booster
Booster with new parameters.
"""
if any(metric_alias in params for metric_alias in ('metric', 'metrics', 'metric_types')):
if any(metric_alias in params for metric_alias in _ConfigAliases.get("metric")):
self.__need_reload_eval_info = True
params_str = param_dict_to_str(params)
if params_str:
Expand Down
11 changes: 4 additions & 7 deletions python-package/lightgbm/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import warnings
from operator import gt, lt

from .basic import _ConfigAliases
from .compat import range_


Expand Down Expand Up @@ -130,9 +131,7 @@ def reset_parameter(**kwargs):
def _callback(env):
new_parameters = {}
for key, value in kwargs.items():
if key in ['num_class', 'num_classes',
'boosting', 'boost', 'boosting_type',
'metric', 'metrics', 'metric_types']:
if key in _ConfigAliases.get("num_class", "boosting", "metric"):
raise RuntimeError("Cannot reset {} during training".format(repr(key)))
if isinstance(value, list):
if len(value) != env.end_iteration - env.begin_iteration:
Expand Down Expand Up @@ -184,10 +183,8 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True):
first_metric = ['']

def _init(env):
enabled[0] = not any((boost_alias in env.params
and env.params[boost_alias] == 'dart') for boost_alias in ('boosting',
'boosting_type',
'boost'))
enabled[0] = not any(env.params.get(boost_alias, "") == 'dart' for boost_alias
in _ConfigAliases.get("boosting"))
if not enabled[0]:
warnings.warn('Early stopping is not available in dart mode')
return
Expand Down
20 changes: 12 additions & 8 deletions python-package/lightgbm/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import numpy as np

from . import callback
from .basic import Booster, Dataset, LightGBMError, _InnerPredictor
from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor
from .compat import (SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold,
string_type, integer_types, range_, zip_)

Expand Down Expand Up @@ -140,14 +140,15 @@ def train(params, train_set, num_boost_round=100,
# create predictor first
params = copy.deepcopy(params)
if fobj is not None:
for obj_alias in _ConfigAliases.get("objective"):
params.pop(obj_alias, None)
params['objective'] = 'none'
for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees",
"num_round", "num_rounds", "num_boost_round", "n_estimators"]:
for alias in _ConfigAliases.get("num_iterations"):
if alias in params:
num_boost_round = params.pop(alias)
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
break
for alias in ["early_stopping_round", "early_stopping_rounds", "early_stopping", "n_iter_no_change"]:
for alias in _ConfigAliases.get("early_stopping_round"):
if alias in params:
early_stopping_rounds = params.pop(alias)
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
Expand Down Expand Up @@ -314,7 +315,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
flatted_group = np.zeros(num_data, dtype=np.int32)
folds = folds.split(X=np.zeros(num_data), y=full_data.get_label(), groups=flatted_group)
else:
if 'objective' in params and params['objective'] == 'lambdarank':
if any(params.get(obj_alias, "") == "lambdarank" for obj_alias in _ConfigAliases.get("objective")):
if not SKLEARN_INSTALLED:
raise LightGBMError('Scikit-learn is required for lambdarank cv.')
# lambdarank task, split according to groups
Expand Down Expand Up @@ -495,14 +496,15 @@ def cv(params, train_set, num_boost_round=100,

params = copy.deepcopy(params)
if fobj is not None:
for obj_alias in _ConfigAliases.get("objective"):
params.pop(obj_alias, None)
params['objective'] = 'none'
for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees",
"num_round", "num_rounds", "num_boost_round", "n_estimators"]:
for alias in _ConfigAliases.get("num_iterations"):
if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
num_boost_round = params.pop(alias)
break
for alias in ["early_stopping_round", "early_stopping_rounds", "early_stopping", "n_iter_no_change"]:
for alias in _ConfigAliases.get("early_stopping_round"):
if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
early_stopping_rounds = params.pop(alias)
Expand All @@ -523,6 +525,8 @@ def cv(params, train_set, num_boost_round=100,
.set_categorical_feature(categorical_feature)

if metrics is not None:
for metric_alias in _ConfigAliases.get("metric"):
params.pop(metric_alias, None)
params['metric'] = metrics

results = collections.defaultdict(list)
Expand Down
12 changes: 9 additions & 3 deletions python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np

from .basic import Dataset, LightGBMError
from .basic import Dataset, LightGBMError, _ConfigAliases
from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase,
LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase,
_LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckConsistentLength,
Expand Down Expand Up @@ -489,15 +489,21 @@ def fit(self, X, y,
evals_result = {}
params = self.get_params()
# user can set verbose with kwargs, it has higher priority
if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and self.silent:
if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and self.silent:
params['verbose'] = -1
params.pop('silent', None)
params.pop('importance_type', None)
params.pop('n_estimators', None)
params.pop('class_weight', None)
for alias in _ConfigAliases.get('objective'):
params.pop(alias, None)
if self._n_classes is not None and self._n_classes > 2:
for alias in _ConfigAliases.get('num_class'):
params.pop(alias, None)
params['num_class'] = self._n_classes
if hasattr(self, '_eval_at'):
for alias in _ConfigAliases.get('eval_at'):
params.pop(alias, None)
params['eval_at'] = self._eval_at
params['objective'] = self._objective
if self._fobj:
Expand All @@ -518,7 +524,7 @@ def fit(self, X, y,
elif isinstance(self, LGBMRanker):
original_metric = "ndcg"
# overwrite default metric by explicitly set metric
for metric_alias in ['metric', 'metrics', 'metric_types']:
for metric_alias in _ConfigAliases.get("metric"):
if metric_alias in params:
original_metric = params.pop(metric_alias)
# concatenate metric from params (or default if not provided in params) and eval_metric
Expand Down