Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python] handle params aliases centralized #2489

Merged
merged 2 commits into from
Oct 22, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 54 additions & 11 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,48 @@ class LightGBMError(Exception):
"feature_penalty": C_API_DTYPE_FLOAT64,
"monotone_constraints": C_API_DTYPE_INT8}

CONFIG_ALIASES = {"boosting": {"boosting",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we please turn this into a class? class params

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, not sure that I understood you correctly. Please take a look.

"boosting_type",
"boost"},
"categorical_feature": {"categorical_feature",
"cat_feature",
"categorical_column",
"cat_column"},
"early_stopping_round": {"early_stopping_round",
"early_stopping_rounds",
"early_stopping",
"n_iter_no_change"},
"eval_at": {"eval_at",
"ndcg_eval_at",
"ndcg_at",
"map_eval_at",
"map_at"},
"header": {"header",
"has_header"},
"machines": {"machines",
"workers",
"nodes"},
"metric": {"metric",
"metrics",
"metric_types"},
"num_class": {"num_class",
"num_classes"},
"num_iterations": {"num_iterations",
"num_iteration",
"n_iter",
"num_tree",
"num_trees",
"num_round",
"num_rounds",
"num_boost_round",
"n_estimators"},
"objective": {"objective",
"objective_type",
"app",
"application"},
"verbosity": {"verbosity",
"verbose"}}


def convert_from_sliced_object(data):
"""Fix the memory of multi-dimensional sliced object."""
Expand Down Expand Up @@ -741,8 +783,7 @@ def _set_init_score_by_predictor(self, predictor, data, used_indices=None):
data_has_header = False
if isinstance(data, string_type):
# check data has header or not
if self.params.get("has_header", False) or self.params.get("header", False):
data_has_header = True
data_has_header = any(self.params.get(alias, False) for alias in CONFIG_ALIASES["header"])
init_score = predictor.predict(data,
raw_score=True,
data_has_header=data_has_header,
Expand Down Expand Up @@ -793,7 +834,7 @@ def _lazy_init(self, data, label=None, reference=None,
'Please use {0} argument of the Dataset constructor to pass this parameter.'
.format(key))
# user can set verbose with params, it has higher priority
if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent:
if not any(verbose_alias in params for verbose_alias in CONFIG_ALIASES["verbosity"]) and silent:
params["verbose"] = -1
# get categorical features
if categorical_feature is not None:
Expand All @@ -810,10 +851,10 @@ def _lazy_init(self, data, label=None, reference=None,
raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature"
.format(type(name).__name__, name))
if categorical_indices:
if "categorical_feature" in params or "categorical_column" in params:
warnings.warn('categorical_feature in param dict is overridden.')
params.pop("categorical_feature", None)
params.pop("categorical_column", None)
for cat_alias in CONFIG_ALIASES["categorical_feature"]:
if cat_alias in params:
warnings.warn('{} in param dict is overridden.'.format(cat_alias))
params.pop(cat_alias, None)
params['categorical_column'] = sorted(categorical_indices)

params_str = param_dict_to_str(params)
Expand Down Expand Up @@ -1259,7 +1300,9 @@ def _set_predictor(self, predictor):
"""
if predictor is self._predictor:
return self
if self.data is not None or (self.used_indices is not None and self.reference is not None and self.reference.data is not None):
if self.data is not None or (self.used_indices is not None
and self.reference is not None
and self.reference.data is not None):
self._predictor = predictor
return self._free_handle()
else:
Expand Down Expand Up @@ -1634,7 +1677,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None,
self.best_score = {}
params = {} if params is None else copy.deepcopy(params)
# user can set verbose with params, it has higher priority
if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent:
if not any(verbose_alias in params for verbose_alias in CONFIG_ALIASES["verbosity"]) and silent:
params["verbose"] = -1
if train_set is not None:
# Training task
Expand All @@ -1643,7 +1686,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None,
.format(type(train_set).__name__))
params_str = param_dict_to_str(params)
# set network if necessary
for alias in ["machines", "workers", "nodes"]:
for alias in CONFIG_ALIASES["machines"]:
if alias in params:
machines = params[alias]
if isinstance(machines, string_type):
Expand Down Expand Up @@ -1863,7 +1906,7 @@ def reset_parameter(self, params):
self : Booster
Booster with new parameters.
"""
if any(metric_alias in params for metric_alias in ('metric', 'metrics', 'metric_types')):
if any(metric_alias in params for metric_alias in CONFIG_ALIASES["metric"]):
self.__need_reload_eval_info = True
params_str = param_dict_to_str(params)
if params_str:
Expand Down
13 changes: 6 additions & 7 deletions python-package/lightgbm/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import warnings
from operator import gt, lt

from .basic import CONFIG_ALIASES
from .compat import range_


Expand Down Expand Up @@ -130,9 +131,9 @@ def reset_parameter(**kwargs):
def _callback(env):
new_parameters = {}
for key, value in kwargs.items():
if key in ['num_class', 'num_classes',
'boosting', 'boost', 'boosting_type',
'metric', 'metrics', 'metric_types']:
if key in (CONFIG_ALIASES["num_class"]
| CONFIG_ALIASES["boosting"]
| CONFIG_ALIASES["metric"]):
raise RuntimeError("Cannot reset {} during training".format(repr(key)))
if isinstance(value, list):
if len(value) != env.end_iteration - env.begin_iteration:
Expand Down Expand Up @@ -184,10 +185,8 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True):
first_metric = ['']

def _init(env):
enabled[0] = not any((boost_alias in env.params
and env.params[boost_alias] == 'dart') for boost_alias in ('boosting',
'boosting_type',
'boost'))
enabled[0] = not any(env.params.get(boost_alias, "") == 'dart' for boost_alias
in CONFIG_ALIASES["boosting"])
if not enabled[0]:
warnings.warn('Early stopping is not available in dart mode')
return
Expand Down
20 changes: 12 additions & 8 deletions python-package/lightgbm/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import numpy as np

from . import callback
from .basic import Booster, Dataset, LightGBMError, _InnerPredictor
from .basic import Booster, Dataset, LightGBMError, _InnerPredictor, CONFIG_ALIASES
from .compat import (SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold,
string_type, integer_types, range_, zip_)

Expand Down Expand Up @@ -140,14 +140,15 @@ def train(params, train_set, num_boost_round=100,
# create predictor first
params = copy.deepcopy(params)
if fobj is not None:
for obj_alias in CONFIG_ALIASES["objective"]:
params.pop(obj_alias, None)
params['objective'] = 'none'
for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees",
"num_round", "num_rounds", "num_boost_round", "n_estimators"]:
for alias in CONFIG_ALIASES["num_iterations"]:
if alias in params:
num_boost_round = params.pop(alias)
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
break
for alias in ["early_stopping_round", "early_stopping_rounds", "early_stopping", "n_iter_no_change"]:
for alias in CONFIG_ALIASES["early_stopping_round"]:
if alias in params:
early_stopping_rounds = params.pop(alias)
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
Expand Down Expand Up @@ -314,7 +315,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
flatted_group = np.zeros(num_data, dtype=np.int32)
folds = folds.split(X=np.zeros(num_data), y=full_data.get_label(), groups=flatted_group)
else:
if 'objective' in params and params['objective'] == 'lambdarank':
if any(params.get(obj_alias, "") == "lambdarank" for obj_alias in CONFIG_ALIASES["objective"]):
if not SKLEARN_INSTALLED:
raise LightGBMError('Scikit-learn is required for lambdarank cv.')
# lambdarank task, split according to groups
Expand Down Expand Up @@ -495,14 +496,15 @@ def cv(params, train_set, num_boost_round=100,

params = copy.deepcopy(params)
if fobj is not None:
for obj_alias in CONFIG_ALIASES["objective"]:
params.pop(obj_alias, None)
params['objective'] = 'none'
for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees",
"num_round", "num_rounds", "num_boost_round", "n_estimators"]:
for alias in CONFIG_ALIASES["num_iterations"]:
if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
num_boost_round = params.pop(alias)
break
for alias in ["early_stopping_round", "early_stopping_rounds", "early_stopping", "n_iter_no_change"]:
for alias in CONFIG_ALIASES["early_stopping_round"]:
if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
early_stopping_rounds = params.pop(alias)
Expand All @@ -523,6 +525,8 @@ def cv(params, train_set, num_boost_round=100,
.set_categorical_feature(categorical_feature)

if metrics is not None:
for metric_alias in CONFIG_ALIASES["metric"]:
params.pop(metric_alias, None)
params['metric'] = metrics

results = collections.defaultdict(list)
Expand Down
12 changes: 9 additions & 3 deletions python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np

from .basic import Dataset, LightGBMError
from .basic import Dataset, LightGBMError, CONFIG_ALIASES
from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase,
LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase,
_LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckConsistentLength,
Expand Down Expand Up @@ -489,15 +489,21 @@ def fit(self, X, y,
evals_result = {}
params = self.get_params()
# user can set verbose with kwargs, it has higher priority
if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and self.silent:
if not any(verbose_alias in params for verbose_alias in CONFIG_ALIASES["verbosity"]) and self.silent:
params['verbose'] = -1
params.pop('silent', None)
params.pop('importance_type', None)
params.pop('n_estimators', None)
params.pop('class_weight', None)
for alias in CONFIG_ALIASES['objective']:
params.pop(alias, None)
if self._n_classes is not None and self._n_classes > 2:
for alias in CONFIG_ALIASES['num_class']:
params.pop(alias, None)
params['num_class'] = self._n_classes
if hasattr(self, '_eval_at'):
for alias in CONFIG_ALIASES['eval_at']:
params.pop(alias, None)
params['eval_at'] = self._eval_at
params['objective'] = self._objective
if self._fobj:
Expand All @@ -518,7 +524,7 @@ def fit(self, X, y,
elif isinstance(self, LGBMRanker):
original_metric = "ndcg"
# overwrite default metric by explicitly set metric
for metric_alias in ['metric', 'metrics', 'metric_types']:
for metric_alias in CONFIG_ALIASES["metric"]:
if metric_alias in params:
original_metric = params.pop(metric_alias)
# concatenate metric from params (or default if not provided in params) and eval_metric
Expand Down