Skip to content

Commit

Permalink
[python] Allow to register custom logger in Python-package (#3820)
Browse files Browse the repository at this point in the history
* centralize Python-package logging in one place

* continue

* fix test name

* removed unused import

* enhance test

* fix lint

* hotfix test

* workaround for GPU test

* remove custom logger from Dask-package

* replace one log func with flags by multiple funcs
  • Loading branch information
StrikerRUS authored Jan 24, 2021
1 parent ac706e1 commit b7ccdaf
Show file tree
Hide file tree
Showing 9 changed files with 203 additions and 53 deletions.
8 changes: 8 additions & 0 deletions docs/Python-API.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,11 @@ Plotting
plot_metric
plot_tree
create_tree_digraph

Utilities
---------

.. autosummary::
:toctree: pythonapi/

register_logger
3 changes: 2 additions & 1 deletion python-package/lightgbm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Contributors: https://github.com/microsoft/LightGBM/graphs/contributors.
"""
from .basic import Booster, Dataset
from .basic import Booster, Dataset, register_logger
from .callback import (early_stopping, print_evaluation, record_evaluation,
reset_parameter)
from .engine import cv, train, CVBooster
Expand All @@ -28,6 +28,7 @@
__version__ = version_file.read().strip()

__all__ = ['Dataset', 'Booster', 'CVBooster',
'register_logger',
'train', 'cv',
'LGBMModel', 'LGBMRegressor', 'LGBMClassifier', 'LGBMRanker',
'print_evaluation', 'record_evaluation', 'reset_parameter', 'early_stopping',
Expand Down
99 changes: 78 additions & 21 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import json
import os
import warnings
from tempfile import NamedTemporaryFile
from collections import OrderedDict
from functools import wraps
from logging import Logger
from tempfile import NamedTemporaryFile

import numpy as np
import scipy.sparse
Expand All @@ -15,9 +17,64 @@
from .libpath import find_lib_path


class _DummyLogger:
def info(self, msg):
print(msg)

def warning(self, msg):
warnings.warn(msg, stacklevel=3)


_LOGGER = _DummyLogger()


def register_logger(logger):
"""Register custom logger.
Parameters
----------
logger : logging.Logger
Custom logger.
"""
if not isinstance(logger, Logger):
raise TypeError("Logger should inherit logging.Logger class")
global _LOGGER
_LOGGER = logger


def _normalize_native_string(func):
"""Join log messages from native library which come by chunks."""
msg_normalized = []

@wraps(func)
def wrapper(msg):
nonlocal msg_normalized
if msg.strip() == '':
msg = ''.join(msg_normalized)
msg_normalized = []
return func(msg)
else:
msg_normalized.append(msg)

return wrapper


def _log_info(msg):
_LOGGER.info(msg)


def _log_warning(msg):
_LOGGER.warning(msg)


@_normalize_native_string
def _log_native(msg):
_LOGGER.info(msg)


def _log_callback(msg):
"""Redirect logs from native library into Python console."""
print("{0:s}".format(msg.decode('utf-8')), end='')
"""Redirect logs from native library into Python."""
_log_native("{0:s}".format(msg.decode('utf-8')))


def _load_lib():
Expand Down Expand Up @@ -329,8 +386,8 @@ def convert_from_sliced_object(data):
"""Fix the memory of multi-dimensional sliced object."""
if isinstance(data, np.ndarray) and isinstance(data.base, np.ndarray):
if not data.flags.c_contiguous:
warnings.warn("Usage of np.ndarray subset (sliced data) is not recommended "
"due to it will double the peak memory cost in LightGBM.")
_log_warning("Usage of np.ndarray subset (sliced data) is not recommended "
"due to it will double the peak memory cost in LightGBM.")
return np.copy(data)
return data

Expand Down Expand Up @@ -620,7 +677,7 @@ def predict(self, data, start_iteration=0, num_iteration=-1,
preds, nrow = self.__pred_for_np2d(data.to_numpy(), start_iteration, num_iteration, predict_type)
else:
try:
warnings.warn('Converting data to scipy sparse matrix.')
_log_warning('Converting data to scipy sparse matrix.')
csr = scipy.sparse.csr_matrix(data)
except BaseException:
raise TypeError('Cannot predict data for type {}'.format(type(data).__name__))
Expand Down Expand Up @@ -1103,9 +1160,9 @@ def _lazy_init(self, data, label=None, reference=None,
.co_varnames[:getattr(self.__class__, '_lazy_init').__code__.co_argcount])
for key, _ in params.items():
if key in args_names:
warnings.warn('{0} keyword has been found in `params` and will be ignored.\n'
'Please use {0} argument of the Dataset constructor to pass this parameter.'
.format(key))
_log_warning('{0} keyword has been found in `params` and will be ignored.\n'
'Please use {0} argument of the Dataset constructor to pass this parameter.'
.format(key))
# user can set verbose with params, it has higher priority
if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent:
params["verbose"] = -1
Expand All @@ -1126,7 +1183,7 @@ def _lazy_init(self, data, label=None, reference=None,
if categorical_indices:
for cat_alias in _ConfigAliases.get("categorical_feature"):
if cat_alias in params:
warnings.warn('{} in param dict is overridden.'.format(cat_alias))
_log_warning('{} in param dict is overridden.'.format(cat_alias))
params.pop(cat_alias, None)
params['categorical_column'] = sorted(categorical_indices)

Expand Down Expand Up @@ -1172,7 +1229,7 @@ def _lazy_init(self, data, label=None, reference=None,
self.set_group(group)
if isinstance(predictor, _InnerPredictor):
if self._predictor is None and init_score is not None:
warnings.warn("The init_score will be overridden by the prediction of init_model.")
_log_warning("The init_score will be overridden by the prediction of init_model.")
self._set_init_score_by_predictor(predictor, data)
elif init_score is not None:
self.set_init_score(init_score)
Expand Down Expand Up @@ -1314,7 +1371,7 @@ def construct(self):
if self.reference is not None:
reference_params = self.reference.get_params()
if self.get_params() != reference_params:
warnings.warn('Overriding the parameters from Reference Dataset.')
_log_warning('Overriding the parameters from Reference Dataset.')
self._update_params(reference_params)
if self.used_indices is None:
# create valid
Expand Down Expand Up @@ -1583,11 +1640,11 @@ def set_categorical_feature(self, categorical_feature):
self.categorical_feature = categorical_feature
return self._free_handle()
elif categorical_feature == 'auto':
warnings.warn('Using categorical_feature in Dataset.')
_log_warning('Using categorical_feature in Dataset.')
return self
else:
warnings.warn('categorical_feature in Dataset is overridden.\n'
'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
_log_warning('categorical_feature in Dataset is overridden.\n'
'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
self.categorical_feature = categorical_feature
return self._free_handle()
else:
Expand Down Expand Up @@ -1840,8 +1897,8 @@ def get_data(self):
elif isinstance(self.data, DataTable):
self.data = self.data[self.used_indices, :]
else:
warnings.warn("Cannot subset {} type of raw data.\n"
"Returning original raw data".format(type(self.data).__name__))
_log_warning("Cannot subset {} type of raw data.\n"
"Returning original raw data".format(type(self.data).__name__))
self.need_slice = False
if self.data is None:
raise LightGBMError("Cannot call `get_data` after freed raw data, "
Expand Down Expand Up @@ -2011,10 +2068,10 @@ def add_features_from(self, other):
old_self_data_type)
err_msg += ("Set free_raw_data=False when construct Dataset to avoid this"
if was_none else "Freeing raw data")
warnings.warn(err_msg)
_log_warning(err_msg)
self.feature_name = self.get_feature_name()
warnings.warn("Reseting categorical features.\n"
"You can set new categorical features via ``set_categorical_feature`` method")
_log_warning("Reseting categorical features.\n"
"You can set new categorical features via ``set_categorical_feature`` method")
self.categorical_feature = "auto"
self.pandas_categorical = None
return self
Expand Down Expand Up @@ -2834,7 +2891,7 @@ def model_from_string(self, model_str, verbose=True):
self.handle,
ctypes.byref(out_num_class)))
if verbose:
print('Finished loading model, total used %d iterations' % int(out_num_iterations.value))
_log_info('Finished loading model, total used %d iterations' % int(out_num_iterations.value))
self.__num_class = out_num_class.value
self.pandas_categorical = _load_pandas_categorical(model_str=model_str)
return self
Expand Down
18 changes: 8 additions & 10 deletions python-package/lightgbm/callback.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
# coding: utf-8
"""Callbacks library."""
import collections
import warnings
from operator import gt, lt

from .basic import _ConfigAliases
from .basic import _ConfigAliases, _log_info, _log_warning


class EarlyStopException(Exception):
Expand Down Expand Up @@ -67,7 +66,7 @@ def print_evaluation(period=1, show_stdv=True):
def _callback(env):
if period > 0 and env.evaluation_result_list and (env.iteration + 1) % period == 0:
result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list])
print('[%d]\t%s' % (env.iteration + 1, result))
_log_info('[%d]\t%s' % (env.iteration + 1, result))
_callback.order = 10
return _callback

Expand Down Expand Up @@ -180,15 +179,14 @@ def _init(env):
enabled[0] = not any(env.params.get(boost_alias, "") == 'dart' for boost_alias
in _ConfigAliases.get("boosting"))
if not enabled[0]:
warnings.warn('Early stopping is not available in dart mode')
_log_warning('Early stopping is not available in dart mode')
return
if not env.evaluation_result_list:
raise ValueError('For early stopping, '
'at least one dataset and eval metric is required for evaluation')

if verbose:
msg = "Training until validation scores don't improve for {} rounds"
print(msg.format(stopping_rounds))
_log_info("Training until validation scores don't improve for {} rounds".format(stopping_rounds))

# split is needed for "<dataset type> <metric>" case (e.g. "train l1")
first_metric[0] = env.evaluation_result_list[0][1].split(" ")[-1]
Expand All @@ -205,10 +203,10 @@ def _init(env):
def _final_iteration_check(env, eval_name_splitted, i):
if env.iteration == env.end_iteration - 1:
if verbose:
print('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % (
_log_info('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % (
best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
if first_metric_only:
print("Evaluated only: {}".format(eval_name_splitted[-1]))
_log_info("Evaluated only: {}".format(eval_name_splitted[-1]))
raise EarlyStopException(best_iter[i], best_score_list[i])

def _callback(env):
Expand All @@ -232,10 +230,10 @@ def _callback(env):
continue # train data for lgb.cv or sklearn wrapper (underlying lgb.train)
elif env.iteration - best_iter[i] >= stopping_rounds:
if verbose:
print('Early stopping, best iteration is:\n[%d]\t%s' % (
_log_info('Early stopping, best iteration is:\n[%d]\t%s' % (
best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
if first_metric_only:
print("Evaluated only: {}".format(eval_name_splitted[-1]))
_log_info("Evaluated only: {}".format(eval_name_splitted[-1]))
raise EarlyStopException(best_iter[i], best_score_list[i])
_final_iteration_check(env, eval_name_splitted, i)
_callback.order = 30
Expand Down
9 changes: 3 additions & 6 deletions python-package/lightgbm/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
It is based on dask-lightgbm, which was based on dask-xgboost.
"""
import logging
import socket
from collections import defaultdict
from copy import deepcopy
Expand All @@ -22,11 +21,9 @@
from dask import delayed
from dask.distributed import Client, default_client, get_worker, wait

from .basic import _ConfigAliases, _LIB, _safe_call
from .basic import _ConfigAliases, _LIB, _log_warning, _safe_call
from .sklearn import LGBMClassifier, LGBMRegressor, LGBMRanker

logger = logging.getLogger(__name__)


def _find_open_port(worker_ip: str, local_listen_port: int, ports_to_skip: Iterable[int]) -> int:
"""Find an open port.
Expand Down Expand Up @@ -257,10 +254,10 @@ def _train(client, data, label, params, model_factory, sample_weight=None, group
'voting_parallel'
}
if tree_learner is None:
logger.warning('Parameter tree_learner not set. Using "data" as default')
_log_warning('Parameter tree_learner not set. Using "data" as default')
params['tree_learner'] = 'data'
elif tree_learner.lower() not in allowed_tree_learners:
logger.warning('Parameter tree_learner set to %s, which is not allowed. Using "data" as default' % tree_learner)
_log_warning('Parameter tree_learner set to %s, which is not allowed. Using "data" as default' % tree_learner)
params['tree_learner'] = 'data'

local_listen_port = 12400
Expand Down
11 changes: 5 additions & 6 deletions python-package/lightgbm/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
"""Library with training routines of LightGBM."""
import collections
import copy
import warnings
from operator import attrgetter

import numpy as np

from . import callback
from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor
from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor, _log_warning
from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold


Expand Down Expand Up @@ -146,12 +145,12 @@ def train(params, train_set, num_boost_round=100,
for alias in _ConfigAliases.get("num_iterations"):
if alias in params:
num_boost_round = params.pop(alias)
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
_log_warning("Found `{}` in params. Will use it instead of argument".format(alias))
params["num_iterations"] = num_boost_round
for alias in _ConfigAliases.get("early_stopping_round"):
if alias in params:
early_stopping_rounds = params.pop(alias)
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
_log_warning("Found `{}` in params. Will use it instead of argument".format(alias))
params["early_stopping_round"] = early_stopping_rounds
first_metric_only = params.get('first_metric_only', False)

Expand Down Expand Up @@ -525,12 +524,12 @@ def cv(params, train_set, num_boost_round=100,
params['objective'] = 'none'
for alias in _ConfigAliases.get("num_iterations"):
if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
_log_warning("Found `{}` in params. Will use it instead of argument".format(alias))
num_boost_round = params.pop(alias)
params["num_iterations"] = num_boost_round
for alias in _ConfigAliases.get("early_stopping_round"):
if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
_log_warning("Found `{}` in params. Will use it instead of argument".format(alias))
early_stopping_rounds = params.pop(alias)
params["early_stopping_round"] = early_stopping_rounds
first_metric_only = params.get('first_metric_only', False)
Expand Down
6 changes: 2 additions & 4 deletions python-package/lightgbm/plotting.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# coding: utf-8
"""Plotting library."""
import warnings
from copy import deepcopy
from io import BytesIO

import numpy as np

from .basic import Booster
from .basic import Booster, _log_warning
from .compat import MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED
from .sklearn import LGBMModel

Expand Down Expand Up @@ -326,8 +325,7 @@ def plot_metric(booster, metric=None, dataset_names=None,
num_metric = len(metrics_for_one)
if metric is None:
if num_metric > 1:
msg = "More than one metric available, picking one to plot."
warnings.warn(msg, stacklevel=2)
_log_warning("More than one metric available, picking one to plot.")
metric, results = metrics_for_one.popitem()
else:
if metric not in metrics_for_one:
Expand Down
Loading

0 comments on commit b7ccdaf

Please sign in to comment.