From c6199311dece08e6ac08778f60d7fc9c3d796b11 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Fri, 27 Aug 2021 22:47:39 +0300 Subject: [PATCH] [python][docs] Refer to string type as `str` and add commas in `list of ...` types (#4557) * Reffer to string type as `str` and and commas in `list of ...` types * update `libpath.py` too --- .ci/get_workflow_status.py | 4 +- examples/python-guide/advanced_example.py | 4 +- examples/python-guide/logistic_regression.py | 4 +- examples/python-guide/sklearn_example.py | 4 +- helpers/check_dynamic_dependencies.py | 2 +- helpers/parameter_generator.py | 8 +- python-package/lightgbm/basic.py | 104 +++++++++---------- python-package/lightgbm/dask.py | 4 +- python-package/lightgbm/engine.py | 34 +++--- python-package/lightgbm/libpath.py | 2 +- python-package/lightgbm/plotting.py | 42 ++++---- python-package/lightgbm/sklearn.py | 36 +++---- 12 files changed, 124 insertions(+), 124 deletions(-) diff --git a/.ci/get_workflow_status.py b/.ci/get_workflow_status.py index e693ec637994..0572197c1ec2 100644 --- a/.ci/get_workflow_status.py +++ b/.ci/get_workflow_status.py @@ -22,7 +22,7 @@ def get_runs(trigger_phrase): Parameters ---------- - trigger_phrase : string + trigger_phrase : str Code phrase that triggers workflow. Returns @@ -55,7 +55,7 @@ def get_status(runs): Returns ------- - status : string + status : str The most recent status of workflow. Can be 'success', 'failure' or 'in-progress'. """ diff --git a/examples/python-guide/advanced_example.py b/examples/python-guide/advanced_example.py index 2707b9947231..7b4eebe7d013 100644 --- a/examples/python-guide/advanced_example.py +++ b/examples/python-guide/advanced_example.py @@ -147,7 +147,7 @@ def loglikelihood(preds, train_data): # self-defined eval metric -# f(preds: array, train_data: Dataset) -> name: string, eval_result: float, is_higher_better: bool +# f(preds: array, train_data: Dataset) -> name: str, eval_result: float, is_higher_better: bool # binary error # NOTE: when you do customized loss function, the default prediction value is margin # This may make built-in evaluation metric calculate wrong results @@ -171,7 +171,7 @@ def binary_error(preds, train_data): # another self-defined eval metric -# f(preds: array, train_data: Dataset) -> name: string, eval_result: float, is_higher_better: bool +# f(preds: array, train_data: Dataset) -> name: str, eval_result: float, is_higher_better: bool # accuracy # NOTE: when you do customized loss function, the default prediction value is margin # This may make built-in evaluation metric calculate wrong results diff --git a/examples/python-guide/logistic_regression.py b/examples/python-guide/logistic_regression.py index 22b6415d97f1..332b528145f8 100644 --- a/examples/python-guide/logistic_regression.py +++ b/examples/python-guide/logistic_regression.py @@ -57,9 +57,9 @@ def experiment(objective, label_type, data): Parameters ---------- - objective : string 'binary' or 'xentropy' + objective : {'binary', 'xentropy'} Objective function. - label_type : string 'binary' or 'probability' + label_type : {'binary', 'probability'} Type of the label. data : dict Data for training. diff --git a/examples/python-guide/sklearn_example.py b/examples/python-guide/sklearn_example.py index 215db6ed7ec5..d5147d57cafc 100644 --- a/examples/python-guide/sklearn_example.py +++ b/examples/python-guide/sklearn_example.py @@ -41,7 +41,7 @@ # self-defined eval metric -# f(y_true: array, y_pred: array) -> name: string, eval_result: float, is_higher_better: bool +# f(y_true: array, y_pred: array) -> name: str, eval_result: float, is_higher_better: bool # Root Mean Squared Logarithmic Error (RMSLE) def rmsle(y_true, y_pred): return 'RMSLE', np.sqrt(np.mean(np.power(np.log1p(y_pred) - np.log1p(y_true), 2))), False @@ -56,7 +56,7 @@ def rmsle(y_true, y_pred): # another self-defined eval metric -# f(y_true: array, y_pred: array) -> name: string, eval_result: float, is_higher_better: bool +# f(y_true: array, y_pred: array) -> name: str, eval_result: float, is_higher_better: bool # Relative Absolute Error (RAE) def rae(y_true, y_pred): return 'RAE', np.sum(np.abs(y_pred - y_true)) / np.sum(np.abs(np.mean(y_true) - y_true)), False diff --git a/helpers/check_dynamic_dependencies.py b/helpers/check_dynamic_dependencies.py index 6bcc408570e6..48418315b2b2 100644 --- a/helpers/check_dynamic_dependencies.py +++ b/helpers/check_dynamic_dependencies.py @@ -13,7 +13,7 @@ def check_dependicies(objdump_string: str) -> None: Parameters ---------- - objdump_string : string + objdump_string : str The dynamic symbol table entries of the file (result of `objdump -T` command). """ GLIBC_version = re.compile(r'0{16}[ \t]+GLIBC_(\d{1,2})[.](\d{1,3})[.]?\d{,3}[ \t]+') diff --git a/helpers/parameter_generator.py b/helpers/parameter_generator.py index ba39f3d27e2c..ccb6675f37af 100644 --- a/helpers/parameter_generator.py +++ b/helpers/parameter_generator.py @@ -139,7 +139,7 @@ def parse_check( Parameters ---------- - check : string + check : str String representation of the constraint. reverse : bool, optional (default=False) Whether to reverse the sign of the constraint. @@ -171,16 +171,16 @@ def set_one_var_from_string( Parameters ---------- - name : string + name : str Name of the parameter. - param_type : string + param_type : str Type of the parameter. checks : list Constraints of the parameter. Returns ------- - ret : string + ret : str Lines of auto config file with getting and checks of one parameter value. """ ret = "" diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 1490fbb7f4db..7c618743375c 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -684,7 +684,7 @@ def __init__(self, model_file=None, booster_handle=None, pred_parameter=None): Parameters ---------- - model_file : string, pathlib.Path or None, optional (default=None) + model_file : str, pathlib.Path or None, optional (default=None) Path to the model file. booster_handle : object or None, optional (default=None) Handle of Booster. @@ -742,9 +742,9 @@ def predict(self, data, start_iteration=0, num_iteration=-1, Parameters ---------- - data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse + data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse Data source for prediction. - When data type is string or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM). + If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM). start_iteration : int, optional (default=0) Start index of the iteration to predict. num_iteration : int, optional (default=-1) @@ -1130,9 +1130,9 @@ def __init__(self, data, label=None, reference=None, Parameters ---------- - data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays + data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays Data source of Dataset. - If string or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file. + If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file. label : list, numpy 1-D array, pandas Series / one-column DataFrame or None, optional (default=None) Label of the data. reference : Dataset or None, optional (default=None) @@ -1149,13 +1149,13 @@ def __init__(self, data, label=None, reference=None, Init score for Dataset. silent : bool, optional (default=False) Whether to print messages during construction. - feature_name : list of strings or 'auto', optional (default="auto") + feature_name : list of str, or 'auto', optional (default="auto") Feature names. If 'auto' and data is pandas DataFrame, data columns names are used. - categorical_feature : list of strings or int, or 'auto', optional (default="auto") + categorical_feature : list of str or int, or 'auto', optional (default="auto") Categorical features. If list of int, interpreted as indices. - If list of strings, interpreted as feature names (need to specify ``feature_name`` as well). + If list of str, interpreted as feature names (need to specify ``feature_name`` as well). If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used. All values in categorical features should be less than int32 max value (2147483647). Large values could be memory consuming. Consider using consecutive integers starting from zero. @@ -1774,9 +1774,9 @@ def create_valid(self, data, label=None, weight=None, group=None, Parameters ---------- - data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays + data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays Data source of Dataset. - If string or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file. + If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file. label : list, numpy 1-D array, pandas Series / one-column DataFrame or None, optional (default=None) Label of the data. weight : list, numpy 1-D array, pandas Series or None, optional (default=None) @@ -1841,7 +1841,7 @@ def save_binary(self, filename): Parameters ---------- - filename : string or pathlib.Path + filename : str or pathlib.Path Name of the output file. Returns @@ -1892,7 +1892,7 @@ def set_field(self, field_name, data): Parameters ---------- - field_name : string + field_name : str The field name of the information. data : list, numpy 1-D array, pandas Series or None The array of data to be set. @@ -1941,7 +1941,7 @@ def get_field(self, field_name): Parameters ---------- - field_name : string + field_name : str The field name of the information. Returns @@ -1978,7 +1978,7 @@ def set_categorical_feature(self, categorical_feature): Parameters ---------- - categorical_feature : list of int or strings + categorical_feature : list of int or str Names or indices of categorical features. Returns @@ -2056,7 +2056,7 @@ def set_feature_name(self, feature_name): Parameters ---------- - feature_name : list of strings + feature_name : list of str Feature names. Returns @@ -2241,7 +2241,7 @@ def get_data(self): Returns ------- - data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays or None + data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays or None Raw data used in the Dataset construction. """ if self.handle is None: @@ -2445,7 +2445,7 @@ def _dump_text(self, filename): Parameters ---------- - filename : string or pathlib.Path + filename : str or pathlib.Path Name of the output file. Returns @@ -2471,9 +2471,9 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None, Parameters for Booster. train_set : Dataset or None, optional (default=None) Training dataset. - model_file : string, pathlib.Path or None, optional (default=None) + model_file : str, pathlib.Path or None, optional (default=None) Path to the model file. - model_str : string or None, optional (default=None) + model_str : str or None, optional (default=None) Model will be loaded from this string. silent : bool, optional (default=False) Whether to print messages during construction. @@ -2650,7 +2650,7 @@ def set_network( Parameters ---------- - machines : list, set or string + machines : list, set or str Names of machines. local_listen_port : int, optional (default=12400) TCP listen port for local machines. @@ -2692,18 +2692,18 @@ def trees_to_dataframe(self): - ``tree_index`` : int64, which tree a node belongs to. 0-based, so a value of ``6``, for example, means "this node is in the 7th tree". - ``node_depth`` : int64, how far a node is from the root of the tree. The root node has a value of ``1``, its direct children are ``2``, etc. - - ``node_index`` : string, unique identifier for a node. - - ``left_child`` : string, ``node_index`` of the child node to the left of a split. ``None`` for leaf nodes. - - ``right_child`` : string, ``node_index`` of the child node to the right of a split. ``None`` for leaf nodes. - - ``parent_index`` : string, ``node_index`` of this node's parent. ``None`` for the root node. - - ``split_feature`` : string, name of the feature used for splitting. ``None`` for leaf nodes. + - ``node_index`` : str, unique identifier for a node. + - ``left_child`` : str, ``node_index`` of the child node to the left of a split. ``None`` for leaf nodes. + - ``right_child`` : str, ``node_index`` of the child node to the right of a split. ``None`` for leaf nodes. + - ``parent_index`` : str, ``node_index`` of this node's parent. ``None`` for the root node. + - ``split_feature`` : str, name of the feature used for splitting. ``None`` for leaf nodes. - ``split_gain`` : float64, gain from adding this split to the tree. ``NaN`` for leaf nodes. - ``threshold`` : float64, value of the feature used to decide which side of the split a record will go down. ``NaN`` for leaf nodes. - - ``decision_type`` : string, logical operator describing how to compare a value to ``threshold``. + - ``decision_type`` : str, logical operator describing how to compare a value to ``threshold``. For example, ``split_feature = "Column_10", threshold = 15, decision_type = "<="`` means that records where ``Column_10 <= 15`` follow the left side of the split, otherwise follows the right side of the split. ``None`` for leaf nodes. - - ``missing_direction`` : string, split direction that missing values should go to. ``None`` for leaf nodes. - - ``missing_type`` : string, describes what types of values are treated as missing. + - ``missing_direction`` : str, split direction that missing values should go to. ``None`` for leaf nodes. + - ``missing_type`` : str, describes what types of values are treated as missing. - ``value`` : float64, predicted value for this leaf node, multiplied by the learning rate. - ``weight`` : float64 or int64, sum of hessian (second-order derivative of objective), summed over observations that fall in this node. - ``count`` : int64, number of records in the training data that fall into this node. @@ -2826,7 +2826,7 @@ def set_train_data_name(self, name): Parameters ---------- - name : string + name : str Name for the training Dataset. Returns @@ -2844,7 +2844,7 @@ def add_valid(self, data, name): ---------- data : Dataset Validation data. - name : string + name : str Name of validation data. Returns @@ -3086,7 +3086,7 @@ def eval(self, data, name, feval=None): ---------- data : Dataset Data for the evaluating. - name : string + name : str Name of the data. feval : callable or None, optional (default=None) Customized evaluation function. @@ -3099,7 +3099,7 @@ def eval(self, data, name, feval=None): e.g. they are raw margin instead of probability of positive class for binary task in this case. eval_data : Dataset The evaluation dataset. - eval_name : string + eval_name : str The name of evaluation function (without whitespace). eval_result : float The eval result. @@ -3147,7 +3147,7 @@ def eval_train(self, feval=None): e.g. they are raw margin instead of probability of positive class for binary task in this case. train_data : Dataset The training dataset. - eval_name : string + eval_name : str The name of evaluation function (without whitespace). eval_result : float The eval result. @@ -3180,7 +3180,7 @@ def eval_valid(self, feval=None): e.g. they are raw margin instead of probability of positive class for binary task in this case. valid_data : Dataset The validation dataset. - eval_name : string + eval_name : str The name of evaluation function (without whitespace). eval_result : float The eval result. @@ -3203,7 +3203,7 @@ def save_model(self, filename, num_iteration=None, start_iteration=0, importance Parameters ---------- - filename : string or pathlib.Path + filename : str or pathlib.Path Filename to save Booster. num_iteration : int or None, optional (default=None) Index of the iteration that should be saved. @@ -3211,7 +3211,7 @@ def save_model(self, filename, num_iteration=None, start_iteration=0, importance If <= 0, all iterations are saved. start_iteration : int, optional (default=0) Start index of the iteration that should be saved. - importance_type : string, optional (default="split") + importance_type : str, optional (default="split") What type of feature importance should be saved. If "split", result contains numbers of times the feature is used in a model. If "gain", result contains total gains of splits which use the feature. @@ -3260,7 +3260,7 @@ def model_from_string(self, model_str, verbose=True): Parameters ---------- - model_str : string + model_str : str Model will be loaded from this string. verbose : bool, optional (default=True) Whether to print messages while loading model. @@ -3300,14 +3300,14 @@ def model_to_string(self, num_iteration=None, start_iteration=0, importance_type If <= 0, all iterations are saved. start_iteration : int, optional (default=0) Start index of the iteration that should be saved. - importance_type : string, optional (default="split") + importance_type : str, optional (default="split") What type of feature importance should be saved. If "split", result contains numbers of times the feature is used in a model. If "gain", result contains total gains of splits which use the feature. Returns ------- - str_repr : string + str_repr : str String representation of Booster. """ if num_iteration is None: @@ -3353,7 +3353,7 @@ def dump_model(self, num_iteration=None, start_iteration=0, importance_type='spl If <= 0, all iterations are dumped. start_iteration : int, optional (default=0) Start index of the iteration that should be dumped. - importance_type : string, optional (default="split") + importance_type : str, optional (default="split") What type of feature importance should be dumped. If "split", result contains numbers of times the feature is used in a model. If "gain", result contains total gains of splits which use the feature. @@ -3412,9 +3412,9 @@ def predict(self, data, start_iteration=0, num_iteration=None, Parameters ---------- - data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse + data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse Data source for prediction. - If string or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM). + If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM). start_iteration : int, optional (default=0) Start index of the iteration to predict. If <= 0, starts from the first iteration. @@ -3440,7 +3440,7 @@ def predict(self, data, start_iteration=0, num_iteration=None, data_has_header : bool, optional (default=False) Whether the data has header. - Used only if data is string. + Used only if data is str. is_reshape : bool, optional (default=True) If True, result is reshaped to [nrow, ncol]. **kwargs @@ -3467,9 +3467,9 @@ def refit(self, data, label, decay_rate=0.9, **kwargs): Parameters ---------- - data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse + data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse Data source for refit. - If string or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM). + If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM). label : list, numpy 1-D array or pandas Series / one-column DataFrame Label for refit. decay_rate : float, optional (default=0.9) @@ -3603,7 +3603,7 @@ def feature_importance(self, importance_type='split', iteration=None): Parameters ---------- - importance_type : string, optional (default="split") + importance_type : str, optional (default="split") How the importance is calculated. If "split", result contains numbers of times the feature is used in a model. If "gain", result contains total gains of splits which use the feature. @@ -3636,20 +3636,20 @@ def get_split_value_histogram(self, feature, bins=None, xgboost_style=False): Parameters ---------- - feature : int or string + feature : int or str The feature name or index the histogram is calculated for. If int, interpreted as index. - If string, interpreted as name. + If str, interpreted as name. .. warning:: Categorical features are not supported. - bins : int, string or None, optional (default=None) + bins : int, str or None, optional (default=None) The maximum number of bins. If None, or int and > number of unique split values and ``xgboost_style=True``, the number of bins equals number of unique split values. - If string, it should be one from the list of the supported values by ``numpy.histogram()`` function. + If str, it should be one from the list of the supported values by ``numpy.histogram()`` function. xgboost_style : bool, optional (default=False) Whether the returned result should be in the same form as it is in XGBoost. If False, the returned value is tuple of 2 numpy arrays as it is in ``numpy.histogram()`` function. @@ -3816,12 +3816,12 @@ def attr(self, key): Parameters ---------- - key : string + key : str The name of the attribute. Returns ------- - value : string or None + value : str or None The attribute value. Returns None if attribute does not exist. """ diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py index dc79b1ac74a3..e6ee708681df 100644 --- a/python-package/lightgbm/dask.py +++ b/python-package/lightgbm/dask.py @@ -428,12 +428,12 @@ def _train( sum(group) = n_samples. For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc. - eval_set : list of (X, y) tuples of Dask data collections or None, optional (default=None) + eval_set : list of (X, y) tuples of Dask data collections, or None, optional (default=None) List of (X, y) tuple pairs to use as validation sets. Note, that not all workers may receive chunks of every eval set within ``eval_set``. When the returned lightgbm estimator is not trained using any chunks of a particular eval set, its corresponding component of evals_result_ and best_score_ will be 'not_evaluated'. - eval_names : list of strings or None, optional (default=None) + eval_names : list of str, or None, optional (default=None) Names of eval_set. eval_sample_weight : list of Dask Arrays, Dask Series or None, optional (default=None) Weights for each validation set in eval_set. diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index ab9cfde7551c..fbaac0b6a7c9 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -50,9 +50,9 @@ def train( Data to be trained on. num_boost_round : int, optional (default=100) Number of boosting iterations. - valid_sets : list of Datasets or None, optional (default=None) + valid_sets : list of Datasets, or None, optional (default=None) List of data to be evaluated on during training. - valid_names : list of strings or None, optional (default=None) + valid_names : list of str, or None, optional (default=None) Names of ``valid_sets``. fobj : callable or None, optional (default=None) Customized objective function. @@ -76,7 +76,7 @@ def train( If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i] and you should group grad and hess in this way as well. - feval : callable, list of callable functions or None, optional (default=None) + feval : callable, list of callable functions, or None, optional (default=None) Customized evaluation function. Each evaluation function should accept two parameters: preds, train_data, and return (eval_name, eval_result, is_higher_better) or list of such tuples. @@ -87,7 +87,7 @@ def train( e.g. they are raw margin instead of probability of positive class for binary task in this case. train_data : Dataset The training dataset. - eval_name : string + eval_name : str The name of evaluation function (without whitespaces). eval_result : float The eval result. @@ -98,15 +98,15 @@ def train( If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. To ignore the default metric corresponding to the used objective, set the ``metric`` parameter to the string ``"None"`` in ``params``. - init_model : string, pathlib.Path, Booster or None, optional (default=None) + init_model : str, pathlib.Path, Booster or None, optional (default=None) Filename of LightGBM model or Booster instance used for continue training. - feature_name : list of strings or 'auto', optional (default="auto") + feature_name : list of str, or 'auto', optional (default="auto") Feature names. If 'auto' and data is pandas DataFrame, data columns names are used. - categorical_feature : list of strings or int, or 'auto', optional (default="auto") + categorical_feature : list of str or int, or 'auto', optional (default="auto") Categorical features. If list of int, interpreted as indices. - If list of strings, interpreted as feature names (need to specify ``feature_name`` as well). + If list of str, interpreted as feature names (need to specify ``feature_name`` as well). If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used. All values in categorical features should be less than int32 max value (2147483647). Large values could be memory consuming. Consider using consecutive integers starting from zero. @@ -156,7 +156,7 @@ def train( When your model is very large and cause the memory error, you can try to set this param to ``True`` to avoid the model conversion performed during the internal call of ``model_to_string``. You can still use _InnerPredictor as ``init_model`` for future continue training. - callbacks : list of callables or None, optional (default=None) + callbacks : list of callables, or None, optional (default=None) List of callback functions that are applied at each iteration. See Callbacks in Python API for more information. @@ -447,7 +447,7 @@ def cv(params, train_set, num_boost_round=100, Whether to perform stratified sampling. shuffle : bool, optional (default=True) Whether to shuffle before splitting data. - metrics : string, list of strings or None, optional (default=None) + metrics : str, list of str, or None, optional (default=None) Evaluation metrics to be monitored while CV. If not None, the metric in ``params`` will be overridden. fobj : callable or None, optional (default=None) @@ -472,7 +472,7 @@ def cv(params, train_set, num_boost_round=100, If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i] and you should group grad and hess in this way as well. - feval : callable, list of callable functions or None, optional (default=None) + feval : callable, list of callable functions, or None, optional (default=None) Customized evaluation function. Each evaluation function should accept two parameters: preds, train_data, and return (eval_name, eval_result, is_higher_better) or list of such tuples. @@ -483,7 +483,7 @@ def cv(params, train_set, num_boost_round=100, e.g. they are raw margin instead of probability of positive class for binary task in this case. train_data : Dataset The training dataset. - eval_name : string + eval_name : str The name of evaluation function (without whitespace). eval_result : float The eval result. @@ -494,15 +494,15 @@ def cv(params, train_set, num_boost_round=100, If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. To ignore the default metric corresponding to the used objective, set ``metrics`` to the string ``"None"``. - init_model : string, pathlib.Path, Booster or None, optional (default=None) + init_model : str, pathlib.Path, Booster or None, optional (default=None) Filename of LightGBM model or Booster instance used for continue training. - feature_name : list of strings or 'auto', optional (default="auto") + feature_name : list of str, or 'auto', optional (default="auto") Feature names. If 'auto' and data is pandas DataFrame, data columns names are used. - categorical_feature : list of strings or int, or 'auto', optional (default="auto") + categorical_feature : list of str or int, or 'auto', optional (default="auto") Categorical features. If list of int, interpreted as indices. - If list of strings, interpreted as feature names (need to specify ``feature_name`` as well). + If list of str, interpreted as feature names (need to specify ``feature_name`` as well). If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used. All values in categorical features should be less than int32 max value (2147483647). Large values could be memory consuming. Consider using consecutive integers starting from zero. @@ -528,7 +528,7 @@ def cv(params, train_set, num_boost_round=100, Results are not affected by this parameter, and always contain std. seed : int, optional (default=0) Seed used to generate the folds (passed to numpy.random.seed). - callbacks : list of callables or None, optional (default=None) + callbacks : list of callables, or None, optional (default=None) List of callback functions that are applied at each iteration. See Callbacks in Python API for more information. eval_train_metric : bool, optional (default=False) diff --git a/python-package/lightgbm/libpath.py b/python-package/lightgbm/libpath.py index 7ad1c65e1c59..18c184a668e7 100644 --- a/python-package/lightgbm/libpath.py +++ b/python-package/lightgbm/libpath.py @@ -11,7 +11,7 @@ def find_lib_path() -> List[str]: Returns ------- - lib_path: list of strings + lib_path: list of str List of all found library paths to LightGBM. """ if environ.get('LIGHTGBM_BUILD_DOC', False): diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 28dff0843a05..38f804de092c 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -56,16 +56,16 @@ def plot_importance( Tuple passed to ``ax.xlim()``. ylim : tuple of 2 elements or None, optional (default=None) Tuple passed to ``ax.ylim()``. - title : string or None, optional (default="Feature importance") + title : str or None, optional (default="Feature importance") Axes title. If None, title is disabled. - xlabel : string or None, optional (default="Feature importance") + xlabel : str or None, optional (default="Feature importance") X-axis title label. If None, title is disabled. - ylabel : string or None, optional (default="Features") + ylabel : str or None, optional (default="Features") Y-axis title label. If None, title is disabled. - importance_type : string, optional (default="split") + importance_type : str, optional (default="split") How the importance is calculated. If "split", result contains numbers of times the feature is used in a model. If "gain", result contains total gains of splits which use the feature. @@ -173,14 +173,14 @@ def plot_split_value_histogram( ---------- booster : Booster or LGBMModel Booster or LGBMModel instance of which feature split value histogram should be plotted. - feature : int or string + feature : int or str The feature name or index the histogram is plotted for. If int, interpreted as index. - If string, interpreted as name. - bins : int, string or None, optional (default=None) + If str, interpreted as name. + bins : int, str or None, optional (default=None) The maximum number of bins. If None, the number of bins equals number of unique split values. - If string, it should be one from the list of the supported values by ``numpy.histogram()`` function. + If str, it should be one from the list of the supported values by ``numpy.histogram()`` function. ax : matplotlib.axes.Axes or None, optional (default=None) Target axes instance. If None, new figure and axes will be created. @@ -190,17 +190,17 @@ def plot_split_value_histogram( Tuple passed to ``ax.xlim()``. ylim : tuple of 2 elements or None, optional (default=None) Tuple passed to ``ax.ylim()``. - title : string or None, optional (default="Split value histogram for feature with @index/name@ @feature@") + title : str or None, optional (default="Split value histogram for feature with @index/name@ @feature@") Axes title. If None, title is disabled. @feature@ placeholder can be used, and it will be replaced with the value of ``feature`` parameter. @index/name@ placeholder can be used, and it will be replaced with ``index`` word in case of ``int`` type ``feature`` parameter - or ``name`` word in case of ``string`` type ``feature`` parameter. - xlabel : string or None, optional (default="Feature split value") + or ``name`` word in case of ``str`` type ``feature`` parameter. + xlabel : str or None, optional (default="Feature split value") X-axis title label. If None, title is disabled. - ylabel : string or None, optional (default="Count") + ylabel : str or None, optional (default="Count") Y-axis title label. If None, title is disabled. figsize : tuple of 2 elements or None, optional (default=None) @@ -288,11 +288,11 @@ def plot_metric( ---------- booster : dict or LGBMModel Dictionary returned from ``lightgbm.train()`` or LGBMModel instance. - metric : string or None, optional (default=None) + metric : str or None, optional (default=None) The metric name to plot. Only one metric supported because different metrics have various scales. If None, first metric picked from dictionary (according to hashcode). - dataset_names : list of strings or None, optional (default=None) + dataset_names : list of str, or None, optional (default=None) List of the dataset names which are used to calculate metric to plot. If None, all datasets are used. ax : matplotlib.axes.Axes or None, optional (default=None) @@ -302,13 +302,13 @@ def plot_metric( Tuple passed to ``ax.xlim()``. ylim : tuple of 2 elements or None, optional (default=None) Tuple passed to ``ax.ylim()``. - title : string or None, optional (default="Metric during training") + title : str or None, optional (default="Metric during training") Axes title. If None, title is disabled. - xlabel : string or None, optional (default="Iterations") + xlabel : str or None, optional (default="Iterations") X-axis title label. If None, title is disabled. - ylabel : string or None, optional (default="auto") + ylabel : str or None, optional (default="auto") Y-axis title label. If 'auto', metric name is used. If None, title is disabled. @@ -536,7 +536,7 @@ def create_tree_digraph( Booster or LGBMModel instance to be converted. tree_index : int, optional (default=0) The index of a target tree to convert. - show_info : list of strings or None, optional (default=None) + show_info : list of str, or None, optional (default=None) What information should be shown in nodes. - ``'split_gain'`` : gain from adding this split to the model @@ -548,7 +548,7 @@ def create_tree_digraph( - ``'data_percentage'`` : percentage of training data that fall into this node precision : int or None, optional (default=3) Used to restrict the display of floating point values to a certain precision. - orientation : string, optional (default='horizontal') + orientation : str, optional (default='horizontal') Orientation of the tree. Can be 'horizontal' or 'vertical'. **kwargs @@ -629,7 +629,7 @@ def plot_tree( Figure size. dpi : int or None, optional (default=None) Resolution of the figure. - show_info : list of strings or None, optional (default=None) + show_info : list of str, or None, optional (default=None) What information should be shown in nodes. - ``'split_gain'`` : gain from adding this split to the model @@ -641,7 +641,7 @@ def plot_tree( - ``'data_percentage'`` : percentage of training data that fall into this node precision : int or None, optional (default=3) Used to restrict the display of floating point values to a certain precision. - orientation : string, optional (default='horizontal') + orientation : str, optional (default='horizontal') Orientation of the tree. Can be 'horizontal' or 'vertical'. **kwargs diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 565ed8c10c9d..9fd2759a0e06 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -136,7 +136,7 @@ def __init__(self, func): sum(group) = n_samples. For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc. - eval_name : string + eval_name : str The name of evaluation function (without whitespace). eval_result : float The eval result. @@ -162,7 +162,7 @@ def __call__(self, preds, dataset): Returns ------- - eval_name : string + eval_name : str The name of evaluation function (without whitespace). eval_result : float The eval result. @@ -206,7 +206,7 @@ def __call__(self, preds, dataset): where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc. eval_set : list or None, optional (default=None) A list of (X, y) tuple pairs to use as validation sets. - eval_names : list of strings or None, optional (default=None) + eval_names : list of str, or None, optional (default=None) Names of eval_set. eval_sample_weight : {eval_sample_weight_shape} Weights of eval data. @@ -216,8 +216,8 @@ def __call__(self, preds, dataset): Init score of eval data. eval_group : {eval_group_shape} Group data of eval data. - eval_metric : string, callable, list or None, optional (default=None) - If string, it should be a built-in evaluation metric to use. + eval_metric : str, callable, list or None, optional (default=None) + If str, it should be a built-in evaluation metric to use. If callable, it should be a custom evaluation metric, see note below for more details. If list, it can be a list of built-in metrics, a list of custom evaluation metrics, or a mix of both. In either case, the ``metric`` from the model parameters will be evaluated and used as well. @@ -241,22 +241,22 @@ def __call__(self, preds, dataset): With ``verbose`` = 4 and at least one item in ``eval_set``, an evaluation metric is printed every 4 (instead of 1) boosting stages. - feature_name : list of strings or 'auto', optional (default='auto') + feature_name : list of str, or 'auto', optional (default='auto') Feature names. If 'auto' and data is pandas DataFrame, data columns names are used. - categorical_feature : list of strings or int, or 'auto', optional (default='auto') + categorical_feature : list of str or int, or 'auto', optional (default='auto') Categorical features. If list of int, interpreted as indices. - If list of strings, interpreted as feature names (need to specify ``feature_name`` as well). + If list of str, interpreted as feature names (need to specify ``feature_name`` as well). If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used. All values in categorical features should be less than int32 max value (2147483647). Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. The output cannot be monotonically constrained with respect to a categorical feature. - callbacks : list of callback functions or None, optional (default=None) + callbacks : list of callback functions, or None, optional (default=None) List of callback functions that are applied at each iteration. See Callbacks in Python API for more information. - init_model : string, pathlib.Path, Booster, LGBMModel or None, optional (default=None) + init_model : str, pathlib.Path, Booster, LGBMModel or None, optional (default=None) Filename of LightGBM model, Booster instance or LGBMModel instance used for continue training. Returns @@ -289,7 +289,7 @@ def __call__(self, preds, dataset): sum(group) = n_samples. For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc. - eval_name : string + eval_name : str The name of evaluation function (without whitespace). eval_result : float The eval result. @@ -377,7 +377,7 @@ def __init__( Parameters ---------- - boosting_type : string, optional (default='gbdt') + boosting_type : str, optional (default='gbdt') 'gbdt', traditional Gradient Boosting Decision Tree. 'dart', Dropouts meet Multiple Additive Regression Trees. 'goss', Gradient-based One-Side Sampling. @@ -395,7 +395,7 @@ def __init__( Number of boosted trees to fit. subsample_for_bin : int, optional (default=200000) Number of samples for constructing bins. - objective : string, callable or None, optional (default=None) + objective : str, callable or None, optional (default=None) Specify the learning task and the corresponding learning objective or a custom objective function to be used (see note below). Default: 'regression' for LGBMRegressor, 'binary' or 'multiclass' for LGBMClassifier, 'lambdarank' for LGBMRanker. @@ -436,7 +436,7 @@ def __init__( Number of parallel threads. silent : bool, optional (default=True) Whether to print messages while running boosting. - importance_type : string, optional (default='split') + importance_type : str, optional (default='split') The type of feature importance to be filled into ``feature_importances_``. If 'split', result contains numbers of times the feature is used in a model. If 'gain', result contains total gains of splits which use the feature. @@ -737,9 +737,9 @@ def _get_meta_data(collection, name, i): sample_weight_shape="array-like of shape = [n_samples] or None, optional (default=None)", init_score_shape="array-like of shape = [n_samples] or None, optional (default=None)", group_shape="array-like or None, optional (default=None)", - eval_sample_weight_shape="list of arrays or None, optional (default=None)", - eval_init_score_shape="list of arrays or None, optional (default=None)", - eval_group_shape="list of arrays or None, optional (default=None)" + eval_sample_weight_shape="list of arrays, or None, optional (default=None)", + eval_init_score_shape="list of arrays, or None, optional (default=None)", + eval_group_shape="list of arrays, or None, optional (default=None)" ) + "\n\n" + _lgbmmodel_doc_custom_eval_note def predict(self, X, raw_score=False, start_iteration=0, num_iteration=None, @@ -796,7 +796,7 @@ def best_iteration_(self): @property def objective_(self): - """:obj:`string` or :obj:`callable`: The concrete objective used while fitting this model.""" + """:obj:`str` or :obj:`callable`: The concrete objective used while fitting this model.""" if self._n_features is None: raise LGBMNotFittedError('No objective found. Need to call fit beforehand.') return self._objective