Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Explainable boosting parameters #6335

Draft
wants to merge 27 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
f47f823
First version of the new parameter "tree_interaction_constraints""
Apr 7, 2022
5730198
readme update
Apr 7, 2022
5d69338
First version of the new parameter "tree_interaction_constraints""
Apr 7, 2022
ec9ed61
readme update
Apr 7, 2022
bfac4e1
Merge branch 'master' into microsoft-master
veneres Feb 14, 2024
f5b391e
Merge pull request #2 from veneres/microsoft-master
veneres Feb 14, 2024
d1966c2
Updated readme
veneres Feb 14, 2024
6438f0e
Merge remote-tracking branch 'upstream/master'
veneres Feb 14, 2024
848fd58
Fix missing parenthesis
veneres Feb 14, 2024
d32b7f6
Temporarly remove a new test
veneres Feb 14, 2024
d216823
Merge with private repository edits
veneres Feb 15, 2024
8dabbb2
Merge remote-tracking branch 'upstream/master'
veneres Feb 15, 2024
137bc6d
Resolved lint errors identified by github actions
veneres Feb 15, 2024
9b3fb5e
Fix docs
veneres Feb 15, 2024
997e06b
Fix docs
veneres Feb 15, 2024
64ff80c
Fix docs and linting
veneres Feb 15, 2024
ee8d6e6
Fix docs
veneres Feb 15, 2024
09acfcf
Fix docs
veneres Feb 15, 2024
0d66bea
Boolean guards added for constrained learning
veneres Feb 16, 2024
84287f1
test and small fix added
veneres Feb 16, 2024
61727ca
Merge branch 'microsoft:master' into master
veneres Feb 21, 2024
227ec1b
Param name refactor
veneres Feb 21, 2024
ca3dac5
Interaction constraints test added
veneres Feb 21, 2024
ab04352
Addressed: Unnecessary `list` comprehension (rewrite using `list()`)
veneres Feb 21, 2024
2fc53c0
Merge remote-tracking branch 'upstream/master'
veneres Feb 22, 2024
c0a4591
Skip constraint test on CUDA for the moment
veneres Feb 22, 2024
8165317
Reformat file for ruff check
veneres Feb 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/LightGBM/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1135,6 +1135,7 @@ struct Config {
std::vector<std::vector<double>> auc_mu_weights_matrix;
std::vector<std::vector<int>> interaction_constraints_vector;
std::vector<std::vector<int>> tree_interaction_constraints_vector;
static const std::unordered_map<std::string, std::string>& ParameterTypes();
static const std::string DumpAliases();

private:
Expand Down
285 changes: 285 additions & 0 deletions src/io/config_auto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,148 @@ std::string Config::SaveMembersToString() const {
return str_buf.str();
}

const std::unordered_map<std::string, std::vector<std::string>>& Config::parameter2aliases() {
static std::unordered_map<std::string, std::vector<std::string>> map({
{"config", {"config_file"}},
{"task", {"task_type"}},
{"objective", {"objective_type", "app", "application", "loss"}},
{"boosting", {"boosting_type", "boost"}},
{"data_sample_strategy", {}},
{"data", {"train", "train_data", "train_data_file", "data_filename"}},
{"valid", {"test", "valid_data", "valid_data_file", "test_data", "test_data_file", "valid_filenames"}},
{"num_iterations", {"num_iteration", "n_iter", "num_tree", "num_trees", "num_round", "num_rounds", "nrounds", "num_boost_round", "n_estimators", "max_iter"}},
{"learning_rate", {"shrinkage_rate", "eta"}},
{"num_leaves", {"num_leaf", "max_leaves", "max_leaf", "max_leaf_nodes"}},
{"tree_learner", {"tree", "tree_type", "tree_learner_type"}},
{"num_threads", {"num_thread", "nthread", "nthreads", "n_jobs"}},
{"device_type", {"device"}},
{"seed", {"random_seed", "random_state"}},
{"deterministic", {}},
{"force_col_wise", {}},
{"force_row_wise", {}},
{"histogram_pool_size", {"hist_pool_size"}},
{"max_depth", {}},
{"min_data_in_leaf", {"min_data_per_leaf", "min_data", "min_child_samples", "min_samples_leaf"}},
{"min_sum_hessian_in_leaf", {"min_sum_hessian_per_leaf", "min_sum_hessian", "min_hessian", "min_child_weight"}},
{"bagging_fraction", {"sub_row", "subsample", "bagging"}},
{"pos_bagging_fraction", {"pos_sub_row", "pos_subsample", "pos_bagging"}},
{"neg_bagging_fraction", {"neg_sub_row", "neg_subsample", "neg_bagging"}},
{"bagging_freq", {"subsample_freq"}},
{"bagging_seed", {"bagging_fraction_seed"}},
{"feature_fraction", {"sub_feature", "colsample_bytree"}},
{"feature_fraction_bynode", {"sub_feature_bynode", "colsample_bynode"}},
{"feature_fraction_seed", {}},
{"extra_trees", {"extra_tree"}},
{"extra_seed", {}},
{"early_stopping_round", {"early_stopping_rounds", "early_stopping", "n_iter_no_change"}},
{"first_metric_only", {}},
{"max_delta_step", {"max_tree_output", "max_leaf_output"}},
{"lambda_l1", {"reg_alpha", "l1_regularization"}},
{"lambda_l2", {"reg_lambda", "lambda", "l2_regularization"}},
{"linear_lambda", {}},
{"min_gain_to_split", {"min_split_gain"}},
{"drop_rate", {"rate_drop"}},
{"max_drop", {}},
{"skip_drop", {}},
{"xgboost_dart_mode", {}},
{"uniform_drop", {}},
{"drop_seed", {}},
{"top_rate", {}},
{"other_rate", {}},
{"min_data_per_group", {}},
{"max_cat_threshold", {}},
{"cat_l2", {}},
{"cat_smooth", {}},
{"max_cat_to_onehot", {}},
{"top_k", {"topk"}},
{"monotone_constraints", {"mc", "monotone_constraint", "monotonic_cst"}},
{"monotone_constraints_method", {"monotone_constraining_method", "mc_method"}},
{"monotone_penalty", {"monotone_splits_penalty", "ms_penalty", "mc_penalty"}},
{"feature_contri", {"feature_contrib", "fc", "fp", "feature_penalty"}},
{"forcedsplits_filename", {"fs", "forced_splits_filename", "forced_splits_file", "forced_splits"}},
{"refit_decay_rate", {}},
{"cegb_tradeoff", {}},
{"cegb_penalty_split", {}},
{"cegb_penalty_feature_lazy", {}},
{"cegb_penalty_feature_coupled", {}},
{"path_smooth", {}},
{"interaction_constraints", {}},
{"verbosity", {"verbose"}},
{"input_model", {"model_input", "model_in"}},
{"output_model", {"model_output", "model_out"}},
{"saved_feature_importance_type", {}},
{"snapshot_freq", {"save_period"}},
{"use_quantized_grad", {}},
{"num_grad_quant_bins", {}},
{"quant_train_renew_leaf", {}},
{"stochastic_rounding", {}},
{"linear_tree", {"linear_trees"}},
{"max_bin", {"max_bins"}},
{"max_bin_by_feature", {}},
{"min_data_in_bin", {}},
{"bin_construct_sample_cnt", {"subsample_for_bin"}},
{"data_random_seed", {"data_seed"}},
{"is_enable_sparse", {"is_sparse", "enable_sparse", "sparse"}},
{"enable_bundle", {"is_enable_bundle", "bundle"}},
{"use_missing", {}},
{"zero_as_missing", {}},
{"feature_pre_filter", {}},
{"pre_partition", {"is_pre_partition"}},
{"two_round", {"two_round_loading", "use_two_round_loading"}},
{"header", {"has_header"}},
{"label_column", {"label"}},
{"weight_column", {"weight"}},
{"group_column", {"group", "group_id", "query_column", "query", "query_id"}},
{"ignore_column", {"ignore_feature", "blacklist"}},
{"categorical_feature", {"cat_feature", "categorical_column", "cat_column", "categorical_features"}},
{"forcedbins_filename", {}},
{"save_binary", {"is_save_binary", "is_save_binary_file"}},
{"precise_float_parser", {}},
{"parser_config_file", {}},
{"start_iteration_predict", {}},
{"num_iteration_predict", {}},
{"predict_raw_score", {"is_predict_raw_score", "predict_rawscore", "raw_score"}},
{"predict_leaf_index", {"is_predict_leaf_index", "leaf_index"}},
{"predict_contrib", {"is_predict_contrib", "contrib"}},
{"predict_disable_shape_check", {}},
{"pred_early_stop", {}},
{"pred_early_stop_freq", {}},
{"pred_early_stop_margin", {}},
{"output_result", {"predict_result", "prediction_result", "predict_name", "prediction_name", "pred_name", "name_pred"}},
{"convert_model_language", {}},
{"convert_model", {"convert_model_file"}},
{"objective_seed", {}},
{"num_class", {"num_classes"}},
{"is_unbalance", {"unbalance", "unbalanced_sets"}},
{"scale_pos_weight", {}},
{"sigmoid", {}},
{"boost_from_average", {}},
{"reg_sqrt", {}},
{"alpha", {}},
{"fair_c", {}},
{"poisson_max_delta_step", {}},
{"tweedie_variance_power", {}},
{"lambdarank_truncation_level", {}},
{"lambdarank_norm", {}},
{"label_gain", {}},
{"lambdarank_position_bias_regularization", {}},
{"metric", {"metrics", "metric_types"}},
{"metric_freq", {"output_freq"}},
{"is_provide_training_metric", {"training_metric", "is_training_metric", "train_metric"}},
{"eval_at", {"ndcg_eval_at", "ndcg_at", "map_eval_at", "map_at"}},
{"multi_error_top_k", {}},
{"auc_mu_weights", {}},
{"num_machines", {"num_machine"}},
{"local_listen_port", {"local_port", "port"}},
{"time_out", {}},
{"machine_list_filename", {"machine_list_file", "machine_list", "mlist"}},
{"machines", {"workers", "nodes"}},
{"gpu_platform_id", {}},
{"gpu_device_id", {}},
{"gpu_use_dp", {}},
{"num_gpu", {}},
});
return map;
const std::string Config::DumpAliases() {
std::stringstream str_buf;
str_buf << "{";
Expand Down Expand Up @@ -924,4 +1066,147 @@ const std::string Config::DumpAliases() {
return str_buf.str();
}

const std::unordered_map<std::string, std::string>& Config::ParameterTypes() {
static std::unordered_map<std::string, std::string> map({
{"config", "string"},
{"objective", "string"},
{"boosting", "string"},
{"data_sample_strategy", "string"},
{"data", "string"},
{"valid", "vector<string>"},
{"num_iterations", "int"},
{"learning_rate", "double"},
{"num_leaves", "int"},
{"tree_learner", "string"},
{"num_threads", "int"},
{"device_type", "string"},
{"seed", "int"},
{"deterministic", "bool"},
{"force_col_wise", "bool"},
{"force_row_wise", "bool"},
{"histogram_pool_size", "double"},
{"max_depth", "int"},
{"min_data_in_leaf", "int"},
{"min_sum_hessian_in_leaf", "double"},
{"bagging_fraction", "double"},
{"pos_bagging_fraction", "double"},
{"neg_bagging_fraction", "double"},
{"bagging_freq", "int"},
{"bagging_seed", "int"},
{"feature_fraction", "double"},
{"feature_fraction_bynode", "double"},
{"feature_fraction_seed", "int"},
{"extra_trees", "bool"},
{"extra_seed", "int"},
{"early_stopping_round", "int"},
{"first_metric_only", "bool"},
{"max_delta_step", "double"},
{"lambda_l1", "double"},
{"lambda_l2", "double"},
{"linear_lambda", "double"},
{"min_gain_to_split", "double"},
{"drop_rate", "double"},
{"max_drop", "int"},
{"skip_drop", "double"},
{"xgboost_dart_mode", "bool"},
{"uniform_drop", "bool"},
{"drop_seed", "int"},
{"top_rate", "double"},
{"other_rate", "double"},
{"min_data_per_group", "int"},
{"max_cat_threshold", "int"},
{"cat_l2", "double"},
{"cat_smooth", "double"},
{"max_cat_to_onehot", "int"},
{"top_k", "int"},
{"monotone_constraints", "vector<int>"},
{"monotone_constraints_method", "string"},
{"monotone_penalty", "double"},
{"feature_contri", "vector<double>"},
{"forcedsplits_filename", "string"},
{"refit_decay_rate", "double"},
{"cegb_tradeoff", "double"},
{"cegb_penalty_split", "double"},
{"cegb_penalty_feature_lazy", "vector<double>"},
{"cegb_penalty_feature_coupled", "vector<double>"},
{"path_smooth", "double"},
{"interaction_constraints", "vector<vector<int>>"},
{"verbosity", "int"},
{"input_model", "string"},
{"output_model", "string"},
{"saved_feature_importance_type", "int"},
{"snapshot_freq", "int"},
{"use_quantized_grad", "bool"},
{"num_grad_quant_bins", "int"},
{"quant_train_renew_leaf", "bool"},
{"stochastic_rounding", "bool"},
{"linear_tree", "bool"},
{"max_bin", "int"},
{"max_bin_by_feature", "vector<int>"},
{"min_data_in_bin", "int"},
{"bin_construct_sample_cnt", "int"},
{"data_random_seed", "int"},
{"is_enable_sparse", "bool"},
{"enable_bundle", "bool"},
{"use_missing", "bool"},
{"zero_as_missing", "bool"},
{"feature_pre_filter", "bool"},
{"pre_partition", "bool"},
{"two_round", "bool"},
{"header", "bool"},
{"label_column", "string"},
{"weight_column", "string"},
{"group_column", "string"},
{"ignore_column", "vector<int>"},
{"categorical_feature", "vector<int>"},
{"forcedbins_filename", "string"},
{"save_binary", "bool"},
{"precise_float_parser", "bool"},
{"parser_config_file", "string"},
{"start_iteration_predict", "int"},
{"num_iteration_predict", "int"},
{"predict_raw_score", "bool"},
{"predict_leaf_index", "bool"},
{"predict_contrib", "bool"},
{"predict_disable_shape_check", "bool"},
{"pred_early_stop", "bool"},
{"pred_early_stop_freq", "int"},
{"pred_early_stop_margin", "double"},
{"output_result", "string"},
{"convert_model_language", "string"},
{"convert_model", "string"},
{"objective_seed", "int"},
{"num_class", "int"},
{"is_unbalance", "bool"},
{"scale_pos_weight", "double"},
{"sigmoid", "double"},
{"boost_from_average", "bool"},
{"reg_sqrt", "bool"},
{"alpha", "double"},
{"fair_c", "double"},
{"poisson_max_delta_step", "double"},
{"tweedie_variance_power", "double"},
{"lambdarank_truncation_level", "int"},
{"lambdarank_norm", "bool"},
{"label_gain", "vector<double>"},
{"lambdarank_position_bias_regularization", "double"},
{"metric", "vector<string>"},
{"metric_freq", "int"},
{"is_provide_training_metric", "bool"},
{"eval_at", "vector<int>"},
{"multi_error_top_k", "int"},
{"auc_mu_weights", "vector<double>"},
{"num_machines", "int"},
{"local_listen_port", "int"},
{"time_out", "int"},
{"machine_list_filename", "string"},
{"machines", "string"},
{"gpu_platform_id", "int"},
{"gpu_device_id", "int"},
{"gpu_use_dp", "bool"},
{"num_gpu", "int"},
});
return map;
}

} // namespace LightGBM
21 changes: 0 additions & 21 deletions tests/python_package_test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3714,27 +3714,6 @@ def check_consistency(est, tree_interaction_constraints):
check_consistency(est, tree_interaction_constraints)



def test_linear_trees_num_threads():
# check that number of threads does not affect result
np.random.seed(0)
x = np.arange(0, 1000, 0.1)
y = 2 * x + np.random.normal(0, 0.1, len(x))
x = x[:, np.newaxis]
lgb_train = lgb.Dataset(x, label=y)
params = {'verbose': -1,
'objective': 'regression',
'seed': 0,
'linear_tree': True,
'num_threads': 2}
est = lgb.train(params, lgb_train, num_boost_round=100)
pred1 = est.predict(x)
params["num_threads"] = 4
est = lgb.train(params, lgb_train, num_boost_round=100)
pred2 = est.predict(x)
np.testing.assert_allclose(pred1, pred2)


def test_linear_trees(tmp_path):
# check that setting linear_tree=True fits better than ordinary trees when data has linear relationship
np.random.seed(0)
Expand Down