Skip to content

Commit

Permalink
Add C API function that returns all parameter names with their aliases (
Browse files Browse the repository at this point in the history
#4829)

* add C API function that returns all param names with aliases

* add C API function that returns all param names with aliases

* add R code

* test R code

* remove debug CI

* fix R lint

* refactor

* run CI

* fix R

* fix

* revert CI checks

* revert changes in docs

* Try to make function `const`

Co-authored-by: James Lamb <[email protected]>

* add `const` in cpp file

* address review comments and sync with `master`

Co-authored-by: James Lamb <[email protected]>
  • Loading branch information
StrikerRUS and jameslamb authored Dec 3, 2021
1 parent 946817a commit cf38071
Show file tree
Hide file tree
Showing 11 changed files with 285 additions and 196 deletions.
136 changes: 32 additions & 104 deletions R-package/R/aliases.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,119 +7,47 @@
# [return] A named list, where each key is a parameter relevant to lgb.Dataset and each value is a character
# vector of corresponding aliases.
.DATASET_PARAMETERS <- function() {
return(
list(
"bin_construct_sample_cnt" = c(
"bin_construct_sample_cnt"
, "subsample_for_bin"
)
, "categorical_feature" = c(
"categorical_feature"
, "cat_feature"
, "categorical_column"
, "cat_column"
, "categorical_features"
)
, "data_random_seed" = c(
"data_random_seed"
, "data_seed"
)
, "enable_bundle" = c(
"enable_bundle"
, "is_enable_bundle"
, "bundle"
)
, "feature_pre_filter" = "feature_pre_filter"
, "forcedbins_filename" = "forcedbins_filename"
, "group_column" = c(
"group_column"
, "group"
, "group_id"
, "query_column"
, "query"
, "query_id"
)
, "header" = c(
"header"
, "has_header"
)
, "ignore_column" = c(
"ignore_column"
, "ignore_feature"
, "blacklist"
)
, "is_enable_sparse" = c(
"is_enable_sparse"
, "is_sparse"
, "enable_sparse"
, "sparse"
)
, "label_column" = c(
"label_column"
, "label"
)
, "linear_tree" = c(
"linear_tree"
, "linear_trees"
)
, "max_bin" = c(
"max_bin"
, "max_bins"
)
, "max_bin_by_feature" = "max_bin_by_feature"
, "min_data_in_bin" = "min_data_in_bin"
, "pre_partition" = c(
"pre_partition"
, "is_pre_partition"
)
, "precise_float_parser" = "precise_float_parser"
, "two_round" = c(
"two_round"
, "two_round_loading"
, "use_two_round_loading"
)
, "use_missing" = "use_missing"
, "weight_column" = c(
"weight_column"
, "weight"
)
, "zero_as_missing" = "zero_as_missing"
)
)
all_aliases <- .PARAMETER_ALIASES()
return(all_aliases[c(
"bin_construct_sample_cnt"
, "categorical_feature"
, "data_random_seed"
, "enable_bundle"
, "feature_pre_filter"
, "forcedbins_filename"
, "group_column"
, "header"
, "ignore_column"
, "is_enable_sparse"
, "label_column"
, "linear_tree"
, "max_bin"
, "max_bin_by_feature"
, "min_data_in_bin"
, "pre_partition"
, "precise_float_parser"
, "two_round"
, "use_missing"
, "weight_column"
, "zero_as_missing"
)])
}

# [description] List of respected parameter aliases. Wrapped in a function to take advantage of
# lazy evaluation (so it doesn't matter what order R sources files during installation).
# [return] A named list, where each key is a main LightGBM parameter and each value is a character
# vector of corresponding aliases.
.PARAMETER_ALIASES <- function() {
learning_params <- list(
"boosting" = c(
"boosting"
, "boost"
, "boosting_type"
)
, "early_stopping_round" = c(
"early_stopping_round"
, "early_stopping_rounds"
, "early_stopping"
, "n_iter_no_change"
)
, "num_iterations" = c(
"num_iterations"
, "num_iteration"
, "n_iter"
, "num_tree"
, "num_trees"
, "num_round"
, "num_rounds"
, "nrounds"
, "num_boost_round"
, "n_estimators"
, "max_iter"
params_to_aliases <- jsonlite::fromJSON(
.Call(
LGBM_DumpParamAliases_R
)
)
return(c(learning_params, .DATASET_PARAMETERS()))
for (main_name in names(params_to_aliases)) {
aliases_with_main_name <- c(main_name, unlist(params_to_aliases[[main_name]]))
params_to_aliases[[main_name]] <- aliases_with_main_name
}
return(params_to_aliases)
}

# [description]
Expand Down
21 changes: 21 additions & 0 deletions R-package/src/lightgbm_R.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -872,6 +872,26 @@ SEXP LGBM_BoosterDumpModel_R(SEXP handle,
R_API_END();
}

SEXP LGBM_DumpParamAliases_R() {
SEXP cont_token = PROTECT(R_MakeUnwindCont());
R_API_BEGIN();
SEXP aliases_str;
int64_t out_len = 0;
int64_t buf_len = 1024 * 1024;
std::vector<char> inner_char_buf(buf_len);
CHECK_CALL(LGBM_DumpParamAliases(buf_len, &out_len, inner_char_buf.data()));
// if aliases string was larger than the initial buffer, allocate a bigger buffer and try again
if (out_len > buf_len) {
inner_char_buf.resize(out_len);
CHECK_CALL(LGBM_DumpParamAliases(out_len, &out_len, inner_char_buf.data()));
}
aliases_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
SET_STRING_ELT(aliases_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
UNPROTECT(2);
return aliases_str;
R_API_END();
}

// .Call() calls
static const R_CallMethodDef CallEntries[] = {
{"LGBM_HandleIsNull_R" , (DL_FUNC) &LGBM_HandleIsNull_R , 1},
Expand Down Expand Up @@ -916,6 +936,7 @@ static const R_CallMethodDef CallEntries[] = {
{"LGBM_BoosterSaveModel_R" , (DL_FUNC) &LGBM_BoosterSaveModel_R , 4},
{"LGBM_BoosterSaveModelToString_R" , (DL_FUNC) &LGBM_BoosterSaveModelToString_R , 3},
{"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3},
{"LGBM_DumpParamAliases_R" , (DL_FUNC) &LGBM_DumpParamAliases_R , 0},
{NULL, NULL, 0}
};

Expand Down
6 changes: 6 additions & 0 deletions R-package/src/lightgbm_R.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,4 +596,10 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterDumpModel_R(
SEXP feature_importance_type
);

/*!
* \brief Dump parameter aliases to JSON
* \return R character vector (length=1) with aliases JSON
*/
LIGHTGBM_C_EXPORT SEXP LGBM_DumpParamAliases_R();

#endif // LIGHTGBM_R_H_
2 changes: 2 additions & 0 deletions R-package/tests/testthat/test_parameters.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ context("parameter aliases")
test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", {
param_aliases <- .PARAMETER_ALIASES()
expect_identical(class(param_aliases), "list")
expect_true(length(param_aliases) > 100L)
expect_true(is.character(names(param_aliases)))
expect_true(is.character(param_aliases[["boosting"]]))
expect_true(is.character(param_aliases[["early_stopping_round"]]))
Expand All @@ -58,6 +59,7 @@ test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where
expect_true(length(names(param_aliases)) == length(param_aliases))
expect_true(all(sapply(param_aliases, is.character)))
expect_true(length(unique(names(param_aliases))) == length(param_aliases))
expect_equal(sort(param_aliases[["task"]]), c("task", "task_type"))
})

test_that("training should warn if you use 'dart' boosting, specified with 'boosting' or aliases", {
Expand Down
18 changes: 18 additions & 0 deletions helpers/parameter_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
along with parameters description in LightGBM/docs/Parameters.rst file
from the information in LightGBM/include/LightGBM/config.h file.
"""
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -291,6 +292,7 @@ def gen_parameter_code(
keys, infos = get_parameter_infos(config_hpp)
names = get_names(infos)
alias = get_alias(infos)
names_with_aliases = defaultdict(list)
str_to_write = r"""/*!
* Copyright (c) 2018 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
Expand All @@ -306,6 +308,7 @@ def gen_parameter_code(

for pair in alias:
str_to_write += f' {{"{pair[0]}", "{pair[1]}"}},\n'
names_with_aliases[pair[1]].append(pair[0])
str_to_write += " });\n"
str_to_write += " return aliases;\n"
str_to_write += "}\n\n"
Expand Down Expand Up @@ -353,6 +356,21 @@ def gen_parameter_code(
# tails
str_to_write += " return str_buf.str();\n"
str_to_write += "}\n\n"

str_to_write += "const std::string Config::DumpAliases() {\n"
str_to_write += " std::stringstream str_buf;\n"
str_to_write += ' str_buf << "{";\n'
for idx, name in enumerate(names):
if idx > 0:
str_to_write += ', ";\n'
aliases = '\\", \\"'.join([alias for alias in names_with_aliases[name]])
aliases = f'[\\"{aliases}\\"]' if aliases else '[]'
str_to_write += f' str_buf << "\\"{name}\\": {aliases}'
str_to_write += '";\n'
str_to_write += ' str_buf << "}";\n'
str_to_write += " return str_buf.str();\n"
str_to_write += "}\n\n"

str_to_write += "} // namespace LightGBM\n"
with open(config_out_cpp, "w") as config_out_cpp_file:
config_out_cpp_file.write(str_to_write)
Expand Down
11 changes: 11 additions & 0 deletions include/LightGBM/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ typedef void* FastConfigHandle; /*!< \brief Handle of FastConfig. */
*/
LIGHTGBM_C_EXPORT const char* LGBM_GetLastError();

/*!
* \brief Dump all parameter names with their aliases to JSON.
* \param buffer_len String buffer length, if ``buffer_len < out_len``, you should re-allocate buffer
* \param[out] out_len Actual output length
* \param[out] out_str JSON format string of parameters, should pre-allocate memory
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_DumpParamAliases(int64_t buffer_len,
int64_t* out_len,
char* out_str);

/*!
* \brief Register a callback function for log redirecting.
* \param callback The callback function to register
Expand Down
1 change: 1 addition & 0 deletions include/LightGBM/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,7 @@ struct Config {
static const std::unordered_set<std::string>& parameter_set();
std::vector<std::vector<double>> auc_mu_weights_matrix;
std::vector<std::vector<int>> interaction_constraints_vector;
static const std::string DumpAliases();

private:
void CheckParamConflict();
Expand Down
Loading

0 comments on commit cf38071

Please sign in to comment.