Skip to content

Commit

Permalink
Merge branch 'master' into arrow-support-training-data
Browse files Browse the repository at this point in the history
  • Loading branch information
borchero committed Sep 15, 2023
2 parents 9164040 + ab1eaa8 commit 9a0a18d
Show file tree
Hide file tree
Showing 25 changed files with 640 additions and 98 deletions.
2 changes: 1 addition & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: 4.1.0.{build}
version: 4.1.0.99.{build}

image: Visual Studio 2015
platform: x64
Expand Down
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,13 @@ if(UNIX OR MINGW OR CYGWIN)
CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -std=c++11 -pthread -Wextra -Wall -Wno-ignored-attributes -Wno-unknown-pragmas -Wno-return-type"
)
if(MINGW)
# ignore this warning: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95353
set(
CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wno-stringop-overflow"
)
endif()
if(USE_DEBUG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0")
else()
Expand Down
18 changes: 9 additions & 9 deletions R-package/configure
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for lightgbm 4.1.0.
# Generated by GNU Autoconf 2.71 for lightgbm 4.1.0.99.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
Expand Down Expand Up @@ -607,8 +607,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='lightgbm'
PACKAGE_TARNAME='lightgbm'
PACKAGE_VERSION='4.1.0'
PACKAGE_STRING='lightgbm 4.1.0'
PACKAGE_VERSION='4.1.0.99'
PACKAGE_STRING='lightgbm 4.1.0.99'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''

Expand Down Expand Up @@ -1211,7 +1211,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures lightgbm 4.1.0 to adapt to many kinds of systems.
\`configure' configures lightgbm 4.1.0.99 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
Expand Down Expand Up @@ -1273,7 +1273,7 @@ fi

if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of lightgbm 4.1.0:";;
short | recursive ) echo "Configuration of lightgbm 4.1.0.99:";;
esac
cat <<\_ACEOF
Expand Down Expand Up @@ -1341,7 +1341,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
lightgbm configure 4.1.0
lightgbm configure 4.1.0.99
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
Expand Down Expand Up @@ -1378,7 +1378,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by lightgbm $as_me 4.1.0, which was
It was created by lightgbm $as_me 4.1.0.99, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
Expand Down Expand Up @@ -2454,7 +2454,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by lightgbm $as_me 4.1.0, which was
This file was extended by lightgbm $as_me 4.1.0.99, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
Expand Down Expand Up @@ -2509,7 +2509,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
lightgbm config.status 4.1.0
lightgbm config.status 4.1.0.99
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"
Expand Down
2 changes: 1 addition & 1 deletion R-package/pkgdown/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ repo:
user: https://github.com/

development:
mode: release
mode: unreleased

authors:
Yu Shi:
Expand Down
17 changes: 17 additions & 0 deletions R-package/tests/testthat/helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,20 @@
.LGB_VERBOSITY <- as.integer(
Sys.getenv("LIGHTGBM_TEST_VERBOSITY", "-1")
)

# [description]
# test that every element of 'x' is in 'y'
#
# testthat::expect_in() is not available in version of {testthat}
# built for R 3.6, this is here to support a similar interface on R 3.6
.expect_in <- function(x, y) {
if (exists("expect_in")) {
expect_in(x, y)
} else {
missing_items <- x[!(x %in% y)]
if (length(missing_items) != 0L) {
error_msg <- paste0("Some expected items not found: ", toString(missing_items))
stop(error_msg)
}
}
}
163 changes: 146 additions & 17 deletions R-package/tests/testthat/test_lgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -799,37 +799,166 @@ test_that("all parameters are stored correctly with save_model_to_string()", {
data = matrix(rnorm(500L), nrow = 100L)
, label = rnorm(100L)
)
nrounds <- 4L
bst <- lgb.train(
params = list(
objective = "regression"
, metric = "l2"
objective = "mape"
, metric = c("l2", "mae")
, num_threads = .LGB_MAX_THREADS
, seed = 708L
, data_sample_strategy = "bagging"
, sub_row = 0.8234
)
, data = dtrain
, nrounds = nrounds
, nrounds = 3L
, verbose = .LGB_VERBOSITY
)

model_str <- bst$save_model_to_string()
params_in_file <- .params_from_model_string(model_str = model_str)
# entries whose values should reflect params passed to lgb.train()
non_default_param_entries <- c(
"[objective: mape]"
# 'l1' was passed in with alias 'mae'
, "[metric: l2,l1]"
, "[data_sample_strategy: bagging]"
, "[seed: 708]"
# this was passed in with alias 'sub_row'
, "[bagging_fraction: 0.8234]"
, "[num_iterations: 3]"
)

# entries with default values of params
default_param_entries <- c(
"[boosting: gbdt]"
, "[tree_learner: serial]"
, "[device_type: cpu]"
, "[data: ]"
, "[valid: ]"
, "[learning_rate: 0.1]"
, "[num_leaves: 31]"
, sprintf("[num_threads: %i]", .LGB_MAX_THREADS)
, "[deterministic: 0]"
, "[histogram_pool_size: -1]"
, "[max_depth: -1]"
, "[min_data_in_leaf: 20]"
, "[min_sum_hessian_in_leaf: 0.001]"
, "[pos_bagging_fraction: 1]"
, "[neg_bagging_fraction: 1]"
, "[bagging_freq: 0]"
, "[bagging_seed: 15415]"
, "[feature_fraction: 1]"
, "[feature_fraction_bynode: 1]"
, "[feature_fraction_seed: 32671]"
, "[extra_trees: 0]"
, "[extra_seed: 6642]"
, "[early_stopping_round: 0]"
, "[first_metric_only: 0]"
, "[max_delta_step: 0]"
, "[lambda_l1: 0]"
, "[lambda_l2: 0]"
, "[linear_lambda: 0]"
, "[min_gain_to_split: 0]"
, "[drop_rate: 0.1]"
, "[max_drop: 50]"
, "[skip_drop: 0.5]"
, "[xgboost_dart_mode: 0]"
, "[uniform_drop: 0]"
, "[drop_seed: 20623]"
, "[top_rate: 0.2]"
, "[other_rate: 0.1]"
, "[min_data_per_group: 100]"
, "[max_cat_threshold: 32]"
, "[cat_l2: 10]"
, "[cat_smooth: 10]"
, "[max_cat_to_onehot: 4]"
, "[top_k: 20]"
, "[monotone_constraints: ]"
, "[monotone_constraints_method: basic]"
, "[monotone_penalty: 0]"
, "[feature_contri: ]"
, "[forcedsplits_filename: ]"
, "[force_col_wise: 0]"
, "[force_row_wise: 0]"
, "[refit_decay_rate: 0.9]"
, "[cegb_tradeoff: 1]"
, "[cegb_penalty_split: 0]"
, "[cegb_penalty_feature_lazy: ]"
, "[cegb_penalty_feature_coupled: ]"
, "[path_smooth: 0]"
, "[interaction_constraints: ]"
, sprintf("[verbosity: %i]", .LGB_VERBOSITY)
, "[saved_feature_importance_type: 0]"
, "[use_quantized_grad: 0]"
, "[num_grad_quant_bins: 4]"
, "[quant_train_renew_leaf: 0]"
, "[stochastic_rounding: 1]"
, "[linear_tree: 0]"
, "[max_bin: 255]"
, "[max_bin_by_feature: ]"
, "[min_data_in_bin: 3]"
, "[bin_construct_sample_cnt: 200000]"
, "[data_random_seed: 2350]"
, "[is_enable_sparse: 1]"
, "[enable_bundle: 1]"
, "[use_missing: 1]"
, "[zero_as_missing: 0]"
, "[feature_pre_filter: 1]"
, "[pre_partition: 0]"
, "[two_round: 0]"
, "[header: 0]"
, "[label_column: ]"
, "[weight_column: ]"
, "[group_column: ]"
, "[ignore_column: ]"
, "[categorical_feature: ]"
, "[forcedbins_filename: ]"
, "[precise_float_parser: 0]"
, "[parser_config_file: ]"
, "[objective_seed: 4309]"
, "[num_class: 1]"
, "[is_unbalance: 0]"
, "[scale_pos_weight: 1]"
, "[sigmoid: 1]"
, "[boost_from_average: 1]"
, "[reg_sqrt: 0]"
, "[alpha: 0.9]"
, "[fair_c: 1]"
, "[poisson_max_delta_step: 0.7]"
, "[tweedie_variance_power: 1.5]"
, "[lambdarank_truncation_level: 30]"
, "[lambdarank_norm: 1]"
, "[label_gain: ]"
, "[lambdarank_position_bias_regularization: 0]"
, "[eval_at: ]"
, "[multi_error_top_k: 1]"
, "[auc_mu_weights: ]"
, "[num_machines: 1]"
, "[local_listen_port: 12400]"
, "[time_out: 120]"
, "[machine_list_filename: ]"
, "[machines: ]"
, "[gpu_platform_id: -1]"
, "[gpu_device_id: -1]"
, "[gpu_use_dp: 0]"
, "[num_gpu: 1]"
)
all_param_entries <- c(non_default_param_entries, default_param_entries)

# parameters should match what was passed from the R package
expect_equal(sum(startsWith(params_in_file, "[metric:")), 1L)
expect_equal(sum(params_in_file == "[metric: l2]"), 1L)

expect_equal(sum(startsWith(params_in_file, "[num_iterations:")), 1L)
expect_equal(sum(params_in_file == "[num_iterations: 4]"), 1L)

expect_equal(sum(startsWith(params_in_file, "[objective:")), 1L)
expect_equal(sum(params_in_file == "[objective: regression]"), 1L)

expect_equal(sum(startsWith(params_in_file, "[verbosity:")), 1L)
expect_equal(sum(params_in_file == sprintf("[verbosity: %i]", .LGB_VERBOSITY)), 1L)
model_str <- bst$save_model_to_string()
params_in_file <- .params_from_model_string(model_str = model_str)
.expect_in(all_param_entries, params_in_file)

# early stopping should be off by default
expect_equal(sum(startsWith(params_in_file, "[early_stopping_round:")), 1L)
expect_equal(sum(params_in_file == "[early_stopping_round: 0]"), 1L)

# since save_model_to_string() is used when serializing with saveRDS(), check that parameters all
# roundtrip saveRDS()/loadRDS() successfully
rds_file <- tempfile()
saveRDS(bst, rds_file)
bst_rds <- readRDS(rds_file)
model_str <- bst_rds$save_model_to_string()
params_in_file <- .params_from_model_string(model_str = model_str)
.expect_in(all_param_entries, params_in_file)
})

test_that("early_stopping, num_iterations are stored correctly in model string even with aliases", {
Expand Down
2 changes: 1 addition & 1 deletion VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4.1.0
4.1.0.99
2 changes: 1 addition & 1 deletion external_libs/fmt
Submodule fmt updated 86 files
+8 −0 .github/dependabot.yml
+6 −0 .github/issue_template.md
+3 −2 .github/pull_request_template.md
+30 −0 .github/workflows/cifuzz.yml
+12 −1 .github/workflows/doc.yml
+42 −10 .github/workflows/linux.yml
+20 −2 .github/workflows/macos.yml
+65 −0 .github/workflows/scorecard.yml
+61 −21 .github/workflows/windows.yml
+99 −70 CMakeLists.txt
+1,187 −3 ChangeLog.rst
+1 −1 LICENSE.rst
+55 −38 README.rst
+11 −2 doc/CMakeLists.txt
+270 −155 doc/api.rst
+9 −6 doc/build.py
+2 −2 doc/index.rst
+163 −10 doc/syntax.rst
+5 −5 include/fmt/args.h
+482 −341 include/fmt/chrono.h
+116 −122 include/fmt/color.h
+62 −170 include/fmt/compile.h
+976 −1,290 include/fmt/core.h
+820 −1,801 include/fmt/format-inl.h
+2,253 −847 include/fmt/format.h
+0 −2 include/fmt/locale.h
+70 −146 include/fmt/os.h
+129 −55 include/fmt/ostream.h
+202 −192 include/fmt/printf.h
+367 −425 include/fmt/ranges.h
+465 −0 include/fmt/std.h
+86 −64 include/fmt/xchar.h
+40 −29 src/fmt.cc
+15 −96 src/format.cc
+117 −80 src/os.cc
+3 −3 support/Vagrantfile
+0 −43 support/appveyor-build.py
+0 −31 support/appveyor.yml
+0 −1 support/bazel/.bazelrc
+1 −1 support/bazel/.bazelversion
+1 −2 support/bazel/BUILD.bazel
+5 −4 support/bazel/README.md
+1 −1 support/build.gradle
+0 −70 support/cmake/cxx14.cmake
+4 −1 support/cmake/fmt-config.cmake.in
+6 −0 support/manage.py
+1 −1 support/printable.py
+7 −0 support/rst2md.py
+30 −17 test/CMakeLists.txt
+1 −1 test/add-subdirectory-test/CMakeLists.txt
+1 −1 test/args-test.cc
+396 −18 test/chrono-test.cc
+6 −0 test/color-test.cc
+42 −4 test/compile-error-test/CMakeLists.txt
+2 −1 test/compile-fp-test.cc
+37 −39 test/compile-test.cc
+167 −232 test/core-test.cc
+18 −0 test/detect-stdfs.cc
+2 −0 test/enforce-checks-test.cc
+1 −1 test/find-package-test/CMakeLists.txt
+257 −151 test/format-impl-test.cc
+465 −388 test/format-test.cc
+1 −1 test/fuzzing/CMakeLists.txt
+2 −2 test/fuzzing/one-arg.cc
+2 −2 test/fuzzing/two-args.cc
+3 −1 test/gtest-extra-test.cc
+1 −1 test/gtest-extra.cc
+2 −7 test/gtest-extra.h
+1 −7 test/gtest/CMakeLists.txt
+2 −2 test/gtest/gmock-gtest-all.cc
+2 −2 test/mock-allocator.h
+36 −96 test/module-test.cc
+24 −69 test/os-test.cc
+69 −79 test/ostream-test.cc
+3 −90 test/posix-mock-test.cc
+0 −2 test/posix-mock.h
+14 −42 test/printf-test.cc
+198 −34 test/ranges-test.cc
+1 −1 test/scan-test.cc
+17 −14 test/scan.h
+1 −1 test/static-export-test/CMakeLists.txt
+257 −0 test/std-test.cc
+2 −3 test/test-main.cc
+4 −4 test/unicode-test.cc
+2 −6 test/util.h
+177 −95 test/xchar-test.cc
4 changes: 2 additions & 2 deletions helpers/parameter_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def gen_parameter_code(
str_to_write += ' std::string tmp_str = "";\n'
for x in infos:
for y in x:
if "[doc-only]" in y:
if "[no-automatically-extract]" in y:
continue
param_type = y["inner_type"][0]
name = y["name"][0]
Expand All @@ -345,7 +345,7 @@ def gen_parameter_code(
str_to_write += " std::stringstream str_buf;\n"
for x in infos:
for y in x:
if "[doc-only]" in y or "[no-save]" in y:
if "[no-save]" in y:
continue
param_type = y["inner_type"][0]
name = y["name"][0]
Expand Down
Loading

0 comments on commit 9a0a18d

Please sign in to comment.