Skip to content

Commit

Permalink
Merge branch 'master' into fix_classifier_refit
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb authored Sep 8, 2023
2 parents 20c47fc + e9fface commit b777288
Show file tree
Hide file tree
Showing 44 changed files with 911 additions and 285 deletions.
20 changes: 15 additions & 5 deletions .ci/lint_r_code.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,37 @@ LINTERS_TO_USE <- list(
, "any_duplicated" = lintr::any_duplicated_linter()
, "any_is_na" = lintr::any_is_na_linter()
, "assignment" = lintr::assignment_linter()
, "boolean_arithmetic" = lintr::boolean_arithmetic_linter()
, "braces" = lintr::brace_linter()
, "class_equals" = lintr::class_equals_linter()
, "commas" = lintr::commas_linter()
, "duplicate_argument" = lintr::duplicate_argument_linter()
, "empty_assignment" = lintr::empty_assignment_linter()
, "equals_na" = lintr::equals_na_linter()
, "for_loop_index" = lintr::for_loop_index_linter()
, "function_left" = lintr::function_left_parentheses_linter()
, "implicit_integers" = lintr::implicit_integer_linter()
, "infix_spaces" = lintr::infix_spaces_linter()
, "inner_combine" = lintr::inner_combine_linter()
, "is_numeric" = lintr::is_numeric_linter()
, "fixed_regex" = lintr::fixed_regex_linter()
, "function_return" = lintr::function_return_linter()
, "lengths" = lintr::lengths_linter()
, "literal_coercion" = lintr::literal_coercion_linter()
, "long_lines" = lintr::line_length_linter(length = 120L)
, "matrix" = lintr::matrix_apply_linter()
, "missing_argument" = lintr::missing_argument_linter()
, "no_tabs" = lintr::no_tab_linter()
, "non_portable_path" = lintr::nonportable_path_linter()
, "numeric_leading_zero" = lintr::numeric_leading_zero_linter()
, "outer_negation" = lintr::outer_negation_linter()
, "package_hooks" = lintr::package_hooks_linter()
, "paste" = lintr::paste_linter()
, "quotes" = lintr::quotes_linter()
, "redundant_equals" = lintr::redundant_equals_linter()
, "regex_subset" = lintr::regex_subset_linter()
, "routine_registration" = lintr::routine_registration_linter()
, "semicolon" = lintr::semicolon_linter()
, "seq" = lintr::seq_linter()
, "single_quotes" = lintr::single_quotes_linter()
, "spaces_inside" = lintr::spaces_inside_linter()
, "spaces_left_parens" = lintr::spaces_left_parentheses_linter()
, "sprintf" = lintr::sprintf_linter()
Expand Down Expand Up @@ -96,9 +104,11 @@ LINTERS_TO_USE <- list(
, "??" = interactive_text
)
)
, "unneeded_concatenation" = lintr::unneeded_concatenation_linter()
, "unreachable_code" = lintr::unreachable_code_linter()
, "vector_logic" = lintr::vector_logic_linter()
, "unnecessary_concatenation" = lintr::unnecessary_concatenation_linter()
, "unnecessary_lambda" = lintr::unnecessary_lambda_linter()
, "unreachable_code" = lintr::unreachable_code_linter()
, "vector_logic" = lintr::vector_logic_linter()
, "whitespace" = lintr::whitespace_linter()
)

noquote(paste0(length(FILES_TO_LINT), " R files need linting"))
Expand Down
1 change: 1 addition & 0 deletions .ci/test-python-oldest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#
echo "installing lightgbm's dependencies"
pip install \
'dataclasses' \
'numpy==1.12.0' \
'pandas==0.24.0' \
'scikit-learn==0.18.2' \
Expand Down
14 changes: 10 additions & 4 deletions .ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ if [[ $TASK == "lint" ]]; then
cpplint \
isort \
mypy \
'r-lintr>=3.0' \
'r-lintr>=3.1' \
ruff
source activate $CONDA_ENV
echo "Linting Python code"
Expand Down Expand Up @@ -119,15 +119,21 @@ if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
exit 0
fi

# older versions of Dask are incompatible with pandas>=2.0, but not all conda packages' metadata accurately reflects that
#
# ref: https://github.com/microsoft/LightGBM/issues/6030
CONSTRAINED_DEPENDENCIES="'dask-core>=2023.5.0' 'distributed>=2023.5.0' 'pandas>=2.0'"
if [[ $PYTHON_VERSION == "3.7" ]]; then
CONSTRAINED_DEPENDENCIES="'dask-core' 'distributed 'pandas<2.0'"
fi

# including python=version[build=*cpython] to ensure that conda doesn't fall back to pypy
conda create -q -y -n $CONDA_ENV \
${CONSTRAINED_DEPENDENCIES} \
cloudpickle \
dask-core \
distributed \
joblib \
matplotlib \
numpy \
pandas \
psutil \
pytest \
${CONDA_PYTHON_REQUIREMENT} \
Expand Down
10 changes: 7 additions & 3 deletions .ci/test_r_package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ if [[ "${R_MAJOR_VERSION}" == "3" ]]; then
export R_LINUX_VERSION="3.6.3-1bionic"
export R_APT_REPO="bionic-cran35/"
elif [[ "${R_MAJOR_VERSION}" == "4" ]]; then
export R_MAC_VERSION=4.2.2
export R_MAC_PKG_URL=${CRAN_MIRROR}/bin/macosx/base/R-${R_MAC_VERSION}.pkg
export R_LINUX_VERSION="4.2.2-1.2204.0"
export R_MAC_VERSION=4.3.1
export R_MAC_PKG_URL=${CRAN_MIRROR}/bin/macosx/big-sur-x86_64/base/R-${R_MAC_VERSION}-x86_64.pkg
export R_LINUX_VERSION="4.3.1-1.2204.0"
export R_APT_REPO="jammy-cran40/"
else
echo "Unrecognized R version: ${R_VERSION}"
Expand All @@ -36,7 +36,10 @@ fi
#
# `devscripts` is required for 'checkbashisms' (https://github.com/r-lib/actions/issues/111)
if [[ $OS_NAME == "linux" ]]; then
mkdir -p ~/.gnupg
echo "disable-ipv6" >> ~/.gnupg/dirmngr.conf
sudo apt-key adv \
--homedir ~/.gnupg \
--keyserver keyserver.ubuntu.com \
--recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 || exit -1
sudo add-apt-repository \
Expand All @@ -53,6 +56,7 @@ if [[ $OS_NAME == "linux" ]]; then
texlive-latex-recommended \
texlive-fonts-recommended \
texlive-fonts-extra \
tidy \
qpdf \
|| exit -1

Expand Down
54 changes: 1 addition & 53 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -7,56 +7,4 @@
# offer a reasonable automatic best-guess

# catch-all rule (this only gets matched if no rules below match)
* @guolinke @StrikerRUS @jameslamb @shiyu1994

# other catch-alls that will get matched if specific rules below are not matched
*.R @jameslamb @jmoralez
*.py @StrikerRUS @jmoralez @jameslamb @shiyu1994
*.cpp @guolinke @shiyu1994
*.h @guolinke @shiyu1994

# main C++ code
include/ @guolinke @shiyu1994
src/ @guolinke @shiyu1994
CMakeLists.txt @guolinke @jameslamb @StrikerRUS @shiyu1994
tests/c_api_test/ @guolinke @shiyu1994
tests/cpp_tests/ @guolinke @shiyu1994
tests/data/ @guolinke @shiyu1994
windows/ @guolinke @StrikerRUS @shiyu1994

# R code
build_r.R @jameslamb @StrikerRUS @jmoralez
build-cran-package.sh @jameslamb @StrikerRUS @jmoralez
R-package/ @jameslamb @jmoralez

# Python code
python-package/ @StrikerRUS @shiyu1994 @jameslamb @jmoralez

# Dask integration
python-package/lightgbm/dask.py @jameslamb @jmoralez
tests/python_package_test/test_dask.py @jameslamb @jmoralez

# helpers
helpers/ @StrikerRUS @guolinke

# CI administrative stuff
.ci/ @StrikerRUS @jameslamb
docs/ @StrikerRUS @jameslamb
examples/ @StrikerRUS @jameslamb @guolinke @jmoralez
*.yml @StrikerRUS @jameslamb
.vsts-ci.yml @StrikerRUS @jameslamb

# docker setup
docker/ @StrikerRUS @jameslamb
docker/dockerfile-cli @guolinke @shiyu1994 @StrikerRUS @jameslamb
docker/gpu/ @StrikerRUS @jameslamb
docker/dockerfile-python @StrikerRUS @shiyu1994 @jameslamb @jmoralez
docker/dockerfile-r @jameslamb @jmoralez

# GPU code
docs/GPU-*.rst @shiyu1994 @guolinke
src/treelearner/gpu_tree_learner.cpp @guolinke @shiyu1994
src/treelearner/tree_learner.cpp @guolinke @shiyu1994

# JAVA code
swig/ @guolinke @shiyu1994
* @guolinke @jameslamb @shiyu1994 @jmoralez
44 changes: 44 additions & 0 deletions .github/workflows/lock.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: 'Lock Inactive Threads'

on:
schedule:
# midnight UTC, every Wednesday
- cron: '0 0 * * 3'
# allow manual triggering from GitHub UI
workflow_dispatch:

permissions:
issues: write
pull-requests: write

concurrency:
group: lock

jobs:
action:
runs-on: ubuntu-latest
steps:
- uses: dessant/lock-threads@v4
with:
github-token: ${{ github.token }}
# after how many days of inactivity should a closed issue/PR be locked?
issue-inactive-days: '90'
pr-inactive-days: '90'
# do not close feature request issues...
# we close those but track them in https://github.com/microsoft/LightGBM/issues/2302
exclude-any-issue-labels: '"feature request"'
# what labels should be removed prior to locking?
remove-issue-labels: 'awaiting response,awaiting review,blocking,in progress'
remove-pr-labels: 'awaiting response,awaiting review,blocking,in progress'
# what message should be posted prior to locking?
issue-comment: >
This issue has been automatically locked since there has not been any recent activity since it was closed.
To start a new related discussion, open a new issue at https://github.com/microsoft/LightGBM/issues
including a reference to this.
pr-comment: >
This pull request has been automatically locked since there has not been any recent activity since it was closed.
To start a new related discussion, open a new issue at https://github.com/microsoft/LightGBM/issues
including a reference to this.
# what shoulld the locking status be?
issue-lock-reason: 'resolved'
pr-lock-reason: 'resolved'
14 changes: 7 additions & 7 deletions .github/workflows/r_package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
- os: ubuntu-latest
task: r-package
compiler: gcc
r_version: 4.2
r_version: 4.3
build_type: cmake
container: 'ubuntu:22.04'
- os: ubuntu-latest
Expand All @@ -60,19 +60,19 @@ jobs:
- os: ubuntu-latest
task: r-package
compiler: clang
r_version: 4.2
r_version: 4.3
build_type: cmake
container: 'ubuntu:22.04'
- os: macOS-latest
task: r-package
compiler: gcc
r_version: 4.2
r_version: 4.3
build_type: cmake
container: null
- os: macOS-latest
task: r-package
compiler: clang
r_version: 4.2
r_version: 4.3
build_type: cmake
container: null
- os: windows-latest
Expand Down Expand Up @@ -125,13 +125,13 @@ jobs:
- os: ubuntu-latest
task: r-package
compiler: gcc
r_version: 4.2
r_version: 4.3
build_type: cran
container: 'ubuntu:22.04'
- os: macOS-latest
task: r-package
compiler: clang
r_version: 4.2
r_version: 4.3
build_type: cran
container: null
################
Expand All @@ -140,7 +140,7 @@ jobs:
- os: ubuntu-latest
task: r-rchk
compiler: gcc
r_version: 4.2
r_version: 4.3
build_type: cran
container: 'ubuntu:22.04'
steps:
Expand Down
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,6 @@ publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# TODO: Comment the next line if you want to checkin your web deploy settings
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj

Expand Down
4 changes: 3 additions & 1 deletion R-package/R/lgb.interprete.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ lgb.interprete <- function(model,
leaf_index_dt <- data.table::as.data.table(x = pred_mat)
leaf_index_mat_list <- lapply(
X = leaf_index_dt
, FUN = function(x) matrix(x, ncol = num_class, byrow = TRUE)
, FUN = matrix
, ncol = num_class
, byrow = TRUE
)

# Get list of trees
Expand Down
8 changes: 3 additions & 5 deletions R-package/R/lgb.train.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ lgb.train <- function(params = list(),

# Construct datasets, if needed
data$update_params(params = params)
if (!is.null(categorical_feature)) {
data$set_categorical_feature(categorical_feature)
}
data$construct()

# Check interaction constraints
Expand All @@ -179,11 +182,6 @@ lgb.train <- function(params = list(),
data$set_colnames(colnames)
}

# Write categorical features
if (!is.null(categorical_feature)) {
data$set_categorical_feature(categorical_feature)
}

valid_contain_train <- FALSE
train_data_name <- "train"
reduced_valid_sets <- list()
Expand Down
5 changes: 4 additions & 1 deletion R-package/R/lightgbm.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ NULL
#' \item If passing a factor with more than two variables, will use objective \code{"multiclass"}
#' (note that parameter \code{num_class} in this case will also be determined automatically from
#' \code{label}).
#' \item Otherwise, will use objective \code{"regression"}.
#' \item Otherwise (or if passing \code{lgb.Dataset} as input), will use objective \code{"regression"}.
#' }
#'
#' \emph{New in version 4.0.0}
Expand Down Expand Up @@ -211,6 +211,9 @@ lightgbm <- function(data,
rm(temp)
} else {
data_processor <- NULL
if (objective == "auto") {
objective <- "regression"
}
}

# Set data to a temporary variable
Expand Down
2 changes: 1 addition & 1 deletion R-package/man/lightgbm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit b777288

Please sign in to comment.