Skip to content

Commit

Permalink
Merge branch 'dmlc:master' into SecureBoost
Browse files Browse the repository at this point in the history
  • Loading branch information
ZiyueXu77 authored Feb 23, 2024
2 parents 04cd1cb + 729fd97 commit 087a8dd
Show file tree
Hide file tree
Showing 11 changed files with 158 additions and 72 deletions.
41 changes: 24 additions & 17 deletions .github/workflows/jvm_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,39 +15,45 @@ jobs:
os: [windows-latest, ubuntu-latest, macos-11]

steps:
- uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'

- uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a # v4.3.0
- uses: mamba-org/setup-micromamba@422500192359a097648154e8db4e39bdb6c6eed7 # v1.8.1
with:
python-version: '3.8'
architecture: 'x64'

- uses: actions/setup-java@d202f5dbf7256730fb690ec59f6381650114feb2 # v3.6.0
with:
java-version: 1.8

- name: Install Python packages
run: |
python -m pip install wheel setuptools
python -m pip install awscli
micromamba-version: '1.5.6-0'
environment-name: jvm_tests
create-args: >-
python=3.10
awscli
cache-downloads: true
cache-environment: true
init-shell: bash powershell

- name: Cache Maven packages
uses: actions/cache@6998d139ddd3e68c71e9e398d8e40b71a2f39812 # v3.2.5
uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}

- name: Build xgboost4j.dll
run: |
mkdir build
cd build
cmake .. -G"Visual Studio 17 2022" -A x64 -DJVM_BINDINGS=ON
cmake --build . --config Release
if: matrix.os == 'windows-latest'

- name: Test XGBoost4J (Core)
run: |
cd jvm-packages
mvn test -B -pl :xgboost4j_2.12
- name: Extract branch name
shell: bash
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
run: |
echo "branch=${GITHUB_REF#refs/heads/}" >> "$GITHUB_OUTPUT"
id: extract_branch
if: |
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
Expand All @@ -58,7 +64,7 @@ jobs:
cd lib/
Rename-Item -Path xgboost4j.dll -NewName xgboost4j_${{ github.sha }}.dll
dir
python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read
python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read --region us-west-2
if: |
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
matrix.os == 'windows-latest'
Expand All @@ -67,11 +73,12 @@ jobs:
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}

- name: Publish artifact libxgboost4j.dylib to S3
shell: bash -l {0}
run: |
cd lib/
mv -v libxgboost4j.dylib libxgboost4j_${{ github.sha }}.dylib
ls
python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read
python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read --region us-west-2
if: |
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
matrix.os == 'macos-11'
Expand Down
2 changes: 1 addition & 1 deletion doc/python/callbacks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ inside iteration loop. You can also pass this callback function directly into X
# Specify which dataset and which metric should be used for early stopping.
early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
metric_name='CustomErr',
data_name='Train')
data_name='Valid')
booster = xgb.train(
{'objective': 'binary:logistic',
Expand Down
2 changes: 1 addition & 1 deletion doc/python/sklearn_estimator.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ stack of trees:
.. code-block:: python
early_stop = xgb.callback.EarlyStopping(
rounds=2, metric_name='logloss', data_name='Validation_0', save_best=True
rounds=2, metric_name='logloss', data_name='validation_0', save_best=True
)
clf = xgb.XGBClassifier(tree_method="hist", callbacks=[early_stop])
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
Expand Down
4 changes: 2 additions & 2 deletions doc/tutorials/spark_estimator.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ We can create a ``SparkXGBRegressor`` estimator like:
.. code-block:: python
from xgboost.spark import SparkXGBRegressor
spark_reg_estimator = SparkXGBRegressor(
xgb_regressor = SparkXGBRegressor(
features_col="features",
label_col="label",
num_workers=2,
Expand Down Expand Up @@ -61,7 +61,7 @@ type or spark array type.

.. code-block:: python
transformed_test_spark_dataframe = xgb_regressor.predict(test_spark_dataframe)
transformed_test_spark_dataframe = xgb_regressor_model.transform(test_spark_dataframe)
The above snippet code returns a ``transformed_test_spark_dataframe`` that contains the input
Expand Down
19 changes: 12 additions & 7 deletions include/xgboost/json.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
* Copyright 2019-2024, XGBoost Contributors
*/
#ifndef XGBOOST_JSON_H_
#define XGBOOST_JSON_H_
Expand Down Expand Up @@ -42,7 +42,8 @@ class Value {
kBoolean,
kNull,
// typed array for ubjson
kNumberArray,
kF32Array,
kF64Array,
kU8Array,
kI32Array,
kI64Array
Expand Down Expand Up @@ -173,7 +174,11 @@ class JsonTypedArray : public Value {
/**
* @brief Typed UBJSON array for 32-bit floating point.
*/
using F32Array = JsonTypedArray<float, Value::ValueKind::kNumberArray>;
using F32Array = JsonTypedArray<float, Value::ValueKind::kF32Array>;
/**
* @brief Typed UBJSON array for 64-bit floating point.
*/
using F64Array = JsonTypedArray<double, Value::ValueKind::kF64Array>;
/**
* @brief Typed UBJSON array for uint8_t.
*/
Expand Down Expand Up @@ -457,9 +462,9 @@ class Json {
Json& operator[](int ind) const { return (*ptr_)[ind]; }

/*! \brief Return the reference to stored Json value. */
Value const& GetValue() const & { return *ptr_; }
Value const& GetValue() && { return *ptr_; }
Value& GetValue() & { return *ptr_; }
[[nodiscard]] Value const& GetValue() const& { return *ptr_; }
Value const& GetValue() && { return *ptr_; }
Value& GetValue() & { return *ptr_; }

bool operator==(Json const& rhs) const {
return *ptr_ == *(rhs.ptr_);
Expand All @@ -472,7 +477,7 @@ class Json {
return os;
}

IntrusivePtr<Value> const& Ptr() const { return ptr_; }
[[nodiscard]] IntrusivePtr<Value> const& Ptr() const { return ptr_; }

private:
IntrusivePtr<Value> ptr_{new JsonNull};
Expand Down
4 changes: 3 additions & 1 deletion include/xgboost/json_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ class JsonWriter {

virtual void Visit(JsonArray const* arr);
virtual void Visit(F32Array const* arr);
virtual void Visit(F64Array const*) { LOG(FATAL) << "Only UBJSON format can handle f64 array."; }
virtual void Visit(U8Array const* arr);
virtual void Visit(I32Array const* arr);
virtual void Visit(I64Array const* arr);
Expand Down Expand Up @@ -244,7 +245,8 @@ class UBJReader : public JsonReader {
*/
class UBJWriter : public JsonWriter {
void Visit(JsonArray const* arr) override;
void Visit(F32Array const* arr) override;
void Visit(F32Array const* arr) override;
void Visit(F64Array const* arr) override;
void Visit(U8Array const* arr) override;
void Visit(I32Array const* arr) override;
void Visit(I64Array const* arr) override;
Expand Down
65 changes: 40 additions & 25 deletions jvm-packages/create_jni.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,40 +81,55 @@ def native_build(args):
with cd(".."):
build_dir = "build-gpu" if cli_args.use_cuda == "ON" else "build"
maybe_makedirs(build_dir)
with cd(build_dir):
if sys.platform == "win32":
# Force x64 build on Windows.
maybe_generator = " -A x64"
else:
maybe_generator = ""
if sys.platform == "linux":
maybe_parallel_build = " -- -j $(nproc)"
else:
maybe_parallel_build = ""

if cli_args.log_capi_invocation == "ON":
CONFIG["LOG_CAPI_INVOCATION"] = "ON"
if sys.platform == "linux":
maybe_parallel_build = " -- -j $(nproc)"
else:
maybe_parallel_build = ""

if cli_args.log_capi_invocation == "ON":
CONFIG["LOG_CAPI_INVOCATION"] = "ON"

if cli_args.use_cuda == "ON":
CONFIG["USE_CUDA"] = "ON"
CONFIG["USE_NCCL"] = "ON"
CONFIG["USE_DLOPEN_NCCL"] = "OFF"
if cli_args.use_cuda == "ON":
CONFIG["USE_CUDA"] = "ON"
CONFIG["USE_NCCL"] = "ON"
CONFIG["USE_DLOPEN_NCCL"] = "OFF"

args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]

# if enviorment set rabit_mock
if os.getenv("RABIT_MOCK", None) is not None:
args.append("-DRABIT_MOCK:BOOL=ON")
# if enviorment set rabit_mock
if os.getenv("RABIT_MOCK", None) is not None:
args.append("-DRABIT_MOCK:BOOL=ON")

# if enviorment set GPU_ARCH_FLAG
gpu_arch_flag = os.getenv("GPU_ARCH_FLAG", None)
if gpu_arch_flag is not None:
args.append("%s" % gpu_arch_flag)
# if enviorment set GPU_ARCH_FLAG
gpu_arch_flag = os.getenv("GPU_ARCH_FLAG", None)
if gpu_arch_flag is not None:
args.append("%s" % gpu_arch_flag)

with cd(build_dir):
lib_dir = os.path.join(os.pardir, "lib")
if os.path.exists(lib_dir):
shutil.rmtree(lib_dir)
run("cmake .. " + " ".join(args) + maybe_generator)

# Same trick as Python build, just test all possible generators.
if sys.platform == "win32":
supported_generators = (
"", # empty, decided by cmake
'-G"Visual Studio 17 2022" -A x64',
'-G"Visual Studio 16 2019" -A x64',
'-G"Visual Studio 15 2017" -A x64',
)
for generator in supported_generators:
try:
run("cmake .. " + " ".join(args + [generator]))
break
except subprocess.CalledProcessError as e:
print(f"Failed to build with generator: {generator}", e)
with cd(os.path.pardir):
shutil.rmtree(build_dir)
maybe_makedirs(build_dir)
else:
run("cmake .. " + " ".join(args))
run("cmake --build . --config Release" + maybe_parallel_build)

with cd("demo/CLI/regression"):
Expand Down
9 changes: 6 additions & 3 deletions python-package/packager/nativelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ def build_libxgboost(
build_dir: pathlib.Path,
build_config: BuildConfiguration,
) -> pathlib.Path:
"""Build libxgboost in a temporary directory and obtain the path to built libxgboost"""
"""Build libxgboost in a temporary directory and obtain the path to built
libxgboost.
"""
logger = logging.getLogger("xgboost.packager.build_libxgboost")

if not cpp_src_dir.is_dir():
Expand All @@ -51,8 +54,8 @@ def _build(*, generator: str) -> None:
cmake_cmd.extend(build_config.get_cmake_args())

# Flag for cross-compiling for Apple Silicon
# We use environment variable because it's the only way to pass down custom flags
# through the cibuildwheel package, which calls `pip wheel` command.
# We use environment variable because it's the only way to pass down custom
# flags through the cibuildwheel package, which calls `pip wheel` command.
if "CIBW_TARGET_OSX_ARM64" in os.environ:
cmake_cmd.append("-DCMAKE_OSX_ARCHITECTURES=arm64")

Expand Down
Loading

0 comments on commit 087a8dd

Please sign in to comment.