diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml index 330c037d7024..bbded088387f 100644 --- a/.github/workflows/jvm_tests.yml +++ b/.github/workflows/jvm_tests.yml @@ -15,31 +15,36 @@ jobs: os: [windows-latest, ubuntu-latest, macos-11] steps: - - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: submodules: 'true' - - uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a # v4.3.0 + - uses: mamba-org/setup-micromamba@422500192359a097648154e8db4e39bdb6c6eed7 # v1.8.1 with: - python-version: '3.8' - architecture: 'x64' - - - uses: actions/setup-java@d202f5dbf7256730fb690ec59f6381650114feb2 # v3.6.0 - with: - java-version: 1.8 - - - name: Install Python packages - run: | - python -m pip install wheel setuptools - python -m pip install awscli + micromamba-version: '1.5.6-0' + environment-name: jvm_tests + create-args: >- + python=3.10 + awscli + cache-downloads: true + cache-environment: true + init-shell: bash powershell - name: Cache Maven packages - uses: actions/cache@6998d139ddd3e68c71e9e398d8e40b71a2f39812 # v3.2.5 + uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 with: path: ~/.m2 key: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }} restore-keys: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }} + - name: Build xgboost4j.dll + run: | + mkdir build + cd build + cmake .. -G"Visual Studio 17 2022" -A x64 -DJVM_BINDINGS=ON + cmake --build . --config Release + if: matrix.os == 'windows-latest' + - name: Test XGBoost4J (Core) run: | cd jvm-packages @@ -47,7 +52,8 @@ jobs: - name: Extract branch name shell: bash - run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" + run: | + echo "branch=${GITHUB_REF#refs/heads/}" >> "$GITHUB_OUTPUT" id: extract_branch if: | (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) && @@ -58,7 +64,7 @@ jobs: cd lib/ Rename-Item -Path xgboost4j.dll -NewName xgboost4j_${{ github.sha }}.dll dir - python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read + python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read --region us-west-2 if: | (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) && matrix.os == 'windows-latest' @@ -67,11 +73,12 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }} - name: Publish artifact libxgboost4j.dylib to S3 + shell: bash -l {0} run: | cd lib/ mv -v libxgboost4j.dylib libxgboost4j_${{ github.sha }}.dylib ls - python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read + python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read --region us-west-2 if: | (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) && matrix.os == 'macos-11' diff --git a/doc/python/callbacks.rst b/doc/python/callbacks.rst index 7cb257a819ed..6d8b43a11557 100644 --- a/doc/python/callbacks.rst +++ b/doc/python/callbacks.rst @@ -36,7 +36,7 @@ inside iteration loop. You can also pass this callback function directly into X # Specify which dataset and which metric should be used for early stopping. early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds, metric_name='CustomErr', - data_name='Train') + data_name='Valid') booster = xgb.train( {'objective': 'binary:logistic', diff --git a/doc/python/sklearn_estimator.rst b/doc/python/sklearn_estimator.rst index 207b9fa30920..1aaa340b1abb 100644 --- a/doc/python/sklearn_estimator.rst +++ b/doc/python/sklearn_estimator.rst @@ -62,7 +62,7 @@ stack of trees: .. code-block:: python early_stop = xgb.callback.EarlyStopping( - rounds=2, metric_name='logloss', data_name='Validation_0', save_best=True + rounds=2, metric_name='logloss', data_name='validation_0', save_best=True ) clf = xgb.XGBClassifier(tree_method="hist", callbacks=[early_stop]) clf.fit(X_train, y_train, eval_set=[(X_test, y_test)]) diff --git a/doc/tutorials/spark_estimator.rst b/doc/tutorials/spark_estimator.rst index 8bd1dcd97a76..4e608440a536 100644 --- a/doc/tutorials/spark_estimator.rst +++ b/doc/tutorials/spark_estimator.rst @@ -28,7 +28,7 @@ We can create a ``SparkXGBRegressor`` estimator like: .. code-block:: python from xgboost.spark import SparkXGBRegressor - spark_reg_estimator = SparkXGBRegressor( + xgb_regressor = SparkXGBRegressor( features_col="features", label_col="label", num_workers=2, @@ -61,7 +61,7 @@ type or spark array type. .. code-block:: python - transformed_test_spark_dataframe = xgb_regressor.predict(test_spark_dataframe) + transformed_test_spark_dataframe = xgb_regressor_model.transform(test_spark_dataframe) The above snippet code returns a ``transformed_test_spark_dataframe`` that contains the input diff --git a/include/xgboost/json.h b/include/xgboost/json.h index a5872ec3a9d6..77ca6a510c96 100644 --- a/include/xgboost/json.h +++ b/include/xgboost/json.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2023 by XGBoost Contributors + * Copyright 2019-2024, XGBoost Contributors */ #ifndef XGBOOST_JSON_H_ #define XGBOOST_JSON_H_ @@ -42,7 +42,8 @@ class Value { kBoolean, kNull, // typed array for ubjson - kNumberArray, + kF32Array, + kF64Array, kU8Array, kI32Array, kI64Array @@ -173,7 +174,11 @@ class JsonTypedArray : public Value { /** * @brief Typed UBJSON array for 32-bit floating point. */ -using F32Array = JsonTypedArray<float, Value::ValueKind::kNumberArray>; +using F32Array = JsonTypedArray<float, Value::ValueKind::kF32Array>; +/** + * @brief Typed UBJSON array for 64-bit floating point. + */ +using F64Array = JsonTypedArray<double, Value::ValueKind::kF64Array>; /** * @brief Typed UBJSON array for uint8_t. */ @@ -457,9 +462,9 @@ class Json { Json& operator[](int ind) const { return (*ptr_)[ind]; } /*! \brief Return the reference to stored Json value. */ - Value const& GetValue() const & { return *ptr_; } - Value const& GetValue() && { return *ptr_; } - Value& GetValue() & { return *ptr_; } + [[nodiscard]] Value const& GetValue() const& { return *ptr_; } + Value const& GetValue() && { return *ptr_; } + Value& GetValue() & { return *ptr_; } bool operator==(Json const& rhs) const { return *ptr_ == *(rhs.ptr_); @@ -472,7 +477,7 @@ class Json { return os; } - IntrusivePtr<Value> const& Ptr() const { return ptr_; } + [[nodiscard]] IntrusivePtr<Value> const& Ptr() const { return ptr_; } private: IntrusivePtr<Value> ptr_{new JsonNull}; diff --git a/include/xgboost/json_io.h b/include/xgboost/json_io.h index 3a73d170a4c7..ce3d25c37e19 100644 --- a/include/xgboost/json_io.h +++ b/include/xgboost/json_io.h @@ -142,6 +142,7 @@ class JsonWriter { virtual void Visit(JsonArray const* arr); virtual void Visit(F32Array const* arr); + virtual void Visit(F64Array const*) { LOG(FATAL) << "Only UBJSON format can handle f64 array."; } virtual void Visit(U8Array const* arr); virtual void Visit(I32Array const* arr); virtual void Visit(I64Array const* arr); @@ -244,7 +245,8 @@ class UBJReader : public JsonReader { */ class UBJWriter : public JsonWriter { void Visit(JsonArray const* arr) override; - void Visit(F32Array const* arr) override; + void Visit(F32Array const* arr) override; + void Visit(F64Array const* arr) override; void Visit(U8Array const* arr) override; void Visit(I32Array const* arr) override; void Visit(I64Array const* arr) override; diff --git a/jvm-packages/create_jni.py b/jvm-packages/create_jni.py index c39d354cf8cb..865d07fe8b0f 100755 --- a/jvm-packages/create_jni.py +++ b/jvm-packages/create_jni.py @@ -81,40 +81,55 @@ def native_build(args): with cd(".."): build_dir = "build-gpu" if cli_args.use_cuda == "ON" else "build" maybe_makedirs(build_dir) - with cd(build_dir): - if sys.platform == "win32": - # Force x64 build on Windows. - maybe_generator = " -A x64" - else: - maybe_generator = "" - if sys.platform == "linux": - maybe_parallel_build = " -- -j $(nproc)" - else: - maybe_parallel_build = "" - if cli_args.log_capi_invocation == "ON": - CONFIG["LOG_CAPI_INVOCATION"] = "ON" + if sys.platform == "linux": + maybe_parallel_build = " -- -j $(nproc)" + else: + maybe_parallel_build = "" + + if cli_args.log_capi_invocation == "ON": + CONFIG["LOG_CAPI_INVOCATION"] = "ON" - if cli_args.use_cuda == "ON": - CONFIG["USE_CUDA"] = "ON" - CONFIG["USE_NCCL"] = "ON" - CONFIG["USE_DLOPEN_NCCL"] = "OFF" + if cli_args.use_cuda == "ON": + CONFIG["USE_CUDA"] = "ON" + CONFIG["USE_NCCL"] = "ON" + CONFIG["USE_DLOPEN_NCCL"] = "OFF" - args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()] + args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()] - # if enviorment set rabit_mock - if os.getenv("RABIT_MOCK", None) is not None: - args.append("-DRABIT_MOCK:BOOL=ON") + # if enviorment set rabit_mock + if os.getenv("RABIT_MOCK", None) is not None: + args.append("-DRABIT_MOCK:BOOL=ON") - # if enviorment set GPU_ARCH_FLAG - gpu_arch_flag = os.getenv("GPU_ARCH_FLAG", None) - if gpu_arch_flag is not None: - args.append("%s" % gpu_arch_flag) + # if enviorment set GPU_ARCH_FLAG + gpu_arch_flag = os.getenv("GPU_ARCH_FLAG", None) + if gpu_arch_flag is not None: + args.append("%s" % gpu_arch_flag) + with cd(build_dir): lib_dir = os.path.join(os.pardir, "lib") if os.path.exists(lib_dir): shutil.rmtree(lib_dir) - run("cmake .. " + " ".join(args) + maybe_generator) + + # Same trick as Python build, just test all possible generators. + if sys.platform == "win32": + supported_generators = ( + "", # empty, decided by cmake + '-G"Visual Studio 17 2022" -A x64', + '-G"Visual Studio 16 2019" -A x64', + '-G"Visual Studio 15 2017" -A x64', + ) + for generator in supported_generators: + try: + run("cmake .. " + " ".join(args + [generator])) + break + except subprocess.CalledProcessError as e: + print(f"Failed to build with generator: {generator}", e) + with cd(os.path.pardir): + shutil.rmtree(build_dir) + maybe_makedirs(build_dir) + else: + run("cmake .. " + " ".join(args)) run("cmake --build . --config Release" + maybe_parallel_build) with cd("demo/CLI/regression"): diff --git a/python-package/packager/nativelib.py b/python-package/packager/nativelib.py index 0227cff37f25..42b510eef540 100644 --- a/python-package/packager/nativelib.py +++ b/python-package/packager/nativelib.py @@ -32,7 +32,10 @@ def build_libxgboost( build_dir: pathlib.Path, build_config: BuildConfiguration, ) -> pathlib.Path: - """Build libxgboost in a temporary directory and obtain the path to built libxgboost""" + """Build libxgboost in a temporary directory and obtain the path to built + libxgboost. + + """ logger = logging.getLogger("xgboost.packager.build_libxgboost") if not cpp_src_dir.is_dir(): @@ -51,8 +54,8 @@ def _build(*, generator: str) -> None: cmake_cmd.extend(build_config.get_cmake_args()) # Flag for cross-compiling for Apple Silicon - # We use environment variable because it's the only way to pass down custom flags - # through the cibuildwheel package, which calls `pip wheel` command. + # We use environment variable because it's the only way to pass down custom + # flags through the cibuildwheel package, which calls `pip wheel` command. if "CIBW_TARGET_OSX_ARM64" in os.environ: cmake_cmd.append("-DCMAKE_OSX_ARCHITECTURES=arm64") diff --git a/src/common/json.cc b/src/common/json.cc index 21be2a5bc674..2887eeccf855 100644 --- a/src/common/json.cc +++ b/src/common/json.cc @@ -1,11 +1,12 @@ /** - * Copyright 2019-2023, XGBoost Contributors + * Copyright 2019-2024, XGBoost Contributors */ #include "xgboost/json.h" #include <array> // for array #include <cctype> // for isdigit #include <cmath> // for isinf, isnan +#include <cstdint> // for uint8_t, uint16_t, uint32_t #include <cstdio> // for EOF #include <cstdlib> // for size_t, strtof #include <cstring> // for memcpy @@ -72,15 +73,16 @@ void JsonWriter::Visit(JsonNumber const* num) { } void JsonWriter::Visit(JsonInteger const* num) { - char i2s_buffer_[NumericLimits<int64_t>::kToCharsSize]; + std::array<char, NumericLimits<int64_t>::kToCharsSize> i2s_buffer_; auto i = num->GetInteger(); - auto ret = to_chars(i2s_buffer_, i2s_buffer_ + NumericLimits<int64_t>::kToCharsSize, i); + auto ret = + to_chars(i2s_buffer_.data(), i2s_buffer_.data() + NumericLimits<int64_t>::kToCharsSize, i); auto end = ret.ptr; CHECK(ret.ec == std::errc()); - auto digits = std::distance(i2s_buffer_, end); + auto digits = std::distance(i2s_buffer_.data(), end); auto ori_size = stream_->size(); stream_->resize(ori_size + digits); - std::memcpy(stream_->data() + ori_size, i2s_buffer_, digits); + std::memcpy(stream_->data() + ori_size, i2s_buffer_.data(), digits); } void JsonWriter::Visit(JsonNull const* ) { @@ -143,8 +145,10 @@ std::string Value::TypeStr() const { return "Null"; case ValueKind::kInteger: return "Integer"; - case ValueKind::kNumberArray: + case ValueKind::kF32Array: return "F32Array"; + case ValueKind::kF64Array: + return "F64Array"; case ValueKind::kU8Array: return "U8Array"; case ValueKind::kI32Array: @@ -262,10 +266,11 @@ bool JsonTypedArray<T, kind>::operator==(Value const& rhs) const { return std::equal(arr.cbegin(), arr.cend(), vec_.cbegin()); } -template class JsonTypedArray<float, Value::ValueKind::kNumberArray>; -template class JsonTypedArray<uint8_t, Value::ValueKind::kU8Array>; -template class JsonTypedArray<int32_t, Value::ValueKind::kI32Array>; -template class JsonTypedArray<int64_t, Value::ValueKind::kI64Array>; +template class JsonTypedArray<float, Value::ValueKind::kF32Array>; +template class JsonTypedArray<double, Value::ValueKind::kF64Array>; +template class JsonTypedArray<std::uint8_t, Value::ValueKind::kU8Array>; +template class JsonTypedArray<std::int32_t, Value::ValueKind::kI32Array>; +template class JsonTypedArray<std::int64_t, Value::ValueKind::kI64Array>; // Json Number bool JsonNumber::operator==(Value const& rhs) const { @@ -708,6 +713,8 @@ Json UBJReader::ParseArray() { switch (type) { case 'd': return ParseTypedArray<F32Array>(n); + case 'D': + return ParseTypedArray<F64Array>(n); case 'U': return ParseTypedArray<U8Array>(n); case 'l': @@ -797,6 +804,10 @@ Json UBJReader::Parse() { auto v = this->ReadPrimitive<float>(); return Json{v}; } + case 'D': { + auto v = this->ReadPrimitive<double>(); + return Json{v}; + } case 'S': { auto str = this->DecodeStr(); return Json{str}; @@ -825,10 +836,6 @@ Json UBJReader::Parse() { Integer::Int i = this->ReadPrimitive<char>(); return Json{i}; } - case 'D': { - LOG(FATAL) << "f64 is not supported."; - break; - } case 'H': { LOG(FATAL) << "High precision number is not supported."; break; @@ -882,6 +889,8 @@ void WriteTypedArray(JsonTypedArray<T, kind> const* arr, std::vector<char>* stre stream->push_back('$'); if (std::is_same<T, float>::value) { stream->push_back('d'); + } else if (std::is_same_v<T, double>) { + stream->push_back('D'); } else if (std::is_same<T, int8_t>::value) { stream->push_back('i'); } else if (std::is_same<T, uint8_t>::value) { @@ -910,6 +919,7 @@ void WriteTypedArray(JsonTypedArray<T, kind> const* arr, std::vector<char>* stre } void UBJWriter::Visit(F32Array const* arr) { WriteTypedArray(arr, stream_); } +void UBJWriter::Visit(F64Array const* arr) { WriteTypedArray(arr, stream_); } void UBJWriter::Visit(U8Array const* arr) { WriteTypedArray(arr, stream_); } void UBJWriter::Visit(I32Array const* arr) { WriteTypedArray(arr, stream_); } void UBJWriter::Visit(I64Array const* arr) { WriteTypedArray(arr, stream_); } diff --git a/tests/cpp/common/test_json.cc b/tests/cpp/common/test_json.cc index 155cf04adf9a..72163efd78cc 100644 --- a/tests/cpp/common/test_json.cc +++ b/tests/cpp/common/test_json.cc @@ -639,6 +639,40 @@ TEST(Json, TypedArray) { ASSERT_EQ(arr[i + 8], i); } } + + { + Json f64{Object{}}; + auto array = F64Array(); + auto& vec = array.GetArray(); + // Construct test data + vec.resize(18); + std::iota(vec.begin(), vec.end(), 0.0); + // special values + vec.push_back(std::numeric_limits<double>::epsilon()); + vec.push_back(std::numeric_limits<double>::max()); + vec.push_back(std::numeric_limits<double>::min()); + vec.push_back(std::numeric_limits<double>::denorm_min()); + vec.push_back(std::numeric_limits<double>::quiet_NaN()); + + static_assert( + std::is_same_v<double, typename std::remove_reference_t<decltype(vec)>::value_type>); + + f64["f64"] = std::move(array); + ASSERT_TRUE(IsA<F64Array>(f64["f64"])); + std::vector<char> out; + Json::Dump(f64, &out, std::ios::binary); + + auto loaded = Json::Load(StringView{out.data(), out.size()}, std::ios::binary); + ASSERT_TRUE(IsA<F64Array>(loaded["f64"])); + auto const& result = get<F64Array const>(loaded["f64"]); + + auto& vec1 = get<F64Array const>(f64["f64"]); + ASSERT_EQ(result.size(), vec1.size()); + for (std::size_t i = 0; i < vec1.size() - 1; ++i) { + ASSERT_EQ(result[i], vec1[i]); + } + ASSERT_TRUE(std::isnan(result.back())); + } } TEST(UBJson, Basic) { @@ -694,6 +728,7 @@ TEST(UBJson, Basic) { } } + TEST(Json, TypeCheck) { Json config{Object{}}; config["foo"] = String{"bar"}; diff --git a/tests/cpp/test_serialization.cc b/tests/cpp/test_serialization.cc index 0b65220ab9e6..283a56fc5ddf 100644 --- a/tests/cpp/test_serialization.cc +++ b/tests/cpp/test_serialization.cc @@ -60,7 +60,7 @@ void CompareJSON(Json l, Json r) { } break; } - case Value::ValueKind::kNumberArray: { + case Value::ValueKind::kF32Array: { auto const& l_arr = get<F32Array const>(l); auto const& r_arr = get<F32Array const>(r); ASSERT_EQ(l_arr.size(), r_arr.size()); @@ -69,6 +69,15 @@ void CompareJSON(Json l, Json r) { } break; } + case Value::ValueKind::kF64Array: { + auto const& l_arr = get<F64Array const>(l); + auto const& r_arr = get<F64Array const>(r); + ASSERT_EQ(l_arr.size(), r_arr.size()); + for (size_t i = 0; i < l_arr.size(); ++i) { + ASSERT_NEAR(l_arr[i], r_arr[i], kRtEps); + } + break; + } case Value::ValueKind::kU8Array: { CompareIntArray<U8Array>(l, r); break;