From 31a609db5cdfbf843f2e565b33ddc0fcd671fd19 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 9 Jan 2025 08:25:25 -0600 Subject: [PATCH 1/8] consolidate pytest config in pyproject.toml (#6201) fixes #6194 Wheel tests in this project are emitting tons of warnings like this: > test_random_forest.py:1247 /__w/cuml/cuml/python/cuml/cuml/tests/test_random_forest.py:1247: PytestUnknownMarkWarning: Unknown pytest.mark.memleak - is this a typo? You can register custom marks to avoid this warning - for details, see https://docs.pytest.org/en/stable/how-to/mark.html @pytest.mark.memleak I think that's because the introduction of a `pytest.ini` file in #6078 resulted in all of the `pytest` options from `pyproject.toml` being ignored. From https://docs.pytest.org/en/stable/reference/customize.html#pytest-ini > pytest.ini files take precedence over other files, even when empty. I think "take precedence" there means that if `pytest` finds a `pytest.ini`, it stops searching for other configuration files. Authors: - James Lamb (https://github.com/jameslamb) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Tim Head (https://github.com/betatim) - Jake Awe (https://github.com/AyodeAwe) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cuml/pull/6201 --- .github/CODEOWNERS | 8 +++----- .gitignore | 5 +++++ python/cuml/cuml/tests/pytest.ini | 5 ----- python/cuml/pyproject.toml | 2 ++ 4 files changed, 10 insertions(+), 10 deletions(-) delete mode 100644 python/cuml/cuml/tests/pytest.ini diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index f51c061074..23d782bb99 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -5,11 +5,9 @@ cpp/ @rapidsai/cuml-cpp-codeowners python/ @rapidsai/cuml-python-codeowners #cmake code owners -**/CMakeLists.txt @rapidsai/cuml-cmake-codeowners -**/cmake/ @rapidsai/cuml-cmake-codeowners -python/cuml/pyproject.toml @rapidsai/cuml-cmake-codeowners -build.sh @rapidsai/cuml-cmake-codeowners -**/build.sh @rapidsai/cuml-cmake-codeowners +CMakeLists.txt @rapidsai/cuml-cmake-codeowners +*.cmake @rapidsai/cuml-cmake-codeowners +**/cmake/ @rapidsai/cuml-cmake-codeowners #CI code owners /.github/ @rapidsai/ci-codeowners diff --git a/.gitignore b/.gitignore index dc0b98d735..e7f8328d0f 100644 --- a/.gitignore +++ b/.gitignore @@ -68,3 +68,8 @@ cpp/Doxyfile # clang tooling compile_commands.json .clangd/ + +# generally prefer 'pyproject.toml' to 'pytest.ini' for pytest options +# ref: https://github.com/rapidsai/cuml/pull/6201 +pytest.ini +!python/cuml/cuml/benchmark/automated/pytest.ini diff --git a/python/cuml/cuml/tests/pytest.ini b/python/cuml/cuml/tests/pytest.ini deleted file mode 100644 index bf70c06f84..0000000000 --- a/python/cuml/cuml/tests/pytest.ini +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -[pytest] -addopts = --tb=native - diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml index 2f0521fe6e..0bb733ef59 100644 --- a/python/cuml/pyproject.toml +++ b/python/cuml/pyproject.toml @@ -28,6 +28,8 @@ select = [ max_allowed_size_compressed = '1.5G' [tool.pytest.ini_options] +addopts = "--tb=native" + markers = [ "unit: Quickest tests focused on accuracy and correctness", "quality: More intense tests than unit with increased runtimes", From bfd2e220d3adf5d8c6b76dc90e3d1275054f32d5 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 10 Jan 2025 00:43:42 -0800 Subject: [PATCH 2/8] Support raft's logger targets (#6208) https://github.com/rapidsai/raft/pull/2530 added new targets that we need to make global in cuML's CMake as well. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Dante Gama Dessavre (https://github.com/dantegd) URL: https://github.com/rapidsai/cuml/pull/6208 --- cpp/cmake/thirdparty/get_raft.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 3240c730c5..3773e94d51 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# Copyright (c) 2021-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ function(find_and_configure_raft) message(VERBOSE "CUML: raft FIND_PACKAGE_ARGUMENTS COMPONENTS ${RAFT_COMPONENTS}") rapids_cpm_find(raft ${PKG_VERSION} - GLOBAL_TARGETS raft::raft + GLOBAL_TARGETS raft::raft raft::raft_logger raft::raft_logger_impl BUILD_EXPORT_SET cuml-exports INSTALL_EXPORT_SET cuml-exports COMPONENTS ${RAFT_COMPONENTS} From 7c715c494dff71274d0fdec774bdee12a7e78827 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 13 Jan 2025 12:05:32 -0600 Subject: [PATCH 3/8] Further fixes for Scipy 1.15 update for PR and nightly CI (#6213) Nightly CI revealed a bug between hypothesis `floating_dtypes(sizes=(32, 64)` and building sparse matrices, this PR uses `st.sampled_from((np.float32, np.float64)` to solve the issue. Additionally, cudf.pandas active made one dataset in ARIMA pytests fail, so disabling that one while we look further into it. Authors: - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - Victor Lafargue (https://github.com/viclafargue) - Simon Adorf (https://github.com/csadorf) URL: https://github.com/rapidsai/cuml/pull/6213 --- python/cuml/cuml/tests/test_arima.py | 11 ++++++++++- python/cuml/cuml/tests/test_linear_model.py | 10 +++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/python/cuml/cuml/tests/test_arima.py b/python/cuml/cuml/tests/test_arima.py index 7c16ef3774..f96def4d04 100644 --- a/python/cuml/cuml/tests/test_arima.py +++ b/python/cuml/cuml/tests/test_arima.py @@ -36,7 +36,10 @@ from cuml.internals.safe_imports import gpu_only_import import statsmodels.api as sm from sklearn.model_selection import train_test_split -from cuml.internals.safe_imports import cpu_only_import_from +from cuml.internals.safe_imports import ( + cpu_only_import_from, + gpu_only_import_from, +) import warnings import os import pytest @@ -49,6 +52,7 @@ approx_fprime = cpu_only_import_from("scipy.optimize", "approx_fprime") cudf = gpu_only_import("cudf") +cudf_pandas_active = gpu_only_import_from("cudf.pandas", "LOADED") ############################################################################### @@ -410,6 +414,11 @@ def fill_interpolation(df_in): @pytest.mark.parametrize("dtype", [np.float64]) def test_integration(key, data, dtype): """Full integration test: estimate, fit, forecast""" + if ( + data.dataset == "endog_hourly_earnings_by_industry_missing_exog" + and cudf_pandas_active + ): + pytest.skip(reason="https://github.com/rapidsai/cuml/issues/6209") order, seasonal_order, intercept = extract_order(key) s = max(1, seasonal_order[3]) diff --git a/python/cuml/cuml/tests/test_linear_model.py b/python/cuml/cuml/tests/test_linear_model.py index 7960d4f0f8..559bfc96c3 100644 --- a/python/cuml/cuml/tests/test_linear_model.py +++ b/python/cuml/cuml/tests/test_linear_model.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2019-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -629,7 +629,7 @@ def test_logistic_regression_model_default(dtype): @given( - dtype=floating_dtypes(sizes=(32, 64)), + dtype=st.sampled_from((np.float32, np.float64)), order=st.sampled_from(("C", "F")), sparse_input=st.booleans(), fit_intercept=st.booleans(), @@ -661,7 +661,7 @@ def test_logistic_regression_model_digits( assert score >= acceptable_score -@given(dtype=floating_dtypes(sizes=(32, 64))) +@given(dtype=st.sampled_from((np.float32, np.float64))) def test_logistic_regression_sparse_only(dtype, nlp_20news): # sklearn score with max_iter = 10000 @@ -685,7 +685,7 @@ def test_logistic_regression_sparse_only(dtype, nlp_20news): @given( dataset=split_datasets( standard_classification_datasets( - dtypes=floating_dtypes(sizes=(32, 64)), + dtypes=st.sampled_from((np.float32, np.float64)), n_classes=st.sampled_from((2, 10)), n_features=st.just(20), n_informative=st.just(10), @@ -727,7 +727,7 @@ def test_logistic_regression_decision_function( @given( dataset=split_datasets( standard_classification_datasets( - dtypes=floating_dtypes(sizes=(32, 64)), + dtypes=st.sampled_from((np.float32, np.float64)), n_classes=st.sampled_from((2, 10)), n_features=st.just(20), n_informative=st.just(10), From 47bac702c005c077b4df082d120cafed608913d5 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 13 Jan 2025 21:05:40 -0600 Subject: [PATCH 4/8] declare cuda-python dependency for wheels, other small packaging changes (#6217) Follow-up to #6190. Proposes some miscellaneous packaging cleanup: * declares `cuml-cu{11,12}` wheels' runtime dependency on `cuda-python` - *as a result of stuff like this: https://github.com/rapidsai/cuml/blob/bfd2e220d3adf5d8c6b76dc90e3d1275054f32d5/python/cuml/cuml/svm/linear.pyx#L40-L43* *~ adds `raft_log.txt` to `.gitignore`~ * adds CMake option `CUML_USE_RAFT_STATIC` - *to provide a default for this: https://github.com/rapidsai/cuml/blob/bfd2e220d3adf5d8c6b76dc90e3d1275054f32d5/cpp/CMakeLists.txt#L600* * defines `BUILD_CAGRA_HNSWLIB OFF` in `get_cuvs.cmake` - *as is done for RAFT: https://github.com/rapidsai/cuml/blob/bfd2e220d3adf5d8c6b76dc90e3d1275054f32d5/cpp/cmake/thirdparty/get_raft.cmake#L58* - *cuML doesn't need the CAGRA stuff from cuVS, as far as I can tell* - *this is `ON` by default in cuVS, so this change saves a bit of build time and size: https://github.com/rapidsai/cuvs/blob/1e548f8c3a773452ce69556f4db72fc712efae02/cpp/CMakeLists.txt#L58* * explicitly passing package type to `rapids-download-wheels-from-s3` in CI scripts ## Notes for Reviewers These changes are useful independently, but will also make the `libcuml` wheels PR (https://github.com/rapidsai/cuml/pull/6199) a bit smaller and easier to review. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cuml/pull/6217 --- .github/workflows/build.yaml | 1 + ci/test_wheel.sh | 7 +++-- cpp/CMakeLists.txt | 2 ++ cpp/cmake/thirdparty/get_cuvs.cmake | 3 +- dependencies.yaml | 46 ++++++++++++++++------------- python/cuml/CMakeLists.txt | 4 +-- python/cuml/pyproject.toml | 1 + 7 files changed, 37 insertions(+), 27 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 9a4b614584..945b8e4f72 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -91,3 +91,4 @@ jobs: sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: cuml + package-type: python diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 047575a2d7..76cfe9a86b 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -1,14 +1,15 @@ #!/bin/bash -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. set -euo pipefail mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist # echo to expand wildcard before adding `[extra]` requires for pip -python -m pip install $(echo ./dist/cuml*.whl)[test] +python -m pip install \ + "$(echo ./dist/cuml*.whl)[test]" RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"} mkdir -p "${RAPIDS_TESTS_DIR}" diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index bfa9cba085..90c0c02cf3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -65,6 +65,7 @@ option(USE_CCACHE "Cache build artifacts with ccache" OFF) option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) option(CUDA_STATIC_MATH_LIBRARIES "Statically link the CUDA math libraries" OFF) option(CUML_USE_CUVS_STATIC "Build and statically link the CUVS library" OFF) +option(CUML_USE_RAFT_STATIC "Build and statically link the RAFT library" OFF) option(CUML_USE_TREELITE_STATIC "Build and statically link the treelite library" OFF) option(CUML_EXPORT_TREELITE_LINKAGE "Whether to publicly or privately link treelite to libcuml++" OFF) option(CUML_USE_CUMLPRIMS_MG_STATIC "Build and statically link the cumlprims_mg library" OFF) @@ -99,6 +100,7 @@ message(VERBOSE "CUML_CPP: Cache build artifacts with ccache: ${USE_CCACHE}") message(VERBOSE "CUML_CPP: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}") message(VERBOSE "CUML_CPP: Statically link the CUDA math libraries: ${CUDA_STATIC_MATH_LIBRARIES}") message(VERBOSE "CUML_CPP: Build and statically link CUVS libraries: ${CUML_USE_CUVS_STATIC}") +message(VERBOSE "CUML_CPP: Build and statically link RAFT library: ${CUML_USE_RAFT_STATIC}") message(VERBOSE "CUML_CPP: Build and statically link Treelite library: ${CUML_USE_TREELITE_STATIC}") set(CUML_ALGORITHMS "ALL" CACHE STRING "Experimental: Choose which algorithms are built into libcuml++.so. Can specify individual algorithms or groups in a semicolon-separated list.") diff --git a/cpp/cmake/thirdparty/get_cuvs.cmake b/cpp/cmake/thirdparty/get_cuvs.cmake index a46879c3e7..a48b4c6b1e 100644 --- a/cpp/cmake/thirdparty/get_cuvs.cmake +++ b/cpp/cmake/thirdparty/get_cuvs.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -55,6 +55,7 @@ function(find_and_configure_cuvs) OPTIONS "BUILD_TESTS OFF" "BUILD_BENCH OFF" + "BUILD_CAGRA_HNSWLIB OFF" "BUILD_MG_ALGOS ${CUVS_BUILD_MG_ALGOS}" ) diff --git a/dependencies.yaml b/dependencies.yaml index 90e6dc5fc2..f35eb74021 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -10,6 +10,7 @@ files: - cuda - cuda_version - depends_on_cudf + - depends_on_cuda_python - depends_on_cupy - depends_on_cuvs - depends_on_dask_cudf @@ -21,8 +22,8 @@ files: - depends_on_raft_dask - depends_on_rmm - docs - - py_build - - py_run + - py_build_cuml + - py_run_cuml - py_version - rapids_build_backend - test_python @@ -90,7 +91,7 @@ files: - depends_on_pylibraft - depends_on_raft_dask - depends_on_rmm - - py_run + - py_run_cuml - py_version - test_notebooks py_build_cuml: @@ -108,12 +109,13 @@ files: key: requires includes: - common_build + - depends_on_cuda_python - depends_on_cuvs - depends_on_libcumlprims - depends_on_libraft_headers - depends_on_pylibraft - depends_on_rmm - - py_build + - py_build_cuml py_run_cuml: output: pyproject pyproject_dir: python/cuml @@ -121,6 +123,7 @@ files: table: project includes: - cuda_wheels + - depends_on_cuda_python - depends_on_cudf - depends_on_cupy - depends_on_cuvs @@ -128,7 +131,7 @@ files: - depends_on_pylibraft - depends_on_raft_dask - depends_on_rmm - - py_run + - py_run_cuml py_test_cuml: output: pyproject pyproject_dir: python/cuml @@ -210,28 +213,14 @@ dependencies: cuda: "12.*" packages: - cuda-nvcc - py_build: + py_build_cuml: common: - output_types: [conda, requirements, pyproject] packages: - &cython cython>=3.0.0 - &treelite treelite==4.3.0 - specific: - - output_types: [conda, requirements, pyproject] - matrices: - - matrix: - cuda: "12.*" - packages: - - cuda-python>=12.6.2,<13.0a0 - - matrix: - cuda: "11.*" - packages: - - cuda-python>=11.8.5,<12.0a0 - - matrix: - packages: - - cuda-python - py_run: + py_run_cuml: common: - output_types: [conda, requirements, pyproject] packages: @@ -469,6 +458,21 @@ dependencies: - *scikit_learn - seaborn - *xgboost + depends_on_cuda_python: + specific: + - output_types: [conda, requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + packages: + - cuda-python>=12.6.2,<13.0a0 + - matrix: + cuda: "11.*" + packages: + - cuda-python>=11.8.5,<12.0a0 + - matrix: + packages: + - cuda-python depends_on_cudf: common: - output_types: conda diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt index 1dcaecc92f..7bda8d9a64 100644 --- a/python/cuml/CMakeLists.txt +++ b/python/cuml/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Copyright (c) 2022-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -174,7 +174,7 @@ endif() include("${CUML_CPP_SRC}/cmake/modules/ConfigureAlgorithms.cmake") include(cmake/ConfigureCythonAlgorithms.cmake) -if(${CUML_CPU}) +if(CUML_CPU) # libcuml requires metrics built if HDSCAN is built, which is not the case # for cuml-cpu unset(metrics_algo) diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml index 0bb733ef59..820a690d3c 100644 --- a/python/cuml/pyproject.toml +++ b/python/cuml/pyproject.toml @@ -92,6 +92,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ + "cuda-python", "cudf==25.2.*,>=0.0.0a0", "cupy-cuda11x>=12.0.0", "cuvs==25.2.*,>=0.0.0a0", From cf259f69570de9a8786b43a6c2fa4ca80cf747fb Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 14 Jan 2025 11:52:51 -0600 Subject: [PATCH 5/8] Add `as_sklearn` and `from_sklearn` APIs to serialize to CPU sklearn-estimators for supported models (#6102) Authors: - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - William Hicks (https://github.com/wphicks) - Victor Lafargue (https://github.com/viclafargue) - Tim Head (https://github.com/betatim) URL: https://github.com/rapidsai/cuml/pull/6102 --- .../cuml/cuml/experimental/accel/__main__.py | 42 +++- python/cuml/cuml/internals/base.pyx | 80 ++++++- python/cuml/cuml/manifold/t_sne.pyx | 2 +- .../cuml/tests/test_sklearn_import_export.py | 213 ++++++++++++++++++ 4 files changed, 333 insertions(+), 4 deletions(-) create mode 100644 python/cuml/cuml/tests/test_sklearn_import_export.py diff --git a/python/cuml/cuml/experimental/accel/__main__.py b/python/cuml/cuml/experimental/accel/__main__.py index e4c4af576b..86c6c0cb41 100644 --- a/python/cuml/cuml/experimental/accel/__main__.py +++ b/python/cuml/cuml/experimental/accel/__main__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,6 +16,8 @@ import click import code +import joblib +import pickle import os import runpy import sys @@ -31,14 +33,50 @@ default=False, help="Turn strict mode for hyperparameters on.", ) +@click.option( + "--convert-to-sklearn", + type=click.Path(exists=True), + required=False, + help="Path to a pickled accelerated estimator to convert to a sklearn estimator.", +) +@click.option( + "--format", + "format", + type=click.Choice(["pickle", "joblib"], case_sensitive=False), + default="pickle", + help="Format to save the converted sklearn estimator.", +) +@click.option( + "--output", + type=click.Path(writable=True), + default="converted_sklearn_model.pkl", + help="Output path for the converted sklearn estimator file.", +) @click.argument("args", nargs=-1) -def main(module, strict, args): +def main(module, strict, convert_to_sklearn, format, output, args): if strict: os.environ["CUML_ACCEL_STRICT_MODE"] = "ON" install() + # If the user requested a conversion, handle it and exit + if convert_to_sklearn: + + with open(convert_to_sklearn, "rb") as f: + if format == "pickle": + serializer = pickle + elif format == "joblib": + serializer = joblib + accelerated_estimator = serializer.load(f) + + sklearn_estimator = accelerated_estimator.as_sklearn() + + with open(output, "wb") as f: + serializer.dump(sklearn_estimator, f) + + sys.exit() + if module: (module,) = module # run the module passing the remaining arguments diff --git a/python/cuml/cuml/internals/base.pyx b/python/cuml/cuml/internals/base.pyx index a2a7374a1f..32b2cad908 100644 --- a/python/cuml/cuml/internals/base.pyx +++ b/python/cuml/cuml/internals/base.pyx @@ -16,6 +16,7 @@ # distutils: language = c++ +import copy import os import inspect import numbers @@ -24,7 +25,7 @@ from cuml.internals.device_support import GPU_ENABLED from cuml.internals.safe_imports import ( cpu_only_import, gpu_only_import_from, - null_decorator + null_decorator, ) np = cpu_only_import('numpy') nvtx_annotate = gpu_only_import_from("nvtx", "annotate", alt=null_decorator) @@ -910,3 +911,80 @@ class UniversalBase(Base): raise ex raise ex + + def as_sklearn(self, deepcopy=False): + """ + Convert the current GPU-accelerated estimator into a scikit-learn estimator. + + This method imports and builds an equivalent CPU-backed scikit-learn model, + transferring all necessary parameters from the GPU representation to the + CPU model. After this conversion, the returned object should be a fully + compatible scikit-learn estimator, allowing you to use it in standard + scikit-learn pipelines and workflows. + + Parameters + ---------- + deepcopy : boolean (default=False) + Whether to return a deepcopy of the internal scikit-learn estimator of + the cuML models. cuML models internally have CPU based estimators that + could be updated. If you intend to use both the cuML and the scikit-learn + estimators after using the method in parallel, it is recommended to set + this to True to avoid one overwriting data of the other. + + Returns + ------- + sklearn.base.BaseEstimator + A scikit-learn compatible estimator instance that mirrors the trained + state of the current GPU-accelerated estimator. + + """ + self.import_cpu_model() + self.build_cpu_model() + self.gpu_to_cpu() + if deepcopy: + return copy.deepcopy(self._cpu_model) + else: + return self._cpu_model + + @classmethod + def from_sklearn(cls, model): + """ + Create a GPU-accelerated estimator from a scikit-learn estimator. + + This class method takes an existing scikit-learn estimator and converts it + into the corresponding GPU-backed estimator. It imports any required CPU + model definitions, stores the given scikit-learn model internally, and then + transfers the model parameters and state onto the GPU. + + Parameters + ---------- + model : sklearn.base.BaseEstimator + A fitted scikit-learn estimator from which to create the GPU-accelerated + version. + + Returns + ------- + cls + A new instance of the GPU-accelerated estimator class that mirrors the + state of the input scikit-learn estimator. + + Notes + ----- + - `output_type` of the estimator is set to "numpy" + by default, as these cannot be inferred from training arguments. If + something different is required, then please use cuML's output_type + configuration utilities. + """ + estimator = cls() + estimator.import_cpu_model() + estimator._cpu_model = model + estimator.cpu_to_gpu() + + # we need to set an output type here since + # we cannot infer from training args. + # Setting to numpy seems like a reasonable default for matching the + # deserialized class by default. + estimator.output_type = "numpy" + estimator.output_mem_type = MemoryType.host + + return estimator diff --git a/python/cuml/cuml/manifold/t_sne.pyx b/python/cuml/cuml/manifold/t_sne.pyx index 7ff8702a2c..b984d47818 100644 --- a/python/cuml/cuml/manifold/t_sne.pyx +++ b/python/cuml/cuml/manifold/t_sne.pyx @@ -728,4 +728,4 @@ class TSNE(UniversalBase, def get_attr_names(self): return ["embedding", "kl_divergence_", "n_features_in_", "learning_rate_", - "n_iter_"] + "n_iter_", "embedding_"] diff --git a/python/cuml/cuml/tests/test_sklearn_import_export.py b/python/cuml/cuml/tests/test_sklearn_import_export.py new file mode 100644 index 0000000000..19e4fb2e5c --- /dev/null +++ b/python/cuml/cuml/tests/test_sklearn_import_export.py @@ -0,0 +1,213 @@ +# Copyright (c) 2024-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest +import numpy as np + +from cuml.cluster import KMeans, DBSCAN +from cuml.decomposition import PCA, TruncatedSVD +from cuml.linear_model import ( + LinearRegression, + LogisticRegression, + ElasticNet, + Ridge, + Lasso, +) +from cuml.manifold import TSNE +from cuml.neighbors import NearestNeighbors + +from cuml.testing.utils import array_equal + +from numpy.testing import assert_allclose + +from sklearn.datasets import make_blobs, make_classification, make_regression +from sklearn.utils.validation import check_is_fitted +from sklearn.cluster import KMeans as SkKMeans, DBSCAN as SkDBSCAN +from sklearn.decomposition import PCA as SkPCA, TruncatedSVD as SkTruncatedSVD +from sklearn.linear_model import ( + LinearRegression as SkLinearRegression, + LogisticRegression as SkLogisticRegression, + ElasticNet as SkElasticNet, + Ridge as SkRidge, + Lasso as SkLasso, +) +from sklearn.manifold import TSNE as SkTSNE +from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors + +############################################################################### +# Helper functions # +############################################################################### + + +@pytest.fixture +def random_state(): + return 42 + + +def assert_estimator_roundtrip( + cuml_model, sklearn_class, X, y=None, transform=False +): + """ + Generic assertion helper to test round-trip conversion: + fit original custom model + convert to sklearn + convert back to custom model + compare predictions or transform outputs + """ + # Fit original model + if y is not None: + cuml_model.fit(X, y) + else: + cuml_model.fit(X) + + # Convert to sklearn model + sklearn_model = cuml_model.as_sklearn() + check_is_fitted(sklearn_model) + + assert isinstance(sklearn_model, sklearn_class) + + # Convert back + roundtrip_model = type(cuml_model).from_sklearn(sklearn_model) + + # Ensure roundtrip model is fitted + check_is_fitted(roundtrip_model) + + # Compare predictions or transforms + if transform: + original_output = cuml_model.transform(X) + roundtrip_output = roundtrip_model.transform(X) + array_equal(original_output, roundtrip_output) + else: + # For predict methods + if hasattr(cuml_model, "predict"): + original_pred = cuml_model.predict(X) + roundtrip_pred = roundtrip_model.predict(X) + array_equal(original_pred, roundtrip_pred) + # For models that only produce labels_ or similar attributes (e.g., clustering) + elif hasattr(cuml_model, "labels_"): + array_equal(cuml_model.labels_, roundtrip_model.labels_) + else: + # If we get here, need a custom handling for that type + raise NotImplementedError( + "No known method to compare outputs of this model." + ) + + +############################################################################### +# Tests # +############################################################################### + + +def test_kmeans(random_state): + # Using sklearn directly for demonstration + X, _ = make_blobs( + n_samples=50, n_features=2, centers=3, random_state=random_state + ) + original = KMeans(n_clusters=3, random_state=random_state) + assert_estimator_roundtrip(original, SkKMeans, X) + + +def test_dbscan(random_state): + X, _ = make_blobs( + n_samples=50, n_features=2, centers=3, random_state=random_state + ) + original = DBSCAN(eps=0.5, min_samples=5) + # DBSCAN assigns labels_ after fit + original.fit(X) + sklearn_model = original.as_sklearn() + roundtrip_model = DBSCAN.from_sklearn(sklearn_model) + array_equal(original.labels_, roundtrip_model.labels_) + + +def test_pca(random_state): + X = np.random.RandomState(random_state).rand(50, 5) + original = PCA(n_components=2, random_state=random_state) + assert_estimator_roundtrip(original, SkPCA, X, transform=True) + + +def test_truncated_svd(random_state): + X = np.random.RandomState(random_state).rand(50, 5) + original = TruncatedSVD(n_components=2, random_state=random_state) + assert_estimator_roundtrip(original, SkTruncatedSVD, X, transform=True) + + +def test_linear_regression(random_state): + X, y = make_regression( + n_samples=50, n_features=5, noise=0.1, random_state=random_state + ) + original = LinearRegression() + assert_estimator_roundtrip(original, SkLinearRegression, X, y) + + +def test_logistic_regression(random_state): + X, y = make_classification( + n_samples=50, n_features=5, n_informative=3, random_state=random_state + ) + original = LogisticRegression(random_state=random_state, max_iter=500) + assert_estimator_roundtrip(original, SkLogisticRegression, X, y) + + +def test_elasticnet(random_state): + X, y = make_regression( + n_samples=50, n_features=5, noise=0.1, random_state=random_state + ) + original = ElasticNet(random_state=random_state) + assert_estimator_roundtrip(original, SkElasticNet, X, y) + + +def test_ridge(random_state): + X, y = make_regression( + n_samples=50, n_features=5, noise=0.1, random_state=random_state + ) + original = Ridge(alpha=1.0, random_state=random_state) + assert_estimator_roundtrip(original, SkRidge, X, y) + + +def test_lasso(random_state): + X, y = make_regression( + n_samples=50, n_features=5, noise=0.1, random_state=random_state + ) + original = Lasso(alpha=0.1, random_state=random_state) + assert_estimator_roundtrip(original, SkLasso, X, y) + + +def test_tsne(random_state): + # TSNE is a bit tricky as it is non-deterministic. For test simplicity: + X = np.random.RandomState(random_state).rand(50, 5) + original = TSNE(n_components=2, random_state=random_state) + original.fit(X) + sklearn_model = original.as_sklearn() + roundtrip_model = TSNE.from_sklearn(sklearn_model) + # Since TSNE is non-deterministic, exact match is unlikely. + # We can at least check output dimensions are the same. + original_embedding = original.embedding_ + sklearn_embedding = sklearn_model.embedding_ + roundtrip_embedding = roundtrip_model.embedding_ + + array_equal(original_embedding, sklearn_embedding) + array_equal(original_embedding, roundtrip_embedding) + + +def test_nearest_neighbors(random_state): + X = np.random.RandomState(random_state).rand(50, 5) + original = NearestNeighbors(n_neighbors=5) + original.fit(X) + sklearn_model = original.as_sklearn() + roundtrip_model = NearestNeighbors.from_sklearn(sklearn_model) + # Check that the kneighbors results are the same + dist_original, ind_original = original.kneighbors(X) + dist_roundtrip, ind_roundtrip = roundtrip_model.kneighbors(X) + assert_allclose(dist_original, dist_roundtrip) + assert_allclose(ind_original, ind_roundtrip) From f29293ffdde2046c0fc43bc566ccb16bc33fcf65 Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Wed, 15 Jan 2025 12:34:59 -0600 Subject: [PATCH 6/8] Avoid duplicate log entries (#6222) Previously the logger would have two sinks configured (the default + our custom one). This PR clears the default logger before registering our custom one. Authors: - Jim Crist-Harif (https://github.com/jcrist) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cuml/pull/6222 --- python/cuml/cuml/internals/logger.pxd | 1 + python/cuml/cuml/internals/logger.pyx | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/python/cuml/cuml/internals/logger.pxd b/python/cuml/cuml/internals/logger.pxd index 6556cb0505..d39c7d24df 100644 --- a/python/cuml/cuml/internals/logger.pxd +++ b/python/cuml/cuml/internals/logger.pxd @@ -47,6 +47,7 @@ IF GPUBUILD == 1: cdef cppclass sink_vector: void push_back(const sink_ptr& sink) except + void pop_back() except + + void clear() cdef extern from "cuml/common/logger.hpp" namespace "ML" nogil: cdef cppclass logger: diff --git a/python/cuml/cuml/internals/logger.pyx b/python/cuml/cuml/internals/logger.pyx index 0bc09126bd..5dec86fd18 100644 --- a/python/cuml/cuml/internals/logger.pyx +++ b/python/cuml/cuml/internals/logger.pyx @@ -319,5 +319,6 @@ def flush(): IF GPUBUILD == 1: - # Set callback functions to handle redirected sys.stdout in Python + # Clear existing sinks and add a callback sink to redirect to sys.stdout + default_logger().sinks().clear() default_logger().sinks().push_back( make_shared[callback_sink_mt](_log_callback, _log_flush)) From d95cae5fb27ba076f7b295697cf27789e28d93fa Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 17 Jan 2025 14:15:07 -0600 Subject: [PATCH 7/8] Use GCC 13 in CUDA 12 conda builds. (#6221) ## Description conda-forge is using GCC 13 for CUDA 12 builds. This PR updates CUDA 12 conda builds to use GCC 13, for alignment. These PRs should be merged in a specific order, see https://github.com/rapidsai/build-planning/issues/129 for details. --------- Co-authored-by: divyegala --- .../all_cuda-118_arch-x86_64.yaml | 2 +- .../all_cuda-125_arch-x86_64.yaml | 4 ++-- .../clang_tidy_cuda-118_arch-x86_64.yaml | 2 +- .../cpp_all_cuda-118_arch-x86_64.yaml | 2 +- .../cpp_all_cuda-125_arch-x86_64.yaml | 4 ++-- .../recipes/cuml-cpu/conda_build_config.yaml | 8 ++++--- conda/recipes/cuml/conda_build_config.yaml | 14 ++++++------- conda/recipes/cuml/meta.yaml | 8 +++---- conda/recipes/libcuml/conda_build_config.yaml | 14 ++++++------- conda/recipes/libcuml/meta.yaml | 16 +++++--------- cpp/CMakeLists.txt | 21 +++++++++++++++++++ cpp/src/hdbscan/condensed_hierarchy.cu | 8 ++++--- cpp/src/hdbscan/detail/utils.h | 8 ++++--- cpp/test/CMakeLists.txt | 10 ++++++++- dependencies.yaml | 16 ++++++++++---- 15 files changed, 86 insertions(+), 51 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index bf73b938a5..b97b4f0ca6 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -75,7 +75,7 @@ dependencies: - sphinx-copybutton - sphinx-markdown-tables - statsmodels -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 - treelite==4.3.0 - umap-learn==0.5.6 - xgboost>=2.1.0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 72539f2d18..5c8439cf50 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -25,7 +25,7 @@ dependencies: - dask-ml - doxygen=1.9.1 - fmt>=11.0.2,<12 -- gcc_linux-64=11.* +- gcc_linux-64=13.* - graphviz - hdbscan>=0.8.39,<0.8.40 - hypothesis>=6.0,<7 @@ -71,7 +71,7 @@ dependencies: - sphinx-copybutton - sphinx-markdown-tables - statsmodels -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 - treelite==4.3.0 - umap-learn==0.5.6 - xgboost>=2.1.0 diff --git a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml index 836e34b0a9..f307fd45e3 100644 --- a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml +++ b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml @@ -33,6 +33,6 @@ dependencies: - ninja - nvcc_linux-64=11.8 - spdlog>=1.14.1,<1.15 -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 - tomli name: clang_tidy_cuda-118_arch-x86_64 diff --git a/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml index 8442e61e86..6220cd8a50 100644 --- a/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml @@ -31,5 +31,5 @@ dependencies: - ninja - nvcc_linux-64=11.8 - spdlog>=1.14.1,<1.15 -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 name: cpp_all_cuda-118_arch-x86_64 diff --git a/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml index d199d744e0..5b553bc95d 100644 --- a/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml @@ -15,7 +15,7 @@ dependencies: - cuda-version=12.5 - cxx-compiler - fmt>=11.0.2,<12 -- gcc_linux-64=11.* +- gcc_linux-64=13.* - libcublas-dev - libcufft-dev - libcumlprims==25.2.*,>=0.0.0a0 @@ -27,5 +27,5 @@ dependencies: - librmm==25.2.*,>=0.0.0a0 - ninja - spdlog>=1.14.1,<1.15 -- sysroot_linux-64==2.17 +- sysroot_linux-64==2.28 name: cpp_all_cuda-125_arch-x86_64 diff --git a/conda/recipes/cuml-cpu/conda_build_config.yaml b/conda/recipes/cuml-cpu/conda_build_config.yaml index a6f636917a..354e9fb334 100644 --- a/conda/recipes/cuml-cpu/conda_build_config.yaml +++ b/conda/recipes/cuml-cpu/conda_build_config.yaml @@ -1,8 +1,10 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cmake_version: - ">=3.26.4,!=3.30.0" @@ -11,4 +13,4 @@ c_stdlib: - sysroot c_stdlib_version: - - "=2.17" + - "=2.28" diff --git a/conda/recipes/cuml/conda_build_config.yaml b/conda/recipes/cuml/conda_build_config.yaml index eb829ba4de..ded1456b11 100644 --- a/conda/recipes/cuml/conda_build_config.yaml +++ b/conda/recipes/cuml/conda_build_config.yaml @@ -1,14 +1,14 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cmake_version: - ">=3.26.4,!=3.30.0" @@ -17,7 +17,7 @@ c_stdlib: - sysroot c_stdlib_version: - - "=2.17" + - "=2.28" treelite_version: - "=4.3.0" diff --git a/conda/recipes/cuml/meta.yaml b/conda/recipes/cuml/meta.yaml index 35157fb3cc..a84f2738f9 100644 --- a/conda/recipes/cuml/meta.yaml +++ b/conda/recipes/cuml/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# Copyright (c) 2018-2025, NVIDIA CORPORATION. {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} @@ -33,10 +33,8 @@ build: - SCCACHE_S3_KEY_PREFIX=cuml-linux64 # [linux64] - SCCACHE_S3_USE_SSL ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} - cuda-python @@ -46,7 +44,7 @@ requirements: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} diff --git a/conda/recipes/libcuml/conda_build_config.yaml b/conda/recipes/libcuml/conda_build_config.yaml index f802440354..b9e0670a70 100644 --- a/conda/recipes/libcuml/conda_build_config.yaml +++ b/conda/recipes/libcuml/conda_build_config.yaml @@ -1,20 +1,20 @@ c_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cxx_compiler_version: - - 11 + - 13 # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - 11 # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] cuda_compiler: - - cuda-nvcc - -cuda11_compiler: - - nvcc + - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] + - nvcc # [os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] c_stdlib: - sysroot c_stdlib_version: - - "=2.17" + - "=2.28" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/conda/recipes/libcuml/meta.yaml b/conda/recipes/libcuml/meta.yaml index f4a65c50f7..4d193a0ae8 100644 --- a/conda/recipes/libcuml/meta.yaml +++ b/conda/recipes/libcuml/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# Copyright (c) 2018-2025, NVIDIA CORPORATION. {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} @@ -14,9 +14,7 @@ source: build: ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% endif %} + - {{ compiler('cuda') }} script_env: - AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY @@ -38,7 +36,7 @@ requirements: - {{ compiler('c') }} - {{ compiler('cxx') }} {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} + - {{ compiler('cuda') }} ={{ cuda_version }} {% else %} - {{ compiler('cuda') }} {% endif %} @@ -84,10 +82,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} requirements: @@ -131,10 +127,8 @@ outputs: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% else %} - {{ compiler('cuda') }} + {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} requirements: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 90c0c02cf3..118f3f0e28 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -422,6 +422,27 @@ if(BUILD_CUML_CPP_LIBRARY) src/hdbscan/hdbscan.cu src/hdbscan/condensed_hierarchy.cu src/hdbscan/prediction_data.cu) + + # When using GCC 13, some maybe-uninitialized warnings appear from CCCL and are treated as errors. + # See this issue: https://github.com/rapidsai/cuml/issues/6225 + set_property( + SOURCE src/hdbscan/condensed_hierarchy.cu + APPEND_STRING + PROPERTY COMPILE_FLAGS + " -Xcompiler=-Wno-maybe-uninitialized" + ) + set_property( + SOURCE src/hdbscan/hdbscan.cu + APPEND_STRING + PROPERTY COMPILE_FLAGS + " -Xcompiler=-Wno-maybe-uninitialized" + ) + set_property( + SOURCE src/hdbscan/prediction_data.cu + APPEND_STRING + PROPERTY COMPILE_FLAGS + " -Xcompiler=-Wno-maybe-uninitialized" + ) endif() if(all_algo OR holtwinters_algo) diff --git a/cpp/src/hdbscan/condensed_hierarchy.cu b/cpp/src/hdbscan/condensed_hierarchy.cu index 76f1a19cf8..5744bc51c8 100644 --- a/cpp/src/hdbscan/condensed_hierarchy.cu +++ b/cpp/src/hdbscan/condensed_hierarchy.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -157,8 +158,9 @@ void CondensedHierarchy::condense(value_idx* full_parents, thrust::cuda::par.on(stream), full_sizes, full_sizes + size, - cuda::proclaim_return_type([=] __device__(value_idx a) -> bool { return a != -1; }), - 0, + cuda::proclaim_return_type( + [=] __device__(value_idx a) -> value_idx { return static_cast(a != -1); }), + static_cast(0), thrust::plus()); parents.resize(n_edges, stream); diff --git a/cpp/src/hdbscan/detail/utils.h b/cpp/src/hdbscan/detail/utils.h index b151628429..4456416a6f 100644 --- a/cpp/src/hdbscan/detail/utils.h +++ b/cpp/src/hdbscan/detail/utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -114,8 +115,9 @@ Common::CondensedHierarchy make_cluster_tree( thrust_policy, sizes, sizes + condensed_tree.get_n_edges(), - cuda::proclaim_return_type([=] __device__(value_idx a) -> bool { return a > 1; }), - 0, + cuda::proclaim_return_type( + [=] __device__(value_idx a) -> value_idx { return static_cast(a > 1); }), + static_cast(0), thrust::plus()); // remove leaves from condensed tree diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 0576217965..6cfd9d2d2f 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# Copyright (c) 2018-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -149,6 +149,14 @@ if("${CMAKE_CUDA_COMPILER_VERSION}" VERSION_GREATER_EQUAL "11.2") # An HDBSCAN gtest is failing w/ CUDA 11.2 for some reason. if(all_algo OR hdbscan_algo) ConfigureTest(PREFIX SG NAME HDBSCAN_TEST sg/hdbscan_test.cu ML_INCLUDE) + # When using GCC 13, some maybe-uninitialized warnings appear from CCCL and are treated as errors. + # See this issue: https://github.com/rapidsai/cuml/issues/6225 + set_property( + SOURCE sg/hdbscan_test.cu + APPEND_STRING + PROPERTY COMPILE_FLAGS + " -Xcompiler=-Wno-maybe-uninitialized" + ) endif() endif() diff --git a/dependencies.yaml b/dependencies.yaml index f35eb74021..6761744857 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -190,29 +190,37 @@ dependencies: - matrix: arch: x86_64 packages: - - gcc_linux-64=11.* - - sysroot_linux-64==2.17 + - sysroot_linux-64==2.28 - matrix: arch: aarch64 packages: - - gcc_linux-aarch64=11.* - - sysroot_linux-aarch64==2.17 + - sysroot_linux-aarch64==2.28 - output_types: conda matrices: - matrix: arch: x86_64 cuda: "11.8" packages: + - gcc_linux-64=11.* - nvcc_linux-64=11.8 - matrix: arch: aarch64 cuda: "11.8" packages: + - gcc_linux-64=11.* - nvcc_linux-aarch64=11.8 - matrix: + arch: x86_64 + cuda: "12.*" + packages: + - cuda-nvcc + - gcc_linux-64=13.* + - matrix: + arch: aarch64 cuda: "12.*" packages: - cuda-nvcc + - gcc_linux-aarch64=13.* py_build_cuml: common: - output_types: [conda, requirements, pyproject] From 01e19bba9821954b062a04fbf31d3522afa4b0b1 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 21 Jan 2025 17:35:49 -0600 Subject: [PATCH 8/8] Ignore cudf's __dataframe__ deprecation. (#6229) Currently CI is failing due to https://github.com/rapidsai/cudf/pull/17736. The `__dataframe__` protocol appears to be used internally by scikit-learn: https://github.com/scikit-learn/scikit-learn/blob/311bf6badd74bb69081eb90e2643f15706d3473c/sklearn/utils/validation.py#L389 Errors look like: ``` FAILED test_metrics.py::test_sklearn_search - FutureWarning: Using `__dataframe__` is deprecated ``` This PR ignores the `FutureWarning` to allow CI to pass. --------- Co-authored-by: Dante Gama Dessavre --- python/cuml/cuml/tests/test_input_utils.py | 16 ++++++++++------ .../cuml/tests/test_kneighbors_classifier.py | 4 +++- python/cuml/cuml/tests/test_metrics.py | 2 ++ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/python/cuml/cuml/tests/test_input_utils.py b/python/cuml/cuml/tests/test_input_utils.py index b2570f8f17..4d76c84900 100644 --- a/python/cuml/cuml/tests/test_input_utils.py +++ b/python/cuml/cuml/tests/test_input_utils.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2019-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -35,6 +35,7 @@ np = cpu_only_import("numpy") nbcuda = gpu_only_import_from("numba", "cuda") +cudf_pandas_active = gpu_only_import_from("cudf.pandas", "LOADED") pdDF = cpu_only_import_from("pandas", "DataFrame") @@ -446,11 +447,14 @@ def test_tocupy_missing_values_handling(): assert str(array.dtype) == "float64" assert cp.isnan(array[1]) - with pytest.raises(ValueError): - df = cudf.Series(data=[7, None, 3]) - array, n_rows, n_cols, dtype = input_to_cupy_array( - df, fail_on_null=True - ) + # cudf.pandas now mimics pandas better for handling None, so we don't + # need to fail and raise this error when cudf.pandas is active. + if not cudf_pandas_active: + with pytest.raises(ValueError): + df = cudf.Series(data=[7, None, 3]) + array, n_rows, n_cols, dtype = input_to_cupy_array( + df, fail_on_null=True + ) @pytest.mark.cudf_pandas diff --git a/python/cuml/cuml/tests/test_kneighbors_classifier.py b/python/cuml/cuml/tests/test_kneighbors_classifier.py index d39ef3bae5..cb387b21d1 100644 --- a/python/cuml/cuml/tests/test_kneighbors_classifier.py +++ b/python/cuml/cuml/tests/test_kneighbors_classifier.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -218,6 +218,8 @@ def test_predict_large_n_classes(datatype): assert array_equal(y_hat.astype(np.int32), y_test.astype(np.int32)) +# Ignore FutureWarning: Using `__dataframe__` is deprecated +@pytest.mark.filterwarnings("ignore::FutureWarning") @pytest.mark.parametrize("n_samples", [100]) @pytest.mark.parametrize("n_features", [40]) @pytest.mark.parametrize("n_neighbors", [4]) diff --git a/python/cuml/cuml/tests/test_metrics.py b/python/cuml/cuml/tests/test_metrics.py index 5886ff68d4..40ea25ed35 100644 --- a/python/cuml/cuml/tests/test_metrics.py +++ b/python/cuml/cuml/tests/test_metrics.py @@ -163,6 +163,8 @@ def test_r2_score(datatype, use_handle): np.testing.assert_almost_equal(score, 0.98, decimal=7) +# Ignore FutureWarning: Using `__dataframe__` is deprecated +@pytest.mark.filterwarnings("ignore::FutureWarning") def test_sklearn_search(): """Test ensures scoring function works with sklearn machinery""" import numpy as np