rapidsai · raydouglass · Nov 21, 2024 · Oct 7, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/ci/run_cuml_singlegpu_pytests.sh b/ci/run_cuml_singlegpu_pytests.sh
@@ -5,3 +5,7 @@
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cuml/cuml/tests
 
 python -m pytest --cache-clear --ignore=dask -m "not memleak" "$@" .
+
+cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cuml/cuml/tests/experimental/accel
+
+python -m pytest -p cuml.experimental.accel --cache-clear "$@" .
@@ -225,6 +225,19 @@ class DBSCAN(UniversalBase,
     core_sample_indices_ = CumlArrayDescriptor(order="C")
     labels_ = CumlArrayDescriptor(order="C")
 
+    _hyperparam_interop_translator = {
+        "metric": {
+            "manhattan": "NotImplemented",
+            "chebyshev": "NotImplemented",
+            "minkowski": "NotImplemented",
+        },
+        "algorithm": {
+            "auto": "brute",
+            "ball_tree": "NotImplemented",
+            "kd_tree": "NotImplemented",
+        },
+    }
+
     @device_interop_preparation
     def __init__(self, *,
                  eps=0.5,
@@ -263,7 +276,7 @@ class DBSCAN(UniversalBase,
         opg that is set to `False` for SG, `True` for OPG (multi-GPU)
         """
         if out_dtype not in ["int32", np.int32, "int64", np.int64]:
-            raise ValueError("Invalid value for out_dtype. "
+            raise ValueError(f"Invalid value for out_dtype: {out_dtype}. "
                              "Valid values are {'int32', 'int64', "
                              "np.int32, np.int64}")
 
@@ -422,7 +435,7 @@ class DBSCAN(UniversalBase,
 
     @generate_docstring(skip_parameters_heading=True)
     @enable_device_interop
-    def fit(self, X, out_dtype="int32", sample_weight=None,
+    def fit(self, X, y=None, out_dtype="int32", sample_weight=None,
             convert_dtype=True) -> "DBSCAN":
         """
         Perform DBSCAN clustering from features.
@@ -447,7 +460,7 @@ class DBSCAN(UniversalBase,
                                        'description': 'Cluster labels',
                                        'shape': '(n_samples, 1)'})
     @enable_device_interop
-    def fit_predict(self, X, out_dtype="int32", sample_weight=None) -> CumlArray:
+    def fit_predict(self, X, y=None, out_dtype="int32", sample_weight=None) -> CumlArray:
         """
         Performs clustering on X and returns cluster labels.
 
@@ -463,7 +476,7 @@ class DBSCAN(UniversalBase,
             negative weight may inhibit its eps-neighbor from being core.
             default: None (which is equivalent to weight 1 for all samples).
         """
-        self.fit(X, out_dtype, sample_weight)
+        self.fit(X, out_dtype=out_dtype, sample_weight=sample_weight)
         return self.labels_
 
     @classmethod

@@ -485,6 +485,19 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
     mst_dst_ = CumlArrayDescriptor()
     mst_weights_ = CumlArrayDescriptor()
 
+    _hyperparam_interop_translator = {
+        "metric": {
+            "manhattan": "NotImplemented",
+            "chebyshev": "NotImplemented",
+            "minkowski": "NotImplemented",
+        },
+        "algorithm": {
+            "auto": "brute",
+            "ball_tree": "NotImplemented",
+            "kd_tree": "NotImplemented",
+        },
+    }
+
     @device_interop_preparation
     def __init__(self, *,
                  min_cluster_size=5,

@@ -564,7 +564,7 @@ class KMeans(UniversalBase,
                                        'description': 'Cluster indexes',
                                        'shape': '(n_samples, 1)'})
     @enable_device_interop
-    def predict(self, X, convert_dtype=True, sample_weight=None,
+    def predict(self, X, y=None, convert_dtype=True, sample_weight=None,
                 normalize_weights=True) -> CumlArray:
         """
         Predict the closest cluster each sample in X belongs to.
@@ -583,7 +583,7 @@ class KMeans(UniversalBase,
                                        'description': 'Transformed data',
                                        'shape': '(n_samples, n_clusters)'})
     @enable_device_interop
-    def transform(self, X, convert_dtype=True) -> CumlArray:
+    def transform(self, X, y=None, convert_dtype=True) -> CumlArray:
         """
         Transform X to a cluster-distance space.
 
@@ -687,7 +687,7 @@ class KMeans(UniversalBase,
                                        'description': 'Transformed data',
                                        'shape': '(n_samples, n_clusters)'})
     @enable_device_interop
-    def fit_transform(self, X, convert_dtype=False,
+    def fit_transform(self, X, y=None, convert_dtype=False,
                       sample_weight=None) -> CumlArray:
         """
         Compute clustering and transform X to cluster-distance space.

@@ -280,6 +280,16 @@ class PCA(UniversalBase,
     noise_variance_ = CumlArrayDescriptor(order='F')
     trans_input_ = CumlArrayDescriptor(order='F')
 
+    _hyperparam_interop_translator = {
+        "svd_solver": {
+            "arpack": "full",
+            "randomized": "full"
+        },
+        "iterated_power": {
+            "auto": 15,
+        },
+    }
+
     @device_interop_preparation
     def __init__(self, *, copy=True, handle=None, iterated_power=15,
                  n_components=None, random_state=None, svd_solver='auto',

@@ -240,6 +240,13 @@ class TruncatedSVD(UniversalBase,
     explained_variance_ratio_ = CumlArrayDescriptor(order='F')
     singular_values_ = CumlArrayDescriptor(order='F')
 
+    _hyperparam_interop_translator = {
+        "algorithm": {
+            "randomized": "full",
+            "arpack": "full",
+        },
+    }
+
     @device_interop_preparation
     def __init__(self, *, algorithm='full', handle=None, n_components=1,
                  n_iter=15, random_state=None, tol=1e-7,

@@ -0,0 +1,68 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import importlib
+
+from .magics import load_ipython_extension
+
+from cuml.internals import logger
+from cuml.internals.global_settings import GlobalSettings
+from cuml.internals.memory_utils import set_global_output_type
+
+__all__ = ["load_ipython_extension", "install"]
+
+
+def _install_for_library(library_name):
+    importlib.import_module(f"._wrappers.{library_name}", __name__)
+    return True
+
+
+def install():
+    """Enable cuML Accelerator Mode."""
+    logger.set_level(logger.level_info)
+    logger.set_pattern("%v")
+
+    logger.info("cuML: Installing experimental accelerator...")
+    loader_sklearn = _install_for_library(library_name="sklearn")
+    loader_umap = _install_for_library(library_name="umap")
+    loader_hdbscan = _install_for_library(library_name="hdbscan")
+
+    GlobalSettings().accelerator_loaded = all(
+        [loader_sklearn, loader_umap, loader_hdbscan]
+    )
+
+    GlobalSettings().accelerator_active = True
+
+    if GlobalSettings().accelerator_loaded:
+        logger.info(
+            "cuML: experimental accelerator successfully initialized..."
+        )
+    else:
+        logger.info("cuML: experimental accelerator failed to initialize...")
+
+    set_global_output_type("numpy")
+
+
+def pytest_load_initial_conftests(early_config, parser, args):
+    # https://docs.pytest.org/en/7.1.x/reference/\
+    # reference.html#pytest.hookspec.pytest_load_initial_conftests
+    try:
+        install()
+    except RuntimeError:
+        raise RuntimeError(
+            "An existing plugin has already loaded sklearn. Interposing failed."
+        )
@@ -0,0 +1,70 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import click
+import code
+import os
+import runpy
+import sys
+
+from . import install
+
+
+@click.command()
+@click.option("-m", "module", required=False, help="Module to run")
+@click.option(
+    "--strict",
+    is_flag=True,
+    default=False,
+    help="Turn strict mode for hyperparameters on.",
+)
+@click.argument("args", nargs=-1)
+def main(module, strict, args):
+
+    if strict:
+        os.environ["CUML_ACCEL_STRICT_MODE"] = "ON"
+
+    install()
+
+    if module:
+        (module,) = module
+        # run the module passing the remaining arguments
+        # as if it were run with python -m <module> <args>
+        sys.argv[:] = [module] + args  # not thread safe?
+        runpy.run_module(module, run_name="__main__")
+    elif len(args) >= 1:
+        # Remove ourself from argv and continue
+        sys.argv[:] = args
+        runpy.run_path(args[0], run_name="__main__")
+    else:
+        if sys.stdin.isatty():
+            banner = f"Python {sys.version} on {sys.platform}"
+            site_import = not sys.flags.no_site
+            if site_import:
+                cprt = 'Type "help", "copyright", "credits" or "license" for more information.'
+                banner += "\n" + cprt
+        else:
+            # Don't show prompts or banners if stdin is not a TTY
+            sys.ps1 = ""
+            sys.ps2 = ""
+            banner = ""
+
+        # Launch an interactive interpreter
+        code.interact(banner=banner, exitmsg="")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,34 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+wrapped_estimators = {
+    "KMeans": ("cuml.cluster", "KMeans"),
+    "DBSCAN": ("cuml.cluster", "DBSCAN"),
+    "PCA": ("cuml.decomposition", "PCA"),
+    "TruncatedSVD": ("cuml.decomposition", "TruncatedSVD"),
+    "KernelRidge": ("cuml.kernel_ridge", "KernelRidge"),
+    "LinearRegression": ("cuml.linear_model", "LinearRegression"),
+    "LogisticRegression": ("cuml.linear_model", "LogisticRegression"),
+    "ElasticNet": ("cuml.linear_model", "ElasticNet"),
+    "Ridge": ("cuml.linear_model", "Ridge"),
+    "Lasso": ("cuml.linear_model", "Lasso"),
+    "TSNE": ("cuml.manifold", "TSNE"),
+    "NearestNeighbors": ("cuml.neighbors", "NearestNeighbors"),
+    "KNeighborsClassifier": ("cuml.neighbors", "KNeighborsClassifier"),
+    "KNeighborsRegressor": ("cuml.neighbors", "KNeighborsRegressor"),
+    "UMAP": ("cuml.manifold", "UMAP"),
+    "HDBSCAN": ("cuml.cluster", "HDBSCAN"),
+}
@@ -0,0 +1,24 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ..estimator_proxy import intercept
+
+
+HDBSCAN = intercept(
+    original_module="hdbscan",
+    accelerated_module="cuml.cluster",
+    original_class_name="HDBSCAN",
+)