Skip to content

Commit

Permalink
Style fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
dantegd committed Nov 20, 2024
1 parent 510da44 commit dfa7828
Show file tree
Hide file tree
Showing 20 changed files with 128 additions and 134 deletions.
5 changes: 3 additions & 2 deletions python/cuml/cuml/experimental/accel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def install():
logger.set_level(logger.level_info)
logger.set_pattern("%v")


logger.info("cuML: Installing experimental accelerator...")
loader_sklearn = _install_for_library(library_name="sklearn")
loader_umap = _install_for_library(library_name="umap")
Expand All @@ -49,7 +48,9 @@ def install():
GlobalSettings().accelerator_active = True

if GlobalSettings().accelerator_loaded:
logger.info("cuML: experimental accelerator succesfully initialized...")
logger.info(
"cuML: experimental accelerator successfully initialized..."
)
else:
logger.info("cuML: experimental accelerator failed to initialize...")

Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cuml/experimental/accel/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
)
@click.argument("args", nargs=-1)
def main(module, strict, args):

if strict:
os.environ["CUML_ACCEL_STRICT_MODE"] = "ON"

Expand Down
103 changes: 53 additions & 50 deletions python/cuml/cuml/experimental/accel/estimator_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ def __init__(self, original_module: types.ModuleType) -> None:
self._original_module = original_module
self._proxy_estimators: Dict[str, Type[Any]] = {}

def add_estimator(self, class_name: str, proxy_estimator: Type[Any]) -> None:
def add_estimator(
self, class_name: str, proxy_estimator: Type[Any]
) -> None:
"""
Add a proxy estimator for a specified class name.
Parameters
Expand All @@ -75,14 +77,14 @@ def __getattr__(self, name: str) -> Any:
The attribute from the proxy estimator or the original module.
"""
if name in self._proxy_estimators:
use_proxy = getattr(GlobalSettings(), 'accelerator_active', False)
use_proxy = getattr(GlobalSettings(), "accelerator_active", False)
if use_proxy:
return self._proxy_estimators[name]
else:
return getattr(self._original_module, name)
else:
return getattr(self._original_module, name)

def __dir__(self) -> List[str]:
"""
Provide a list of attributes available in the proxy module.
Expand All @@ -92,7 +94,7 @@ def __dir__(self) -> List[str]:
A list of attribute names from the original module.
"""
return dir(self._original_module)


def intercept(
original_module: str,
Expand All @@ -102,52 +104,52 @@ def intercept(
):
"""
Factory function that creates class definitions of ProxyEstimators that
accelerate estimators of the original class.
accelerate estimators of the original class.
This function dynamically creates a new class called `ProxyEstimator` that
inherits from the GPU-accelerated class in the `accelerated_module`
(e.g., cuML) and acts as a drop-in replacement for the original class in
`original_module` (e.g., scikit-learn). Then, this class can be used to
This function dynamically creates a new class called `ProxyEstimator` that
inherits from the GPU-accelerated class in the `accelerated_module`
(e.g., cuML) and acts as a drop-in replacement for the original class in
`original_module` (e.g., scikit-learn). Then, this class can be used to
create instances of ProxyEstimators that dispatch to either library.
**Design of the ProxyEstimator Class Inside**
**`ProxyEstimator` Class:**
- The `ProxyEstimator` class inherits from the GPU-accelerated
- The `ProxyEstimator` class inherits from the GPU-accelerated
class (`class_b`) obtained from the `accelerated_module`.
- It serves as a wrapper that adds additional functionality
- It serves as a wrapper that adds additional functionality
to maintain compatibility with the original CPU-based estimator.
Key methods and attributes:
- `__init__`: Initializes the proxy estimator, stores a
- `__init__`: Initializes the proxy estimator, stores a
reference to the original class before ModuleAccelerator
replaces the original module, translates hyperparameters,
replaces the original module, translates hyperparameters,
and initializes the parent (cuML) class.
- `__repr__` and `__str__`: Provide string representations
- `__repr__` and `__str__`: Provide string representations
that reference the original CPU-based class.
- Attribute `_cpu_model_class`: Stores a reference to the
- Attribute `_cpu_model_class`: Stores a reference to the
original CPU-based estimator class.
- Attribute `_gpuaccel`: Indicates whether GPU acceleration
- Attribute `_gpuaccel`: Indicates whether GPU acceleration
is enabled.
- By designing the `ProxyEstimator` in this way, we can
seamlessly replace the original CPU-based estimator with a
GPU-accelerated version without altering the existing codebase.
The metaclass ensures that the class behaves and appears
like the original estimator, while the proxy class manages
- By designing the `ProxyEstimator` in this way, we can
seamlessly replace the original CPU-based estimator with a
GPU-accelerated version without altering the existing codebase.
The metaclass ensures that the class behaves and appears
like the original estimator, while the proxy class manages
the underlying acceleration and compatibility.
**Serialization/Pickling of ProxyEstimators**
Since pickle has strict rules about serializing classes, we cannot
Since pickle has strict rules about serializing classes, we cannot
(reasonably) create a method that just pickles and unpickles a ProxyEstimat
as if it was just an instance of the original module.
as if it was just an instance of the original module.
Therefore, doing a pickling of ProxyEstimator will make it serialize to
a file that can be opened in systems with cuML installed (CPU or GPU).
Therefore, doing a pickling of ProxyEstimator will make it serialize to
a file that can be opened in systems with cuML installed (CPU or GPU).
To serialize for non cuML systems, the to_sklearn and from_sklearn APIs
are being introduced in
are being introduced in
https://github.com/rapidsai/cuml/pull/6102
Parameters
----------
original_module : str
Expand All @@ -162,13 +164,13 @@ class (`class_b`) obtained from the `accelerated_module`.
Returns
-------
A class definition of ProxyEstimator that inherits from
A class definition of ProxyEstimator that inherits from
the accelerated library class (cuML).
Examples
--------
>>> from module_accelerator import intercept
>>> ProxyEstimator = intercept('sklearn.linear_model',
>>> ProxyEstimator = intercept('sklearn.linear_model',
... 'cuml.linear_model', 'LinearRegression')
>>> model = ProxyEstimator()
Expand All @@ -192,14 +194,15 @@ class ProxyEstimator(class_b):
A proxy estimator class that wraps the accelerated estimator and provides
compatibility with the original estimator interface.
The ProxyEstimator inherits from the accelerated estimator class and
wraps additional functionality to maintain compatibility with the original
The ProxyEstimator inherits from the accelerated estimator class and
wraps additional functionality to maintain compatibility with the original
CPU-based estimator.
It handles the translation of hyperparameters and the transfer of models
It handles the translation of hyperparameters and the transfer of models
between CPU and GPU.
"""

def __init__(self, *args, **kwargs):
self._cpu_model_class = (
original_class_a # Store a reference to the original class
Expand All @@ -216,7 +219,7 @@ def __init__(self, *args, **kwargs):
def __repr__(self):
"""
Return a formal string representation of the object.
Returns
-------
str
Expand All @@ -228,7 +231,7 @@ def __repr__(self):
def __str__(self):
"""
Return an informal string representation of the object.
Returns
-------
str
Expand Down Expand Up @@ -267,7 +270,7 @@ def __reduce__(self):
Returns
-------
tuple
A tuple containing the callable to reconstruct the object
A tuple containing the callable to reconstruct the object
and the arguments for reconstruction.
Notes
Expand All @@ -284,7 +287,7 @@ def __reduce__(self):
self.__getstate__(),
),
)

logger.debug(
f"Created proxy estimator: ({module_b}, {original_class_name}, {ProxyEstimator})"
)
Expand All @@ -298,25 +301,25 @@ def __reduce__(self):
GlobalSettings().accelerated_modules[original_module] = proxy_module

proxy_module.add_estimator(
class_name=original_class_name,
proxy_estimator=ProxyEstimator
class_name=original_class_name, proxy_estimator=ProxyEstimator
)

sys.modules[original_module] = proxy_module

return ProxyEstimator


def reconstruct_proxy(
original_module: str,
accelerated_module: str,
class_name: str,
args: Tuple,
kwargs: Dict):
def reconstruct_proxy(
original_module: str,
accelerated_module: str,
class_name: str,
args: Tuple,
kwargs: Dict,
):
"""
Function to enable pickling of ProxyEstimators since they are defined inside
a function, which Pickle doesn't like without a function or something
that has an absolute import path like this function.
that has an absolute import path like this function.
Parameters
----------
Expand All @@ -330,11 +333,11 @@ def reconstruct_proxy(
Args of class to be deserialized (typically empty for ProxyEstimators)
kwargs : Dict
Keyword arguments to reconstruct the ProxyEstimator instance, typically
state from __setstate__ method.
state from __setstate__ method.
Returns
-------
Instance of ProxyEstimator constructed with the kwargs passed to the function.
Instance of ProxyEstimator constructed with the kwargs passed to the function.
"""
# We probably don't need to intercept again here, since we already stored
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/cuml/internals/global_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ def __init__(self):

self.shared_state.update(
{
"_output_type": None,
"_output_type": None,
"root_cm": None,
"accelerator_active": False,
"accelerator_loaded": False,
"accelerated_modules": {}
"accelerated_modules": {},
}
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,23 @@ def test_dbscan_eps(clustering_data, eps):
X, y_true = clustering_data
dbscan = DBSCAN(eps=eps).fit(X)
y_pred = dbscan.labels_
ari = adjusted_rand_score(y_true, y_pred)
adjusted_rand_score(y_true, y_pred)


@pytest.mark.parametrize("min_samples", [1, 5, 10, 20])
def test_dbscan_min_samples(clustering_data, min_samples):
X, y_true = clustering_data
dbscan = DBSCAN(eps=0.5, min_samples=min_samples).fit(X)
y_pred = dbscan.labels_
ari = adjusted_rand_score(y_true, y_pred)
adjusted_rand_score(y_true, y_pred)


@pytest.mark.parametrize("metric", ["euclidean", "manhattan", "chebyshev"])
def test_dbscan_metric(clustering_data, metric):
X, y_true = clustering_data
dbscan = DBSCAN(eps=0.5, metric=metric).fit(X)
y_pred = dbscan.labels_
ari = adjusted_rand_score(y_true, y_pred)
adjusted_rand_score(y_true, y_pred)


@pytest.mark.parametrize(
Expand All @@ -63,23 +63,23 @@ def test_dbscan_algorithm(clustering_data, algorithm):
X, y_true = clustering_data
dbscan = DBSCAN(eps=0.5, algorithm=algorithm).fit(X)
y_pred = dbscan.labels_
ari = adjusted_rand_score(y_true, y_pred)
adjusted_rand_score(y_true, y_pred)


@pytest.mark.parametrize("leaf_size", [10, 30, 50])
def test_dbscan_leaf_size(clustering_data, leaf_size):
X, y_true = clustering_data
dbscan = DBSCAN(eps=0.5, leaf_size=leaf_size).fit(X)
y_pred = dbscan.labels_
ari = adjusted_rand_score(y_true, y_pred)
adjusted_rand_score(y_true, y_pred)


@pytest.mark.parametrize("p", [1, 2, 3])
def test_dbscan_p(clustering_data, p):
X, y_true = clustering_data
dbscan = DBSCAN(eps=0.5, metric="minkowski", p=p).fit(X)
y_pred = dbscan.labels_
ari = adjusted_rand_score(y_true, y_pred)
adjusted_rand_score(y_true, y_pred)


def test_dbscan_consistency(clustering_data):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def test_hdbscan_approx_min_span_tree(synthetic_data):
X, _ = synthetic_data
clusterer = hdbscan.HDBSCAN(approx_min_span_tree=True)
clusterer.fit(X)
# this parameter is ignored in cuML
# this parameter is ignored in cuML


@pytest.mark.parametrize("n_jobs", [1, -1])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def test_hdbscan_membership_vector(synthetic_data):
clusterer = hdbscan.HDBSCAN(prediction_data=True)
clusterer.fit(X_train)
point = X_train[0].reshape((1, 2))
membership = hdbscan.membership_vector(clusterer, point)
hdbscan.membership_vector(clusterer, point)


def test_hdbscan_all_points_membership_vectors(synthetic_data):
Expand Down Expand Up @@ -108,7 +108,7 @@ def test_hdbscan_condensed_tree(synthetic_data):
condensed_tree, "to_pandas"
), "Condensed tree should have a 'to_pandas' method"
# Convert to pandas DataFrame and check columns
df = condensed_tree.to_pandas()
condensed_tree.to_pandas()


def test_hdbscan_single_linkage_tree_attribute(synthetic_data):
Expand Down Expand Up @@ -146,7 +146,7 @@ def test_hdbscan_prediction_membership_vector(synthetic_data):
clusterer = hdbscan.HDBSCAN(prediction_data=True)
clusterer.fit(X_train)
point = X_train[0].reshape((1, 2))
membership = prediction.membership_vector(clusterer, point)
prediction.membership_vector(clusterer, point)


def test_hdbscan_prediction_all_points_membership_vectors(synthetic_data):
Expand Down
Loading

0 comments on commit dfa7828

Please sign in to comment.