From 1272ab439e2681cd8f616199836171c123f0991e Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Wed, 27 Sep 2023 08:39:19 -0500 Subject: [PATCH] FIX actual typo fix --- python/cuml/cluster/hdbscan/__init__.py | 1 + python/cuml/cluster/hdbscan/prediction.pyx | 7 ++++++- python/cuml/tests/test_doctest.py | 4 ++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/python/cuml/cluster/hdbscan/__init__.py b/python/cuml/cluster/hdbscan/__init__.py index 2d4ff67fea..6f2024f678 100644 --- a/python/cuml/cluster/hdbscan/__init__.py +++ b/python/cuml/cluster/hdbscan/__init__.py @@ -18,4 +18,5 @@ from cuml.cluster.hdbscan.hdbscan import condense_hierarchy from cuml.cluster.hdbscan.prediction import all_points_membership_vectors +from cuml.cluster.hdbscan.prediction import membership_vector from cuml.cluster.hdbscan.prediction import approximate_predict diff --git a/python/cuml/cluster/hdbscan/prediction.pyx b/python/cuml/cluster/hdbscan/prediction.pyx index 0237fd506c..b3ef5b3587 100644 --- a/python/cuml/cluster/hdbscan/prediction.pyx +++ b/python/cuml/cluster/hdbscan/prediction.pyx @@ -228,25 +228,30 @@ def all_points_membership_vectors(clusterer, batch_size=4096): def membership_vector(clusterer, points_to_predict, batch_size=4096, convert_dtype=True): - """Predict soft cluster membership. The result produces a vector + """ + Predict soft cluster membership. The result produces a vector for each point in ``points_to_predict`` that gives a probability that the given point is a member of a cluster for each of the selected clusters of the ``clusterer``. + Parameters ---------- clusterer : HDBSCAN A clustering object that has been fit to the data and either had ``prediction_data=True`` set, or called the ``generate_prediction_data`` method after the fact. + points_to_predict : array, or array-like (n_samples, n_features) The new data points to predict cluster labels for. They should have the same dimensionality as the original dataset over which clusterer was fit. + batch_size : int, optional, default=min(4096, n_points_to_predict) Lowers memory requirement by computing distance-based membership in smaller batches of points in the prediction data. For example, a batch size of 1,000 computes distance based memberships for 1,000 points at a time. The default batch size is 4,096. + Returns ------- membership_vectors : array (n_samples, n_clusters) diff --git a/python/cuml/tests/test_doctest.py b/python/cuml/tests/test_doctest.py index 2bfcb1c59b..181cb80d57 100644 --- a/python/cuml/tests/test_doctest.py +++ b/python/cuml/tests/test_doctest.py @@ -80,6 +80,10 @@ def _find_doctests_in_obj(obj, finder=None, criteria=None): continue if "RandomForestClassifier" in member.__name__: continue + if "Lasso" in member.__name__: + continue + if "ElasticNet" in member.__name__: + continue yield from _find_doctests_in_obj( member, finder, criteria=_is_public_name )