Skip to content

Commit

Permalink
FIX actual typo fix
Browse files Browse the repository at this point in the history
  • Loading branch information
dantegd committed Sep 27, 2023
1 parent db61f17 commit 1272ab4
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 1 deletion.
1 change: 1 addition & 0 deletions python/cuml/cluster/hdbscan/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@
from cuml.cluster.hdbscan.hdbscan import condense_hierarchy

from cuml.cluster.hdbscan.prediction import all_points_membership_vectors
from cuml.cluster.hdbscan.prediction import membership_vector
from cuml.cluster.hdbscan.prediction import approximate_predict
7 changes: 6 additions & 1 deletion python/cuml/cluster/hdbscan/prediction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -228,25 +228,30 @@ def all_points_membership_vectors(clusterer, batch_size=4096):


def membership_vector(clusterer, points_to_predict, batch_size=4096, convert_dtype=True):
"""Predict soft cluster membership. The result produces a vector
"""
Predict soft cluster membership. The result produces a vector
for each point in ``points_to_predict`` that gives a probability that
the given point is a member of a cluster for each of the selected clusters
of the ``clusterer``.
Parameters
----------
clusterer : HDBSCAN
A clustering object that has been fit to the data and
either had ``prediction_data=True`` set, or called the
``generate_prediction_data`` method after the fact.
points_to_predict : array, or array-like (n_samples, n_features)
The new data points to predict cluster labels for. They should
have the same dimensionality as the original dataset over which
clusterer was fit.
batch_size : int, optional, default=min(4096, n_points_to_predict)
Lowers memory requirement by computing distance-based membership
in smaller batches of points in the prediction data. For example, a
batch size of 1,000 computes distance based memberships for 1,000
points at a time. The default batch size is 4,096.
Returns
-------
membership_vectors : array (n_samples, n_clusters)
Expand Down
4 changes: 4 additions & 0 deletions python/cuml/tests/test_doctest.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ def _find_doctests_in_obj(obj, finder=None, criteria=None):
continue
if "RandomForestClassifier" in member.__name__:
continue
if "Lasso" in member.__name__:
continue
if "ElasticNet" in member.__name__:
continue
yield from _find_doctests_in_obj(
member, finder, criteria=_is_public_name
)
Expand Down

0 comments on commit 1272ab4

Please sign in to comment.