diff --git a/hdbscan/hdbscan_.py b/hdbscan/hdbscan_.py index 60dddd05..d3bc4917 100644 --- a/hdbscan/hdbscan_.py +++ b/hdbscan/hdbscan_.py @@ -4,8 +4,10 @@ of Applications with Noise """ +import sklearn import numpy as np +from packaging.version import Version from sklearn.base import BaseEstimator, ClusterMixin from sklearn.metrics import pairwise_distances from scipy.sparse import issparse @@ -37,7 +39,14 @@ from .plots import CondensedTree, SingleLinkageTree, MinimumSpanningTree from .prediction import PredictionData -FAST_METRICS = KDTree.valid_metrics + BallTree.valid_metrics + ["cosine", "arccos"] +if Version(sklearn.__version__) >= Version("1.3.0"): + kdtree_valid_metrics = KDTree.valid_metrics() + balltree_valid_metrics = BallTree.valid_metrics() +else: + kdtree_valid_metrics = KDTree.valid_metrics + balltree_valid_metrics = BallTree.valid_metrics + +FAST_METRICS = kdtree_valid_metrics + balltree_valid_metrics + ["cosine", "arccos"] # Author: Leland McInnes # Steve Astels @@ -742,19 +751,19 @@ def hdbscan( _hdbscan_generic )(X, min_samples, alpha, metric, p, leaf_size, gen_min_span_tree, **kwargs) elif algorithm == "prims_kdtree": - if metric not in KDTree.valid_metrics: + if metric not in kdtree_valid_metrics: raise ValueError("Cannot use Prim's with KDTree for this" " metric!") (single_linkage_tree, result_min_span_tree) = memory.cache( _hdbscan_prims_kdtree )(X, min_samples, alpha, metric, p, leaf_size, gen_min_span_tree, **kwargs) elif algorithm == "prims_balltree": - if metric not in BallTree.valid_metrics: + if metric not in balltree_valid_metrics: raise ValueError("Cannot use Prim's with BallTree for this" " metric!") (single_linkage_tree, result_min_span_tree) = memory.cache( _hdbscan_prims_balltree )(X, min_samples, alpha, metric, p, leaf_size, gen_min_span_tree, **kwargs) elif algorithm == "boruvka_kdtree": - if metric not in BallTree.valid_metrics: + if metric not in balltree_valid_metrics: raise ValueError("Cannot use Boruvka with KDTree for this" " metric!") (single_linkage_tree, result_min_span_tree) = memory.cache( _hdbscan_boruvka_kdtree @@ -771,7 +780,7 @@ def hdbscan( **kwargs ) elif algorithm == "boruvka_balltree": - if metric not in BallTree.valid_metrics: + if metric not in balltree_valid_metrics: raise ValueError("Cannot use Boruvka with BallTree for this" " metric!") if (X.shape[0] // leaf_size) > 16000: warn( @@ -802,7 +811,7 @@ def hdbscan( (single_linkage_tree, result_min_span_tree) = memory.cache( _hdbscan_generic )(X, min_samples, alpha, metric, p, leaf_size, gen_min_span_tree, **kwargs) - elif metric in KDTree.valid_metrics: + elif metric in kdtree_valid_metrics: # TO DO: Need heuristic to decide when to go to boruvka; # still debugging for now if X.shape[1] > 60: @@ -1237,9 +1246,9 @@ def generate_prediction_data(self): if self.metric in FAST_METRICS: min_samples = self.min_samples or self.min_cluster_size - if self.metric in KDTree.valid_metrics: + if self.metric in kdtree_valid_metrics: tree_type = "kdtree" - elif self.metric in BallTree.valid_metrics: + elif self.metric in balltree_valid_metrics: tree_type = "balltree" else: warn("Metric {} not supported for prediction data!".format(self.metric)) diff --git a/hdbscan/robust_single_linkage_.py b/hdbscan/robust_single_linkage_.py index 5662190e..e5c5ec83 100644 --- a/hdbscan/robust_single_linkage_.py +++ b/hdbscan/robust_single_linkage_.py @@ -2,8 +2,10 @@ """ Robust Single Linkage: Density based single linkage clustering. """ +import sklearn import numpy as np +from packaging.version import Version from sklearn.base import BaseEstimator, ClusterMixin from sklearn.metrics import pairwise_distances from scipy.sparse import issparse @@ -24,7 +26,14 @@ # # License: BSD 3 clause -FAST_METRICS = KDTree.valid_metrics + BallTree.valid_metrics +if Version(sklearn.__version__) >= Version("1.3.0"): + kdtree_valid_metrics = KDTree.valid_metrics() + balltree_valid_metrics = BallTree.valid_metrics() +else: + kdtree_valid_metrics = KDTree.valid_metrics + balltree_valid_metrics = BallTree.valid_metrics + +FAST_METRICS = kdtree_valid_metrics + balltree_valid_metrics def _rsl_generic(X, k=5, alpha=1.4142135623730951, metric='euclidean', @@ -266,7 +275,7 @@ def robust_single_linkage(X, cut, k=5, alpha=1.4142135623730951, # We can't do much with sparse matrices ... single_linkage_tree = memory.cache(_rsl_generic)( X, k, alpha, metric, **kwargs) - elif metric in KDTree.valid_metrics: + elif metric in kdtree_valid_metrics: # Need heuristic to decide when to go to boruvka; # still debugging for now if X.shape[1] > 128: diff --git a/requirements.txt b/requirements.txt index 487cfe7b..2cc6fd64 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ cython>=0.27 numpy>=1.20 +packaging scipy>= 1.0 scikit-learn>=0.20 -joblib>=1.0 +joblib>=1.0 \ No newline at end of file