zama-ai · kcelia · Sep 21, 2023 · Jul 20, 2023 · Jul 20, 2023 · Jul 21, 2023
@@ -33,6 +33,7 @@
 from concrete.ml.sklearn.base import (
     BaseTreeEstimatorMixin,
     QuantizedTorchEstimatorMixin,
+    SklearnKNeighborsMixin,
     SklearnLinearModelMixin,
 )
 
@@ -482,7 +483,12 @@ def check_is_good_execution_for_cml_vs_circuit_impl(
             else:
                 assert isinstance(
                     model,
-                    (QuantizedTorchEstimatorMixin, BaseTreeEstimatorMixin, SklearnLinearModelMixin),
+                    (
+                        QuantizedTorchEstimatorMixin,
+                        BaseTreeEstimatorMixin,
+                        SklearnLinearModelMixin,
+                        SklearnKNeighborsMixin,
+                    ),
                 )
 
                 if model._is_a_public_cml_model:  # pylint: disable=protected-access
@@ -492,8 +498,14 @@ def check_is_good_execution_for_cml_vs_circuit_impl(
                     # tests), especially since these results are tested in other tests such as the
                     # `check_subfunctions_in_fhe`
                     if is_classifier_or_partial_classifier(model):
-                        results_cnp_circuit = model.predict_proba(*inputs, fhe=fhe_mode)
-                        results_model = model.predict_proba(*inputs, fhe="disable")
+                        if isinstance(model, SklearnKNeighborsMixin):
+                            # For KNN `predict_proba` is not supported for now
+                            # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962
+                            results_cnp_circuit = model.predict(*inputs, fhe=fhe_mode)
+                            results_model = model.predict(*inputs, fhe="disable")
+                        else:
+                            results_cnp_circuit = model.predict_proba(*inputs, fhe=fhe_mode)
+                            results_model = model.predict_proba(*inputs, fhe="disable")
 
                     else:
                         results_cnp_circuit = model.predict(*inputs, fhe=fhe_mode)

@@ -87,6 +87,9 @@ def _get_fully_qualified_name(object_class: Type) -> str:
         "skorch.dataset.Dataset",
         "skorch.dataset.ValidSplit",
         "inspect._empty",
+        "sklearn.neighbors._classification.KNeighborsClassifier",
+        "sklearn.metrics._dist_metrics.EuclideanDistance",
+        "sklearn.neighbors._kd_tree.KDTree",
     ]
 )
 

@@ -18,6 +18,7 @@
     DecisionTreeRegressor,
     ElasticNet,
     GammaRegressor,
+    KNeighborsClassifier,
     Lasso,
     LinearRegression,
     LinearSVC,
@@ -66,6 +67,7 @@
 ]
 
 _classifier_models = [
+    KNeighborsClassifier,
     DecisionTreeClassifier,
     RandomForestClassifier,
     XGBClassifier,
@@ -95,9 +97,25 @@
         id=get_model_name(model),
     )
     for model in _classifier_models
+    if get_model_name(model) != "KNeighborsClassifier"
     for n_classes in [2, 4]
+] + [
+    pytest.param(
+        model,
+        {
+            "n_samples": 6,
+            "n_features": 2,
+            "n_classes": n_classes,
+            "n_informative": 2,
+            "n_redundant": 0,
+        },
+        id=get_model_name(model),
+    )
+    for model in [KNeighborsClassifier]
+    for n_classes in [2]
 ]
 
+
 # Get the data-sets. The data generation is seeded in load_data.
 # Only LinearRegression supports multi targets
 # GammaRegressor, PoissonRegressor and TweedieRegressor only handle positive target values
@@ -141,8 +159,8 @@ def get_random_extract_of_sklearn_models_and_datasets():
             unique_model_classes.append(m)
 
     # To avoid to make mistakes and return empty list
-    assert len(sklearn_models_and_datasets) == 28
-    assert len(unique_model_classes) == 18
+    assert len(sklearn_models_and_datasets) == 29
+    assert len(unique_model_classes) == 19
 
     return unique_model_classes
 

@@ -61,7 +61,11 @@
 from tqdm import tqdm
 
 from ..common.utils import is_brevitas_model, is_model_class_in_a_list
-from ..sklearn import get_sklearn_neural_net_models, get_sklearn_tree_models
+from ..sklearn import (
+    get_sklearn_neighbors_models,
+    get_sklearn_neural_net_models,
+    get_sklearn_tree_models,
+)
 from ..torch.compile import compile_brevitas_qat_model, compile_torch_model
 
 
@@ -126,7 +130,10 @@ def compile_and_simulated_fhe_inference(
         dequantized_output = quantized_module.forward(calibration_data, fhe="simulate")
 
     elif is_model_class_in_a_list(
-        estimator, get_sklearn_neural_net_models() + get_sklearn_tree_models()
+        estimator,
+        get_sklearn_neural_net_models()
+        + get_sklearn_tree_models()
+        + get_sklearn_neighbors_models(),
     ):
         if not estimator.is_fitted:
             estimator.fit(calibration_data, ground_truth)

@@ -3,9 +3,16 @@
 
 from ..common.debugging.custom_assert import assert_true
 from ..common.utils import is_classifier_or_partial_classifier, is_regressor_or_partial_regressor
-from .base import _ALL_SKLEARN_MODELS, _LINEAR_MODELS, _NEURALNET_MODELS, _TREE_MODELS
+from .base import (
+    _ALL_SKLEARN_MODELS,
+    _LINEAR_MODELS,
+    _NEIGHBORS_MODELS,
+    _NEURALNET_MODELS,
+    _TREE_MODELS,
+)
 from .glm import GammaRegressor, PoissonRegressor, TweedieRegressor
 from .linear_model import ElasticNet, Lasso, LinearRegression, LogisticRegression, Ridge
+from .neighbors import KNeighborsClassifier
 from .qnn import NeuralNetClassifier, NeuralNetRegressor
 from .rf import RandomForestClassifier, RandomForestRegressor
 from .svm import LinearSVC, LinearSVR
@@ -31,6 +38,7 @@ def get_sklearn_models():
         "linear": sorted(list(_LINEAR_MODELS), key=lambda m: m.__name__),
         "tree": sorted(list(_TREE_MODELS), key=lambda m: m.__name__),
         "neural_net": sorted(list(_NEURALNET_MODELS), key=lambda m: m.__name__),
+        "neighbors": sorted(list(_NEIGHBORS_MODELS), key=lambda m: m.__name__),
     }
     return ans
 
@@ -123,3 +131,21 @@ def get_sklearn_neural_net_models(
     """
     prelist = get_sklearn_models()["neural_net"]
     return _filter_models(prelist, classifier, regressor, str_in_class_name)
+
+
+def get_sklearn_neighbors_models(
+    classifier: bool = True, regressor: bool = True, str_in_class_name: List[str] = None
+):
+    """Return the list of available neighbor models in Concrete ML.
+
+    Args:
+        classifier (bool): whether you want classifiers or not
+        regressor (bool): whether you want regressors or not
+        str_in_class_name (List[str]): if not None, only return models with the given string or
+            list of strings as a substring in their class name
+
+    Returns:
+        the lists of neighbor models in Concrete ML
+    """
+    prelist = get_sklearn_models()["neighbors"]
+    return _filter_models(prelist, classifier, regressor, str_in_class_name)