From 0fc4ad8fe773acdd5e13cbb8a4e0ee9a3241c557 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Thu, 20 Jul 2023 22:03:41 +0200
Subject: [PATCH 01/51] chore: update base.py with concrete ml v

---
 src/concrete/ml/sklearn/base.py | 284 ++++++++++++++++++++++++++++++++
 1 file changed, 284 insertions(+)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 5ac220efd..da645ca63 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -60,11 +60,13 @@
 # Silence Hummingbird warnings
 warnings.filterwarnings("ignore")
 from hummingbird.ml import convert as hb_convert  # noqa: E402
+from hummingbird.ml.operator_converters import constants
 
 _ALL_SKLEARN_MODELS: Set[Type] = set()
 _LINEAR_MODELS: Set[Type] = set()
 _TREE_MODELS: Set[Type] = set()
 _NEURALNET_MODELS: Set[Type] = set()
+_NEIGHBORS_MODELS: Set[Type] = set()
 
 # Define the supported types for both the input data and the target values. Since the Pandas
 # library is currently only a dev dependencies, we cannot import it. We therefore need to use type
@@ -1690,3 +1692,285 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) ->
         y_logits = self.decision_function(X, fhe=fhe)
         y_proba = self.post_processing(y_logits)
         return y_proba
+
+
+# pylint: disable=invalid-name,too-many-instance-attributes
+# from sklearn.neighbors._base import NeighborsBase
+class SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
+    """A Mixin class for sklearn neighbors models with FHE.
+
+    This class inherits from sklearn.base.BaseEstimator in order to have access to scikit-learn's
+    `get_params` and `set_params` methods.
+    """
+
+    def __init_subclass__(cls):
+        for klass in cls.__mro__:
+            # pylint: disable-next=protected-access
+            if getattr(klass, "_is_a_public_cml_model", False):
+                _NEIGHBORS_MODELS.add(cls)
+                _ALL_SKLEARN_MODELS.add(cls)
+
+    def __init__(self, n_bits: Union[int, Dict[str, int]] = 8):
+        """Initialize the FHE KNeighbors model.
+
+        Args:
+        n_bits (int, Dict[str, int]): Number of bits to quantize the model. If an int is passed
+        for n_bits, the value will be used for quantizing inputs and weights. If a dict is
+        passed, then it should contain "op_inputs" and "op_weights" as keys with
+        corresponding number of quantization bits so that:
+        - op_inputs : number of bits to quantize the input values
+        - op_weights: number of bits to quantize the learned parameters
+        Default to 8.
+        """
+        self.n_bits: Union[int, Dict[str, int]] = n_bits
+
+        #: The quantizer to use for quantizing the model's weights
+        self._weight_quantizer: Optional[UniformQuantizer] = None
+
+        #: The model's quantized data points
+        self._q_points: Optional[numpy.ndarray] = None
+
+        BaseEstimator.__init__(self)
+
+    def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
+        """Retrieve the model's ONNX graph using Hummingbird conversion.
+
+        Args:
+        test_input (numpy.ndarray): An input data used to trace the model execution.
+        """
+
+        # Check that the underlying sklearn model has been set and fit
+        assert self.sklearn_model is not None, self._sklearn_model_is_not_fitted_error_message()
+
+        self.onnx_model_ = hb_convert(
+            self.sklearn_model,
+            backend="onnx",
+            test_input=test_input,
+            # https://github.com/microsoft/hummingbird/issues/422
+            extra_config={
+                "onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT,
+                 constants.BATCH_SIZE: self.sklearn_model._fit_X.shape[0],
+            },
+        ).model
+
+        self._clean_graph()
+
+    def _clean_graph(self) -> None:
+        """Clean the ONNX graph from undesired nodes."""
+        
+        assert self.onnx_model_ is not None, self._is_not_fitted_error_message()
+        # Remove cast operators as they are not needed
+        remove_node_types(onnx_model=self.onnx_model_, op_types_to_remove=["Cast"])
+
+    def fit(self, X: Data, y: Target, **fit_parameters):
+        # Reset for double fit
+        self._is_fitted = False
+
+        # LinearRegression handles multi-labels data
+        X, y = check_X_y_and_assert_multi_output(X, y)
+
+        # Fit the scikit-learn model
+        self._fit_sklearn_model(X, y, **fit_parameters)
+
+        # Check that the underlying sklearn model has been set and fit
+        assert self.sklearn_model is not None, self._sklearn_model_is_not_fitted_error_message()
+
+        # Retrieve the ONNX graph
+        self._set_onnx_model(X)
+
+        # Convert the n_bits attribute into a proper dictionary
+        n_bits = get_n_bits_dict(self.n_bits)
+
+        input_n_bits = n_bits["op_inputs"]
+        input_options = QuantizationOptions(n_bits=input_n_bits, is_signed=True)
+
+        # Quantize the inputs and store the associated quantizer
+        q_inputs = QuantizedArray(n_bits=input_n_bits, values=X, options=input_options)
+        input_quantizer = q_inputs.quantizer
+        self.input_quantizers.append(input_quantizer)
+
+        weights_n_bits = n_bits["op_weights"]
+        weight_options = QuantizationOptions(n_bits=weights_n_bits, is_signed=True)
+
+        # Quantize the weights and store the associated quantizer
+        # Transpose and expand are necessary in order to make sure the weight array has the correct
+        # shape when calling the Gemm operator on it
+        points = self.sklearn_model._fit_X
+        q_points = QuantizedArray(
+            n_bits=n_bits["op_weights"],
+            values=numpy.expand_dims(points, axis=1) if len(points.shape) == 1 else points,
+            options=weight_options,
+        )
+        self._q_points = q_points.qvalues
+        weight_quantizer = q_points.quantizer
+        self._weight_quantizer = weight_quantizer
+
+        # mypy
+        assert input_quantizer.scale is not None
+        assert weight_quantizer.scale is not None
+
+        # Compute the scale and zero-point of the matmul's outputs, following the same steps from
+        # the QuantizedGemm operator, which are based on equations detailed in
+        # https://arxiv.org/abs/1712.05877
+
+        output_quant_params = UniformQuantizationParameters(
+            scale=input_quantizer.scale * weight_quantizer.scale,
+            zero_point=input_quantizer.zero_point
+            * (
+                numpy.sum(self._q_points, axis=0, keepdims=True)
+                - X.shape[1] * weight_quantizer.zero_point
+            ),
+            offset=0,
+        )
+
+        output_quantizer = UniformQuantizer(params=output_quant_params, no_clipping=True)
+
+        # Since the matmul and the bias both use the same scale and zero-points, we obtain that
+        # y = S*(q_y - 2*Z) when de-quantizing the values. We therefore need to multiply the initial
+        # output zero_point by 2
+        assert output_quantizer.zero_point is not None
+        output_quantizer.zero_point *= 2
+        self.output_quantizers.append(output_quantizer)
+
+        # Updating post-processing parameters
+        self._set_post_processing_params()
+
+        self._is_fitted = True
+
+        return self
+
+    def quantize_input(self, X: numpy.ndarray) -> numpy.ndarray:
+        self.check_model_is_fitted()
+        q_X = self.input_quantizers[0].quant(X)
+
+        assert q_X.dtype == numpy.int64, "Inputs were not quantized to int64 values"
+        return q_X
+
+    def dequantize_output(self, q_y_preds: numpy.ndarray) -> numpy.ndarray:
+        self.check_model_is_fitted()
+
+        # De-quantize the output values
+        y_preds = self.output_quantizers[0].dequant(q_y_preds)
+
+        return y_preds
+
+    def _get_module_to_compile(self) -> Union[Compiler, QuantizedModule]:
+        # Define the inference function to compile.
+        # This function can neither be a class method nor a static one because self we want to avoid
+        # having self as a parameter while still being able to access some of its attribute
+        def inference_to_compile(q_X: numpy.ndarray) -> numpy.ndarray:
+            """Compile the circuit in FHE using only the inputs as parameters.
+
+            Args:
+            q_X (numpy.ndarray): The quantized input data
+
+            Returns:
+            numpy.ndarray: The circuit is outputs.
+            """
+            return self._inference(q_X)
+
+        # Create the compiler instance
+        compiler = Compiler(inference_to_compile, {"q_X": "encrypted"})
+
+        return compiler
+
+
+    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
+        pass
+
+    def _inference(self, q_X: numpy.ndarray) -> numpy.ndarray:
+        assert self._weight_quantizer is not None, self._is_not_fitted_error_message()
+
+        print("_inference ici")
+        print(q_X.shape)
+        print(q_X[:3])
+
+        assert self.sklearn_model.weights == "uniform", "uniform only, NTM"
+        
+        neigh_dist, neigh_ind = self.kneighbors(q_X)
+
+        classes_ = self.classes_
+        _y = self._y
+
+        #n_queries = _num_samples(X)
+
+        # weights = _get_weights(neigh_dist, self.weights)
+        # if weights is None:
+        #     weights = np.ones_like(neigh_ind)
+
+        # all_rows = np.arange(n_queries)
+        # probabilities = []
+        # for k, classes_k in enumerate(classes_):
+        #     pred_labels = _y[:, k][neigh_ind]
+        #     proba_k = np.zeros((n_queries, classes_k.size))
+
+        #     # a simple ':' index doesn't work right
+        #     for i, idx in enumerate(pred_labels.T):  # loop is O(n_neighbors)
+        #         proba_k[all_rows, idx] += weights[:, i]
+
+        #     # normalize 'votes' into real [0,1] probabilities
+        #     normalizer = proba_k.sum(axis=1)[:, np.newaxis]
+        #     normalizer[normalizer == 0.0] = 1.0
+        #     proba_k /= normalizer
+
+        #     probabilities.append(proba_k)
+
+        # if not self.outputs_2d_:
+        #     probabilities = probabilities[0]
+
+        # return probabilities
+        return q_X @ self._q_points.T
+
+class SklearnKNeighborsClassifierMixin(
+    BaseClassifier, SklearnKNeighborsMixin, sklearn.base.ClassifierMixin, sklearn.neighbors._base.NeighborsBase, ABC
+):
+    """A Mixin class for sklearn neighbors classifiers with FHE.
+
+    This class is used to create a neighbors classifier class that inherits from
+    sklearn.base.ClassifierMixin, which essentially gives access to scikit-learn's `score` method
+    for classifiers.
+
+    Additionally, this class adjusts some of the tree-based base class's methods in order to make
+    them compliant with classification workflows.
+    """
+
+    def _clean_graph(self) -> None:
+        assert self.onnx_model_ is not None, self._is_not_fitted_error_message()
+
+        # Remove any operators following gemm, as they will be done in the clear
+        assert self.onnx_model_ is not None
+        # There is no Gemm node
+        try:
+            clean_graph_after_node_op_type(self.onnx_model_, node_op_type="Gemm")
+        except ValueError:
+            print('No Gemm node in the graph') 
+        SklearnKNeighborsMixin._clean_graph(self)
+
+    def predict(
+        self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE
+    ) -> numpy.ndarray:
+        """Predict confidence scores.
+
+        Args:
+        X (Data): The input values to predict, as a Numpy array, Torch tensor, Pandas DataFrame
+        or List.
+        fhe (Union[FheMode, str]): The mode to use for prediction.
+        Can be FheMode.DISABLE for Concrete ML Python inference,
+        FheMode.SIMULATE for FHE simulation and FheMode.EXECUTE for actual FHE execution.
+        Can also be the string representation of any of these values.
+        Default to FheMode.DISABLE.
+
+        Returns:
+        numpy.ndarray: The predicted confidence scores.
+        """
+        # Here, we want to use SklearnKNeighborsMixin's `predict` method as confidence scores are
+        # the dot product's output values, without any post-processing
+        # TODO
+        y_preds = SklearnKNeighborsMixin.predict(self, X, fhe=fhe)
+        return y_preds
+
+    def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
+        # TODO
+        y_predict = self.predict(X, fhe=fhe)
+        y_proba = self.post_processing(y_predict)
+        return y_proba
\ No newline at end of file

From 8dc0199d25003049d6e5a7d4a26b20e58ab93178 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Thu, 20 Jul 2023 22:04:06 +0200
Subject: [PATCH 02/51] chore: v2

---
 src/concrete/ml/sklearn/neighbors.py | 149 +++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)
 create mode 100644 src/concrete/ml/sklearn/neighbors.py

diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py
new file mode 100644
index 000000000..5bce4721f
--- /dev/null
+++ b/src/concrete/ml/sklearn/neighbors.py
@@ -0,0 +1,149 @@
+"""Implement sklearn linear model."""
+from typing import Any, Dict
+
+import sklearn.linear_model
+
+from .base import SklearnKNeighborsClassifierMixin
+
+
+# pylint: disable=invalid-name,too-many-instance-attributes
+class KNeighborsClassifier(SklearnKNeighborsClassifierMixin):
+    """A k-nearest classifier model with FHE.
+
+    Parameters:
+        n_bits (int, Dict[str, int]): Number of bits to quantize the model. If an int is passed
+            for n_bits, the value will be used for quantizing inputs and weights. If a dict is
+            passed, then it should contain "op_inputs" and "op_weights" as keys with
+            corresponding number of quantization bits so that:
+            - op_inputs : number of bits to quantize the input values
+            - op_weights: number of bits to quantize the learned parameters
+            Default to 8.
+
+    For more details on KNeighborsClassifier please refer to the scikit-learn documentation:
+    https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html
+    """
+
+    sklearn_model_class = sklearn.neighbors.KNeighborsClassifier
+    _is_a_public_cml_model = True
+
+    def __init__(
+        self,
+        n_bits=8,
+        n_neighbors=5,
+        *,
+        weights="uniform",
+        algorithm="auto",
+        leaf_size=30,
+        p=2,
+        metric="minkowski",
+        metric_params=None,
+        n_jobs=None,
+    ):
+        # Call SklearnKNeighborsClassifierMixin's __init__ method
+        super().__init__(n_bits=n_bits)
+
+        self.n_neighbors = n_neighbors
+        self.algorithm = algorithm
+        self.leaf_size = leaf_size
+        self.p = p
+        self.metric = metric
+        self.metric_params = metric_params
+        self.n_jobs = n_jobs
+
+        self.weights = weights
+
+    def dump_dict(self) -> Dict[str, Any]:
+        assert self._weight_quantizer is not None, self._is_not_fitted_error_message()
+
+        metadata: Dict[str, Any] = {}
+
+        # Concrete ML
+        metadata["n_bits"] = self.n_bits
+        metadata["sklearn_model"] = self.sklearn_model
+        metadata["_is_fitted"] = self._is_fitted
+        metadata["_is_compiled"] = self._is_compiled
+        metadata["input_quantizers"] = self.input_quantizers    # TODO: DOUBT
+        metadata["_weight_quantizer"] = self._weight_quantizer  # TODO: DOUBT
+        metadata["output_quantizers"] = self.output_quantizers  # TODO: DOUBT
+        metadata["onnx_model_"] = self.onnx_model_
+        metadata["post_processing_params"] = self.post_processing_params
+        metadata["cml_dumped_class_name"] = type(self).__name__
+        metadata["_q_points"] = self._q_points
+
+        # Scikit-learn
+        
+        metadata["classes_"] = self.target_classes_
+        metadata["n_classes_"] = self.n_classes_
+        metadata["sklearn_model_class"] = self.sklearn_model_class
+        metadata["n_neighbors"] = self.n_neighbors
+        metadata["algorithm"] = self.algorithm
+        metadata["weights"] = self.weights
+        metadata["leaf_size"] = self.leaf_size
+        metadata["p"] = self.p
+        metadata["metric"] = self.metric
+        metadata["metric_params"] = self.metric_params
+        metadata["n_jobs"] = self.n_jobs
+
+        return metadata
+
+    @classmethod
+    def load_dict(cls, metadata: Dict):
+
+        # Instantiate the model
+        obj = KNeighborsClassifier()
+
+        # Concrete-ML
+        obj.n_bits = metadata["n_bits"]
+        obj.sklearn_model = metadata["sklearn_model"]
+        obj._is_fitted = metadata["_is_fitted"]
+        obj._is_compiled = metadata["_is_compiled"]
+        obj.input_quantizers = metadata["input_quantizers"]
+        obj.output_quantizers = metadata["output_quantizers"]
+        obj._weight_quantizer = metadata["_weight_quantizer"]
+        obj.onnx_model_ = metadata["onnx_model_"]
+
+        obj.post_processing_params = metadata["post_processing_params"]
+
+        # Classifier
+        obj.target_classes_ = metadata["target_classes_"]
+        obj.n_classes_ = metadata["n_classes_"]
+
+        # Scikit-Learn
+        obj.n_neighbors = metadata["n_neighbors"]
+        obj.weights = metadata["weights"]
+        obj.algorithm = metadata["algorithm"]
+        obj.leaf_size = metadata["leaf_size"]
+        obj.p = metadata["p"]
+        obj.metric = metadata["metric"]
+        obj.metric_params = metadata["metric_params"]
+        obj.n_jobs = metadata["n_jobs"]
+        return obj
+
+
+class _KNeighborsRegressor:
+    pass
+
+
+class _RadiusNeighborsClassifier:
+    """
+
+    Find the neighbors within a given radius of a point or points.
+
+    Return the indices and distances of each point from the dataset lying in a ball with size radius
+    around the points of the query array.
+
+    Points lying on the boundary are included in the results.
+
+    The result points are not necessarily sorted by distance to their query point.
+
+    """
+
+    pass
+
+
+class _RadiusNeighborsRegressor:
+    pass
+
+
+class _NearestNeighbors:
+    pass

From 771648fd0944c1a8b267f341bf29f161c5616dd5 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Fri, 21 Jul 2023 09:55:25 +0200
Subject: [PATCH 03/51] chore: keep one class quantization not working properly
 add similarity point encrypted argsort and topk in clear

---
 src/concrete/ml/sklearn/base.py      | 328 +++++++++++++++++++++++----
 src/concrete/ml/sklearn/neighbors.py |   6 +-
 2 files changed, 288 insertions(+), 46 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index da645ca63..2109239f0 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -61,6 +61,7 @@
 warnings.filterwarnings("ignore")
 from hummingbird.ml import convert as hb_convert  # noqa: E402
 from hummingbird.ml.operator_converters import constants
+import numpy as np
 
 _ALL_SKLEARN_MODELS: Set[Type] = set()
 _LINEAR_MODELS: Set[Type] = set()
@@ -608,6 +609,8 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
             ValueError,
         )
 
+        print("monkey")
+
         # Check that the model is properly fitted
         self.check_model_is_fitted()
 
@@ -1696,7 +1699,7 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) ->
 
 # pylint: disable=invalid-name,too-many-instance-attributes
 # from sklearn.neighbors._base import NeighborsBase
-class SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
+class _SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
     """A Mixin class for sklearn neighbors models with FHE.
 
     This class inherits from sklearn.base.BaseEstimator in order to have access to scikit-learn's
@@ -1921,56 +1924,295 @@ def _inference(self, q_X: numpy.ndarray) -> numpy.ndarray:
         # return probabilities
         return q_X @ self._q_points.T
 
-class SklearnKNeighborsClassifierMixin(
-    BaseClassifier, SklearnKNeighborsMixin, sklearn.base.ClassifierMixin, sklearn.neighbors._base.NeighborsBase, ABC
-):
-    """A Mixin class for sklearn neighbors classifiers with FHE.
+# class SklearnKNeighborsClassifierMixin(BaseClassifier, SklearnKNeighborsMixin, sklearn.base.ClassifierMixin, sklearn.neighbors._base.NeighborsBase, ABC
+# ):
+#     """A Mixin class for sklearn neighbors classifiers with FHE.
+
+#     This class is used to create a neighbors classifier class that inherits from
+#     sklearn.base.ClassifierMixin, which essentially gives access to scikit-learn's `score` method
+#     for classifiers.
+
+#     Additionally, this class adjusts some of the tree-based base class's methods in order to make
+#     them compliant with classification workflows.
+#     """
+
+#     def _clean_graph(self) -> None:
+#         assert self.onnx_model_ is not None, self._is_not_fitted_error_message()
+
+#         # Remove any operators following gemm, as they will be done in the clear
+#         assert self.onnx_model_ is not None
+#         # There is no Gemm node
+#         try:
+#             clean_graph_after_node_op_type(self.onnx_model_, node_op_type="Gemm")
+#         except ValueError:
+#             print('No Gemm node in the graph') 
+#         SklearnKNeighborsMixin._clean_graph(self)
+
+#     def predict(
+#         self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE
+#     ) -> numpy.ndarray:
+#         """Predict confidence scores.
+
+#         Args:
+#         X (Data): The input values to predict, as a Numpy array, Torch tensor, Pandas DataFrame
+#         or List.
+#         fhe (Union[FheMode, str]): The mode to use for prediction.
+#         Can be FheMode.DISABLE for Concrete ML Python inference,
+#         FheMode.SIMULATE for FHE simulation and FheMode.EXECUTE for actual FHE execution.
+#         Can also be the string representation of any of these values.
+#         Default to FheMode.DISABLE.
+
+#         Returns:
+#         numpy.ndarray: The predicted confidence scores.
+#         """
+#         # Here, we want to use SklearnKNeighborsMixin's `predict` method as confidence scores are
+#         # the dot product's output values, without any post-processing
+#         # TODO
+#         y_preds = SklearnKNeighborsMixin.predict(self, X, fhe=fhe)
+#         return y_preds
+
+#     def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
+#         # TODO
+#         y_predict = self.predict(X, fhe=fhe)
+#         y_proba = self.post_processing(y_predict)
+#         return y_proba
 
-    This class is used to create a neighbors classifier class that inherits from
-    sklearn.base.ClassifierMixin, which essentially gives access to scikit-learn's `score` method
-    for classifiers.
 
-    Additionally, this class adjusts some of the tree-based base class's methods in order to make
-    them compliant with classification workflows.
+
+# pylint: disable=invalid-name,too-many-instance-attributes
+class SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
+    """A Mixin class for sklearn linear models with FHE.
+
+    This class inherits from sklearn.base.BaseEstimator in order to have access to scikit-learn's
+    `get_params` and `set_params` methods.
     """
 
-    def _clean_graph(self) -> None:
-        assert self.onnx_model_ is not None, self._is_not_fitted_error_message()
+    def __init_subclass__(cls):
+        for klass in cls.__mro__:
+            # pylint: disable-next=protected-access
+            if getattr(klass, "_is_a_public_cml_model", False):
+                _NEIGHBORS_MODELS.add(cls) # Changed
+                _ALL_SKLEARN_MODELS.add(cls)
 
-        # Remove any operators following gemm, as they will be done in the clear
-        assert self.onnx_model_ is not None
-        # There is no Gemm node
-        try:
-            clean_graph_after_node_op_type(self.onnx_model_, node_op_type="Gemm")
-        except ValueError:
-            print('No Gemm node in the graph') 
-        SklearnKNeighborsMixin._clean_graph(self)
-
-    def predict(
-        self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE
-    ) -> numpy.ndarray:
-        """Predict confidence scores.
+    def __init__(self, n_bits: Union[int, Dict[str, int]] = 8):
+        """Initialize the FHE knn model.
 
         Args:
-        X (Data): The input values to predict, as a Numpy array, Torch tensor, Pandas DataFrame
-        or List.
-        fhe (Union[FheMode, str]): The mode to use for prediction.
-        Can be FheMode.DISABLE for Concrete ML Python inference,
-        FheMode.SIMULATE for FHE simulation and FheMode.EXECUTE for actual FHE execution.
-        Can also be the string representation of any of these values.
-        Default to FheMode.DISABLE.
+            n_bits (int, Dict[str, int]): Number of bits to quantize the model. If an int is passed
+                for n_bits, the value will be used for quantizing inputs and weights. If a dict is
+                passed, then it should contain "op_inputs" and "op_weights" as keys with
+                corresponding number of quantization bits so that:
+                    - op_inputs : number of bits to quantize the input values
+                    - op_weights: number of bits to quantize the learned parameters
+                Default to 8.
+        """
+        self.n_bits: Union[int, Dict[str, int]] = n_bits
 
-        Returns:
-        numpy.ndarray: The predicted confidence scores.
+        #: The quantizer to use for quantizing the model's weights
+        self._weight_quantizer: Optional[UniformQuantizer] = None
+
+        #: The model's quantized weights
+        self._q_weights: Optional[numpy.ndarray] = None
+
+        BaseEstimator.__init__(self)
+
+    def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
+        """Retrieve the model's ONNX graph using Hummingbird conversion.
+
+        Args:
+            test_input (numpy.ndarray): An input data used to trace the model execution.
         """
-        # Here, we want to use SklearnKNeighborsMixin's `predict` method as confidence scores are
-        # the dot product's output values, without any post-processing
-        # TODO
-        y_preds = SklearnKNeighborsMixin.predict(self, X, fhe=fhe)
+        # Check that the underlying sklearn model has been set and fit
+        assert self.sklearn_model is not None, self._sklearn_model_is_not_fitted_error_message()
+
+        self.onnx_model_ = hb_convert(
+            self.sklearn_model,
+            backend="onnx",
+            test_input=test_input,
+            extra_config={"onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT, 
+                          constants.BATCH_SIZE: self.sklearn_model._fit_X.shape[0] # Changed
+                        },
+        ).model
+
+        self._clean_graph()
+
+    def _clean_graph(self) -> None:
+        """Clean the ONNX graph from undesired nodes."""
+        assert self.onnx_model_ is not None, self._is_not_fitted_error_message()
+
+        # Remove cast operators as they are not needed
+        remove_node_types(onnx_model=self.onnx_model_, op_types_to_remove=["Cast"])
+
+    def fit(self, X: Data, y: Target, **fit_parameters):
+        # Reset for double fit
+        self._is_fitted = False
+
+        # LinearRegression handles multi-labels data
+        X, y = check_X_y_and_assert_multi_output(X, y)
+
+        # Fit the scikit-learn model
+        self._fit_sklearn_model(X, y, **fit_parameters)
+
+        # Check that the underlying sklearn model has been set and fit
+        assert self.sklearn_model is not None, self._sklearn_model_is_not_fitted_error_message()
+
+        # Retrieve the ONNX graph
+        self._set_onnx_model(X)
+
+        # Convert the n_bits attribute into a proper dictionary
+        n_bits = get_n_bits_dict(self.n_bits)
+
+        input_n_bits = n_bits["op_inputs"]
+        input_options = QuantizationOptions(n_bits=input_n_bits, is_signed=True)
+
+        # Quantize the inputs and store the associated quantizer
+        q_inputs = QuantizedArray(n_bits=input_n_bits, values=X, options=input_options)
+        input_quantizer = q_inputs.quantizer
+        self.input_quantizers.append(input_quantizer)
+
+        weights_n_bits = n_bits["op_weights"]
+        weight_options = QuantizationOptions(n_bits=weights_n_bits, is_signed=True)
+
+        # Quantize the weights and store the associated quantizer
+        # Transpose and expand are necessary in order to make sure the weight array has the correct
+        # shape when calling the Gemm operator on it
+        weights = self.sklearn_model._fit_X.T  # Changed
+        q_weights = QuantizedArray(
+            n_bits=n_bits["op_weights"],
+            values=numpy.expand_dims(weights, axis=1) if len(weights.shape) == 1 else weights,
+            options=weight_options,
+        )
+        self._q_weights = q_weights.qvalues
+        weight_quantizer = q_weights.quantizer
+        self._weight_quantizer = weight_quantizer
+
+        # mypy
+        assert input_quantizer.scale is not None
+        assert weight_quantizer.scale is not None
+
+        # Compute the scale and zero-point of the matmul's outputs, following the same steps from
+        # the QuantizedGemm operator, which are based on equations detailed in
+        # https://arxiv.org/abs/1712.05877
+
+        output_quant_params = UniformQuantizationParameters(
+            scale=input_quantizer.scale * weight_quantizer.scale,
+            zero_point=input_quantizer.zero_point
+            * (
+                numpy.sum(self._q_weights, axis=0, keepdims=True)
+                - X.shape[1] * weight_quantizer.zero_point
+            ),
+            offset=0,
+        )
+        print(output_quant_params)
+        self.output_quant_params = output_quant_params
+
+
+        output_quantizer = UniformQuantizer(params=output_quant_params, no_clipping=True)
+
+        # Since the matmul and the bias both use the same scale and zero-points, we obtain that
+        # y = S*(q_y - 2*Z) when de-quantizing the values. We therefore need to multiply the initial
+        # output zero_point by 2
+        assert output_quantizer.zero_point is not None
+        output_quantizer.zero_point *= 2
+        self.output_quantizers.append(output_quantizer)
+
+        # Updating post-processing parameters
+        self._set_post_processing_params()
+
+        self._is_fitted = True
+
+        return self
+
+    def quantize_input(self, X: numpy.ndarray) -> numpy.ndarray:
+        self.check_model_is_fitted()
+        q_X = self.input_quantizers[0].quant(X)
+
+        assert q_X.dtype == numpy.int64, "Inputs were not quantized to int64 values"
+        return q_X
+
+    def dequantize_output(self, q_y_preds: numpy.ndarray) -> numpy.ndarray:
+        self.check_model_is_fitted()
+
+        # De-quantize the output values
+        y_preds = self.output_quantizers[0].dequant(q_y_preds)
+
         return y_preds
 
-    def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
-        # TODO
-        y_predict = self.predict(X, fhe=fhe)
-        y_proba = self.post_processing(y_predict)
-        return y_proba
\ No newline at end of file
+    def _get_module_to_compile(self) -> Union[Compiler, QuantizedModule]:
+        # Define the inference function to compile.
+        # This function can neither be a class method nor a static one because self we want to avoid
+        # having self as a parameter while still being able to access some of its attribute
+        def inference_to_compile(q_X: numpy.ndarray) -> numpy.ndarray:
+            """Compile the circuit in FHE using only the inputs as parameters.
+
+            Args:
+                q_X (numpy.ndarray): The quantized input data
+
+            Returns:
+                numpy.ndarray: The circuit is outputs.
+            """
+            return self._inference(q_X)
+
+        # Create the compiler instance
+        compiler = Compiler(inference_to_compile, {"q_X": "encrypted"})
+
+        return compiler
+
+
+    def top_k_indices(self, distance_matrix, k):
+        print("TOP K")
+        return numpy.argsort(distance_matrix, 1)[:,:k] #0 ou 1
+
+        # Get the number of queries (rows) and points (columns)
+        n_queries, n_points = distance_matrix.shape
+        
+        # Initialize an array to store the top-k indices for each query
+        top_k_indices_array = np.empty((n_queries, k), dtype=int)
+        
+        for i in range(n_queries):
+            print("$$$$$$$", i)
+            # Sort the distances for the current query and get the indices of the sorted elements
+            sorted_indices = np.argsort(distance_matrix[i])
+            print(distance_matrix[i])
+            # Get the top-k indices for the current query and store them in the result array
+            top_k_indices_array[i] = sorted_indices[:k]
+        
+        return top_k_indices_array
+
+    def majority_vote(self, nearest_classes):
+        # Get the number of queries (rows) and k (number of nearest points)
+        n_queries, k = nearest_classes.shape
+
+        # Compute the majority vote for each query
+        majority_votes = np.empty(n_queries, dtype=int)
+        for i in range(n_queries):
+            # Use bincount to count occurrences of each class and find the most common one
+            class_counts = np.bincount(nearest_classes[i])
+            majority_votes[i] = np.argmax(class_counts)
+
+        return majority_votes
+
+
+    def _inference(self, q_X: numpy.ndarray) -> numpy.ndarray:
+        assert self._weight_quantizer is not None, self._is_not_fitted_error_message()
+
+        # Quantizing weights and inputs makes an additional term appear in the inference function
+        print(q_X.shape, self._q_weights.shape)
+
+        distances_matrix = q_X @ self._q_weights # TODO: replace with real minkovski distance 
+        #from sklearn.metrics.pairwise import euclidean_distances
+        # y_pred = euclidean_distances(q_X, self._q_weights.T)
+
+        self.distances_matrix = distances_matrix
+        return distances_matrix
+
+    def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
+        distances_matrix = super().predict(X, fhe)
+        print(distances_matrix)
+
+        indices = self.top_k_indices(distances_matrix, self.sklearn_model.n_neighbors)
+        y_pred = self.majority_vote(self.sklearn_model._y[indices])
+        
+        return y_pred
+
+
diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py
index 5bce4721f..07d5d4bea 100644
--- a/src/concrete/ml/sklearn/neighbors.py
+++ b/src/concrete/ml/sklearn/neighbors.py
@@ -3,11 +3,11 @@
 
 import sklearn.linear_model
 
-from .base import SklearnKNeighborsClassifierMixin
+from .base import SklearnKNeighborsMixin
 
 
 # pylint: disable=invalid-name,too-many-instance-attributes
-class KNeighborsClassifier(SklearnKNeighborsClassifierMixin):
+class KNeighborsClassifier(SklearnKNeighborsMixin):
     """A k-nearest classifier model with FHE.
 
     Parameters:
@@ -49,7 +49,6 @@ def __init__(
         self.metric = metric
         self.metric_params = metric_params
         self.n_jobs = n_jobs
-
         self.weights = weights
 
     def dump_dict(self) -> Dict[str, Any]:
@@ -83,6 +82,7 @@ def dump_dict(self) -> Dict[str, Any]:
         metadata["metric"] = self.metric
         metadata["metric_params"] = self.metric_params
         metadata["n_jobs"] = self.n_jobs
+        print(self._fit_X)
 
         return metadata
 

From 4fc02ea7c29442ef9ec3dc14f874c3811a8f4960 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Fri, 21 Jul 2023 09:56:05 +0200
Subject: [PATCH 04/51] chore: remove other classes

---
 src/concrete/ml/sklearn/base.py | 283 --------------------------------
 1 file changed, 283 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 2109239f0..cc289c0f7 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1696,289 +1696,6 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) ->
         y_proba = self.post_processing(y_logits)
         return y_proba
 
-
-# pylint: disable=invalid-name,too-many-instance-attributes
-# from sklearn.neighbors._base import NeighborsBase
-class _SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
-    """A Mixin class for sklearn neighbors models with FHE.
-
-    This class inherits from sklearn.base.BaseEstimator in order to have access to scikit-learn's
-    `get_params` and `set_params` methods.
-    """
-
-    def __init_subclass__(cls):
-        for klass in cls.__mro__:
-            # pylint: disable-next=protected-access
-            if getattr(klass, "_is_a_public_cml_model", False):
-                _NEIGHBORS_MODELS.add(cls)
-                _ALL_SKLEARN_MODELS.add(cls)
-
-    def __init__(self, n_bits: Union[int, Dict[str, int]] = 8):
-        """Initialize the FHE KNeighbors model.
-
-        Args:
-        n_bits (int, Dict[str, int]): Number of bits to quantize the model. If an int is passed
-        for n_bits, the value will be used for quantizing inputs and weights. If a dict is
-        passed, then it should contain "op_inputs" and "op_weights" as keys with
-        corresponding number of quantization bits so that:
-        - op_inputs : number of bits to quantize the input values
-        - op_weights: number of bits to quantize the learned parameters
-        Default to 8.
-        """
-        self.n_bits: Union[int, Dict[str, int]] = n_bits
-
-        #: The quantizer to use for quantizing the model's weights
-        self._weight_quantizer: Optional[UniformQuantizer] = None
-
-        #: The model's quantized data points
-        self._q_points: Optional[numpy.ndarray] = None
-
-        BaseEstimator.__init__(self)
-
-    def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
-        """Retrieve the model's ONNX graph using Hummingbird conversion.
-
-        Args:
-        test_input (numpy.ndarray): An input data used to trace the model execution.
-        """
-
-        # Check that the underlying sklearn model has been set and fit
-        assert self.sklearn_model is not None, self._sklearn_model_is_not_fitted_error_message()
-
-        self.onnx_model_ = hb_convert(
-            self.sklearn_model,
-            backend="onnx",
-            test_input=test_input,
-            # https://github.com/microsoft/hummingbird/issues/422
-            extra_config={
-                "onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT,
-                 constants.BATCH_SIZE: self.sklearn_model._fit_X.shape[0],
-            },
-        ).model
-
-        self._clean_graph()
-
-    def _clean_graph(self) -> None:
-        """Clean the ONNX graph from undesired nodes."""
-        
-        assert self.onnx_model_ is not None, self._is_not_fitted_error_message()
-        # Remove cast operators as they are not needed
-        remove_node_types(onnx_model=self.onnx_model_, op_types_to_remove=["Cast"])
-
-    def fit(self, X: Data, y: Target, **fit_parameters):
-        # Reset for double fit
-        self._is_fitted = False
-
-        # LinearRegression handles multi-labels data
-        X, y = check_X_y_and_assert_multi_output(X, y)
-
-        # Fit the scikit-learn model
-        self._fit_sklearn_model(X, y, **fit_parameters)
-
-        # Check that the underlying sklearn model has been set and fit
-        assert self.sklearn_model is not None, self._sklearn_model_is_not_fitted_error_message()
-
-        # Retrieve the ONNX graph
-        self._set_onnx_model(X)
-
-        # Convert the n_bits attribute into a proper dictionary
-        n_bits = get_n_bits_dict(self.n_bits)
-
-        input_n_bits = n_bits["op_inputs"]
-        input_options = QuantizationOptions(n_bits=input_n_bits, is_signed=True)
-
-        # Quantize the inputs and store the associated quantizer
-        q_inputs = QuantizedArray(n_bits=input_n_bits, values=X, options=input_options)
-        input_quantizer = q_inputs.quantizer
-        self.input_quantizers.append(input_quantizer)
-
-        weights_n_bits = n_bits["op_weights"]
-        weight_options = QuantizationOptions(n_bits=weights_n_bits, is_signed=True)
-
-        # Quantize the weights and store the associated quantizer
-        # Transpose and expand are necessary in order to make sure the weight array has the correct
-        # shape when calling the Gemm operator on it
-        points = self.sklearn_model._fit_X
-        q_points = QuantizedArray(
-            n_bits=n_bits["op_weights"],
-            values=numpy.expand_dims(points, axis=1) if len(points.shape) == 1 else points,
-            options=weight_options,
-        )
-        self._q_points = q_points.qvalues
-        weight_quantizer = q_points.quantizer
-        self._weight_quantizer = weight_quantizer
-
-        # mypy
-        assert input_quantizer.scale is not None
-        assert weight_quantizer.scale is not None
-
-        # Compute the scale and zero-point of the matmul's outputs, following the same steps from
-        # the QuantizedGemm operator, which are based on equations detailed in
-        # https://arxiv.org/abs/1712.05877
-
-        output_quant_params = UniformQuantizationParameters(
-            scale=input_quantizer.scale * weight_quantizer.scale,
-            zero_point=input_quantizer.zero_point
-            * (
-                numpy.sum(self._q_points, axis=0, keepdims=True)
-                - X.shape[1] * weight_quantizer.zero_point
-            ),
-            offset=0,
-        )
-
-        output_quantizer = UniformQuantizer(params=output_quant_params, no_clipping=True)
-
-        # Since the matmul and the bias both use the same scale and zero-points, we obtain that
-        # y = S*(q_y - 2*Z) when de-quantizing the values. We therefore need to multiply the initial
-        # output zero_point by 2
-        assert output_quantizer.zero_point is not None
-        output_quantizer.zero_point *= 2
-        self.output_quantizers.append(output_quantizer)
-
-        # Updating post-processing parameters
-        self._set_post_processing_params()
-
-        self._is_fitted = True
-
-        return self
-
-    def quantize_input(self, X: numpy.ndarray) -> numpy.ndarray:
-        self.check_model_is_fitted()
-        q_X = self.input_quantizers[0].quant(X)
-
-        assert q_X.dtype == numpy.int64, "Inputs were not quantized to int64 values"
-        return q_X
-
-    def dequantize_output(self, q_y_preds: numpy.ndarray) -> numpy.ndarray:
-        self.check_model_is_fitted()
-
-        # De-quantize the output values
-        y_preds = self.output_quantizers[0].dequant(q_y_preds)
-
-        return y_preds
-
-    def _get_module_to_compile(self) -> Union[Compiler, QuantizedModule]:
-        # Define the inference function to compile.
-        # This function can neither be a class method nor a static one because self we want to avoid
-        # having self as a parameter while still being able to access some of its attribute
-        def inference_to_compile(q_X: numpy.ndarray) -> numpy.ndarray:
-            """Compile the circuit in FHE using only the inputs as parameters.
-
-            Args:
-            q_X (numpy.ndarray): The quantized input data
-
-            Returns:
-            numpy.ndarray: The circuit is outputs.
-            """
-            return self._inference(q_X)
-
-        # Create the compiler instance
-        compiler = Compiler(inference_to_compile, {"q_X": "encrypted"})
-
-        return compiler
-
-
-    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
-        pass
-
-    def _inference(self, q_X: numpy.ndarray) -> numpy.ndarray:
-        assert self._weight_quantizer is not None, self._is_not_fitted_error_message()
-
-        print("_inference ici")
-        print(q_X.shape)
-        print(q_X[:3])
-
-        assert self.sklearn_model.weights == "uniform", "uniform only, NTM"
-        
-        neigh_dist, neigh_ind = self.kneighbors(q_X)
-
-        classes_ = self.classes_
-        _y = self._y
-
-        #n_queries = _num_samples(X)
-
-        # weights = _get_weights(neigh_dist, self.weights)
-        # if weights is None:
-        #     weights = np.ones_like(neigh_ind)
-
-        # all_rows = np.arange(n_queries)
-        # probabilities = []
-        # for k, classes_k in enumerate(classes_):
-        #     pred_labels = _y[:, k][neigh_ind]
-        #     proba_k = np.zeros((n_queries, classes_k.size))
-
-        #     # a simple ':' index doesn't work right
-        #     for i, idx in enumerate(pred_labels.T):  # loop is O(n_neighbors)
-        #         proba_k[all_rows, idx] += weights[:, i]
-
-        #     # normalize 'votes' into real [0,1] probabilities
-        #     normalizer = proba_k.sum(axis=1)[:, np.newaxis]
-        #     normalizer[normalizer == 0.0] = 1.0
-        #     proba_k /= normalizer
-
-        #     probabilities.append(proba_k)
-
-        # if not self.outputs_2d_:
-        #     probabilities = probabilities[0]
-
-        # return probabilities
-        return q_X @ self._q_points.T
-
-# class SklearnKNeighborsClassifierMixin(BaseClassifier, SklearnKNeighborsMixin, sklearn.base.ClassifierMixin, sklearn.neighbors._base.NeighborsBase, ABC
-# ):
-#     """A Mixin class for sklearn neighbors classifiers with FHE.
-
-#     This class is used to create a neighbors classifier class that inherits from
-#     sklearn.base.ClassifierMixin, which essentially gives access to scikit-learn's `score` method
-#     for classifiers.
-
-#     Additionally, this class adjusts some of the tree-based base class's methods in order to make
-#     them compliant with classification workflows.
-#     """
-
-#     def _clean_graph(self) -> None:
-#         assert self.onnx_model_ is not None, self._is_not_fitted_error_message()
-
-#         # Remove any operators following gemm, as they will be done in the clear
-#         assert self.onnx_model_ is not None
-#         # There is no Gemm node
-#         try:
-#             clean_graph_after_node_op_type(self.onnx_model_, node_op_type="Gemm")
-#         except ValueError:
-#             print('No Gemm node in the graph') 
-#         SklearnKNeighborsMixin._clean_graph(self)
-
-#     def predict(
-#         self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE
-#     ) -> numpy.ndarray:
-#         """Predict confidence scores.
-
-#         Args:
-#         X (Data): The input values to predict, as a Numpy array, Torch tensor, Pandas DataFrame
-#         or List.
-#         fhe (Union[FheMode, str]): The mode to use for prediction.
-#         Can be FheMode.DISABLE for Concrete ML Python inference,
-#         FheMode.SIMULATE for FHE simulation and FheMode.EXECUTE for actual FHE execution.
-#         Can also be the string representation of any of these values.
-#         Default to FheMode.DISABLE.
-
-#         Returns:
-#         numpy.ndarray: The predicted confidence scores.
-#         """
-#         # Here, we want to use SklearnKNeighborsMixin's `predict` method as confidence scores are
-#         # the dot product's output values, without any post-processing
-#         # TODO
-#         y_preds = SklearnKNeighborsMixin.predict(self, X, fhe=fhe)
-#         return y_preds
-
-#     def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
-#         # TODO
-#         y_predict = self.predict(X, fhe=fhe)
-#         y_proba = self.post_processing(y_predict)
-#         return y_proba
-
-
-
 # pylint: disable=invalid-name,too-many-instance-attributes
 class SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
     """A Mixin class for sklearn linear models with FHE.

From 481950dd0b82fa549425972dd989043b996bb307 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Tue, 25 Jul 2023 20:28:33 +0200
Subject: [PATCH 05/51] chore: update

---
 docs/advanced_examples/LinearRegression.ipynb | 14 ++++++++
 src/concrete/ml/sklearn/base.py               | 34 ++++++++-----------
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/docs/advanced_examples/LinearRegression.ipynb b/docs/advanced_examples/LinearRegression.ipynb
index e453e857b..054233813 100644
--- a/docs/advanced_examples/LinearRegression.ipynb
+++ b/docs/advanced_examples/LinearRegression.ipynb
@@ -588,6 +588,20 @@
  "metadata": {
   "execution": {
    "timeout": 10800
+  },
+  "kernelspec": {
+   "display_name": "Python 3.10.6 ('.venv': poetry)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10.6"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "d11d2d767e01a44b3e69d0864f5db4163d647e8ae5c68b7694f10d9d57d10ac5"
+   }
   }
  },
  "nbformat": 4,
diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index cc289c0f7..442ec912c 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1880,21 +1880,6 @@ def top_k_indices(self, distance_matrix, k):
         print("TOP K")
         return numpy.argsort(distance_matrix, 1)[:,:k] #0 ou 1
 
-        # Get the number of queries (rows) and points (columns)
-        n_queries, n_points = distance_matrix.shape
-        
-        # Initialize an array to store the top-k indices for each query
-        top_k_indices_array = np.empty((n_queries, k), dtype=int)
-        
-        for i in range(n_queries):
-            print("$$$$$$$", i)
-            # Sort the distances for the current query and get the indices of the sorted elements
-            sorted_indices = np.argsort(distance_matrix[i])
-            print(distance_matrix[i])
-            # Get the top-k indices for the current query and store them in the result array
-            top_k_indices_array[i] = sorted_indices[:k]
-        
-        return top_k_indices_array
 
     def majority_vote(self, nearest_classes):
         # Get the number of queries (rows) and k (number of nearest points)
@@ -1914,14 +1899,25 @@ def _inference(self, q_X: numpy.ndarray) -> numpy.ndarray:
         assert self._weight_quantizer is not None, self._is_not_fitted_error_message()
 
         # Quantizing weights and inputs makes an additional term appear in the inference function
-        print(q_X.shape, self._q_weights.shape)
+        print("ici", "q_X.shape", q_X.shape, "self._q_weights.shape", self._q_weights.shape)
+
+        #distances_matrix = q_X @ self._q_weights # TODO: replace with real minkovski distance 
 
-        distances_matrix = q_X @ self._q_weights # TODO: replace with real minkovski distance 
+        # sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))
         #from sklearn.metrics.pairwise import euclidean_distances
         # y_pred = euclidean_distances(q_X, self._q_weights.T)
 
-        self.distances_matrix = distances_matrix
-        return distances_matrix
+        distances = []
+        for x_i in q_X:
+            distance_xi = []
+            print(f"{x_i.shape=}")
+            for point_i in self._q_weights:
+                print(f"{point_i.shape=}")
+                distance_xi.append(np.sqrt(np.dot(x_i, x_i) - 2 * np.dot(x_i, point_i) + np.dot(point_i, point_i)))
+            distances.append(distance_xi)
+
+        self.distances_matrix =  np.array(distances)
+        return self.distances_matrix
 
     def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
         distances_matrix = super().predict(X, fhe)

From b1ffecc4226757ee2863135fa879940c6f1f982b Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Wed, 26 Jul 2023 18:45:00 +0200
Subject: [PATCH 06/51] chore: version 1 w only pairwise.euclidean_distances is
 encrypted topk and majority vote are done on the client side

---
 src/concrete/ml/sklearn/base.py      | 114 ++++++++++++---------------
 src/concrete/ml/sklearn/neighbors.py |   8 +-
 2 files changed, 54 insertions(+), 68 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 442ec912c..3a5f9857b 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -59,9 +59,9 @@
 # pylint: disable=wrong-import-position,wrong-import-order
 # Silence Hummingbird warnings
 warnings.filterwarnings("ignore")
+import numpy as np
 from hummingbird.ml import convert as hb_convert  # noqa: E402
 from hummingbird.ml.operator_converters import constants
-import numpy as np
 
 _ALL_SKLEARN_MODELS: Set[Type] = set()
 _LINEAR_MODELS: Set[Type] = set()
@@ -521,6 +521,7 @@ def compile(
         """
         # Reset for double compile
         self._is_compiled = False
+        print("1. Compile based estimator")
 
         # Check that the model is correctly fitted
         self.check_model_is_fitted()
@@ -609,7 +610,7 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
             ValueError,
         )
 
-        print("monkey")
+        # print("monkey")
 
         # Check that the model is properly fitted
         self.check_model_is_fitted()
@@ -1696,6 +1697,7 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) ->
         y_proba = self.post_processing(y_logits)
         return y_proba
 
+
 # pylint: disable=invalid-name,too-many-instance-attributes
 class SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
     """A Mixin class for sklearn linear models with FHE.
@@ -1708,7 +1710,7 @@ def __init_subclass__(cls):
         for klass in cls.__mro__:
             # pylint: disable-next=protected-access
             if getattr(klass, "_is_a_public_cml_model", False):
-                _NEIGHBORS_MODELS.add(cls) # Changed
+                _NEIGHBORS_MODELS.add(cls)  # Changed
                 _ALL_SKLEARN_MODELS.add(cls)
 
     def __init__(self, n_bits: Union[int, Dict[str, int]] = 8):
@@ -1746,9 +1748,10 @@ def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
             self.sklearn_model,
             backend="onnx",
             test_input=test_input,
-            extra_config={"onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT, 
-                          constants.BATCH_SIZE: self.sklearn_model._fit_X.shape[0] # Changed
-                        },
+            extra_config={
+                "onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT,
+                constants.BATCH_SIZE: self.sklearn_model._fit_X.shape[0],  # Changed
+            },
         ).model
 
         self._clean_graph()
@@ -1790,47 +1793,37 @@ def fit(self, X: Data, y: Target, **fit_parameters):
         weights_n_bits = n_bits["op_weights"]
         weight_options = QuantizationOptions(n_bits=weights_n_bits, is_signed=True)
 
-        # Quantize the weights and store the associated quantizer
-        # Transpose and expand are necessary in order to make sure the weight array has the correct
-        # shape when calling the Gemm operator on it
-        weights = self.sklearn_model._fit_X.T  # Changed
-        q_weights = QuantizedArray(
+        # Quantize the _X_fit and store the associated quantizer
+        # Weights in KNN algorithms are the train data points
+        # pylint: disable=protected-access
+        _X_fit = self.sklearn_model._fit_X
+        q_X_fit = QuantizedArray(
             n_bits=n_bits["op_weights"],
-            values=numpy.expand_dims(weights, axis=1) if len(weights.shape) == 1 else weights,
+            values=numpy.expand_dims(_X_fit, axis=1) if len(_X_fit.shape) == 1 else _X_fit,
             options=weight_options,
         )
-        self._q_weights = q_weights.qvalues
-        weight_quantizer = q_weights.quantizer
-        self._weight_quantizer = weight_quantizer
+        self._q_X_fit = q_X_fit.qvalues
+        self._q_X_fit_quantizer = q_X_fit.quantizer
 
         # mypy
-        assert input_quantizer.scale is not None
-        assert weight_quantizer.scale is not None
+        assert self._q_X_fit_quantizer.scale is not None
 
-        # Compute the scale and zero-point of the matmul's outputs, following the same steps from
-        # the QuantizedGemm operator, which are based on equations detailed in
+        # We assume that the query has the same distribution as the data in _X_fit.
+        # therefore, they use the same scaling and zero point.
         # https://arxiv.org/abs/1712.05877
 
-        output_quant_params = UniformQuantizationParameters(
-            scale=input_quantizer.scale * weight_quantizer.scale,
-            zero_point=input_quantizer.zero_point
-            * (
-                numpy.sum(self._q_weights, axis=0, keepdims=True)
-                - X.shape[1] * weight_quantizer.zero_point
-            ),
+        self.output_quant_params = UniformQuantizationParameters(
+            scale=self._q_X_fit_quantizer.scale,
+            zero_point=self._q_X_fit_quantizer.zero_point,
             offset=0,
         )
-        print(output_quant_params)
-        self.output_quant_params = output_quant_params
 
-
-        output_quantizer = UniformQuantizer(params=output_quant_params, no_clipping=True)
+        output_quantizer = UniformQuantizer(params=self.output_quant_params, no_clipping=True)
 
         # Since the matmul and the bias both use the same scale and zero-points, we obtain that
         # y = S*(q_y - 2*Z) when de-quantizing the values. We therefore need to multiply the initial
         # output zero_point by 2
         assert output_quantizer.zero_point is not None
-        output_quantizer.zero_point *= 2
         self.output_quantizers.append(output_quantizer)
 
         # Updating post-processing parameters
@@ -1872,21 +1865,22 @@ def inference_to_compile(q_X: numpy.ndarray) -> numpy.ndarray:
 
         # Create the compiler instance
         compiler = Compiler(inference_to_compile, {"q_X": "encrypted"})
+        print("Compile SklearnKNeighborsMixin", type(compiler))
 
         return compiler
 
-
     def top_k_indices(self, distance_matrix, k):
-        print("TOP K")
-        return numpy.argsort(distance_matrix, 1)[:,:k] #0 ou 1
-
+        print("Top_k_indices")
+        # Sort the distance in the ascending order
+        # Pick up the k smallest distanes
+        # Sort by index 1
+        return numpy.argsort(distance_matrix, axis=1)[:, :k]
 
     def majority_vote(self, nearest_classes):
         # Get the number of queries (rows) and k (number of nearest points)
-        n_queries, k = nearest_classes.shape
-
+        n_queries, _ = nearest_classes.shape
         # Compute the majority vote for each query
-        majority_votes = np.empty(n_queries, dtype=int)
+        majority_votes = np.array([0] * n_queries, dtype=int)
         for i in range(n_queries):
             # Use bincount to count occurrences of each class and find the most common one
             class_counts = np.bincount(nearest_classes[i])
@@ -1894,38 +1888,30 @@ def majority_vote(self, nearest_classes):
 
         return majority_votes
 
-
     def _inference(self, q_X: numpy.ndarray) -> numpy.ndarray:
-        assert self._weight_quantizer is not None, self._is_not_fitted_error_message()
-
-        # Quantizing weights and inputs makes an additional term appear in the inference function
-        print("ici", "q_X.shape", q_X.shape, "self._q_weights.shape", self._q_weights.shape)
-
-        #distances_matrix = q_X @ self._q_weights # TODO: replace with real minkovski distance 
+        assert self._q_X_fit_quantizer is not None, self._is_not_fitted_error_message()
 
-        # sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))
-        #from sklearn.metrics.pairwise import euclidean_distances
-        # y_pred = euclidean_distances(q_X, self._q_weights.T)
+        # <!> np.newaxis, [..., None] ->
+        # ValueError: Indexing with 'None' & 'Ellipsis' is not supported
+        # dot is used for a tensor of one dimension
+        # @ is used for matrices quand c'est une matrice @ -> matmul
 
-        distances = []
-        for x_i in q_X:
-            distance_xi = []
-            print(f"{x_i.shape=}")
-            for point_i in self._q_weights:
-                print(f"{point_i.shape=}")
-                distance_xi.append(np.sqrt(np.dot(x_i, x_i) - 2 * np.dot(x_i, point_i) + np.dot(point_i, point_i)))
-            distances.append(distance_xi)
+        distance_matrix = (
+            np.sum(q_X**2, axis=1).reshape(-1, 1)
+            + np.sum(self._q_X_fit**2, axis=1).reshape(1, -1)
+            - 2 * q_X @ self._q_X_fit.T
+        )
 
-        self.distances_matrix =  np.array(distances)
-        return self.distances_matrix
+        return distance_matrix
 
     def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
-        distances_matrix = super().predict(X, fhe)
-        print(distances_matrix)
 
-        indices = self.top_k_indices(distances_matrix, self.sklearn_model.n_neighbors)
-        y_pred = self.majority_vote(self.sklearn_model._y[indices])
-        
-        return y_pred
+        self.distances_matrix = np.array(np.sqrt(super().predict(X, fhe)))
 
+        k_indices = self.top_k_indices(self.distances_matrix, self.sklearn_model.n_neighbors)
 
+        # pylint: disable=protected-access
+        label_k_indices = self.sklearn_model._y[k_indices]
+        y_pred = self.majority_vote(label_k_indices)
+
+        return y_pred
diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py
index 07d5d4bea..4aea79fb5 100644
--- a/src/concrete/ml/sklearn/neighbors.py
+++ b/src/concrete/ml/sklearn/neighbors.py
@@ -61,16 +61,16 @@ def dump_dict(self) -> Dict[str, Any]:
         metadata["sklearn_model"] = self.sklearn_model
         metadata["_is_fitted"] = self._is_fitted
         metadata["_is_compiled"] = self._is_compiled
-        metadata["input_quantizers"] = self.input_quantizers    # TODO: DOUBT
-        metadata["_weight_quantizer"] = self._weight_quantizer  # TODO: DOUBT
-        metadata["output_quantizers"] = self.output_quantizers  # TODO: DOUBT
+        metadata["input_quantizers"] = self.input_quantizers
+        metadata["_weight_quantizer"] = self._weight_quantizer
+        metadata["output_quantizers"] = self.output_quantizers
         metadata["onnx_model_"] = self.onnx_model_
         metadata["post_processing_params"] = self.post_processing_params
         metadata["cml_dumped_class_name"] = type(self).__name__
         metadata["_q_points"] = self._q_points
 
         # Scikit-learn
-        
+
         metadata["classes_"] = self.target_classes_
         metadata["n_classes_"] = self.n_classes_
         metadata["sklearn_model_class"] = self.sklearn_model_class

From af2550a3c1c84572cf401c18694d64a0fa55ccf8 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Wed, 26 Jul 2023 18:55:43 +0200
Subject: [PATCH 07/51] chore: previous version

---
 src/concrete/ml/sklearn/base.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 3a5f9857b..c2d395b48 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1897,19 +1897,23 @@ def _inference(self, q_X: numpy.ndarray) -> numpy.ndarray:
         # @ is used for matrices quand c'est une matrice @ -> matmul
 
         distance_matrix = (
-            np.sum(q_X**2, axis=1).reshape(-1, 1)
+            np.sum(q_X**2).reshape(1)
             + np.sum(self._q_X_fit**2, axis=1).reshape(1, -1)
             - 2 * q_X @ self._q_X_fit.T
         )
-
         return distance_matrix
 
     def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
 
-        self.distances_matrix = np.array(np.sqrt(super().predict(X, fhe)))
+        distances = []
+        #TODO: include in _inference
+        for query in X:
+            d = super().predict(query, fhe)[0]
+            distances.append(np.sqrt(d))
 
-        k_indices = self.top_k_indices(self.distances_matrix, self.sklearn_model.n_neighbors)
+        self.distances_matrix = np.array(distances)
 
+        k_indices = self.top_k_indices(self.distances_matrix, self.sklearn_model.n_neighbors)
         # pylint: disable=protected-access
         label_k_indices = self.sklearn_model._y[k_indices]
         y_pred = self.majority_vote(label_k_indices)

From 96cd8210a7ea79f4a9b257b8a9eda30ca3f38426 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Sun, 27 Aug 2023 20:35:06 +0200
Subject: [PATCH 08/51] chore: start testing

---
 src/concrete/ml/pytest/utils.py      | 1 +
 src/concrete/ml/sklearn/__init__.py  | 1 +
 src/concrete/ml/sklearn/base.py      | 2 ++
 tests/sklearn/test_sklearn_models.py | 4 ++--
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/concrete/ml/pytest/utils.py b/src/concrete/ml/pytest/utils.py
index ce130991c..dc9e67bc2 100644
--- a/src/concrete/ml/pytest/utils.py
+++ b/src/concrete/ml/pytest/utils.py
@@ -14,6 +14,7 @@
 from ..common.serialization.loaders import load, loads
 from ..common.utils import get_model_class, get_model_name, is_model_class_in_a_list, is_pandas_type
 from ..sklearn import (
+    KNeighborsClassifier,
     DecisionTreeClassifier,
     DecisionTreeRegressor,
     ElasticNet,
diff --git a/src/concrete/ml/sklearn/__init__.py b/src/concrete/ml/sklearn/__init__.py
index 1b938ac4d..4c9286a06 100644
--- a/src/concrete/ml/sklearn/__init__.py
+++ b/src/concrete/ml/sklearn/__init__.py
@@ -11,6 +11,7 @@
 from .svm import LinearSVC, LinearSVR
 from .tree import DecisionTreeClassifier, DecisionTreeRegressor
 from .xgb import XGBClassifier, XGBRegressor
+from .neighbors import KNeighborsClassifier
 
 
 def get_sklearn_models():
diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index c2d395b48..e10e1d54a 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1750,6 +1750,8 @@ def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
             test_input=test_input,
             extra_config={
                 "onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT,
+                # pylint: disable=protected-access
+                # pylint: disable=no-member
                 constants.BATCH_SIZE: self.sklearn_model._fit_X.shape[0],  # Changed
             },
         ).model
diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 13c33e12c..a04038154 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -58,12 +58,12 @@
     is_regressor_or_partial_regressor,
 )
 from concrete.ml.pytest.utils import (
-    _classifiers_and_datasets,
+    _classifiers_and_datasets,  # ICI
     instantiate_model_generic,
     sklearn_models_and_datasets,
 )
 from concrete.ml.sklearn import (
-    get_sklearn_linear_models,
+    get_sklearn_linear_models, # ICI
     get_sklearn_neural_net_models,
     get_sklearn_tree_models,
 )

From 98de388c16f5d27e6ba2edc1b32fc3139d6a9a19 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 28 Aug 2023 10:11:32 +0200
Subject: [PATCH 09/51] chore: first testing version

---
 src/concrete/ml/pytest/utils.py      |  1 +
 tests/sklearn/test_common.py         |  5 +++--
 tests/sklearn/test_sklearn_models.py | 10 +++++++++-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/concrete/ml/pytest/utils.py b/src/concrete/ml/pytest/utils.py
index dc9e67bc2..887377c9c 100644
--- a/src/concrete/ml/pytest/utils.py
+++ b/src/concrete/ml/pytest/utils.py
@@ -67,6 +67,7 @@
 ]
 
 _classifier_models = [
+    KNeighborsClassifier,
     DecisionTreeClassifier,
     RandomForestClassifier,
     XGBClassifier,
diff --git a/tests/sklearn/test_common.py b/tests/sklearn/test_common.py
index 3ce9dcede..65f52928c 100644
--- a/tests/sklearn/test_common.py
+++ b/tests/sklearn/test_common.py
@@ -12,6 +12,7 @@
     get_sklearn_linear_models,
     get_sklearn_neural_net_models,
     get_sklearn_tree_models,
+    get_sklearn_neighbors_models,
 )
 
 
@@ -19,7 +20,7 @@ def test_sklearn_args():
     """Check that all arguments from the underlying sklearn model are exposed."""
     test_counter = 0
     for model_class in (
-        get_sklearn_linear_models() + get_sklearn_neural_net_models() + get_sklearn_tree_models()
+        get_sklearn_linear_models() + get_sklearn_neural_net_models() + get_sklearn_tree_models() + get_sklearn_neighbors_models()
     ):
         model_class = get_model_class(model_class)
 
@@ -32,7 +33,7 @@ def test_sklearn_args():
         )
         test_counter += 1
 
-    assert test_counter == 18
+    assert test_counter == 19
 
 
 @pytest.mark.parametrize("model_class, parameters", sklearn_models_and_datasets)
diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index a04038154..602c7b9bf 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -66,6 +66,7 @@
     get_sklearn_linear_models, # ICI
     get_sklearn_neural_net_models,
     get_sklearn_tree_models,
+    get_sklearn_neighbors_models,
 )
 
 # Allow multiple runs in FHE to make sure we always have the correct output
@@ -471,6 +472,9 @@ def check_subfunctions(fitted_model, model_class, x):
         ):
             fitted_model.predict_proba(x)
 
+    if get_model_name(fitted_model) == "KNeighborsClassifier":
+        pytest.skip("Skipping subfunctions test for KNN, doesn't work for now")
+
     if is_classifier_or_partial_classifier(model_class):
 
         fitted_model.predict_proba(x)
@@ -559,6 +563,7 @@ def cast_input(x, y, input_type):
     # Sometimes, we miss convergence, which is not a problem for our test
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=ConvergenceWarning)
+
         model.fit(x, y)
 
     # Make sure `predict` is working when FHE is disabled
@@ -566,6 +571,8 @@ def cast_input(x, y, input_type):
 
     # Similarly, we test `predict_proba` for classifiers
     if is_classifier_or_partial_classifier(model):
+        if get_model_name(model_class) == "KNeighborsClassifier":
+            pytest.skip("Skipping predict_proba for KNN, doesn't work for now")
         model.predict_proba(x)
 
     # If n_bits is above N_BITS_LINEAR_MODEL_CRYPTO_PARAMETERS, do not compile the model
@@ -762,6 +769,8 @@ def get_hyper_param_combinations(model_class):
             "importance_type": ["weight", "gain"],
             "base_score": [0.5, None],
         }
+    elif model_class in get_sklearn_neighbors_models():
+        hyper_param_combinations = {"n_neighbors": [3, 5]}
     else:
 
         assert is_model_class_in_a_list(
@@ -1302,7 +1311,6 @@ def test_input_support(
 ):
     """Test all models with Pandas, List or Torch inputs."""
     x, y = get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option)
-
     if verbose:
         print("Run input_support")
 

From 795842e521381a0c216eb55912fb105a76685810 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 28 Aug 2023 10:13:16 +0200
Subject: [PATCH 10/51] chore: add `_NEIGHBORS_MODELS` and
 `get_sklearn_neighbors_models` to __init__

---
 src/concrete/ml/sklearn/__init__.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/concrete/ml/sklearn/__init__.py b/src/concrete/ml/sklearn/__init__.py
index 4c9286a06..fb81e88d5 100644
--- a/src/concrete/ml/sklearn/__init__.py
+++ b/src/concrete/ml/sklearn/__init__.py
@@ -3,7 +3,7 @@
 
 from ..common.debugging.custom_assert import assert_true
 from ..common.utils import is_classifier_or_partial_classifier, is_regressor_or_partial_regressor
-from .base import _ALL_SKLEARN_MODELS, _LINEAR_MODELS, _NEURALNET_MODELS, _TREE_MODELS
+from .base import _ALL_SKLEARN_MODELS, _LINEAR_MODELS, _NEURALNET_MODELS, _TREE_MODELS, _NEIGHBORS_MODELS
 from .glm import GammaRegressor, PoissonRegressor, TweedieRegressor
 from .linear_model import ElasticNet, Lasso, LinearRegression, LogisticRegression, Ridge
 from .qnn import NeuralNetClassifier, NeuralNetRegressor
@@ -32,6 +32,7 @@ def get_sklearn_models():
         "linear": sorted(list(_LINEAR_MODELS), key=lambda m: m.__name__),
         "tree": sorted(list(_TREE_MODELS), key=lambda m: m.__name__),
         "neural_net": sorted(list(_NEURALNET_MODELS), key=lambda m: m.__name__),
+        "neighbors": sorted(list(_NEIGHBORS_MODELS), key=lambda m: m.__name__),
     }
     return ans
 
@@ -124,3 +125,21 @@ def get_sklearn_neural_net_models(
     """
     prelist = get_sklearn_models()["neural_net"]
     return _filter_models(prelist, classifier, regressor, str_in_class_name)
+
+
+def get_sklearn_neighbors_models(
+    classifier: bool = True, regressor: bool = True, str_in_class_name: List[str] = None
+):
+    """Return the list of available neighbor models in Concrete ML.
+
+    Args:
+        classifier (bool): whether you want classifiers or not
+        regressor (bool): whether you want regressors or not
+        str_in_class_name (List[str]): if not None, only return models with the given string or
+            list of strings as a substring in their class name
+
+    Returns:
+        the lists of neighbor models in Concrete ML
+    """
+    prelist = get_sklearn_models()["neighbors"]
+    return _filter_models(prelist, classifier, regressor, str_in_class_name)

From bab9ee74122c5753dcb31463db38c59733621de3 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 28 Aug 2023 10:15:02 +0200
Subject: [PATCH 11/51] chore: add a new inheritance layer for classification

---
 src/concrete/ml/sklearn/base.py      | 18 +++++++++++++++++-
 src/concrete/ml/sklearn/neighbors.py |  4 ++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index e10e1d54a..9a87fd01c 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1698,8 +1698,11 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) ->
         return y_proba
 
 
+
+
 # pylint: disable=invalid-name,too-many-instance-attributes
 class SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
+
     """A Mixin class for sklearn linear models with FHE.
 
     This class inherits from sklearn.base.BaseEstimator in order to have access to scikit-learn's
@@ -1713,6 +1716,7 @@ def __init_subclass__(cls):
                 _NEIGHBORS_MODELS.add(cls)  # Changed
                 _ALL_SKLEARN_MODELS.add(cls)
 
+
     def __init__(self, n_bits: Union[int, Dict[str, int]] = 8):
         """Initialize the FHE knn model.
 
@@ -1769,7 +1773,7 @@ def fit(self, X: Data, y: Target, **fit_parameters):
         # Reset for double fit
         self._is_fitted = False
 
-        # LinearRegression handles multi-labels data
+        # KNeighbors handles multi-labels data
         X, y = check_X_y_and_assert_multi_output(X, y)
 
         # Fit the scikit-learn model
@@ -1911,6 +1915,7 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
         #TODO: include in _inference
         for query in X:
             d = super().predict(query, fhe)[0]
+            assert any(d < 0) or any(np.isnan(d)), "!!!!!!!! Non valid values"
             distances.append(np.sqrt(d))
 
         self.distances_matrix = np.array(distances)
@@ -1921,3 +1926,14 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
         y_pred = self.majority_vote(label_k_indices)
 
         return y_pred
+
+
+class SklearnKNeighborsClassifierMixin(SklearnKNeighborsMixin, sklearn.base.ClassifierMixin, ABC):
+    """A Mixin class for sklearn linear regressors with FHE.
+
+    This class is used to create a linear regressor class that inherits from
+    sklearn.base.RegressorMixin, which essentially gives access to scikit-learn's `score` method
+    for regressors.
+    """
+
+    # sklearn.base.ClassifierMixin --> is_classifier_or_partial_classifier(KNeighborsClassifier) : True 
\ No newline at end of file
diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py
index 4aea79fb5..27e521efe 100644
--- a/src/concrete/ml/sklearn/neighbors.py
+++ b/src/concrete/ml/sklearn/neighbors.py
@@ -3,11 +3,11 @@
 
 import sklearn.linear_model
 
-from .base import SklearnKNeighborsMixin
+from .base import SklearnKNeighborsClassifierMixin
 
 
 # pylint: disable=invalid-name,too-many-instance-attributes
-class KNeighborsClassifier(SklearnKNeighborsMixin):
+class KNeighborsClassifier(SklearnKNeighborsClassifierMixin):
     """A k-nearest classifier model with FHE.
 
     Parameters:

From cf76270524afb27fe50e2f9f9d5c10c7429e4919 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Fri, 1 Sep 2023 10:35:02 +0200
Subject: [PATCH 12/51] chore: update serialize testing

---
 src/concrete/ml/sklearn/base.py      | 22 ++++++++++++++--------
 src/concrete/ml/sklearn/neighbors.py |  7 ++-----
 tests/sklearn/test_sklearn_models.py |  1 +
 3 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 9a87fd01c..e7974df5b 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1734,9 +1734,6 @@ def __init__(self, n_bits: Union[int, Dict[str, int]] = 8):
         #: The quantizer to use for quantizing the model's weights
         self._weight_quantizer: Optional[UniformQuantizer] = None
 
-        #: The model's quantized weights
-        self._q_weights: Optional[numpy.ndarray] = None
-
         BaseEstimator.__init__(self)
 
     def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
@@ -1809,7 +1806,7 @@ def fit(self, X: Data, y: Target, **fit_parameters):
             options=weight_options,
         )
         self._q_X_fit = q_X_fit.qvalues
-        self._q_X_fit_quantizer = q_X_fit.quantizer
+        self._q_X_fit_quantizer = self._weight_quantizer = q_X_fit.quantizer
 
         # mypy
         assert self._q_X_fit_quantizer.scale is not None
@@ -1904,18 +1901,26 @@ def _inference(self, q_X: numpy.ndarray) -> numpy.ndarray:
 
         distance_matrix = (
             np.sum(q_X**2).reshape(1)
-            + np.sum(self._q_X_fit**2, axis=1).reshape(1, -1)
             - 2 * q_X @ self._q_X_fit.T
+            + np.sum(self._q_X_fit**2, axis=1).reshape(1, -1)
         )
+
+        #distance_matrix = np.sum(self._q_X_fit **2 + q_X**2 - 2 * self._q_X_fit * q_X, axis=1)
+
         return distance_matrix
 
     def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
 
+        X = check_array_and_assert(X)
+
         distances = []
         #TODO: include in _inference
         for query in X:
-            d = super().predict(query, fhe)[0]
-            assert any(d < 0) or any(np.isnan(d)), "!!!!!!!! Non valid values"
+
+            d = super().predict(query[None], fhe)[0]
+            #assert any(d < 0) or any(np.isnan(d)), "!!!!!!!! Not valid values"
+            if any(d < 0) or any(np.isnan(d)):
+                print("!!!!!!!!!!!!!!!!!!!!!", d[:5], "y_item shape", query.shape, "distance:", d.shape)
             distances.append(np.sqrt(d))
 
         self.distances_matrix = np.array(distances)
@@ -1936,4 +1941,5 @@ class SklearnKNeighborsClassifierMixin(SklearnKNeighborsMixin, sklearn.base.Clas
     for regressors.
     """
 
-    # sklearn.base.ClassifierMixin --> is_classifier_or_partial_classifier(KNeighborsClassifier) : True 
\ No newline at end of file
+    # sklearn.base.ClassifierMixin --> is_classifier_or_partial_classifier(KNeighborsClassifier) : True 
+    
\ No newline at end of file
diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py
index 27e521efe..814a8d4fb 100644
--- a/src/concrete/ml/sklearn/neighbors.py
+++ b/src/concrete/ml/sklearn/neighbors.py
@@ -62,17 +62,15 @@ def dump_dict(self) -> Dict[str, Any]:
         metadata["_is_fitted"] = self._is_fitted
         metadata["_is_compiled"] = self._is_compiled
         metadata["input_quantizers"] = self.input_quantizers
-        metadata["_weight_quantizer"] = self._weight_quantizer
+        #metadata["_weight_quantizer"] = self._weight_quantizer
+        metadata["_q_X_fit_quantizer"] = self._q_X_fit_quantizer
         metadata["output_quantizers"] = self.output_quantizers
         metadata["onnx_model_"] = self.onnx_model_
         metadata["post_processing_params"] = self.post_processing_params
         metadata["cml_dumped_class_name"] = type(self).__name__
-        metadata["_q_points"] = self._q_points
 
         # Scikit-learn
 
-        metadata["classes_"] = self.target_classes_
-        metadata["n_classes_"] = self.n_classes_
         metadata["sklearn_model_class"] = self.sklearn_model_class
         metadata["n_neighbors"] = self.n_neighbors
         metadata["algorithm"] = self.algorithm
@@ -82,7 +80,6 @@ def dump_dict(self) -> Dict[str, Any]:
         metadata["metric"] = self.metric
         metadata["metric_params"] = self.metric_params
         metadata["n_jobs"] = self.n_jobs
-        print(self._fit_X)
 
         return metadata
 
diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 602c7b9bf..a90677687 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -200,6 +200,7 @@ def check_correctness_with_sklearn(
         "XGBClassifier": 0.7,
         "RandomForestClassifier": 0.8,
         "NeuralNetClassifier": 0.7,
+        "KNeighborsClassifier": 0.9,
     }
 
     model_name = get_model_name(model_class)

From ab6f93dcabdd97a9ff46eb3bb80319cbab38a4ef Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Fri, 1 Sep 2023 11:59:24 +0200
Subject: [PATCH 13/51] chore: fix serialization test

---
 .../ml/common/serialization/decoder.py        |  3 +++
 src/concrete/ml/sklearn/base.py               | 19 ++++++++-----------
 src/concrete/ml/sklearn/neighbors.py          | 13 ++++++-------
 tests/sklearn/test_sklearn_models.py          |  4 ++--
 4 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/src/concrete/ml/common/serialization/decoder.py b/src/concrete/ml/common/serialization/decoder.py
index eebe4e25a..bd2f8ee74 100644
--- a/src/concrete/ml/common/serialization/decoder.py
+++ b/src/concrete/ml/common/serialization/decoder.py
@@ -87,6 +87,9 @@ def _get_fully_qualified_name(object_class: Type) -> str:
         "skorch.dataset.Dataset",
         "skorch.dataset.ValidSplit",
         "inspect._empty",
+        "sklearn.neighbors._classification.KNeighborsClassifier",
+        "sklearn.metrics._dist_metrics.EuclideanDistance",
+        "sklearn.neighbors._kd_tree.KDTree",
     ]
 )
 
diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index e7974df5b..de1561d71 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1703,7 +1703,7 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) ->
 # pylint: disable=invalid-name,too-many-instance-attributes
 class SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
 
-    """A Mixin class for sklearn linear models with FHE.
+    """A Mixin class for sklearn KNeighbors models with FHE.
 
     This class inherits from sklearn.base.BaseEstimator in order to have access to scikit-learn's
     `get_params` and `set_params` methods.
@@ -1713,7 +1713,7 @@ def __init_subclass__(cls):
         for klass in cls.__mro__:
             # pylint: disable-next=protected-access
             if getattr(klass, "_is_a_public_cml_model", False):
-                _NEIGHBORS_MODELS.add(cls)  # Changed
+                _NEIGHBORS_MODELS.add(cls)
                 _ALL_SKLEARN_MODELS.add(cls)
 
 
@@ -1751,8 +1751,7 @@ def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
             test_input=test_input,
             extra_config={
                 "onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT,
-                # pylint: disable=protected-access
-                # pylint: disable=no-member
+                # pylint: disable=protected-access, no-member
                 constants.BATCH_SIZE: self.sklearn_model._fit_X.shape[0],  # Changed
             },
         ).model
@@ -1868,7 +1867,6 @@ def inference_to_compile(q_X: numpy.ndarray) -> numpy.ndarray:
 
         # Create the compiler instance
         compiler = Compiler(inference_to_compile, {"q_X": "encrypted"})
-        print("Compile SklearnKNeighborsMixin", type(compiler))
 
         return compiler
 
@@ -1934,12 +1932,11 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
 
 
 class SklearnKNeighborsClassifierMixin(SklearnKNeighborsMixin, sklearn.base.ClassifierMixin, ABC):
-    """A Mixin class for sklearn linear regressors with FHE.
+    """A Mixin class for sklearn KNeighbors classifiers with FHE.
 
-    This class is used to create a linear regressor class that inherits from
-    sklearn.base.RegressorMixin, which essentially gives access to scikit-learn's `score` method
-    for regressors.
+    This class is used to create a KNeighbors classifier class that inherits from
+    SklearnKNeighborsMixin and sklearn.base.ClassifierMixin. 
+    By inheriting from sklearn.base.ClassifierMixin, it allows this class to be recognized
+    as a classifier."
     """
 
-    # sklearn.base.ClassifierMixin --> is_classifier_or_partial_classifier(KNeighborsClassifier) : True 
-    
\ No newline at end of file
diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py
index 814a8d4fb..5292632d7 100644
--- a/src/concrete/ml/sklearn/neighbors.py
+++ b/src/concrete/ml/sklearn/neighbors.py
@@ -62,15 +62,16 @@ def dump_dict(self) -> Dict[str, Any]:
         metadata["_is_fitted"] = self._is_fitted
         metadata["_is_compiled"] = self._is_compiled
         metadata["input_quantizers"] = self.input_quantizers
-        #metadata["_weight_quantizer"] = self._weight_quantizer
+        metadata["_weight_quantizer"] = self._weight_quantizer
         metadata["_q_X_fit_quantizer"] = self._q_X_fit_quantizer
+        metadata["_q_X_fit"] = self._q_X_fit
+
         metadata["output_quantizers"] = self.output_quantizers
         metadata["onnx_model_"] = self.onnx_model_
         metadata["post_processing_params"] = self.post_processing_params
         metadata["cml_dumped_class_name"] = type(self).__name__
 
         # Scikit-learn
-
         metadata["sklearn_model_class"] = self.sklearn_model_class
         metadata["n_neighbors"] = self.n_neighbors
         metadata["algorithm"] = self.algorithm
@@ -97,19 +98,17 @@ def load_dict(cls, metadata: Dict):
         obj.input_quantizers = metadata["input_quantizers"]
         obj.output_quantizers = metadata["output_quantizers"]
         obj._weight_quantizer = metadata["_weight_quantizer"]
+        obj._q_X_fit_quantizer = metadata["_q_X_fit_quantizer"]
+        obj._q_X_fit = metadata["_q_X_fit"]
+
         obj.onnx_model_ = metadata["onnx_model_"]
 
         obj.post_processing_params = metadata["post_processing_params"]
 
-        # Classifier
-        obj.target_classes_ = metadata["target_classes_"]
-        obj.n_classes_ = metadata["n_classes_"]
-
         # Scikit-Learn
         obj.n_neighbors = metadata["n_neighbors"]
         obj.weights = metadata["weights"]
         obj.algorithm = metadata["algorithm"]
-        obj.leaf_size = metadata["leaf_size"]
         obj.p = metadata["p"]
         obj.metric = metadata["metric"]
         obj.metric_params = metadata["metric_params"]
diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index a90677687..315a11e64 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -58,12 +58,12 @@
     is_regressor_or_partial_regressor,
 )
 from concrete.ml.pytest.utils import (
-    _classifiers_and_datasets,  # ICI
+    _classifiers_and_datasets,
     instantiate_model_generic,
     sklearn_models_and_datasets,
 )
 from concrete.ml.sklearn import (
-    get_sklearn_linear_models, # ICI
+    get_sklearn_linear_models,
     get_sklearn_neural_net_models,
     get_sklearn_tree_models,
     get_sklearn_neighbors_models,

From 9898062a4f64a7d0c96f979c1e8e874a79edfc49 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Fri, 1 Sep 2023 15:35:47 +0200
Subject: [PATCH 14/51] chore: fix gridsearch test + make conformance

---
 src/concrete/ml/pytest/utils.py      |  2 +-
 src/concrete/ml/sklearn/__init__.py  | 10 ++++++++--
 src/concrete/ml/sklearn/base.py      | 17 ++++-------------
 tests/sklearn/test_common.py         |  7 +++++--
 tests/sklearn/test_sklearn_models.py | 10 ++++++++--
 5 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/src/concrete/ml/pytest/utils.py b/src/concrete/ml/pytest/utils.py
index 887377c9c..5cb5f2e0d 100644
--- a/src/concrete/ml/pytest/utils.py
+++ b/src/concrete/ml/pytest/utils.py
@@ -14,11 +14,11 @@
 from ..common.serialization.loaders import load, loads
 from ..common.utils import get_model_class, get_model_name, is_model_class_in_a_list, is_pandas_type
 from ..sklearn import (
-    KNeighborsClassifier,
     DecisionTreeClassifier,
     DecisionTreeRegressor,
     ElasticNet,
     GammaRegressor,
+    KNeighborsClassifier,
     Lasso,
     LinearRegression,
     LinearSVC,
diff --git a/src/concrete/ml/sklearn/__init__.py b/src/concrete/ml/sklearn/__init__.py
index fb81e88d5..06e5545f3 100644
--- a/src/concrete/ml/sklearn/__init__.py
+++ b/src/concrete/ml/sklearn/__init__.py
@@ -3,15 +3,21 @@
 
 from ..common.debugging.custom_assert import assert_true
 from ..common.utils import is_classifier_or_partial_classifier, is_regressor_or_partial_regressor
-from .base import _ALL_SKLEARN_MODELS, _LINEAR_MODELS, _NEURALNET_MODELS, _TREE_MODELS, _NEIGHBORS_MODELS
+from .base import (
+    _ALL_SKLEARN_MODELS,
+    _LINEAR_MODELS,
+    _NEIGHBORS_MODELS,
+    _NEURALNET_MODELS,
+    _TREE_MODELS,
+)
 from .glm import GammaRegressor, PoissonRegressor, TweedieRegressor
 from .linear_model import ElasticNet, Lasso, LinearRegression, LogisticRegression, Ridge
+from .neighbors import KNeighborsClassifier
 from .qnn import NeuralNetClassifier, NeuralNetRegressor
 from .rf import RandomForestClassifier, RandomForestRegressor
 from .svm import LinearSVC, LinearSVR
 from .tree import DecisionTreeClassifier, DecisionTreeRegressor
 from .xgb import XGBClassifier, XGBRegressor
-from .neighbors import KNeighborsClassifier
 
 
 def get_sklearn_models():
diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index de1561d71..1da2112a7 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -521,7 +521,6 @@ def compile(
         """
         # Reset for double compile
         self._is_compiled = False
-        print("1. Compile based estimator")
 
         # Check that the model is correctly fitted
         self.check_model_is_fitted()
@@ -1698,8 +1697,6 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) ->
         return y_proba
 
 
-
-
 # pylint: disable=invalid-name,too-many-instance-attributes
 class SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
 
@@ -1716,7 +1713,6 @@ def __init_subclass__(cls):
                 _NEIGHBORS_MODELS.add(cls)
                 _ALL_SKLEARN_MODELS.add(cls)
 
-
     def __init__(self, n_bits: Union[int, Dict[str, int]] = 8):
         """Initialize the FHE knn model.
 
@@ -1752,7 +1748,7 @@ def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
             extra_config={
                 "onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT,
                 # pylint: disable=protected-access, no-member
-                constants.BATCH_SIZE: self.sklearn_model._fit_X.shape[0],  # Changed
+                constants.BATCH_SIZE: self.sklearn_model._fit_X.shape[0],
             },
         ).model
 
@@ -1903,7 +1899,7 @@ def _inference(self, q_X: numpy.ndarray) -> numpy.ndarray:
             + np.sum(self._q_X_fit**2, axis=1).reshape(1, -1)
         )
 
-        #distance_matrix = np.sum(self._q_X_fit **2 + q_X**2 - 2 * self._q_X_fit * q_X, axis=1)
+        # distance_matrix = np.sum(self._q_X_fit **2 + q_X**2 - 2 * self._q_X_fit * q_X, axis=1)
 
         return distance_matrix
 
@@ -1912,13 +1908,9 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
         X = check_array_and_assert(X)
 
         distances = []
-        #TODO: include in _inference
         for query in X:
-
             d = super().predict(query[None], fhe)[0]
-            #assert any(d < 0) or any(np.isnan(d)), "!!!!!!!! Not valid values"
-            if any(d < 0) or any(np.isnan(d)):
-                print("!!!!!!!!!!!!!!!!!!!!!", d[:5], "y_item shape", query.shape, "distance:", d.shape)
+            # assert any(d < 0) or any(np.isnan(d)), "!!!!!!!! Not valid values"
             distances.append(np.sqrt(d))
 
         self.distances_matrix = np.array(distances)
@@ -1935,8 +1927,7 @@ class SklearnKNeighborsClassifierMixin(SklearnKNeighborsMixin, sklearn.base.Clas
     """A Mixin class for sklearn KNeighbors classifiers with FHE.
 
     This class is used to create a KNeighbors classifier class that inherits from
-    SklearnKNeighborsMixin and sklearn.base.ClassifierMixin. 
+    SklearnKNeighborsMixin and sklearn.base.ClassifierMixin.
     By inheriting from sklearn.base.ClassifierMixin, it allows this class to be recognized
     as a classifier."
     """
-
diff --git a/tests/sklearn/test_common.py b/tests/sklearn/test_common.py
index 65f52928c..54ba6d378 100644
--- a/tests/sklearn/test_common.py
+++ b/tests/sklearn/test_common.py
@@ -10,9 +10,9 @@
 from concrete.ml.pytest.utils import sklearn_models_and_datasets
 from concrete.ml.sklearn import (
     get_sklearn_linear_models,
+    get_sklearn_neighbors_models,
     get_sklearn_neural_net_models,
     get_sklearn_tree_models,
-    get_sklearn_neighbors_models,
 )
 
 
@@ -20,7 +20,10 @@ def test_sklearn_args():
     """Check that all arguments from the underlying sklearn model are exposed."""
     test_counter = 0
     for model_class in (
-        get_sklearn_linear_models() + get_sklearn_neural_net_models() + get_sklearn_tree_models() + get_sklearn_neighbors_models()
+        get_sklearn_linear_models()
+        + get_sklearn_neural_net_models()
+        + get_sklearn_tree_models()
+        + get_sklearn_neighbors_models()
     ):
         model_class = get_model_class(model_class)
 
diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 315a11e64..a9bdea69b 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -64,9 +64,9 @@
 )
 from concrete.ml.sklearn import (
     get_sklearn_linear_models,
+    get_sklearn_neighbors_models,
     get_sklearn_neural_net_models,
     get_sklearn_tree_models,
-    get_sklearn_neighbors_models,
 )
 
 # Allow multiple runs in FHE to make sure we always have the correct output
@@ -672,6 +672,12 @@ def check_grid_search(model_class, x, y, scoring):
         # Sometimes, we miss convergence, which is not a problem for our test
         warnings.simplefilter("ignore", category=ConvergenceWarning)
 
+        if get_model_name(model_class) == "KNeighborsClassifier" and scoring in [
+            "roc_auc",
+            "average_precision",
+        ]:
+            pytest.skip("Skipping predict_proba for KNN, doesn't work for now")
+
         _ = GridSearchCV(
             model_class(), param_grid, cv=5, scoring=scoring, error_score="raise", n_jobs=1
         ).fit(x, y)
@@ -771,7 +777,7 @@ def get_hyper_param_combinations(model_class):
             "base_score": [0.5, None],
         }
     elif model_class in get_sklearn_neighbors_models():
-        hyper_param_combinations = {"n_neighbors": [3, 5]}
+        hyper_param_combinations = {"n_neighbors": [2, 4]}
     else:
 
         assert is_model_class_in_a_list(

From a13d02820695d41967e1a962e83cfa1f04fd4b3a Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Fri, 1 Sep 2023 16:16:04 +0200
Subject: [PATCH 15/51] chore: update conformance

---
 src/concrete/ml/sklearn/base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 1da2112a7..5df3f8f34 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1867,7 +1867,6 @@ def inference_to_compile(q_X: numpy.ndarray) -> numpy.ndarray:
         return compiler
 
     def top_k_indices(self, distance_matrix, k):
-        print("Top_k_indices")
         # Sort the distance in the ascending order
         # Pick up the k smallest distanes
         # Sort by index 1

From ead5c4549ac24334a782cd9d12c03092fab41dea Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Fri, 1 Sep 2023 16:28:06 +0200
Subject: [PATCH 16/51] chore: correct pairwise euclidean_distances mistake in
 the dims

---
 src/concrete/ml/sklearn/base.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 5df3f8f34..fc9551dee 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1893,13 +1893,11 @@ def _inference(self, q_X: numpy.ndarray) -> numpy.ndarray:
         # @ is used for matrices quand c'est une matrice @ -> matmul
 
         distance_matrix = (
-            np.sum(q_X**2).reshape(1)
+            numpy.sum(q_X**2, axis=1, keepdims=True)
             - 2 * q_X @ self._q_X_fit.T
-            + np.sum(self._q_X_fit**2, axis=1).reshape(1, -1)
+            + numpy.expand_dims(numpy.sum(self._q_X_fit**2, axis=1), 0)
         )
 
-        # distance_matrix = np.sum(self._q_X_fit **2 + q_X**2 - 2 * self._q_X_fit * q_X, axis=1)
-
         return distance_matrix
 
     def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:

From 9336b8425bc14beed49ebd8f2422c27f88bd32f4 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 4 Sep 2023 09:40:06 +0200
Subject: [PATCH 17/51] chore: remove other classes

---
 docs/advanced_examples/LinearRegression.ipynb | 14 ---------
 src/concrete/ml/sklearn/neighbors.py          | 29 -------------------
 2 files changed, 43 deletions(-)

diff --git a/docs/advanced_examples/LinearRegression.ipynb b/docs/advanced_examples/LinearRegression.ipynb
index 054233813..e453e857b 100644
--- a/docs/advanced_examples/LinearRegression.ipynb
+++ b/docs/advanced_examples/LinearRegression.ipynb
@@ -588,20 +588,6 @@
  "metadata": {
   "execution": {
    "timeout": 10800
-  },
-  "kernelspec": {
-   "display_name": "Python 3.10.6 ('.venv': poetry)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python",
-   "version": "3.10.6"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "d11d2d767e01a44b3e69d0864f5db4163d647e8ae5c68b7694f10d9d57d10ac5"
-   }
   }
  },
  "nbformat": 4,
diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py
index 5292632d7..a42452ef8 100644
--- a/src/concrete/ml/sklearn/neighbors.py
+++ b/src/concrete/ml/sklearn/neighbors.py
@@ -114,32 +114,3 @@ def load_dict(cls, metadata: Dict):
         obj.metric_params = metadata["metric_params"]
         obj.n_jobs = metadata["n_jobs"]
         return obj
-
-
-class _KNeighborsRegressor:
-    pass
-
-
-class _RadiusNeighborsClassifier:
-    """
-
-    Find the neighbors within a given radius of a point or points.
-
-    Return the indices and distances of each point from the dataset lying in a ball with size radius
-    around the points of the query array.
-
-    Points lying on the boundary are included in the results.
-
-    The result points are not necessarily sorted by distance to their query point.
-
-    """
-
-    pass
-
-
-class _RadiusNeighborsRegressor:
-    pass
-
-
-class _NearestNeighbors:
-    pass

From a5747174e71a2b1381f991092c1dcc170ab0e3f1 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 4 Sep 2023 11:58:01 +0200
Subject: [PATCH 18/51] chore: fix make pcc

---
 src/concrete/ml/sklearn/base.py | 67 +++++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 20 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index fc9551dee..58dc75f1f 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -59,9 +59,8 @@
 # pylint: disable=wrong-import-position,wrong-import-order
 # Silence Hummingbird warnings
 warnings.filterwarnings("ignore")
-import numpy as np
 from hummingbird.ml import convert as hb_convert  # noqa: E402
-from hummingbird.ml.operator_converters import constants
+from hummingbird.ml.operator_converters import constants  # noqa: E402
 
 _ALL_SKLEARN_MODELS: Set[Type] = set()
 _LINEAR_MODELS: Set[Type] = set()
@@ -1699,7 +1698,6 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) ->
 
 # pylint: disable=invalid-name,too-many-instance-attributes
 class SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
-
     """A Mixin class for sklearn KNeighbors models with FHE.
 
     This class inherits from sklearn.base.BaseEstimator in order to have access to scikit-learn's
@@ -1729,6 +1727,8 @@ def __init__(self, n_bits: Union[int, Dict[str, int]] = 8):
 
         #: The quantizer to use for quantizing the model's weights
         self._weight_quantizer: Optional[UniformQuantizer] = None
+        self._q_X_fit_quantizer: Optional[UniformQuantizer] = None
+        self._q_X_fit: numpy.ndarray
 
         BaseEstimator.__init__(self)
 
@@ -1768,6 +1768,8 @@ def fit(self, X: Data, y: Target, **fit_parameters):
         # KNeighbors handles multi-labels data
         X, y = check_X_y_and_assert_multi_output(X, y)
 
+        self._y = y
+
         # Fit the scikit-learn model
         self._fit_sklearn_model(X, y, **fit_parameters)
 
@@ -1866,32 +1868,57 @@ def inference_to_compile(q_X: numpy.ndarray) -> numpy.ndarray:
 
         return compiler
 
-    def top_k_indices(self, distance_matrix, k):
-        # Sort the distance in the ascending order
-        # Pick up the k smallest distanes
-        # Sort by index 1
+    @staticmethod
+    def top_k_indices(distance_matrix: numpy.ndarray, k: int) -> numpy.ndarray:
+        """Get the indices of the top-k smallest distances for each point.
+
+        Args:
+            distance_matrix (numpy.ndarray): Represents the pairwise euclidean distance between
+                the query and other points
+            k (int): The top nearest neighbors to consider
+
+        Returns:
+            numpy.ndarray: The k nearest neighbors for the corresponding query, sorted in
+            ascending order.
+        """
+
+        # Sort the distances in an ascending order and select the k smallest distanes
         return numpy.argsort(distance_matrix, axis=1)[:, :k]
 
-    def majority_vote(self, nearest_classes):
+    @staticmethod
+    def majority_vote(nearest_classes: numpy.ndarray):
+        """Determine the most common class among nearest neighborsfor each query.
+
+        Args:
+            nearest_classes (numpy.ndarray): The class labels of the nearest neighbors for a query
+
+        Returns:
+            numpy.ndarray: The majority-voted class label for the corresponding query.
+        """
         # Get the number of queries (rows) and k (number of nearest points)
         n_queries, _ = nearest_classes.shape
         # Compute the majority vote for each query
-        majority_votes = np.array([0] * n_queries, dtype=int)
+        majority_votes = numpy.array([0] * n_queries, dtype=int)
         for i in range(n_queries):
             # Use bincount to count occurrences of each class and find the most common one
-            class_counts = np.bincount(nearest_classes[i])
-            majority_votes[i] = np.argmax(class_counts)
+            class_counts = numpy.bincount(nearest_classes[i])
+            majority_votes[i] = numpy.argmax(class_counts)
 
         return majority_votes
 
     def _inference(self, q_X: numpy.ndarray) -> numpy.ndarray:
-        assert self._q_X_fit_quantizer is not None, self._is_not_fitted_error_message()
+        """Inference function.
+
+        Args:
+            q_X (numpy.ndarray): The quantized input values.
 
-        # <!> np.newaxis, [..., None] ->
-        # ValueError: Indexing with 'None' & 'Ellipsis' is not supported
-        # dot is used for a tensor of one dimension
-        # @ is used for matrices quand c'est une matrice @ -> matmul
+        Returns:
+            numpy.ndarray: The quantized predicted values.
+        """
+        assert self._q_X_fit_quantizer is not None, self._is_not_fitted_error_message()
 
+        # Pairwise euclidean distance
+        # dist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))
         distance_matrix = (
             numpy.sum(q_X**2, axis=1, keepdims=True)
             - 2 * q_X @ self._q_X_fit.T
@@ -1908,13 +1935,13 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
         for query in X:
             d = super().predict(query[None], fhe)[0]
             # assert any(d < 0) or any(np.isnan(d)), "!!!!!!!! Not valid values"
-            distances.append(np.sqrt(d))
+            distances.append(numpy.sqrt(d))
 
-        self.distances_matrix = np.array(distances)
+        self.distances_matrix = numpy.array(distances)
 
-        k_indices = self.top_k_indices(self.distances_matrix, self.sklearn_model.n_neighbors)
+        k_indices = self.top_k_indices(self.distances_matrix, self.n_neighbors)
         # pylint: disable=protected-access
-        label_k_indices = self.sklearn_model._y[k_indices]
+        label_k_indices = self._y[k_indices]
         y_pred = self.majority_vote(label_k_indices)
 
         return y_pred

From 833d4689b4c296d803dfc1f0e788bc8e7d3d834f Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 4 Sep 2023 13:34:59 +0200
Subject: [PATCH 19/51] chore: update test/common

---
 src/concrete/ml/pytest/utils.py         |  4 ++--
 tests/common/test_skearn_model_lists.py | 15 ++++++++++++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/concrete/ml/pytest/utils.py b/src/concrete/ml/pytest/utils.py
index 5cb5f2e0d..d50fe9519 100644
--- a/src/concrete/ml/pytest/utils.py
+++ b/src/concrete/ml/pytest/utils.py
@@ -143,8 +143,8 @@ def get_random_extract_of_sklearn_models_and_datasets():
             unique_model_classes.append(m)
 
     # To avoid to make mistakes and return empty list
-    assert len(sklearn_models_and_datasets) == 28
-    assert len(unique_model_classes) == 18
+    assert len(sklearn_models_and_datasets) == 30
+    assert len(unique_model_classes) == 19
 
     return unique_model_classes
 
diff --git a/tests/common/test_skearn_model_lists.py b/tests/common/test_skearn_model_lists.py
index cd7fe34a2..dc38c716b 100644
--- a/tests/common/test_skearn_model_lists.py
+++ b/tests/common/test_skearn_model_lists.py
@@ -8,6 +8,7 @@
     LogisticRegression,
     Ridge,
 )
+from concrete.ml.sklearn.neighbors import KNeighborsClassifier
 from concrete.ml.sklearn.qnn import NeuralNetClassifier, NeuralNetRegressor
 from concrete.ml.sklearn.rf import RandomForestClassifier, RandomForestRegressor
 from concrete.ml.sklearn.svm import LinearSVC, LinearSVR
@@ -18,10 +19,12 @@
 def test_get_sklearn_models():
     """List all available models in Concrete ML."""
     dic = get_sklearn_models()
+
     cml_list = dic["all"]
     linear_list = dic["linear"]
     tree_list = dic["tree"]
     neuralnet_list = dic["neural_net"]
+    neighbors_list = dic["neighbors"]
 
     print("All models: ")
     for m in cml_list:
@@ -39,6 +42,10 @@ def test_get_sklearn_models():
     for m in neuralnet_list:
         print(f"     {m}")
 
+    print("Neighbors models: ")
+    for m in neighbors_list:
+        print(f"     {m}")
+
     # Check values
     expected_neuralnet_list = [NeuralNetClassifier, NeuralNetRegressor]
     assert (
@@ -69,12 +76,18 @@ def test_get_sklearn_models():
         Ridge,
         TweedieRegressor,
     ]
+
+    expected_neighbors_list = [KNeighborsClassifier]
+
     assert (
         linear_list == expected_linear_list
     ), "Please change the expected number of models if you add new models"
 
     # Check number
     assert cml_list == sorted(
-        expected_linear_list + expected_neuralnet_list + expected_tree_list,
+        expected_linear_list
+        + expected_neuralnet_list
+        + expected_tree_list
+        + expected_neighbors_list,
         key=lambda m: m.__name__,
     ), "Please change the expected number of models if you add new models"

From 749359203b84674bbbcdf19b15dba611f155faa6 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 4 Sep 2023 15:21:56 +0200
Subject: [PATCH 20/51] chore: fix parameter search tests

---
 src/concrete/ml/search_parameters/p_error_search.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/concrete/ml/search_parameters/p_error_search.py b/src/concrete/ml/search_parameters/p_error_search.py
index bc882937c..65953e3d7 100644
--- a/src/concrete/ml/search_parameters/p_error_search.py
+++ b/src/concrete/ml/search_parameters/p_error_search.py
@@ -61,7 +61,7 @@
 from tqdm import tqdm
 
 from ..common.utils import is_brevitas_model, is_model_class_in_a_list
-from ..sklearn import get_sklearn_neural_net_models, get_sklearn_tree_models
+from ..sklearn import get_sklearn_neural_net_models, get_sklearn_tree_models, get_sklearn_neighbors_models
 from ..torch.compile import compile_brevitas_qat_model, compile_torch_model
 
 
@@ -126,7 +126,7 @@ def compile_and_simulated_fhe_inference(
         dequantized_output = quantized_module.forward(calibration_data, fhe="simulate")
 
     elif is_model_class_in_a_list(
-        estimator, get_sklearn_neural_net_models() + get_sklearn_tree_models()
+        estimator, get_sklearn_neural_net_models() + get_sklearn_tree_models() + get_sklearn_neighbors_models()
     ):
         if not estimator.is_fitted:
             estimator.fit(calibration_data, ground_truth)

From e06bd40497486508ae526c6b44796a2fa1650089 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 4 Sep 2023 15:51:30 +0200
Subject: [PATCH 21/51] chore: fix deployment tests reformat
 search_parameters/p_error_search.py

---
 conftest.py                                      | 16 +++++++++++++---
 .../ml/search_parameters/p_error_search.py       | 11 +++++++++--
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/conftest.py b/conftest.py
index 5209a325e..c4fa713c7 100644
--- a/conftest.py
+++ b/conftest.py
@@ -33,6 +33,7 @@
 from concrete.ml.sklearn.base import (
     BaseTreeEstimatorMixin,
     QuantizedTorchEstimatorMixin,
+    SklearnKNeighborsMixin,
     SklearnLinearModelMixin,
 )
 
@@ -482,7 +483,12 @@ def check_is_good_execution_for_cml_vs_circuit_impl(
             else:
                 assert isinstance(
                     model,
-                    (QuantizedTorchEstimatorMixin, BaseTreeEstimatorMixin, SklearnLinearModelMixin),
+                    (
+                        QuantizedTorchEstimatorMixin,
+                        BaseTreeEstimatorMixin,
+                        SklearnLinearModelMixin,
+                        SklearnKNeighborsMixin,
+                    ),
                 )
 
                 if model._is_a_public_cml_model:  # pylint: disable=protected-access
@@ -492,8 +498,12 @@ def check_is_good_execution_for_cml_vs_circuit_impl(
                     # tests), especially since these results are tested in other tests such as the
                     # `check_subfunctions_in_fhe`
                     if is_classifier_or_partial_classifier(model):
-                        results_cnp_circuit = model.predict_proba(*inputs, fhe=fhe_mode)
-                        results_model = model.predict_proba(*inputs, fhe="disable")
+                        if isinstance(model, SklearnKNeighborsMixin):
+                            results_cnp_circuit = model.predict(*inputs, fhe=fhe_mode)
+                            results_model = model.predict(*inputs, fhe="disable")
+                        else:
+                            results_cnp_circuit = model.predict_proba(*inputs, fhe=fhe_mode)
+                            results_model = model.predict_proba(*inputs, fhe="disable")
 
                     else:
                         results_cnp_circuit = model.predict(*inputs, fhe=fhe_mode)
diff --git a/src/concrete/ml/search_parameters/p_error_search.py b/src/concrete/ml/search_parameters/p_error_search.py
index 65953e3d7..dbed2c1f7 100644
--- a/src/concrete/ml/search_parameters/p_error_search.py
+++ b/src/concrete/ml/search_parameters/p_error_search.py
@@ -61,7 +61,11 @@
 from tqdm import tqdm
 
 from ..common.utils import is_brevitas_model, is_model_class_in_a_list
-from ..sklearn import get_sklearn_neural_net_models, get_sklearn_tree_models, get_sklearn_neighbors_models
+from ..sklearn import (
+    get_sklearn_neighbors_models,
+    get_sklearn_neural_net_models,
+    get_sklearn_tree_models,
+)
 from ..torch.compile import compile_brevitas_qat_model, compile_torch_model
 
 
@@ -126,7 +130,10 @@ def compile_and_simulated_fhe_inference(
         dequantized_output = quantized_module.forward(calibration_data, fhe="simulate")
 
     elif is_model_class_in_a_list(
-        estimator, get_sklearn_neural_net_models() + get_sklearn_tree_models() + get_sklearn_neighbors_models()
+        estimator,
+        get_sklearn_neural_net_models()
+        + get_sklearn_tree_models()
+        + get_sklearn_neighbors_models(),
     ):
         if not estimator.is_fitted:
             estimator.fit(calibration_data, ground_truth)

From 70053540534260c4ffa892fa9b3b5412e48b3ff6 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 4 Sep 2023 17:57:33 +0200
Subject: [PATCH 22/51] chore: reduce dataset size for knn

---
 src/concrete/ml/pytest/utils.py | 6 +++---
 src/concrete/ml/sklearn/base.py | 2 --
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/concrete/ml/pytest/utils.py b/src/concrete/ml/pytest/utils.py
index d50fe9519..d19e2df8c 100644
--- a/src/concrete/ml/pytest/utils.py
+++ b/src/concrete/ml/pytest/utils.py
@@ -88,10 +88,10 @@
     pytest.param(
         model,
         {
-            "n_samples": 1000,
-            "n_features": 10,
+            "n_samples": 100 if get_model_name(model) == "KNeighborsClassifier" else 1000,
+            "n_features": 5 if get_model_name(model) == "KNeighborsClassifier" else 10,
             "n_classes": n_classes,
-            "n_informative": 10,
+            "n_informative": 4 if get_model_name(model) == "KNeighborsClassifier" else 10,
             "n_redundant": 0,
         },
         id=get_model_name(model),
diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 58dc75f1f..e99079825 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -608,8 +608,6 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
             ValueError,
         )
 
-        # print("monkey")
-
         # Check that the model is properly fitted
         self.check_model_is_fitted()
 

From 538f03abc7d8bd6d7b034e73aa609f629f1b1d13 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Tue, 5 Sep 2023 11:29:08 +0200
Subject: [PATCH 23/51] chore: add self._y

---
 src/concrete/ml/sklearn/neighbors.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py
index a42452ef8..6a28fc678 100644
--- a/src/concrete/ml/sklearn/neighbors.py
+++ b/src/concrete/ml/sklearn/neighbors.py
@@ -65,6 +65,7 @@ def dump_dict(self) -> Dict[str, Any]:
         metadata["_weight_quantizer"] = self._weight_quantizer
         metadata["_q_X_fit_quantizer"] = self._q_X_fit_quantizer
         metadata["_q_X_fit"] = self._q_X_fit
+        metadata["_y"] = self._y
 
         metadata["output_quantizers"] = self.output_quantizers
         metadata["onnx_model_"] = self.onnx_model_
@@ -100,6 +101,7 @@ def load_dict(cls, metadata: Dict):
         obj._weight_quantizer = metadata["_weight_quantizer"]
         obj._q_X_fit_quantizer = metadata["_q_X_fit_quantizer"]
         obj._q_X_fit = metadata["_q_X_fit"]
+        obj._y = metadata["_y"]
 
         obj.onnx_model_ = metadata["onnx_model_"]
 

From a7bab6def159b4e7253adb6690727e473b15e1bc Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Tue, 5 Sep 2023 11:29:53 +0200
Subject: [PATCH 24/51] chore: fix test_p_error_global_p_error_simulation test

---
 tests/sklearn/test_sklearn_models.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index a9bdea69b..74d379ea5 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -1533,7 +1533,9 @@ def test_p_error_global_p_error_simulation(
     model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option)
 
     # Check if model is linear
-    is_linear_model = is_model_class_in_a_list(model_class, get_sklearn_linear_models())
+    is_linear_model = is_model_class_in_a_list(
+        model_class, get_sklearn_linear_models() + get_sklearn_neighbors_models()
+    )
 
     # Compile with a large p_error to be sure the result is random.
     model.compile(x, **error_param)
@@ -1541,9 +1543,14 @@ def test_p_error_global_p_error_simulation(
     def check_for_divergent_predictions(x, model, fhe, max_iterations=N_ALLOWED_FHE_RUN):
         """Detect divergence between simulated/FHE execution and clear run."""
         predict_function = (
-            model.predict_proba if is_classifier_or_partial_classifier(model) else model.predict
+            model.predict_proba
+            if is_classifier_or_partial_classifier(model)
+            # predict_prob not implemented yet for KNeighborsClassifier
+            and get_model_name(model) != "KNeighborsClassifier"
+            else model.predict
         )
         y_expected = predict_function(x, fhe="disable")
+
         for i in range(max_iterations):
             y_pred = predict_function(x[i : i + 1], fhe=fhe).ravel()
             if not numpy.array_equal(y_pred, y_expected[i : i + 1].ravel()):

From 7bcaddc00022fb1b225a08c3cdd45c9c4ee41e80 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Tue, 5 Sep 2023 12:36:48 +0200
Subject: [PATCH 25/51] chore: fix test_quantization

---
 tests/sklearn/test_sklearn_models.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 74d379ea5..8d12d9462 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -719,7 +719,8 @@ def check_sklearn_equivalence(model_class, n_bits, x, y, check_accuracy, check_r
             y_pred_sklearn = sklearn_model.decision_function(x)
 
         # Else, compute the model's predicted probabilities
-        else:
+        # predict_proba not implemented for KNeighborsClassifier for now
+        elif get_model_name(model_class) != "KNeighborsClassifier":
             y_pred_cml = model.predict_proba(x)
             y_pred_sklearn = sklearn_model.predict_proba(x)
 

From 20edf010a1759322332bcf15635203dd77fdbe4f Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Tue, 5 Sep 2023 15:03:56 +0200
Subject: [PATCH 26/51] chore: add encrypted argsort

---
 src/concrete/ml/sklearn/base.py | 98 ++++++++++++++++++++++++++++-----
 1 file changed, 84 insertions(+), 14 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index e99079825..6d5f0495a 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -26,6 +26,8 @@
 from concrete.fhe.dtypes.integer import Integer
 from sklearn.base import clone
 
+from concrete import fhe
+
 from ..common.check_inputs import check_array_and_assert, check_X_y_and_assert_multi_output
 from ..common.debugging.custom_assert import assert_true
 from ..common.serialization.dumpers import dump, dumps
@@ -1915,29 +1917,97 @@ def _inference(self, q_X: numpy.ndarray) -> numpy.ndarray:
         """
         assert self._q_X_fit_quantizer is not None, self._is_not_fitted_error_message()
 
-        # Pairwise euclidean distance
-        # dist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))
-        distance_matrix = (
-            numpy.sum(q_X**2, axis=1, keepdims=True)
-            - 2 * q_X @ self._q_X_fit.T
-            + numpy.expand_dims(numpy.sum(self._q_X_fit**2, axis=1), 0)
-        )
+        def pairwise_euclidean_distance(q_X):
+            # 1. Pairwise euclidean distance
+            # dist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))
+            return (
+                numpy.sum(q_X**2, axis=1, keepdims=True)
+                - 2 * q_X @ self._q_X_fit.T
+                + numpy.expand_dims(numpy.sum(self._q_X_fit**2, axis=1), 0)
+            )
+
+        distance_matrix = pairwise_euclidean_distance(q_X)
+
+        # sqr not done
+
+        # 2. Sorting
+        def topk_sorting(x):
+            def gather1d(x, indices):
+                """Select x[indices]."""
+                arr = []
+                for i in indices:
+                    arr.append(x[i])
+                enc_arr = fhe.array(arr)
+                return enc_arr
+
+            def scatter1d(x, v, indices):
+                for idx, i in enumerate(indices):
+                    x[i] = v[idx]
+                return x
+
+            def mul_tlu(a, b):
+                # (a - b)^2 - (a + b)^2 = -4ab => ab = ((a + b)^2 - (a - b)^2) / 4
+                return (((a + b) ** 2 - (a - b) ** 2) / 4).astype(numpy.int64)
+
+            idx = numpy.arange(x.size) + fhe.zeros(x.shape)
+            comparisons = numpy.zeros(x.shape)
+            n = x.size
+            k = self.n_neighbors
+
+            ln2n = int(numpy.ceil(numpy.log2(n)))
+            for t in range(ln2n - 1, -1, -1):
+                p = 2**t
+                r = 0
+                d = p
+
+                for bq in range(ln2n - 1, t - 1, -1):  # q = 2^(t-1), 2^(t-2), ..., p
+                    q = 2**bq
+                    range_i = numpy.array(
+                        [i for i in range(0, n - d) if i & p == r and comparisons[i] < k]
+                    )
+
+                    if len(range_i) == 0:
+                        continue
+
+                    a = gather1d(x, range_i)  # x[range_i]
+                    a_i = gather1d(idx, range_i)  # idx[range_i]
+                    b = gather1d(x, range_i + d)  # x[range_i + d]
+                    b_i = gather1d(idx, range_i + d)  # idx[range_i + d]
+
+                    diff = a - b
+                    sign = diff < 0
+
+                    max_x = a + numpy.maximum(0, b - a)
+                    x = scatter1d(x, a + b - max_x, range_i)  # x[range_i] = a + b - max_x
+                    x = scatter1d(x, max_x, range_i + d)  # x[range_i + d] = max_x
+
+                    max_idx = a_i + mul_tlu((b_i - a_i), sign)
+                    idx = scatter1d(idx, a_i + b_i - max_idx, range_i)
+                    idx = scatter1d(idx, max_idx, range_i + d)
+
+                    comparisons[range_i + d] = comparisons[range_i + d] + 1
+                    d = q - p
+                    r = p
+
+            return numpy.concatenate((x.reshape((1, -1)), idx.reshape((1, -1))), axis=0)
+
+        _, sorted_args = topk_sorting(distance_matrix[0])
+        sorted_args = sorted_args.astype(numpy.int16)
 
-        return distance_matrix
+        return sorted_args
 
     def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
 
         X = check_array_and_assert(X)
 
-        distances = []
+        sorted_args_matrix = []
         for query in X:
-            d = super().predict(query[None], fhe)[0]
-            # assert any(d < 0) or any(np.isnan(d)), "!!!!!!!! Not valid values"
-            distances.append(numpy.sqrt(d))
+            arg_sort = super().predict(query[None], fhe)[0]
+            sorted_args_matrix.append(arg_sort)
 
-        self.distances_matrix = numpy.array(distances)
+        self.sorted_args_matrix = numpy.array(sorted_args_matrix)
 
-        k_indices = self.top_k_indices(self.distances_matrix, self.n_neighbors)
+        k_indices = self.top_k_indices(self.sorted_args_matrix, self.n_neighbors)
         # pylint: disable=protected-access
         label_k_indices = self._y[k_indices]
         y_pred = self.majority_vote(label_k_indices)

From e9f2c2197a3ee3e86bef0f6b7bd762884a754dab Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Tue, 5 Sep 2023 16:15:42 +0200
Subject: [PATCH 27/51] chore: decrease even more the knn dataset size

---
 src/concrete/ml/pytest/utils.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/concrete/ml/pytest/utils.py b/src/concrete/ml/pytest/utils.py
index d19e2df8c..b53b00498 100644
--- a/src/concrete/ml/pytest/utils.py
+++ b/src/concrete/ml/pytest/utils.py
@@ -88,18 +88,19 @@
     pytest.param(
         model,
         {
-            "n_samples": 100 if get_model_name(model) == "KNeighborsClassifier" else 1000,
-            "n_features": 5 if get_model_name(model) == "KNeighborsClassifier" else 10,
+            "n_samples": 50 if get_model_name(model) == "KNeighborsClassifier" else 1000,
+            "n_features": 3 if get_model_name(model) == "KNeighborsClassifier" else 10,
             "n_classes": n_classes,
-            "n_informative": 4 if get_model_name(model) == "KNeighborsClassifier" else 10,
+            "n_informative": 2 if get_model_name(model) == "KNeighborsClassifier" else 10,
             "n_redundant": 0,
         },
         id=get_model_name(model),
     )
     for model in _classifier_models
-    for n_classes in [2, 4]
+    for n_classes in [2]
 ]
 
+
 # Get the data-sets. The data generation is seeded in load_data.
 # Only LinearRegression supports multi targets
 # GammaRegressor, PoissonRegressor and TweedieRegressor only handle positive target values

From 600f72cc3a48d5368760fc31a0826848526ce14c Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Tue, 5 Sep 2023 16:16:25 +0200
Subject: [PATCH 28/51] chore: correct argsort and topk_indice naming

---
 src/concrete/ml/sklearn/base.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 6d5f0495a..213f5e2c6 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1992,7 +1992,6 @@ def mul_tlu(a, b):
             return numpy.concatenate((x.reshape((1, -1)), idx.reshape((1, -1))), axis=0)
 
         _, sorted_args = topk_sorting(distance_matrix[0])
-        sorted_args = sorted_args.astype(numpy.int16)
 
         return sorted_args
 
@@ -2003,14 +2002,14 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
         sorted_args_matrix = []
         for query in X:
             arg_sort = super().predict(query[None], fhe)[0]
-            sorted_args_matrix.append(arg_sort)
+            sorted_args_matrix.append(arg_sort.astype(numpy.int64))
 
         self.sorted_args_matrix = numpy.array(sorted_args_matrix)
 
-        k_indices = self.top_k_indices(self.sorted_args_matrix, self.n_neighbors)
+        # k_indices = self.top_k_indices(self.sorted_args_matrix, self.n_neighbors)
         # pylint: disable=protected-access
-        label_k_indices = self._y[k_indices]
-        y_pred = self.majority_vote(label_k_indices)
+        label_k_indices = self._y[self.sorted_args_matrix]
+        y_pred = self.majority_vote(label_k_indices[None])
 
         return y_pred
 

From 654983dc89ddeb663e299cc196f187ad0739d0fa Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Tue, 5 Sep 2023 16:36:48 +0200
Subject: [PATCH 29/51] chore: remove topk_indice

---
 src/concrete/ml/sklearn/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 213f5e2c6..665ea1022 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -2007,6 +2007,7 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
         self.sorted_args_matrix = numpy.array(sorted_args_matrix)
 
         # k_indices = self.top_k_indices(self.sorted_args_matrix, self.n_neighbors)
+        
         # pylint: disable=protected-access
         label_k_indices = self._y[self.sorted_args_matrix]
         y_pred = self.majority_vote(label_k_indices[None])

From e014ff0c27d4cd374b791b74c13ae84061e87415 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Wed, 6 Sep 2023 15:28:17 +0200
Subject: [PATCH 30/51] chore: simplify multiplication

---
 src/concrete/ml/sklearn/base.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 665ea1022..5397291d3 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1946,8 +1946,7 @@ def scatter1d(x, v, indices):
                 return x
 
             def mul_tlu(a, b):
-                # (a - b)^2 - (a + b)^2 = -4ab => ab = ((a + b)^2 - (a - b)^2) / 4
-                return (((a + b) ** 2 - (a - b) ** 2) / 4).astype(numpy.int64)
+                return a * b
 
             idx = numpy.arange(x.size) + fhe.zeros(x.shape)
             comparisons = numpy.zeros(x.shape)
@@ -1965,7 +1964,6 @@ def mul_tlu(a, b):
                     range_i = numpy.array(
                         [i for i in range(0, n - d) if i & p == r and comparisons[i] < k]
                     )
-
                     if len(range_i) == 0:
                         continue
 
@@ -1982,16 +1980,19 @@ def mul_tlu(a, b):
                     x = scatter1d(x, max_x, range_i + d)  # x[range_i + d] = max_x
 
                     max_idx = a_i + mul_tlu((b_i - a_i), sign)
-                    idx = scatter1d(idx, a_i + b_i - max_idx, range_i)
-                    idx = scatter1d(idx, max_idx, range_i + d)
+                    idx = scatter1d(
+                        idx, a_i + b_i - max_idx, range_i
+                    )  # idx[range_i] = a_i + b_i - max_idx
+                    idx = scatter1d(idx, max_idx, range_i + d)  # idx[range_i + d] = max_idx
 
                     comparisons[range_i + d] = comparisons[range_i + d] + 1
+
                     d = q - p
                     r = p
 
             return numpy.concatenate((x.reshape((1, -1)), idx.reshape((1, -1))), axis=0)
 
-        _, sorted_args = topk_sorting(distance_matrix[0])
+        _, sorted_args = topk_sorting(distance_matrix.flatten())
 
         return sorted_args
 
@@ -2007,7 +2008,7 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
         self.sorted_args_matrix = numpy.array(sorted_args_matrix)
 
         # k_indices = self.top_k_indices(self.sorted_args_matrix, self.n_neighbors)
-        
+
         # pylint: disable=protected-access
         label_k_indices = self._y[self.sorted_args_matrix]
         y_pred = self.majority_vote(label_k_indices[None])

From ef248592748a7d771fb49c55f088599d8acc60a0 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Thu, 7 Sep 2023 14:43:09 +0200
Subject: [PATCH 31/51] chore: fix inference

---
 src/concrete/ml/sklearn/base.py | 34 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 5397291d3..e4666ec79 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1895,14 +1895,8 @@ def majority_vote(nearest_classes: numpy.ndarray):
         Returns:
             numpy.ndarray: The majority-voted class label for the corresponding query.
         """
-        # Get the number of queries (rows) and k (number of nearest points)
-        n_queries, _ = nearest_classes.shape
-        # Compute the majority vote for each query
-        majority_votes = numpy.array([0] * n_queries, dtype=int)
-        for i in range(n_queries):
-            # Use bincount to count occurrences of each class and find the most common one
-            class_counts = numpy.bincount(nearest_classes[i])
-            majority_votes[i] = numpy.argmax(class_counts)
+        class_counts = numpy.bincount(nearest_classes)
+        majority_votes = numpy.argmax(class_counts)
 
         return majority_votes
 
@@ -2000,20 +1994,18 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
 
         X = check_array_and_assert(X)
 
-        sorted_args_matrix = []
+        y_preds = []
         for query in X:
-            arg_sort = super().predict(query[None], fhe)[0]
-            sorted_args_matrix.append(arg_sort.astype(numpy.int64))
-
-        self.sorted_args_matrix = numpy.array(sorted_args_matrix)
-
-        # k_indices = self.top_k_indices(self.sorted_args_matrix, self.n_neighbors)
-
-        # pylint: disable=protected-access
-        label_k_indices = self._y[self.sorted_args_matrix]
-        y_pred = self.majority_vote(label_k_indices[None])
-
-        return y_pred
+            # Argsort
+            arg_sort = super().predict(query[None], fhe)
+            arg_sort = arg_sort.astype(numpy.int64)
+            # Majority vote
+            # pylint: disable=protected-access
+            label_indices = self._y[arg_sort]
+            y_pred = self.majority_vote(label_indices)
+            y_preds.append(y_pred)
+
+        return numpy.array(y_preds)
 
 
 class SklearnKNeighborsClassifierMixin(SklearnKNeighborsMixin, sklearn.base.ClassifierMixin, ABC):

From 108f9228b54edb9aebb2bb22f458d4fa434aae8d Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Thu, 7 Sep 2023 15:38:52 +0200
Subject: [PATCH 32/51] chore: remove dequantization for sortargmax

---
 src/concrete/ml/sklearn/base.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index e4666ec79..63b1666ef 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1843,10 +1843,10 @@ def quantize_input(self, X: numpy.ndarray) -> numpy.ndarray:
     def dequantize_output(self, q_y_preds: numpy.ndarray) -> numpy.ndarray:
         self.check_model_is_fitted()
 
-        # De-quantize the output values
-        y_preds = self.output_quantizers[0].dequant(q_y_preds)
+        # We compute the sorted argmax in FHE, which are integers.
+        # No need to de-quantize the output values
 
-        return y_preds
+        return q_y_preds
 
     def _get_module_to_compile(self) -> Union[Compiler, QuantizedModule]:
         # Define the inference function to compile.
@@ -1998,7 +1998,7 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
         for query in X:
             # Argsort
             arg_sort = super().predict(query[None], fhe)
-            arg_sort = arg_sort.astype(numpy.int64)
+            arg_sort = arg_sort.astype(numpy.int64)[: self.n_neighbors]
             # Majority vote
             # pylint: disable=protected-access
             label_indices = self._y[arg_sort]

From 49785069955b8ddc2a0586cdae4c43506efb3db7 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Thu, 7 Sep 2023 17:31:51 +0200
Subject: [PATCH 33/51] chore: reduce even more the dataset size of knn

---
 src/concrete/ml/pytest/utils.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/concrete/ml/pytest/utils.py b/src/concrete/ml/pytest/utils.py
index b53b00498..7781594ef 100644
--- a/src/concrete/ml/pytest/utils.py
+++ b/src/concrete/ml/pytest/utils.py
@@ -88,15 +88,30 @@
     pytest.param(
         model,
         {
-            "n_samples": 50 if get_model_name(model) == "KNeighborsClassifier" else 1000,
-            "n_features": 3 if get_model_name(model) == "KNeighborsClassifier" else 10,
+            "n_samples": 1000,
+            "n_features": 10,
             "n_classes": n_classes,
-            "n_informative": 2 if get_model_name(model) == "KNeighborsClassifier" else 10,
+            "n_informative": 10,
             "n_redundant": 0,
         },
         id=get_model_name(model),
     )
     for model in _classifier_models
+    if get_model_name(model) != "KNeighborsClassifier"
+    for n_classes in [2, 4]
+] + [
+    pytest.param(
+        model,
+        {
+            "n_samples": 10,
+            "n_features": 3,
+            "n_classes": n_classes,
+            "n_informative": 2,
+            "n_redundant": 0,
+        },
+        id=get_model_name(model),
+    )
+    for model in [KNeighborsClassifier]
     for n_classes in [2]
 ]
 

From fb633d39c6fb23bf6667585ff2cd0857ac7ed312 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Thu, 7 Sep 2023 17:33:03 +0200
Subject: [PATCH 34/51] chore: decrease the defaut n_bit of knn class to 4. for
 some inputs, the compilation is not possible

---
 src/concrete/ml/sklearn/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 63b1666ef..41baf6f76 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1711,7 +1711,7 @@ def __init_subclass__(cls):
                 _NEIGHBORS_MODELS.add(cls)
                 _ALL_SKLEARN_MODELS.add(cls)
 
-    def __init__(self, n_bits: Union[int, Dict[str, int]] = 8):
+    def __init__(self, n_bits: Union[int, Dict[str, int]] = 4):
         """Initialize the FHE knn model.
 
         Args:

From 2cc6b263f92599f7e6cec1de429b731a603180a0 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Thu, 7 Sep 2023 17:34:31 +0200
Subject: [PATCH 35/51] chore: fix test_dump_onn

---
 tests/sklearn/test_dump_onnx.py      | 45 +++++++++++++++++++++++++++-
 tests/sklearn/test_sklearn_models.py |  4 +++
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py
index 00b22c4a9..f1949a6ca 100644
--- a/tests/sklearn/test_dump_onnx.py
+++ b/tests/sklearn/test_dump_onnx.py
@@ -9,6 +9,7 @@
 import pytest
 from sklearn.exceptions import ConvergenceWarning
 
+from concrete import fhe
 from concrete.ml.common.utils import is_model_class_in_a_list
 from concrete.ml.pytest.utils import get_model_name, sklearn_models_and_datasets
 from concrete.ml.sklearn import get_sklearn_tree_models
@@ -35,6 +36,11 @@ def check_onnx_file_dump(model_class, parameters, load_data, str_expected, defau
 
         model.set_params(**model_params)
 
+    if get_model_name(model) == "KNeighborsClassifier":
+        model.n_bits = 4
+        default_configuration.parameter_selection_strategy = fhe.ParameterSelectionStrategy.MONO
+        default_configuration.single_precision = True
+
     with warnings.catch_warnings():
         # Sometimes, we miss convergence, which is not a problem for our test
         warnings.simplefilter("ignore", category=ConvergenceWarning)
@@ -44,7 +50,6 @@ def check_onnx_file_dump(model_class, parameters, load_data, str_expected, defau
     with warnings.catch_warnings():
         # Use FHE simulation to not have issues with precision
         model.compile(x, default_configuration)
-
     # Get ONNX model
     onnx_model = model.onnx_model
 
@@ -56,6 +61,7 @@ def check_onnx_file_dump(model_class, parameters, load_data, str_expected, defau
         "RandomForestClassifier",
         "RandomForestRegressor",
         "XGBClassifier",
+        "KNeighborsClassifier",
     ]:
         while len(onnx_model.graph.initializer) > 0:
             del onnx_model.graph.initializer[0]
@@ -415,6 +421,43 @@ def test_dump(
 ) {
   %variable = Gemm[alpha = 1, beta = 1](%input_0, %_operators.0.coefficients, %_operators.0.intercepts)
   return %variable
+}""",
+        "KNeighborsClassifier": """graph torch_jit (
+  %input_0[DOUBLE, symx3]
+) {
+  %/_operators.0/Constant_output_0 = Constant[value = <Tensor>]()
+  %/_operators.0/Unsqueeze_output_0 = Unsqueeze(%input_0, %/_operators.0/Constant_output_0)
+  %/_operators.0/Constant_1_output_0 = Constant[value = <Tensor>]()
+  %/_operators.0/Sub_output_0 = Sub(%/_operators.0/Unsqueeze_output_0, %onnx::Sub_46)
+  %/_operators.0/Constant_2_output_0 = Constant[value = <Scalar Tensor []>]()
+  %/_operators.0/Pow_output_0 = Pow(%/_operators.0/Sub_output_0, %/_operators.0/Constant_2_output_0)
+  %/_operators.0/Constant_3_output_0 = Constant[value = <Tensor>]()
+  %/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0, noop_with_empty_axes = 0](%/_operators.0/Pow_output_0, %/_operators.0/Constant_3_output_0)
+  %/_operators.0/Pow_1_output_0 = Pow(%/_operators.0/ReduceSum_output_0, %/_operators.0/Constant_1_output_0)
+  %/_operators.0/Constant_4_output_0 = Constant[value = <Tensor>]()
+  %/_operators.0/TopK_output_0, %/_operators.0/TopK_output_1 = TopK[axis = 1, largest = 0, sorted = 1](%/_operators.0/Pow_1_output_0, %/_operators.0/Constant_4_output_0)
+  %/_operators.0/Constant_5_output_0 = Constant[value = <Tensor>]()
+  %/_operators.0/Reshape_output_0 = Reshape[allowzero = 0](%/_operators.0/TopK_output_1, %/_operators.0/Constant_5_output_0)
+  %/_operators.0/Gather_output_0 = Gather[axis = 0](%_operators.0.train_labels, %/_operators.0/Reshape_output_0)
+  %/_operators.0/Shape_output_0 = Shape(%/_operators.0/TopK_output_1)
+  %/_operators.0/ConstantOfShape_output_0 = ConstantOfShape[value = <Tensor>](%/_operators.0/Shape_output_0)
+  %/_operators.0/Constant_6_output_0 = Constant[value = <Tensor>]()
+  %/_operators.0/Reshape_1_output_0 = Reshape[allowzero = 0](%/_operators.0/Gather_output_0, %/_operators.0/Constant_6_output_0)
+  %/_operators.0/Constant_7_output_0 = Constant[value = <Tensor>]()
+  %/_operators.0/ScatterElements_output_0 = ScatterElements[axis = 1](%/_operators.0/Constant_7_output_0, %/_operators.0/Reshape_1_output_0, %/_operators.0/ConstantOfShape_output_0)
+  %/_operators.0/Constant_8_output_0 = Constant[value = <Tensor>]()
+  %/_operators.0/Add_output_0 = Add(%/_operators.0/Constant_8_output_0, %/_operators.0/ScatterElements_output_0)
+  %onnx::ReduceSum_36 = Constant[value = <Tensor>]()
+  %/_operators.0/ReduceSum_1_output_0 = ReduceSum[keepdims = 1](%/_operators.0/Add_output_0, %onnx::ReduceSum_36)
+  %/_operators.0/Constant_9_output_0 = Constant[value = <Scalar Tensor []>]()
+  %/_operators.0/Equal_output_0 = Equal(%/_operators.0/ReduceSum_1_output_0, %/_operators.0/Constant_9_output_0)
+  %/_operators.0/Constant_10_output_0 = Constant[value = <Tensor>]()
+  %/_operators.0/Where_output_0 = Where(%/_operators.0/Equal_output_0, %/_operators.0/Constant_10_output_0, %/_operators.0/ReduceSum_1_output_0)
+  %/_operators.0/Constant_11_output_0 = Constant[value = <Scalar Tensor []>]()
+  %/_operators.0/Pow_2_output_0 = Pow(%/_operators.0/Where_output_0, %/_operators.0/Constant_11_output_0)
+  %onnx::ArgMax_44 = Mul(%/_operators.0/Pow_2_output_0, %/_operators.0/Add_output_0)
+  %variable = ArgMax[axis = 1, keepdims = 0, select_last_index = 0](%onnx::ArgMax_44)
+  return %variable, %onnx::ArgMax_44
 }""",
     }
 
diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 8d12d9462..c27293692 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -474,6 +474,7 @@ def check_subfunctions(fitted_model, model_class, x):
             fitted_model.predict_proba(x)
 
     if get_model_name(fitted_model) == "KNeighborsClassifier":
+        # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962
         pytest.skip("Skipping subfunctions test for KNN, doesn't work for now")
 
     if is_classifier_or_partial_classifier(model_class):
@@ -573,6 +574,7 @@ def cast_input(x, y, input_type):
     # Similarly, we test `predict_proba` for classifiers
     if is_classifier_or_partial_classifier(model):
         if get_model_name(model_class) == "KNeighborsClassifier":
+            # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962
             pytest.skip("Skipping predict_proba for KNN, doesn't work for now")
         model.predict_proba(x)
 
@@ -676,6 +678,7 @@ def check_grid_search(model_class, x, y, scoring):
             "roc_auc",
             "average_precision",
         ]:
+            # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962
             pytest.skip("Skipping predict_proba for KNN, doesn't work for now")
 
         _ = GridSearchCV(
@@ -720,6 +723,7 @@ def check_sklearn_equivalence(model_class, n_bits, x, y, check_accuracy, check_r
 
         # Else, compute the model's predicted probabilities
         # predict_proba not implemented for KNeighborsClassifier for now
+        # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962
         elif get_model_name(model_class) != "KNeighborsClassifier":
             y_pred_cml = model.predict_proba(x)
             y_pred_sklearn = sklearn_model.predict_proba(x)

From fd5ff5814aa78b5340c9da2398dff072681bc57d Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Fri, 8 Sep 2023 11:19:11 +0200
Subject: [PATCH 36/51] chore: fix double_fit test for KNN

---
 tests/sklearn/test_sklearn_models.py | 36 ++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index c27293692..543b970c0 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -275,10 +275,18 @@ def check_double_fit(model_class, n_bits, x_1, x_2, y_1, y_2):
 
         # Check that the new quantizers are different from the first ones. This is because we
         # currently expect all quantizers to be re-computed when re-fitting a model
-        assert all(
-            quantizer_1 != quantizer_2
-            for (quantizer_1, quantizer_2) in zip(quantizers_1, quantizers_2)
-        )
+
+        # For now, in KNN, we compute the pairwise Euclidean distance between the encrypted 
+        # X and each element in the database. 
+        # Then, we return the indices of the k closest distances to this point. 
+        # The exact precision of computation of the quantization and dequantization parameters 
+        # is not relevant in this case. That's why the assertion test is being ignored 
+        # for now in the context of the KNN algorithm.
+        if get_model_name(model) != "KNeighborsClassifier":
+            assert all(
+                quantizer_1 != quantizer_2
+                for (quantizer_1, quantizer_2) in zip(quantizers_1, quantizers_2)
+            )
 
         # Set the same torch seed manually before re-fitting the neural network
         if is_model_class_in_a_list(model_class, get_sklearn_neural_net_models()):
@@ -298,13 +306,21 @@ def check_double_fit(model_class, n_bits, x_1, x_2, y_1, y_2):
         # Check that the new quantizers are identical from the first ones. Again, we expect the
         # quantizers to be re-computed when re-fitting. Since we used the same dataset as the first
         # fit, we also expect these quantizers to be the same.
-        assert all(
-            quantizer_1 == quantizer_3
-            for (quantizer_1, quantizer_3) in zip(
-                input_quantizers_1 + output_quantizers_1,
-                input_quantizers_3 + output_quantizers_3,
+
+        # For now, in KNN, we compute the pairwise Euclidean distance between the encrypted 
+        # X and each element in the database. 
+        # Then, we return the indices of the k closest distances to this point. 
+        # The exact precision of computation of the quantization and dequantization parameters 
+        # is not relevant in this case. That's why the assertion test is being ignored 
+        # for now in the context of the KNN algorithm.
+        if get_model_name(model) != "KNeighborsClassifier":
+            assert all(
+                quantizer_1 == quantizer_3
+                for (quantizer_1, quantizer_3) in zip(
+                    input_quantizers_1 + output_quantizers_1,
+                    input_quantizers_3 + output_quantizers_3,
+                )
             )
-        )
 
 
 def check_serialization(model, x, use_dump_method):

From 3faad7a58a96b1b03565995ea1090bbb2658e5c7 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 11 Sep 2023 13:46:19 +0200
Subject: [PATCH 37/51] chore: fix tests/common and tests/deployment

---
 src/concrete/ml/sklearn/base.py               |  4 +--
 src/concrete/ml/sklearn/neighbors.py          |  4 +--
 .../test_pbs_error_probability_settings.py    | 21 ++++++++++++--
 tests/deployment/test_client_server.py        | 18 ++++++++++--
 tests/sklearn/test_sklearn_models.py          | 28 +++++++++++--------
 5 files changed, 55 insertions(+), 20 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 41baf6f76..4ecc21b0d 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1711,7 +1711,7 @@ def __init_subclass__(cls):
                 _NEIGHBORS_MODELS.add(cls)
                 _ALL_SKLEARN_MODELS.add(cls)
 
-    def __init__(self, n_bits: Union[int, Dict[str, int]] = 4):
+    def __init__(self, n_bits: Union[int, Dict[str, int]] = 5):
         """Initialize the FHE knn model.
 
         Args:
@@ -1721,7 +1721,7 @@ def __init__(self, n_bits: Union[int, Dict[str, int]] = 4):
                 corresponding number of quantization bits so that:
                     - op_inputs : number of bits to quantize the input values
                     - op_weights: number of bits to quantize the learned parameters
-                Default to 8.
+                Default to 3.
         """
         self.n_bits: Union[int, Dict[str, int]] = n_bits
 
diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py
index 6a28fc678..2a4982643 100644
--- a/src/concrete/ml/sklearn/neighbors.py
+++ b/src/concrete/ml/sklearn/neighbors.py
@@ -28,8 +28,8 @@ class KNeighborsClassifier(SklearnKNeighborsClassifierMixin):
 
     def __init__(
         self,
-        n_bits=8,
-        n_neighbors=5,
+        n_bits=3,
+        n_neighbors=3,
         *,
         weights="uniform",
         algorithm="auto",
diff --git a/tests/common/test_pbs_error_probability_settings.py b/tests/common/test_pbs_error_probability_settings.py
index 31aad3aea..4066119eb 100644
--- a/tests/common/test_pbs_error_probability_settings.py
+++ b/tests/common/test_pbs_error_probability_settings.py
@@ -4,9 +4,12 @@
 
 import numpy
 import pytest
+from concrete.fhe.compilation import Configuration
 from sklearn.exceptions import ConvergenceWarning
 from torch import nn
 
+from concrete import fhe
+from concrete.ml.common.utils import get_model_name
 from concrete.ml.pytest.torch_models import FCSmall
 from concrete.ml.pytest.utils import sklearn_models_and_datasets
 from concrete.ml.torch.compile import compile_torch_model
@@ -26,7 +29,7 @@
         {"global_p_error": 0.038, "p_error": 0.39},
     ],
 )
-def test_config_sklearn(model_class, parameters, kwargs, load_data):
+def test_config_sklearn(model_class, parameters, kwargs, load_data, default_configuration):
     """Testing with p_error and global_p_error configs with sklearn models."""
 
     x, y = load_data(model_class, **parameters)
@@ -38,12 +41,24 @@ def test_config_sklearn(model_class, parameters, kwargs, load_data):
         # Fit the model
         model.fit(x, y)
 
+    if get_model_name(model_class) == "KNeighborsClassifier":
+
+        default_configuration = Configuration(
+            dump_artifacts_on_unexpected_failures=False,
+            enable_unsafe_features=True,
+            use_insecure_key_cache=True,
+            insecure_key_cache_location="ConcreteNumpyKeyCache",
+            parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO,
+            single_precision=True,
+        )
+
     if kwargs.get("p_error", None) is not None and kwargs.get("global_p_error", None) is not None:
         with pytest.raises(ValueError) as excinfo:
-            model.compile(x, verbose=True, **kwargs)
+            model.compile(x, default_configuration, verbose=True, **kwargs)
         assert "Please only set one of (p_error, global_p_error) values" in str(excinfo.value)
     else:
-        model.compile(x, verbose=True, **kwargs)
+
+        model.compile(x, default_configuration, verbose=True, **kwargs)
 
     # We still need to check that we have the expected probabilities
     # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/2206
diff --git a/tests/deployment/test_client_server.py b/tests/deployment/test_client_server.py
index 783cd07ab..f5e4a8e43 100644
--- a/tests/deployment/test_client_server.py
+++ b/tests/deployment/test_client_server.py
@@ -9,9 +9,12 @@
 
 import numpy
 import pytest
+from concrete.fhe.compilation import Configuration
 from sklearn.exceptions import ConvergenceWarning
 from torch import nn
 
+from concrete import fhe
+from concrete.ml.common.utils import get_model_name
 from concrete.ml.deployment.fhe_client_server import FHEModelClient, FHEModelDev, FHEModelServer
 from concrete.ml.pytest.torch_models import FCSmall
 from concrete.ml.pytest.utils import instantiate_model_generic, sklearn_models_and_datasets
@@ -67,7 +70,7 @@ def cleanup(self):
 
 
 @pytest.mark.parametrize("model_class, parameters", sklearn_models_and_datasets)
-@pytest.mark.parametrize("n_bits", [3])
+@pytest.mark.parametrize("n_bits", [2])
 def test_client_server_sklearn(
     default_configuration,
     model_class,
@@ -95,10 +98,21 @@ def test_client_server_sklearn(
     # Compile
     extra_params = {"global_p_error": 1 / 100_000}
 
+    if get_model_name(model_class) == "KNeighborsClassifier":
+
+        default_configuration = Configuration(
+            dump_artifacts_on_unexpected_failures=False,
+            enable_unsafe_features=True,
+            use_insecure_key_cache=True,
+            insecure_key_cache_location="ConcreteNumpyKeyCache",
+            parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO,
+            single_precision=True,
+        )
+
     # Running the simulation using a model that is not compiled should not be possible
     with pytest.raises(AttributeError, match=".* model is not compiled.*"):
         client_server_simulation(x_train, x_test, model, default_configuration)
-
+    # With n_bits = 3, KNN is not compilable
     fhe_circuit = model.compile(
         x_train, default_configuration, **extra_params, show_mlir=(n_bits <= 8)
     )
diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 543b970c0..aa45d837f 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -276,11 +276,11 @@ def check_double_fit(model_class, n_bits, x_1, x_2, y_1, y_2):
         # Check that the new quantizers are different from the first ones. This is because we
         # currently expect all quantizers to be re-computed when re-fitting a model
 
-        # For now, in KNN, we compute the pairwise Euclidean distance between the encrypted 
-        # X and each element in the database. 
-        # Then, we return the indices of the k closest distances to this point. 
-        # The exact precision of computation of the quantization and dequantization parameters 
-        # is not relevant in this case. That's why the assertion test is being ignored 
+        # For now, in KNN, we compute the pairwise Euclidean distance between the encrypted
+        # X and each element in the database.
+        # Then, we return the indices of the k closest distances to this point.
+        # The exact precision of computation of the quantization and dequantization parameters
+        # is not relevant in this case. That's why the assertion test is being ignored
         # for now in the context of the KNN algorithm.
         if get_model_name(model) != "KNeighborsClassifier":
             assert all(
@@ -307,11 +307,11 @@ def check_double_fit(model_class, n_bits, x_1, x_2, y_1, y_2):
         # quantizers to be re-computed when re-fitting. Since we used the same dataset as the first
         # fit, we also expect these quantizers to be the same.
 
-        # For now, in KNN, we compute the pairwise Euclidean distance between the encrypted 
-        # X and each element in the database. 
-        # Then, we return the indices of the k closest distances to this point. 
-        # The exact precision of computation of the quantization and dequantization parameters 
-        # is not relevant in this case. That's why the assertion test is being ignored 
+        # For now, in KNN, we compute the pairwise Euclidean distance between the encrypted
+        # X and each element in the database.
+        # Then, we return the indices of the k closest distances to this point.
+        # The exact precision of computation of the quantization and dequantization parameters
+        # is not relevant in this case. That's why the assertion test is being ignored
         # for now in the context of the KNN algorithm.
         if get_model_name(model) != "KNeighborsClassifier":
             assert all(
@@ -1461,10 +1461,16 @@ def test_predict_correctness(
                 print("Compile the model")
 
             with warnings.catch_warnings():
+                from concrete import fhe
+
+                if get_model_name(model) == "KNeighborsClassifier":
+                    default_configuration.parameter_selection_strategy = (
+                        fhe.ParameterSelectionStrategy.MONO
+                    )
                 fhe_circuit = model.compile(
                     x,
                     default_configuration,
-                    show_mlir=verbose and (n_bits <= 8),
+                    show_mlir=False,
                 )
 
                 check_properties_of_circuit(model_class, fhe_circuit, check_circuit_has_no_tlu)

From c1ef09ba0492f640b05f7d844582fea62af816f8 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 11 Sep 2023 14:14:36 +0200
Subject: [PATCH 38/51] chore: fix parameter_search test

---
 src/concrete/ml/pytest/utils.py               |  2 +-
 .../ml/search_parameters/p_error_search.py    | 21 +++++++++++++++++--
 .../test_p_error_binary_search.py             |  8 ++++++-
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/concrete/ml/pytest/utils.py b/src/concrete/ml/pytest/utils.py
index 7781594ef..3bef4b8a1 100644
--- a/src/concrete/ml/pytest/utils.py
+++ b/src/concrete/ml/pytest/utils.py
@@ -159,7 +159,7 @@ def get_random_extract_of_sklearn_models_and_datasets():
             unique_model_classes.append(m)
 
     # To avoid to make mistakes and return empty list
-    assert len(sklearn_models_and_datasets) == 30
+    assert len(sklearn_models_and_datasets) == 29
     assert len(unique_model_classes) == 19
 
     return unique_model_classes
diff --git a/src/concrete/ml/search_parameters/p_error_search.py b/src/concrete/ml/search_parameters/p_error_search.py
index dbed2c1f7..7205a08fd 100644
--- a/src/concrete/ml/search_parameters/p_error_search.py
+++ b/src/concrete/ml/search_parameters/p_error_search.py
@@ -58,9 +58,12 @@
 
 import numpy
 import torch
+from concrete.fhe.compilation import Configuration
 from tqdm import tqdm
 
-from ..common.utils import is_brevitas_model, is_model_class_in_a_list
+from concrete import fhe
+
+from ..common.utils import get_model_name, is_brevitas_model, is_model_class_in_a_list
 from ..sklearn import (
     get_sklearn_neighbors_models,
     get_sklearn_neural_net_models,
@@ -108,6 +111,16 @@ def compile_and_simulated_fhe_inference(
     """
 
     compile_params: Dict = {}
+
+    default_configuration = Configuration(
+        dump_artifacts_on_unexpected_failures=False,
+        enable_unsafe_features=True,
+        use_insecure_key_cache=True,
+        insecure_key_cache_location="ConcreteNumpyKeyCache",
+        parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO
+        if get_model_name(estimator) == "KNeighborsClassifier"
+        else fhe.ParameterSelectionStrategy.MULTI,
+    )
     compile_function: Callable[..., Any]
     dequantized_output: numpy.ndarray
 
@@ -138,7 +151,11 @@ def compile_and_simulated_fhe_inference(
         if not estimator.is_fitted:
             estimator.fit(calibration_data, ground_truth)
 
-        estimator.compile(calibration_data, p_error=p_error)
+        estimator.compile(
+            calibration_data,
+            p_error=p_error,
+            configuration=default_configuration,
+        )
         predict_method = getattr(estimator, predict)
         dequantized_output = predict_method(calibration_data, fhe="simulate")
 
diff --git a/tests/parameter_search/test_p_error_binary_search.py b/tests/parameter_search/test_p_error_binary_search.py
index d4cc20495..5ab3ffee6 100644
--- a/tests/parameter_search/test_p_error_binary_search.py
+++ b/tests/parameter_search/test_p_error_binary_search.py
@@ -312,7 +312,13 @@ def test_binary_search_for_built_in_models(model_class, parameters, threshold, p
     # Skorch but since Scikit-Learn does not, we don't as well. This issue could be fixed by making
     # neural networks not inherit from Skorch.
     # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3373
-    if predict == "predict_proba" and get_model_name(model_class) == "NeuralNetRegressor":
+    # Skipping predict_proba for KNN, doesn't work for now.
+    # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962
+
+    if predict == "predict_proba" and get_model_name(model_class) in [
+        "NeuralNetRegressor",
+        "KNeighborsClassifier",
+    ]:
         return
 
     metric = r2_score if is_regressor_or_partial_regressor(model) else binary_classification_metric

From b6ec7fcf1f5961241747050afdcefd8d23084360 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 11 Sep 2023 15:06:25 +0200
Subject: [PATCH 39/51] chore: fix test_mono_param_waraning

---
 tests/sklearn/test_sklearn_models.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index aa45d837f..6b2677ddb 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -1461,11 +1461,10 @@ def test_predict_correctness(
                 print("Compile the model")
 
             with warnings.catch_warnings():
-                from concrete import fhe
 
                 if get_model_name(model) == "KNeighborsClassifier":
                     default_configuration.parameter_selection_strategy = (
-                        fhe.ParameterSelectionStrategy.MONO
+                        ParameterSelectionStrategy.MONO
                     )
                 fhe_circuit = model.compile(
                     x,
@@ -1690,6 +1689,10 @@ def test_mono_parameter_warnings(
     if is_model_class_in_a_list(model_class, get_sklearn_linear_models()):
         return
 
+    # KNN works only for ParameterSelectionStrategy.MULTI
+    if is_model_class_in_a_list(model_class, get_sklearn_neighbors_models()):
+        pytest.skip("Skipping predict_proba for KNN, doesn't work for now")
+
     n_bits = min(N_BITS_REGULAR_BUILDS)
 
     model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option)

From f158db7ca85de41367ea16ebc698168a85842189 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 11 Sep 2023 15:52:38 +0200
Subject: [PATCH 40/51] chore: fix grid_search test

---
 tests/sklearn/test_sklearn_models.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 6b2677ddb..f925c3e50 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -41,7 +41,7 @@
 import torch
 from concrete.fhe import ParameterSelectionStrategy
 from sklearn.decomposition import PCA
-from sklearn.exceptions import ConvergenceWarning
+from sklearn.exceptions import ConvergenceWarning, UndefinedMetricWarning
 from sklearn.metrics import make_scorer, matthews_corrcoef, top_k_accuracy_score
 from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import Pipeline
@@ -681,6 +681,10 @@ def check_grid_search(model_class, x, y, scoring):
             "n_estimators": [5, 10],
             "n_jobs": [1],
         }
+    elif model_class in get_sklearn_neighbors_models():
+        param_grid = {
+            "n_bits": [3],
+        }
     else:
         param_grid = {
             "n_bits": [20],
@@ -689,6 +693,7 @@ def check_grid_search(model_class, x, y, scoring):
     with warnings.catch_warnings():
         # Sometimes, we miss convergence, which is not a problem for our test
         warnings.simplefilter("ignore", category=ConvergenceWarning)
+        warnings.simplefilter("ignore", category=UndefinedMetricWarning)
 
         if get_model_name(model_class) == "KNeighborsClassifier" and scoring in [
             "roc_auc",

From 4ea78faf60dac6baf7784e7d97b9521de46da643 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 11 Sep 2023 15:53:09 +0200
Subject: [PATCH 41/51] chore: fix predict_correctness

---
 tests/sklearn/test_sklearn_models.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index f925c3e50..67872af72 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -1447,6 +1447,9 @@ def test_predict_correctness(
             f"number_of_tests_in_non_fhe = {number_of_tests_in_non_fhe})"
         )
 
+    if n_bits > 5 and get_model_name(model) == "KNeighborsClassifier":
+        pytest.skip("Use less than 5 bits with KNN.")
+
     y_pred = model.predict(x[:number_of_tests_in_non_fhe])
 
     list_of_possibilities = [False, True]

From 41c7cc5e28580971474823a95386833520cb9667 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Tue, 12 Sep 2023 15:19:43 +0200
Subject: [PATCH 42/51] chore: fix check_fitted_compiled_error_raises

---
 tests/sklearn/test_sklearn_models.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 67872af72..73c01b8b1 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -894,6 +894,9 @@ def check_fitted_compiled_error_raises(model_class, n_bits, x, y):
             model.predict(x)
 
     if is_classifier_or_partial_classifier(model_class):
+        if get_model_name(model) == "KNeighborsClassifier":
+            print("merde")
+            pytest.skip("predict_proba not implement for KNN")
         # Predicting probabilities using an untrained linear or tree-based classifier should not
         # be possible
         if not is_model_class_in_a_list(model_class, get_sklearn_neural_net_models()):

From 1797dcfa2a593fe9ea766e697ce3c62dd2732225 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Tue, 12 Sep 2023 19:05:59 +0200
Subject: [PATCH 43/51] chore: update

---
 .../ml/search_parameters/p_error_search.py    |  7 +-
 src/concrete/ml/sklearn/base.py               | 75 ++++++++++++++-----
 src/concrete/ml/sklearn/neighbors.py          |  1 +
 tests/sklearn/test_sklearn_models.py          |  7 +-
 4 files changed, 67 insertions(+), 23 deletions(-)

diff --git a/src/concrete/ml/search_parameters/p_error_search.py b/src/concrete/ml/search_parameters/p_error_search.py
index 7205a08fd..eec213001 100644
--- a/src/concrete/ml/search_parameters/p_error_search.py
+++ b/src/concrete/ml/search_parameters/p_error_search.py
@@ -58,11 +58,10 @@
 
 import numpy
 import torch
+from concrete.fhe import ParameterSelectionStrategy
 from concrete.fhe.compilation import Configuration
 from tqdm import tqdm
 
-from concrete import fhe
-
 from ..common.utils import get_model_name, is_brevitas_model, is_model_class_in_a_list
 from ..sklearn import (
     get_sklearn_neighbors_models,
@@ -117,9 +116,9 @@ def compile_and_simulated_fhe_inference(
         enable_unsafe_features=True,
         use_insecure_key_cache=True,
         insecure_key_cache_location="ConcreteNumpyKeyCache",
-        parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO
+        parameter_selection_strategy=ParameterSelectionStrategy.MONO
         if get_model_name(estimator) == "KNeighborsClassifier"
-        else fhe.ParameterSelectionStrategy.MULTI,
+        else ParameterSelectionStrategy.MULTI,
     )
     compile_function: Callable[..., Any]
     dequantized_output: numpy.ndarray
diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 4ecc21b0d..262a25072 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -19,6 +19,8 @@
 import skorch.net
 import torch
 from brevitas.export.onnx.qonnx.manager import QONNXManager as BrevitasONNXManager
+from concrete.fhe import array as fhe_array
+from concrete.fhe import zeros as fhe_zeros
 from concrete.fhe.compilation.artifacts import DebugArtifacts
 from concrete.fhe.compilation.circuit import Circuit
 from concrete.fhe.compilation.compiler import Compiler
@@ -26,8 +28,6 @@
 from concrete.fhe.dtypes.integer import Integer
 from sklearn.base import clone
 
-from concrete import fhe
-
 from ..common.check_inputs import check_array_and_assert, check_X_y_and_assert_multi_output
 from ..common.debugging.custom_assert import assert_true
 from ..common.serialization.dumpers import dump, dumps
@@ -1711,7 +1711,7 @@ def __init_subclass__(cls):
                 _NEIGHBORS_MODELS.add(cls)
                 _ALL_SKLEARN_MODELS.add(cls)
 
-    def __init__(self, n_bits: Union[int, Dict[str, int]] = 5):
+    def __init__(self, n_bits: Union[int, Dict[str, int]] = 3):
         """Initialize the FHE knn model.
 
         Args:
@@ -1920,40 +1920,73 @@ def pairwise_euclidean_distance(q_X):
                 + numpy.expand_dims(numpy.sum(self._q_X_fit**2, axis=1), 0)
             )
 
-        distance_matrix = pairwise_euclidean_distance(q_X)
+        def topk_sorting(x):
+            """Argsort in FHE.
 
-        # sqr not done
+            Args:
+                x (numpy.ndarray): The quantized input values.
+
+            Returns:
+                numpy.ndarray: The argsort.
+            """
 
-        # 2. Sorting
-        def topk_sorting(x):
             def gather1d(x, indices):
-                """Select x[indices]."""
+                """Select elements from the input array `x` using the provided `indices`.
+
+                Args:
+                    x (numpy.ndarray): The encrypted input array
+                    indices (numpy.ndarray): The desired indexes
+
+                Returns:
+                    numpy.ndarray: The selected encrypted indexes.
+                """
                 arr = []
                 for i in indices:
                     arr.append(x[i])
-                enc_arr = fhe.array(arr)
+                enc_arr = fhe_array(arr)
                 return enc_arr
 
             def scatter1d(x, v, indices):
+                """Rearrange elements of `x` with values from `v` at the specified `indices`.
+
+                Args:
+                    x (numpy.ndarray): The encrypted input array in which items will be updated
+                    v (numpy.ndarray): The array containing values to be inserted into `x`
+                        at the specified `indices`.
+                    indices (numpy.ndarray): The indices indicating where to insert the elements
+                        from `v` into `x`.
+
+                Returns:
+                    numpy.ndarray: The updated encrypted `x`
+                """
                 for idx, i in enumerate(indices):
                     x[i] = v[idx]
                 return x
 
             def mul_tlu(a, b):
+                """Matrix multiplication.
+
+                Args:
+                    a (numpy.ndarray): An encrypted array
+                    b (numpy.ndarray): An encrypted array
+
+                Returns:
+                    numpy.ndarray: The result of a * b
+                """
                 return a * b
 
-            idx = numpy.arange(x.size) + fhe.zeros(x.shape)
             comparisons = numpy.zeros(x.shape)
-            n = x.size
-            k = self.n_neighbors
+            idx = numpy.arange(x.size) + fhe_zeros(x.shape)
 
+            n, k = x.size, self.n_neighbors
             ln2n = int(numpy.ceil(numpy.log2(n)))
+
             for t in range(ln2n - 1, -1, -1):
                 p = 2**t
                 r = 0
                 d = p
 
-                for bq in range(ln2n - 1, t - 1, -1):  # q = 2^(t-1), 2^(t-2), ..., p
+                for bq in range(ln2n - 1, t - 1, -1):
                     q = 2**bq
                     range_i = numpy.array(
                         [i for i in range(0, n - d) if i & p == r and comparisons[i] < k]
@@ -1974,9 +2007,9 @@ def mul_tlu(a, b):
                     x = scatter1d(x, max_x, range_i + d)  # x[range_i + d] = max_x
 
                     max_idx = a_i + mul_tlu((b_i - a_i), sign)
-                    idx = scatter1d(
-                        idx, a_i + b_i - max_idx, range_i
-                    )  # idx[range_i] = a_i + b_i - max_idx
+
+                    # idx[range_i] = a_i + b_i - max_idx
+                    idx = scatter1d(idx, a_i + b_i - max_idx, range_i)
                     idx = scatter1d(idx, max_idx, range_i + d)  # idx[range_i + d] = max_idx
 
                     comparisons[range_i + d] = comparisons[range_i + d] + 1
@@ -1984,9 +2017,15 @@ def mul_tlu(a, b):
                     d = q - p
                     r = p
 
-            return numpy.concatenate((x.reshape((1, -1)), idx.reshape((1, -1))), axis=0)
+            return idx
+
+        # 1. Pairwise_euclidiean distance
+        distance_matrix = pairwise_euclidean_distance(q_X)
+
+        # sqr not done
 
-        _, sorted_args = topk_sorting(distance_matrix.flatten())
+        # 2. Sorting args
+        sorted_args = topk_sorting(distance_matrix.flatten())
 
         return sorted_args
 
diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py
index 2a4982643..d7dad8639 100644
--- a/src/concrete/ml/sklearn/neighbors.py
+++ b/src/concrete/ml/sklearn/neighbors.py
@@ -50,6 +50,7 @@ def __init__(
         self.metric_params = metric_params
         self.n_jobs = n_jobs
         self.weights = weights
+        self._y = None
 
     def dump_dict(self) -> Dict[str, Any]:
         assert self._weight_quantizer is not None, self._is_not_fitted_error_message()
diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 73c01b8b1..df617b543 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -1550,6 +1550,7 @@ def test_p_error_global_p_error_simulation(
     parameters,
     error_param,
     load_data,
+    default_configuration,
     is_weekly_option,
 ):
     """Test p_error and global_p_error simulation.
@@ -1565,6 +1566,10 @@ def test_p_error_global_p_error_simulation(
 
     # Get data-set
     n_bits = min(N_BITS_REGULAR_BUILDS)
+    if get_model_name(model_class) == "KNeighborsClassifier":
+        n_bits = min(n_bits, 5)
+        default_configuration.parameter_selection_strategy = ParameterSelectionStrategy.MONO
+        default_configuration.single_precision = True
 
     # Initialize and fit the model
     model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option)
@@ -1575,7 +1580,7 @@ def test_p_error_global_p_error_simulation(
     )
 
     # Compile with a large p_error to be sure the result is random.
-    model.compile(x, **error_param)
+    model.compile(x, default_configuration, **error_param)
 
     def check_for_divergent_predictions(x, model, fhe, max_iterations=N_ALLOWED_FHE_RUN):
         """Detect divergence between simulated/FHE execution and clear run."""

From e94026c404f38e1635aed20470b809cd3d54a717 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Wed, 13 Sep 2023 16:27:47 +0200
Subject: [PATCH 44/51] chore: fix bug in prediction + fix p_error_simulation
 test

---
 src/concrete/ml/sklearn/base.py      | 22 +++++++++++++++----
 tests/sklearn/test_sklearn_models.py | 33 ++++++++++++++++++++--------
 2 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 262a25072..ae0801f4f 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -657,7 +657,10 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
                 # Execute the inference in FHE or with simulation
                 q_y_pred_i = predict_method(q_X_i)
 
-                q_y_pred_list.append(q_y_pred_i[0])
+                if self.__class__.__name__ == "KNeighborsClassifier":
+                    q_y_pred_list.append(q_y_pred_i)
+                else:
+                    q_y_pred_list.append(q_y_pred_i[0])
 
             q_y_pred = numpy.array(q_y_pred_list)
 
@@ -1846,6 +1849,10 @@ def dequantize_output(self, q_y_preds: numpy.ndarray) -> numpy.ndarray:
         # We compute the sorted argmax in FHE, which are integers.
         # No need to de-quantize the output values
 
+        assert q_y_preds.shape[-1] == self.n_neighbors, (
+            f"Shape error: `q_y_preds` must be shape of ({self.n_neighbors},) and got:"
+            f"`{q_y_preds.shape}`"
+        )
         return q_y_preds
 
     def _get_module_to_compile(self) -> Union[Compiler, QuantizedModule]:
@@ -2017,7 +2024,14 @@ def mul_tlu(a, b):
                     d = q - p
                     r = p
 
-            return idx
+            x = []
+            for i in range((self.n_neighbors)):
+                x.append(idx[i])
+            x = fhe_array(x)
+
+            assert x.shape[0] == self.n_neighbors
+
+            return x
 
         # 1. Pairwise_euclidiean distance
         distance_matrix = pairwise_euclidean_distance(q_X)
@@ -2037,10 +2051,10 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
         for query in X:
             # Argsort
             arg_sort = super().predict(query[None], fhe)
-            arg_sort = arg_sort.astype(numpy.int64)[: self.n_neighbors]
+            assert arg_sort.size == self.n_neighbors
             # Majority vote
             # pylint: disable=protected-access
-            label_indices = self._y[arg_sort]
+            label_indices = self._y[arg_sort.flatten()]
             y_pred = self.majority_vote(label_indices)
             y_preds.append(y_pred)
 
diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index df617b543..83f2db7e9 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -117,7 +117,9 @@
 def get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option):
     """Prepare the the (x, y) data-set."""
 
-    if not is_model_class_in_a_list(model_class, get_sklearn_linear_models()):
+    if not is_model_class_in_a_list(
+        model_class, get_sklearn_linear_models() + get_sklearn_neighbors_models()
+    ):
         if n_bits in N_BITS_WEEKLY_ONLY_BUILDS and not is_weekly_option:
             pytest.skip("Skipping some tests in non-weekly builds, except for linear models")
 
@@ -130,7 +132,9 @@ def get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option):
 def preamble(model_class, parameters, n_bits, load_data, is_weekly_option):
     """Prepare the fitted model, and the (x, y) data-set."""
 
-    if not is_model_class_in_a_list(model_class, get_sklearn_linear_models()):
+    if not is_model_class_in_a_list(
+        model_class, get_sklearn_linear_models() + get_sklearn_neighbors_models()
+    ):
         if n_bits in N_BITS_WEEKLY_ONLY_BUILDS and not is_weekly_option:
             pytest.skip("Skipping some tests in non-weekly builds")
 
@@ -895,7 +899,6 @@ def check_fitted_compiled_error_raises(model_class, n_bits, x, y):
 
     if is_classifier_or_partial_classifier(model_class):
         if get_model_name(model) == "KNeighborsClassifier":
-            print("merde")
             pytest.skip("predict_proba not implement for KNN")
         # Predicting probabilities using an untrained linear or tree-based classifier should not
         # be possible
@@ -1567,17 +1570,17 @@ def test_p_error_global_p_error_simulation(
     # Get data-set
     n_bits = min(N_BITS_REGULAR_BUILDS)
     if get_model_name(model_class) == "KNeighborsClassifier":
-        n_bits = min(n_bits, 5)
+        n_bits = min(n_bits, 2)
         default_configuration.parameter_selection_strategy = ParameterSelectionStrategy.MONO
-        default_configuration.single_precision = True
 
     # Initialize and fit the model
     model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option)
 
     # Check if model is linear
-    is_linear_model = is_model_class_in_a_list(
-        model_class, get_sklearn_linear_models() + get_sklearn_neighbors_models()
-    )
+    is_linear_model = is_model_class_in_a_list(model_class, get_sklearn_linear_models())
+
+    # Check if model is linear
+    is_knn_model = is_model_class_in_a_list(model_class, get_sklearn_neighbors_models())
 
     # Compile with a large p_error to be sure the result is random.
     model.compile(x, default_configuration, **error_param)
@@ -1599,9 +1602,21 @@ def check_for_divergent_predictions(x, model, fhe, max_iterations=N_ALLOWED_FHE_
                 return True
         return False
 
+    if is_knn_model:
+        # In the case of KNN, a large `p_error` results in indexes larger than expected, which will
+        # trigger an IndexError
+        with pytest.raises(IndexError, match=".* is out of bounds for axis 0 with size .*"):
+            simulation_diff_found = check_for_divergent_predictions(x, model, fhe="simulate")
+            fhe_diff_found = check_for_divergent_predictions(x, model, fhe="execute")
+
+        assert simulation_diff_found, (
+            "Due to large p_error, "
+            "simulate predictions should be different from the expected predictions."
+        )
+        return
+
     simulation_diff_found = check_for_divergent_predictions(x, model, fhe="simulate")
     fhe_diff_found = check_for_divergent_predictions(x, model, fhe="execute")
-
     # Check for differences in predictions
     # Remark that, with the old VL, linear models (or, more generally, circuits without PBS) were
     # badly simulated. It has been fixed in the new simulation.

From aeb9196bfd8e3392e133059659dd6da84a77e864 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Wed, 13 Sep 2023 16:42:12 +0200
Subject: [PATCH 45/51] chore: resume show_mlir

---
 tests/sklearn/test_sklearn_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 83f2db7e9..931d0c322 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -1483,7 +1483,7 @@ def test_predict_correctness(
                 fhe_circuit = model.compile(
                     x,
                     default_configuration,
-                    show_mlir=False,
+                    show_mlir=verbose and (n_bits <= 8),
                 )
 
                 check_properties_of_circuit(model_class, fhe_circuit, check_circuit_has_no_tlu)

From 0794adc050047d6600fddac9edaa6ed25062c2a1 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 18 Sep 2023 10:46:02 +0200
Subject: [PATCH 46/51] chore: reduce knn dataset

---
 src/concrete/ml/pytest/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/concrete/ml/pytest/utils.py b/src/concrete/ml/pytest/utils.py
index 3bef4b8a1..b996f15c4 100644
--- a/src/concrete/ml/pytest/utils.py
+++ b/src/concrete/ml/pytest/utils.py
@@ -103,8 +103,8 @@
     pytest.param(
         model,
         {
-            "n_samples": 10,
-            "n_features": 3,
+            "n_samples": 6,
+            "n_features": 2,
             "n_classes": n_classes,
             "n_informative": 2,
             "n_redundant": 0,

From ca03c3cf6809990c01931a53f88c90b798e8a60c Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 18 Sep 2023 10:47:04 +0200
Subject: [PATCH 47/51] chore: update fix workaround remove top_k_indices

---
 src/concrete/ml/sklearn/base.py | 47 ++++++++++++---------------------
 1 file changed, 17 insertions(+), 30 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index ae0801f4f..83f40bf1f 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -632,6 +632,7 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
             for q_X_i in q_X:
                 # Expected encrypt_run_decrypt output shape is (1, n_features) while q_X_i
                 # is of shape (n_features,)
+
                 q_X_i = numpy.expand_dims(q_X_i, 0)
 
                 # For mypy, even though we already check this with self.check_model_is_compiled()
@@ -657,10 +658,7 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
                 # Execute the inference in FHE or with simulation
                 q_y_pred_i = predict_method(q_X_i)
 
-                if self.__class__.__name__ == "KNeighborsClassifier":
-                    q_y_pred_list.append(q_y_pred_i)
-                else:
-                    q_y_pred_list.append(q_y_pred_i[0])
+                q_y_pred_list.append(q_y_pred_i[0])
 
             q_y_pred = numpy.array(q_y_pred_list)
 
@@ -1849,7 +1847,7 @@ def dequantize_output(self, q_y_preds: numpy.ndarray) -> numpy.ndarray:
         # We compute the sorted argmax in FHE, which are integers.
         # No need to de-quantize the output values
 
-        assert q_y_preds.shape[-1] == self.n_neighbors, (
+        assert q_y_preds[0].shape[-1] == self.n_neighbors, (
             f"Shape error: `q_y_preds` must be shape of ({self.n_neighbors},) and got:"
             f"`{q_y_preds.shape}`"
         )
@@ -1875,23 +1873,6 @@ def inference_to_compile(q_X: numpy.ndarray) -> numpy.ndarray:
 
         return compiler
 
-    @staticmethod
-    def top_k_indices(distance_matrix: numpy.ndarray, k: int) -> numpy.ndarray:
-        """Get the indices of the top-k smallest distances for each point.
-
-        Args:
-            distance_matrix (numpy.ndarray): Represents the pairwise euclidean distance between
-                the query and other points
-            k (int): The top nearest neighbors to consider
-
-        Returns:
-            numpy.ndarray: The k nearest neighbors for the corresponding query, sorted in
-            ascending order.
-        """
-
-        # Sort the distances in an ascending order and select the k smallest distanes
-        return numpy.argsort(distance_matrix, axis=1)[:, :k]
-
     @staticmethod
     def majority_vote(nearest_classes: numpy.ndarray):
         """Determine the most common class among nearest neighborsfor each query.
@@ -2024,24 +2005,31 @@ def mul_tlu(a, b):
                     d = q - p
                     r = p
 
-            x = []
+            topk_indexes = []
             for i in range((self.n_neighbors)):
-                x.append(idx[i])
-            x = fhe_array(x)
+                topk_indexes.append(idx[i])
+
+            topk_indexes = fhe_array(topk_indexes)
 
-            assert x.shape[0] == self.n_neighbors
+            assert topk_indexes.shape[0] == self.n_neighbors
 
-            return x
+            return topk_indexes
 
         # 1. Pairwise_euclidiean distance
+        # from concrete import fhe
+        # with fhe.tag(f"distance_matrix"):
         distance_matrix = pairwise_euclidean_distance(q_X)
 
-        # sqr not done
+        # The square root in the Euclidean distance calculation is not applied.
+        # Being a monotonic function, it does not affect the logic of the calculation, notably for
+        # for the argsort
 
         # 2. Sorting args
+        # with fhe.tag(f"sorted_args"):
+
         sorted_args = topk_sorting(distance_matrix.flatten())
 
-        return sorted_args
+        return numpy.expand_dims(sorted_args, axis=0)
 
     def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
 
@@ -2051,7 +2039,6 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
         for query in X:
             # Argsort
             arg_sort = super().predict(query[None], fhe)
-            assert arg_sort.size == self.n_neighbors
             # Majority vote
             # pylint: disable=protected-access
             label_indices = self._y[arg_sort.flatten()]

From 9d0a4dd2cd0c043ef16caaa58743e35b5ef2a0c2 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 18 Sep 2023 10:56:08 +0200
Subject: [PATCH 48/51] chore: force the configuration of KNN to run under MONO
 settings

---
 conftest.py                                   |   2 +
 .../ml/search_parameters/p_error_search.py    |  20 +--
 src/concrete/ml/sklearn/base.py               | 144 +++++++++---------
 src/concrete/ml/sklearn/neighbors.py          |  21 ++-
 .../test_pbs_error_probability_settings.py    |  21 +--
 tests/deployment/test_client_server.py        |  15 +-
 tests/sklearn/test_dump_onnx.py               |  10 +-
 tests/sklearn/test_sklearn_models.py          |  81 ++++------
 .../credit_scoring/CreditScoring.ipynb        |  10 +-
 9 files changed, 141 insertions(+), 183 deletions(-)

diff --git a/conftest.py b/conftest.py
index c4fa713c7..32ba7bae0 100644
--- a/conftest.py
+++ b/conftest.py
@@ -499,6 +499,8 @@ def check_is_good_execution_for_cml_vs_circuit_impl(
                     # `check_subfunctions_in_fhe`
                     if is_classifier_or_partial_classifier(model):
                         if isinstance(model, SklearnKNeighborsMixin):
+                            # For KNN `predict_proba` is not supported for now
+                            # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962
                             results_cnp_circuit = model.predict(*inputs, fhe=fhe_mode)
                             results_model = model.predict(*inputs, fhe="disable")
                         else:
diff --git a/src/concrete/ml/search_parameters/p_error_search.py b/src/concrete/ml/search_parameters/p_error_search.py
index eec213001..dbed2c1f7 100644
--- a/src/concrete/ml/search_parameters/p_error_search.py
+++ b/src/concrete/ml/search_parameters/p_error_search.py
@@ -58,11 +58,9 @@
 
 import numpy
 import torch
-from concrete.fhe import ParameterSelectionStrategy
-from concrete.fhe.compilation import Configuration
 from tqdm import tqdm
 
-from ..common.utils import get_model_name, is_brevitas_model, is_model_class_in_a_list
+from ..common.utils import is_brevitas_model, is_model_class_in_a_list
 from ..sklearn import (
     get_sklearn_neighbors_models,
     get_sklearn_neural_net_models,
@@ -110,16 +108,6 @@ def compile_and_simulated_fhe_inference(
     """
 
     compile_params: Dict = {}
-
-    default_configuration = Configuration(
-        dump_artifacts_on_unexpected_failures=False,
-        enable_unsafe_features=True,
-        use_insecure_key_cache=True,
-        insecure_key_cache_location="ConcreteNumpyKeyCache",
-        parameter_selection_strategy=ParameterSelectionStrategy.MONO
-        if get_model_name(estimator) == "KNeighborsClassifier"
-        else ParameterSelectionStrategy.MULTI,
-    )
     compile_function: Callable[..., Any]
     dequantized_output: numpy.ndarray
 
@@ -150,11 +138,7 @@ def compile_and_simulated_fhe_inference(
         if not estimator.is_fitted:
             estimator.fit(calibration_data, ground_truth)
 
-        estimator.compile(
-            calibration_data,
-            p_error=p_error,
-            configuration=default_configuration,
-        )
+        estimator.compile(calibration_data, p_error=p_error)
         predict_method = getattr(estimator, predict)
         dequantized_output = predict_method(calibration_data, fhe="simulate")
 
diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 83f40bf1f..97fb8149b 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -632,7 +632,6 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
             for q_X_i in q_X:
                 # Expected encrypt_run_decrypt output shape is (1, n_features) while q_X_i
                 # is of shape (n_features,)
-
                 q_X_i = numpy.expand_dims(q_X_i, 0)
 
                 # For mypy, even though we already check this with self.check_model_is_compiled()
@@ -1697,7 +1696,7 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) ->
         return y_proba
 
 
-# pylint: disable=invalid-name,too-many-instance-attributes
+# pylint: disable-next=invalid-name,too-many-instance-attributes
 class SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
     """A Mixin class for sklearn KNeighbors models with FHE.
 
@@ -1712,24 +1711,22 @@ def __init_subclass__(cls):
                 _NEIGHBORS_MODELS.add(cls)
                 _ALL_SKLEARN_MODELS.add(cls)
 
-    def __init__(self, n_bits: Union[int, Dict[str, int]] = 3):
+    def __init__(self, n_bits: int = 3):
         """Initialize the FHE knn model.
 
         Args:
-            n_bits (int, Dict[str, int]): Number of bits to quantize the model. If an int is passed
-                for n_bits, the value will be used for quantizing inputs and weights. If a dict is
-                passed, then it should contain "op_inputs" and "op_weights" as keys with
-                corresponding number of quantization bits so that:
-                    - op_inputs : number of bits to quantize the input values
-                    - op_weights: number of bits to quantize the learned parameters
-                Default to 3.
+            n_bits (int): Number of bits to quantize the model. IThe value will be used for
+                quantizing inputs and X_fit. Default to 3.
         """
-        self.n_bits: Union[int, Dict[str, int]] = n_bits
-
-        #: The quantizer to use for quantizing the model's weights
-        self._weight_quantizer: Optional[UniformQuantizer] = None
-        self._q_X_fit_quantizer: Optional[UniformQuantizer] = None
+        self.n_bits: int = n_bits
+        # _q_X_fit: In distance metric algorithms, `_q_X_fit` stores the training set to compute
+        # the similarity or distance measures. There is no `weights` attribute because there isn't
+        # a training phase
         self._q_X_fit: numpy.ndarray
+        # _y: Labels of `_q_X_fit`
+        self._y: numpy.ndarray
+        # _q_X_fit_quantizer: The quantizer to use for quantizing the model's training set
+        self._q_X_fit_quantizer: Optional[UniformQuantizer] = None
 
         BaseEstimator.__init__(self)
 
@@ -1748,7 +1745,7 @@ def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
             test_input=test_input,
             extra_config={
                 "onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT,
-                # pylint: disable=protected-access, no-member
+                # pylint: disable-next=protected-access, no-member
                 constants.BATCH_SIZE: self.sklearn_model._fit_X.shape[0],
             },
         ).model
@@ -1765,6 +1762,8 @@ def _clean_graph(self) -> None:
     def fit(self, X: Data, y: Target, **fit_parameters):
         # Reset for double fit
         self._is_fitted = False
+        self.input_quantizers = []
+        self.output_quantizers = []
 
         # KNeighbors handles multi-labels data
         X, y = check_X_y_and_assert_multi_output(X, y)
@@ -1780,31 +1779,23 @@ def fit(self, X: Data, y: Target, **fit_parameters):
         # Retrieve the ONNX graph
         self._set_onnx_model(X)
 
-        # Convert the n_bits attribute into a proper dictionary
-        n_bits = get_n_bits_dict(self.n_bits)
-
-        input_n_bits = n_bits["op_inputs"]
-        input_options = QuantizationOptions(n_bits=input_n_bits, is_signed=True)
-
         # Quantize the inputs and store the associated quantizer
-        q_inputs = QuantizedArray(n_bits=input_n_bits, values=X, options=input_options)
+        input_options = QuantizationOptions(n_bits=self.n_bits, is_signed=True)
+        q_inputs = QuantizedArray(n_bits=self.n_bits, values=X, options=input_options)
         input_quantizer = q_inputs.quantizer
         self.input_quantizers.append(input_quantizer)
 
-        weights_n_bits = n_bits["op_weights"]
-        weight_options = QuantizationOptions(n_bits=weights_n_bits, is_signed=True)
-
         # Quantize the _X_fit and store the associated quantizer
-        # Weights in KNN algorithms are the train data points
-        # pylint: disable=protected-access
+        # pylint: disable-next=protected-access
         _X_fit = self.sklearn_model._fit_X
+        # We assume that the inputs have the same distribution as the _X_fit
         q_X_fit = QuantizedArray(
-            n_bits=n_bits["op_weights"],
+            n_bits=self.n_bits,
             values=numpy.expand_dims(_X_fit, axis=1) if len(_X_fit.shape) == 1 else _X_fit,
-            options=weight_options,
+            options=input_options,
         )
         self._q_X_fit = q_X_fit.qvalues
-        self._q_X_fit_quantizer = self._weight_quantizer = q_X_fit.quantizer
+        self._q_X_fit_quantizer = q_X_fit.quantizer
 
         # mypy
         assert self._q_X_fit_quantizer.scale is not None
@@ -1821,9 +1812,6 @@ def fit(self, X: Data, y: Target, **fit_parameters):
 
         output_quantizer = UniformQuantizer(params=self.output_quant_params, no_clipping=True)
 
-        # Since the matmul and the bias both use the same scale and zero-points, we obtain that
-        # y = S*(q_y - 2*Z) when de-quantizing the values. We therefore need to multiply the initial
-        # output zero_point by 2
         assert output_quantizer.zero_point is not None
         self.output_quantizers.append(output_quantizer)
 
@@ -1843,14 +1831,8 @@ def quantize_input(self, X: numpy.ndarray) -> numpy.ndarray:
 
     def dequantize_output(self, q_y_preds: numpy.ndarray) -> numpy.ndarray:
         self.check_model_is_fitted()
-
         # We compute the sorted argmax in FHE, which are integers.
         # No need to de-quantize the output values
-
-        assert q_y_preds[0].shape[-1] == self.n_neighbors, (
-            f"Shape error: `q_y_preds` must be shape of ({self.n_neighbors},) and got:"
-            f"`{q_y_preds.shape}`"
-        )
         return q_y_preds
 
     def _get_module_to_compile(self) -> Union[Compiler, QuantizedModule]:
@@ -1911,6 +1893,8 @@ def pairwise_euclidean_distance(q_X):
         def topk_sorting(x):
             """Argsort in FHE.
 
+            Time complexity: O(nlog²(k))
+
             Args:
                 x (numpy.ndarray): The quantized input values.
 
@@ -1951,68 +1935,70 @@ def scatter1d(x, v, indices):
                     x[i] = v[idx]
                 return x
 
-            def mul_tlu(a, b):
-                """Matrix multiplication.
-
-                Args:
-                    a (numpy.ndarray): An encrypted array
-                    b (numpy.ndarray): An encrypted array
-
-                Returns:
-                    numpy.ndarray: The result of a * b
-                """
-                return a * b
-
             comparisons = numpy.zeros(x.shape)
             idx = numpy.arange(x.size) + fhe_zeros(x.shape)
 
             n, k = x.size, self.n_neighbors
             ln2n = int(numpy.ceil(numpy.log2(n)))
 
+            # Number of stages
             for t in range(ln2n - 1, -1, -1):
                 p = 2**t
                 r = 0
+                # d: Length of the bitonic sequence
                 d = p
 
                 for bq in range(ln2n - 1, t - 1, -1):
                     q = 2**bq
+                    # Determine the range of indexes to be compared
                     range_i = numpy.array(
                         [i for i in range(0, n - d) if i & p == r and comparisons[i] < k]
                     )
                     if len(range_i) == 0:
+                        # Edge case, for k=1
                         continue
 
-                    a = gather1d(x, range_i)  # x[range_i]
-                    a_i = gather1d(idx, range_i)  # idx[range_i]
-                    b = gather1d(x, range_i + d)  # x[range_i + d]
-                    b_i = gather1d(idx, range_i + d)  # idx[range_i + d]
+                    # Select 2 bitonic sequences `a` and `b` of length `d`
+                    # a = x[range_i]: first bitonic sequence
+                    a = gather1d(x, range_i)
+                    a_i = gather1d(idx, range_i)
+                    # b = x[range_i + d]: Second bitonic sequence
+                    # b_i = idx[range_i]: Indexes of a_i elements in the original x
+                    b = gather1d(x, range_i + d)
+                    b_i = gather1d(idx, range_i + d)
 
+                    # Select max(a, b)
                     diff = a - b
-                    sign = diff < 0
-
                     max_x = a + numpy.maximum(0, b - a)
-                    x = scatter1d(x, a + b - max_x, range_i)  # x[range_i] = a + b - max_x
-                    x = scatter1d(x, max_x, range_i + d)  # x[range_i + d] = max_x
 
-                    max_idx = a_i + mul_tlu((b_i - a_i), sign)
+                    # Swap if a > b
+                    # x[range_i] = max_x(a, b): First bitonic sequence gets min(a, b)
+                    x = scatter1d(x, a + b - max_x, range_i)
+                    # x[range_i + d] = min(a, b): Second bitonic sequence gets max(a, b)
+                    x = scatter1d(x, max_x, range_i + d)
+
+                    # Max index selection
+                    sign = diff < 0
+                    max_idx = a_i + (b_i - a_i) * sign
 
-                    # idx[range_i] = a_i + b_i - max_idx
+                    # Update indexes array according to the max items
+                    # idx[range_i] = a_i + b_i - max_idx <=> min_idx
                     idx = scatter1d(idx, a_i + b_i - max_idx, range_i)
-                    idx = scatter1d(idx, max_idx, range_i + d)  # idx[range_i + d] = max_idx
+                    # idx[range_i + d] = max_idx
+                    idx = scatter1d(idx, max_idx, range_i + d)
 
+                    # Update
                     comparisons[range_i + d] = comparisons[range_i + d] + 1
-
                     d = q - p
                     r = p
 
+            # Return only the topk indexes
             topk_indexes = []
             for i in range((self.n_neighbors)):
                 topk_indexes.append(idx[i])
 
             topk_indexes = fhe_array(topk_indexes)
 
-            assert topk_indexes.shape[0] == self.n_neighbors
-
             return topk_indexes
 
         # 1. Pairwise_euclidiean distance
@@ -2020,9 +2006,10 @@ def mul_tlu(a, b):
         # with fhe.tag(f"distance_matrix"):
         distance_matrix = pairwise_euclidean_distance(q_X)
 
-        # The square root in the Euclidean distance calculation is not applied.
+        # The square root in the Euclidean distance calculation is not applied to speed up FHE
+        # computations.
         # Being a monotonic function, it does not affect the logic of the calculation, notably for
-        # for the argsort
+        # the argsort.
 
         # 2. Sorting args
         # with fhe.tag(f"sorted_args"):
@@ -2031,6 +2018,25 @@ def mul_tlu(a, b):
 
         return numpy.expand_dims(sorted_args, axis=0)
 
+    # KNN works only for MONO in the latest concrete Python version
+    # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3978
+    def compile(self, *args, **kwargs) -> Circuit:
+        # If a configuration instance is given as a positional parameter, set the strategy to
+        # multi-parameter
+        if len(args) >= 2:
+            configuration = force_mono_parameter_in_configuration(args[1])
+            args_list = list(args)
+            args_list[1] = configuration
+            args = tuple(args_list)
+
+        # Else, retrieve the configuration in kwargs if it exists, or create a new one, and set the
+        # strategy to multi-parameter
+        else:
+            configuration = kwargs.get("configuration", None)
+            kwargs["configuration"] = force_mono_parameter_in_configuration(configuration)
+
+        return BaseEstimator.compile(self, *args, **kwargs)
+
     def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
 
         X = check_array_and_assert(X)
@@ -2040,7 +2046,7 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
             # Argsort
             arg_sort = super().predict(query[None], fhe)
             # Majority vote
-            # pylint: disable=protected-access
+            # pylint: disable-next=protected-access
             label_indices = self._y[arg_sort.flatten()]
             y_pred = self.majority_vote(label_indices)
             y_preds.append(y_pred)
diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py
index d7dad8639..12f0d9015 100644
--- a/src/concrete/ml/sklearn/neighbors.py
+++ b/src/concrete/ml/sklearn/neighbors.py
@@ -1,8 +1,10 @@
 """Implement sklearn linear model."""
 from typing import Any, Dict
 
+import numpy
 import sklearn.linear_model
 
+from ..common.debugging.custom_assert import assert_true
 from .base import SklearnKNeighborsClassifierMixin
 
 
@@ -28,7 +30,7 @@ class KNeighborsClassifier(SklearnKNeighborsClassifierMixin):
 
     def __init__(
         self,
-        n_bits=3,
+        n_bits=2,
         n_neighbors=3,
         *,
         weights="uniform",
@@ -42,6 +44,18 @@ def __init__(
         # Call SklearnKNeighborsClassifierMixin's __init__ method
         super().__init__(n_bits=n_bits)
 
+        assert_true(
+            algorithm in ["brute", "auto"], f"Algorithm = `{algorithm}` is not supported in FHE."
+        )
+        assert_true(
+            not callable(metric), "The KNeighborsClassifier does not support custom metrics."
+        )
+        assert_true(
+            p == 2 and metric == "minkowski",
+            "Only `L2` norm is supported with `p=2` and `metric = 'minkowski'`",
+        )
+
+        self._y: numpy.ndarray
         self.n_neighbors = n_neighbors
         self.algorithm = algorithm
         self.leaf_size = leaf_size
@@ -50,10 +64,9 @@ def __init__(
         self.metric_params = metric_params
         self.n_jobs = n_jobs
         self.weights = weights
-        self._y = None
 
     def dump_dict(self) -> Dict[str, Any]:
-        assert self._weight_quantizer is not None, self._is_not_fitted_error_message()
+        assert self._q_X_fit_quantizer is not None, self._is_not_fitted_error_message()
 
         metadata: Dict[str, Any] = {}
 
@@ -63,7 +76,6 @@ def dump_dict(self) -> Dict[str, Any]:
         metadata["_is_fitted"] = self._is_fitted
         metadata["_is_compiled"] = self._is_compiled
         metadata["input_quantizers"] = self.input_quantizers
-        metadata["_weight_quantizer"] = self._weight_quantizer
         metadata["_q_X_fit_quantizer"] = self._q_X_fit_quantizer
         metadata["_q_X_fit"] = self._q_X_fit
         metadata["_y"] = self._y
@@ -99,7 +111,6 @@ def load_dict(cls, metadata: Dict):
         obj._is_compiled = metadata["_is_compiled"]
         obj.input_quantizers = metadata["input_quantizers"]
         obj.output_quantizers = metadata["output_quantizers"]
-        obj._weight_quantizer = metadata["_weight_quantizer"]
         obj._q_X_fit_quantizer = metadata["_q_X_fit_quantizer"]
         obj._q_X_fit = metadata["_q_X_fit"]
         obj._y = metadata["_y"]
diff --git a/tests/common/test_pbs_error_probability_settings.py b/tests/common/test_pbs_error_probability_settings.py
index 4066119eb..31aad3aea 100644
--- a/tests/common/test_pbs_error_probability_settings.py
+++ b/tests/common/test_pbs_error_probability_settings.py
@@ -4,12 +4,9 @@
 
 import numpy
 import pytest
-from concrete.fhe.compilation import Configuration
 from sklearn.exceptions import ConvergenceWarning
 from torch import nn
 
-from concrete import fhe
-from concrete.ml.common.utils import get_model_name
 from concrete.ml.pytest.torch_models import FCSmall
 from concrete.ml.pytest.utils import sklearn_models_and_datasets
 from concrete.ml.torch.compile import compile_torch_model
@@ -29,7 +26,7 @@
         {"global_p_error": 0.038, "p_error": 0.39},
     ],
 )
-def test_config_sklearn(model_class, parameters, kwargs, load_data, default_configuration):
+def test_config_sklearn(model_class, parameters, kwargs, load_data):
     """Testing with p_error and global_p_error configs with sklearn models."""
 
     x, y = load_data(model_class, **parameters)
@@ -41,24 +38,12 @@ def test_config_sklearn(model_class, parameters, kwargs, load_data, default_conf
         # Fit the model
         model.fit(x, y)
 
-    if get_model_name(model_class) == "KNeighborsClassifier":
-
-        default_configuration = Configuration(
-            dump_artifacts_on_unexpected_failures=False,
-            enable_unsafe_features=True,
-            use_insecure_key_cache=True,
-            insecure_key_cache_location="ConcreteNumpyKeyCache",
-            parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO,
-            single_precision=True,
-        )
-
     if kwargs.get("p_error", None) is not None and kwargs.get("global_p_error", None) is not None:
         with pytest.raises(ValueError) as excinfo:
-            model.compile(x, default_configuration, verbose=True, **kwargs)
+            model.compile(x, verbose=True, **kwargs)
         assert "Please only set one of (p_error, global_p_error) values" in str(excinfo.value)
     else:
-
-        model.compile(x, default_configuration, verbose=True, **kwargs)
+        model.compile(x, verbose=True, **kwargs)
 
     # We still need to check that we have the expected probabilities
     # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/2206
diff --git a/tests/deployment/test_client_server.py b/tests/deployment/test_client_server.py
index f5e4a8e43..7df681a1a 100644
--- a/tests/deployment/test_client_server.py
+++ b/tests/deployment/test_client_server.py
@@ -9,12 +9,9 @@
 
 import numpy
 import pytest
-from concrete.fhe.compilation import Configuration
 from sklearn.exceptions import ConvergenceWarning
 from torch import nn
 
-from concrete import fhe
-from concrete.ml.common.utils import get_model_name
 from concrete.ml.deployment.fhe_client_server import FHEModelClient, FHEModelDev, FHEModelServer
 from concrete.ml.pytest.torch_models import FCSmall
 from concrete.ml.pytest.utils import instantiate_model_generic, sklearn_models_and_datasets
@@ -98,20 +95,10 @@ def test_client_server_sklearn(
     # Compile
     extra_params = {"global_p_error": 1 / 100_000}
 
-    if get_model_name(model_class) == "KNeighborsClassifier":
-
-        default_configuration = Configuration(
-            dump_artifacts_on_unexpected_failures=False,
-            enable_unsafe_features=True,
-            use_insecure_key_cache=True,
-            insecure_key_cache_location="ConcreteNumpyKeyCache",
-            parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO,
-            single_precision=True,
-        )
-
     # Running the simulation using a model that is not compiled should not be possible
     with pytest.raises(AttributeError, match=".* model is not compiled.*"):
         client_server_simulation(x_train, x_test, model, default_configuration)
+
     # With n_bits = 3, KNN is not compilable
     fhe_circuit = model.compile(
         x_train, default_configuration, **extra_params, show_mlir=(n_bits <= 8)
diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py
index f1949a6ca..e2957788f 100644
--- a/tests/sklearn/test_dump_onnx.py
+++ b/tests/sklearn/test_dump_onnx.py
@@ -9,7 +9,6 @@
 import pytest
 from sklearn.exceptions import ConvergenceWarning
 
-from concrete import fhe
 from concrete.ml.common.utils import is_model_class_in_a_list
 from concrete.ml.pytest.utils import get_model_name, sklearn_models_and_datasets
 from concrete.ml.sklearn import get_sklearn_tree_models
@@ -37,9 +36,9 @@ def check_onnx_file_dump(model_class, parameters, load_data, str_expected, defau
         model.set_params(**model_params)
 
     if get_model_name(model) == "KNeighborsClassifier":
-        model.n_bits = 4
-        default_configuration.parameter_selection_strategy = fhe.ParameterSelectionStrategy.MONO
-        default_configuration.single_precision = True
+        # KNN works only for small quantization bits
+        # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979
+        model.n_bits = 2
 
     with warnings.catch_warnings():
         # Sometimes, we miss convergence, which is not a problem for our test
@@ -50,6 +49,7 @@ def check_onnx_file_dump(model_class, parameters, load_data, str_expected, defau
     with warnings.catch_warnings():
         # Use FHE simulation to not have issues with precision
         model.compile(x, default_configuration)
+
     # Get ONNX model
     onnx_model = model.onnx_model
 
@@ -423,7 +423,7 @@ def test_dump(
   return %variable
 }""",
         "KNeighborsClassifier": """graph torch_jit (
-  %input_0[DOUBLE, symx3]
+  %input_0[DOUBLE, symx2]
 ) {
   %/_operators.0/Constant_output_0 = Constant[value = <Tensor>]()
   %/_operators.0/Unsqueeze_output_0 = Unsqueeze(%input_0, %/_operators.0/Constant_output_0)
diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 931d0c322..816ae4553 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -225,6 +225,7 @@ def check_correctness_with_sklearn(
 
 def check_double_fit(model_class, n_bits, x_1, x_2, y_1, y_2):
     """Check double fit."""
+
     model = instantiate_model_generic(model_class, n_bits=n_bits)
 
     # Sometimes, we miss convergence, which is not a problem for our test
@@ -280,17 +281,10 @@ def check_double_fit(model_class, n_bits, x_1, x_2, y_1, y_2):
         # Check that the new quantizers are different from the first ones. This is because we
         # currently expect all quantizers to be re-computed when re-fitting a model
 
-        # For now, in KNN, we compute the pairwise Euclidean distance between the encrypted
-        # X and each element in the database.
-        # Then, we return the indices of the k closest distances to this point.
-        # The exact precision of computation of the quantization and dequantization parameters
-        # is not relevant in this case. That's why the assertion test is being ignored
-        # for now in the context of the KNN algorithm.
-        if get_model_name(model) != "KNeighborsClassifier":
-            assert all(
-                quantizer_1 != quantizer_2
-                for (quantizer_1, quantizer_2) in zip(quantizers_1, quantizers_2)
-            )
+        assert all(
+            quantizer_1 != quantizer_2
+            for (quantizer_1, quantizer_2) in zip(quantizers_1, quantizers_2)
+        )
 
         # Set the same torch seed manually before re-fitting the neural network
         if is_model_class_in_a_list(model_class, get_sklearn_neural_net_models()):
@@ -311,20 +305,13 @@ def check_double_fit(model_class, n_bits, x_1, x_2, y_1, y_2):
         # quantizers to be re-computed when re-fitting. Since we used the same dataset as the first
         # fit, we also expect these quantizers to be the same.
 
-        # For now, in KNN, we compute the pairwise Euclidean distance between the encrypted
-        # X and each element in the database.
-        # Then, we return the indices of the k closest distances to this point.
-        # The exact precision of computation of the quantization and dequantization parameters
-        # is not relevant in this case. That's why the assertion test is being ignored
-        # for now in the context of the KNN algorithm.
-        if get_model_name(model) != "KNeighborsClassifier":
-            assert all(
-                quantizer_1 == quantizer_3
-                for (quantizer_1, quantizer_3) in zip(
-                    input_quantizers_1 + output_quantizers_1,
-                    input_quantizers_3 + output_quantizers_3,
-                )
+        assert all(
+            quantizer_1 == quantizer_3
+            for (quantizer_1, quantizer_3) in zip(
+                input_quantizers_1 + output_quantizers_1,
+                input_quantizers_3 + output_quantizers_3,
             )
+        )
 
 
 def check_serialization(model, x, use_dump_method):
@@ -585,7 +572,6 @@ def cast_input(x, y, input_type):
     # Sometimes, we miss convergence, which is not a problem for our test
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=ConvergenceWarning)
-
         model.fit(x, y)
 
     # Make sure `predict` is working when FHE is disabled
@@ -656,8 +642,8 @@ def check_pipeline(model_class, x, y):
         param_grid = {
             "model__n_bits": [2, 3],
         }
-
-    grid_search = GridSearchCV(pipe_cv, param_grid, error_score="raise", cv=3)
+    # Since the data-set is really small for KNN, we have to decrease the number of splits
+    grid_search = GridSearchCV(pipe_cv, param_grid, error_score="raise", cv=2)
 
     # Sometimes, we miss convergence, which is not a problem for our test
     with warnings.catch_warnings():
@@ -686,9 +672,7 @@ def check_grid_search(model_class, x, y, scoring):
             "n_jobs": [1],
         }
     elif model_class in get_sklearn_neighbors_models():
-        param_grid = {
-            "n_bits": [3],
-        }
+        param_grid = {"n_bits": [2], "n_neighbors": [2]}
     else:
         param_grid = {
             "n_bits": [20],
@@ -707,7 +691,7 @@ def check_grid_search(model_class, x, y, scoring):
             pytest.skip("Skipping predict_proba for KNN, doesn't work for now")
 
         _ = GridSearchCV(
-            model_class(), param_grid, cv=5, scoring=scoring, error_score="raise", n_jobs=1
+            model_class(), param_grid, cv=2, scoring=scoring, error_score="raise", n_jobs=1
         ).fit(x, y)
 
 
@@ -807,7 +791,8 @@ def get_hyper_param_combinations(model_class):
             "base_score": [0.5, None],
         }
     elif model_class in get_sklearn_neighbors_models():
-        hyper_param_combinations = {"n_neighbors": [2, 4]}
+        # Use small `n_neighbors` values for KNN, because the data-set is too small for now
+        hyper_param_combinations = {"n_neighbors": [1, 2]}
     else:
 
         assert is_model_class_in_a_list(
@@ -1350,6 +1335,7 @@ def test_input_support(
 ):
     """Test all models with Pandas, List or Torch inputs."""
     x, y = get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option)
+
     if verbose:
         print("Run input_support")
 
@@ -1452,7 +1438,8 @@ def test_predict_correctness(
             "Inference in the clear (with "
             f"number_of_tests_in_non_fhe = {number_of_tests_in_non_fhe})"
         )
-
+    # KNN works only for smaller quantization bits
+    # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979
     if n_bits > 5 and get_model_name(model) == "KNeighborsClassifier":
         pytest.skip("Use less than 5 bits with KNN.")
 
@@ -1475,11 +1462,6 @@ def test_predict_correctness(
                 print("Compile the model")
 
             with warnings.catch_warnings():
-
-                if get_model_name(model) == "KNeighborsClassifier":
-                    default_configuration.parameter_selection_strategy = (
-                        ParameterSelectionStrategy.MONO
-                    )
                 fhe_circuit = model.compile(
                     x,
                     default_configuration,
@@ -1553,7 +1535,6 @@ def test_p_error_global_p_error_simulation(
     parameters,
     error_param,
     load_data,
-    default_configuration,
     is_weekly_option,
 ):
     """Test p_error and global_p_error simulation.
@@ -1567,23 +1548,24 @@ def test_p_error_global_p_error_simulation(
     if "global_p_error" in error_param:
         pytest.skip("global_p_error behave very differently depending on the type of model.")
 
-    # Get data-set
-    n_bits = min(N_BITS_REGULAR_BUILDS)
     if get_model_name(model_class) == "KNeighborsClassifier":
-        n_bits = min(n_bits, 2)
-        default_configuration.parameter_selection_strategy = ParameterSelectionStrategy.MONO
+        # KNN works only for smaller quantization bits
+        # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979
+        n_bits = min([2] + N_BITS_REGULAR_BUILDS)
+    else:
+        n_bits = min(N_BITS_REGULAR_BUILDS)
 
-    # Initialize and fit the model
+    # Get data-set, initialize and fit the model
     model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option)
 
     # Check if model is linear
     is_linear_model = is_model_class_in_a_list(model_class, get_sklearn_linear_models())
 
-    # Check if model is linear
+    # Check if model is a distance metrics model
     is_knn_model = is_model_class_in_a_list(model_class, get_sklearn_neighbors_models())
 
     # Compile with a large p_error to be sure the result is random.
-    model.compile(x, default_configuration, **error_param)
+    model.compile(x, **error_param)
 
     def check_for_divergent_predictions(x, model, fhe, max_iterations=N_ALLOWED_FHE_RUN):
         """Detect divergence between simulated/FHE execution and clear run."""
@@ -1595,7 +1577,6 @@ def check_for_divergent_predictions(x, model, fhe, max_iterations=N_ALLOWED_FHE_
             else model.predict
         )
         y_expected = predict_function(x, fhe="disable")
-
         for i in range(max_iterations):
             y_pred = predict_function(x[i : i + 1], fhe=fhe).ravel()
             if not numpy.array_equal(y_pred, y_expected[i : i + 1].ravel()):
@@ -1617,6 +1598,7 @@ def check_for_divergent_predictions(x, model, fhe, max_iterations=N_ALLOWED_FHE_
 
     simulation_diff_found = check_for_divergent_predictions(x, model, fhe="simulate")
     fhe_diff_found = check_for_divergent_predictions(x, model, fhe="execute")
+
     # Check for differences in predictions
     # Remark that, with the old VL, linear models (or, more generally, circuits without PBS) were
     # badly simulated. It has been fixed in the new simulation.
@@ -1720,9 +1702,10 @@ def test_mono_parameter_warnings(
     if is_model_class_in_a_list(model_class, get_sklearn_linear_models()):
         return
 
-    # KNN works only for ParameterSelectionStrategy.MULTI
+    # KNN is manually forced to use mono-parameter
+    # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3978
     if is_model_class_in_a_list(model_class, get_sklearn_neighbors_models()):
-        pytest.skip("Skipping predict_proba for KNN, doesn't work for now")
+        return
 
     n_bits = min(N_BITS_REGULAR_BUILDS)
 
diff --git a/use_case_examples/credit_scoring/CreditScoring.ipynb b/use_case_examples/credit_scoring/CreditScoring.ipynb
index b5af7d35c..c4ce77f6c 100644
--- a/use_case_examples/credit_scoring/CreditScoring.ipynb
+++ b/use_case_examples/credit_scoring/CreditScoring.ipynb
@@ -20,11 +20,7 @@
     "from functools import partial\n",
     "\n",
     "import numpy as np\n",
-    "import pandas as pd\n",
-    "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.pipeline import Pipeline\n",
-    "from sklearn.preprocessing import StandardScaler"
+    "import pandas as pd"
    ]
   },
   {
@@ -36,6 +32,10 @@
     "# Importing the models, from both scikit-learn and Concrete ML\n",
     "from sklearn.ensemble import RandomForestClassifier as SklearnRandomForestClassifier\n",
     "from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression\n",
+    "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.preprocessing import StandardScaler\n",
     "from sklearn.tree import DecisionTreeClassifier as SklearnDecisionTreeClassifier\n",
     "from xgboost import XGBClassifier as SklearnXGBoostClassifier\n",
     "\n",

From a59aa96c3e534b8ee7ddd3b61f969fa9829b57f0 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Wed, 20 Sep 2023 11:37:46 +0200
Subject: [PATCH 49/51] chore: predict returns the topk labels

---
 src/concrete/ml/sklearn/base.py | 47 ++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 97fb8149b..bf0090f2e 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1768,7 +1768,7 @@ def fit(self, X: Data, y: Target, **fit_parameters):
         # KNeighbors handles multi-labels data
         X, y = check_X_y_and_assert_multi_output(X, y)
 
-        self._y = y
+        self._y = numpy.array(y)
 
         # Fit the scikit-learn model
         self._fit_sklearn_model(X, y, **fit_parameters)
@@ -1890,7 +1890,7 @@ def pairwise_euclidean_distance(q_X):
                 + numpy.expand_dims(numpy.sum(self._q_X_fit**2, axis=1), 0)
             )
 
-        def topk_sorting(x):
+        def topk_sorting(x, labels):
             """Argsort in FHE.
 
             Time complexity: O(nlog²(k))
@@ -1936,7 +1936,7 @@ def scatter1d(x, v, indices):
                 return x
 
             comparisons = numpy.zeros(x.shape)
-            idx = numpy.arange(x.size) + fhe_zeros(x.shape)
+            labels = labels + fhe_zeros(labels.shape)
 
             n, k = x.size, self.n_neighbors
             ln2n = int(numpy.ceil(numpy.log2(n)))
@@ -1960,12 +1960,16 @@ def scatter1d(x, v, indices):
 
                     # Select 2 bitonic sequences `a` and `b` of length `d`
                     # a = x[range_i]: first bitonic sequence
+                    # a_i = idx[range_i]: Indexes of a_i elements in the original x
                     a = gather1d(x, range_i)
-                    a_i = gather1d(idx, range_i)
+                    # a_i = gather1d(idx, range_i)
                     # b = x[range_i + d]: Second bitonic sequence
-                    # b_i = idx[range_i]: Indexes of a_i elements in the original x
+                    # b_i = idx[range_i + d]: Indexes of b_i elements in the original x
                     b = gather1d(x, range_i + d)
-                    b_i = gather1d(idx, range_i + d)
+                    # b_i = gather1d(idx, range_i + d)
+
+                    labels_a = gather1d(labels, range_i)  #
+                    labels_b = gather1d(labels, range_i + d)  # idx[range_i + d]
 
                     # Select max(a, b)
                     diff = a - b
@@ -1978,14 +1982,12 @@ def scatter1d(x, v, indices):
                     x = scatter1d(x, max_x, range_i + d)
 
                     # Max index selection
-                    sign = diff < 0
-                    max_idx = a_i + (b_i - a_i) * sign
+                    sign = diff <= 0
 
-                    # Update indexes array according to the max items
-                    # idx[range_i] = a_i + b_i - max_idx <=> min_idx
-                    idx = scatter1d(idx, a_i + b_i - max_idx, range_i)
-                    # idx[range_i + d] = max_idx
-                    idx = scatter1d(idx, max_idx, range_i + d)
+                    # Update labels array according to the max items
+                    max_labels = labels_a + (labels_b - labels_a) * sign
+                    labels = scatter1d(labels, labels_a + labels_b - max_labels, range_i)
+                    labels = scatter1d(labels, max_labels, range_i + d)
 
                     # Update
                     comparisons[range_i + d] = comparisons[range_i + d] + 1
@@ -1993,13 +1995,11 @@ def scatter1d(x, v, indices):
                     r = p
 
             # Return only the topk indexes
-            topk_indexes = []
+            topk_labels = []
             for i in range((self.n_neighbors)):
-                topk_indexes.append(idx[i])
-
-            topk_indexes = fhe_array(topk_indexes)
+                topk_labels.append(labels[i])
 
-            return topk_indexes
+            return fhe_array(topk_labels)
 
         # 1. Pairwise_euclidiean distance
         # from concrete import fhe
@@ -2014,9 +2014,10 @@ def scatter1d(x, v, indices):
         # 2. Sorting args
         # with fhe.tag(f"sorted_args"):
 
-        sorted_args = topk_sorting(distance_matrix.flatten())
+        # pylint: disable-next=protected-access
+        topk_labels = topk_sorting(distance_matrix.flatten(), self._y)
 
-        return numpy.expand_dims(sorted_args, axis=0)
+        return numpy.expand_dims(topk_labels, axis=0)
 
     # KNN works only for MONO in the latest concrete Python version
     # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3978
@@ -2044,11 +2045,9 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
         y_preds = []
         for query in X:
             # Argsort
-            arg_sort = super().predict(query[None], fhe)
+            topk_labels = super().predict(query[None], fhe)
             # Majority vote
-            # pylint: disable-next=protected-access
-            label_indices = self._y[arg_sort.flatten()]
-            y_pred = self.majority_vote(label_indices)
+            y_pred = self.majority_vote(topk_labels.flatten())
             y_preds.append(y_pred)
 
         return numpy.array(y_preds)

From d5b6e4662aed2fbb9638b4bdb701c051bc488d8a Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Wed, 20 Sep 2023 11:47:11 +0200
Subject: [PATCH 50/51] chore: update check_for_divergent_predictions test for
 KNN

---
 src/concrete/ml/sklearn/neighbors.py | 11 +++--------
 tests/sklearn/test_sklearn_models.py | 18 +-----------------
 2 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py
index 12f0d9015..3fed38276 100644
--- a/src/concrete/ml/sklearn/neighbors.py
+++ b/src/concrete/ml/sklearn/neighbors.py
@@ -13,13 +13,8 @@ class KNeighborsClassifier(SklearnKNeighborsClassifierMixin):
     """A k-nearest classifier model with FHE.
 
     Parameters:
-        n_bits (int, Dict[str, int]): Number of bits to quantize the model. If an int is passed
-            for n_bits, the value will be used for quantizing inputs and weights. If a dict is
-            passed, then it should contain "op_inputs" and "op_weights" as keys with
-            corresponding number of quantization bits so that:
-            - op_inputs : number of bits to quantize the input values
-            - op_weights: number of bits to quantize the learned parameters
-            Default to 8.
+        n_bits (int): Number of bits to quantize the model. The value will be used for quantizing
+            inputs and X_fit. Default to 3.
 
     For more details on KNeighborsClassifier please refer to the scikit-learn documentation:
     https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html
@@ -85,7 +80,7 @@ def dump_dict(self) -> Dict[str, Any]:
         metadata["post_processing_params"] = self.post_processing_params
         metadata["cml_dumped_class_name"] = type(self).__name__
 
-        # Scikit-learn
+        # scikit-learn
         metadata["sklearn_model_class"] = self.sklearn_model_class
         metadata["n_neighbors"] = self.n_neighbors
         metadata["algorithm"] = self.algorithm
diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 816ae4553..0342198ba 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -642,7 +642,7 @@ def check_pipeline(model_class, x, y):
         param_grid = {
             "model__n_bits": [2, 3],
         }
-    # Since the data-set is really small for KNN, we have to decrease the number of splits
+    # We need a small number of splits, especially for the KNN model, which has a small data-set
     grid_search = GridSearchCV(pipe_cv, param_grid, error_score="raise", cv=2)
 
     # Sometimes, we miss convergence, which is not a problem for our test
@@ -1561,9 +1561,6 @@ def test_p_error_global_p_error_simulation(
     # Check if model is linear
     is_linear_model = is_model_class_in_a_list(model_class, get_sklearn_linear_models())
 
-    # Check if model is a distance metrics model
-    is_knn_model = is_model_class_in_a_list(model_class, get_sklearn_neighbors_models())
-
     # Compile with a large p_error to be sure the result is random.
     model.compile(x, **error_param)
 
@@ -1583,19 +1580,6 @@ def check_for_divergent_predictions(x, model, fhe, max_iterations=N_ALLOWED_FHE_
                 return True
         return False
 
-    if is_knn_model:
-        # In the case of KNN, a large `p_error` results in indexes larger than expected, which will
-        # trigger an IndexError
-        with pytest.raises(IndexError, match=".* is out of bounds for axis 0 with size .*"):
-            simulation_diff_found = check_for_divergent_predictions(x, model, fhe="simulate")
-            fhe_diff_found = check_for_divergent_predictions(x, model, fhe="execute")
-
-        assert simulation_diff_found, (
-            "Due to large p_error, "
-            "simulate predictions should be different from the expected predictions."
-        )
-        return
-
     simulation_diff_found = check_for_divergent_predictions(x, model, fhe="simulate")
     fhe_diff_found = check_for_divergent_predictions(x, model, fhe="execute")
 

From fd2c1c7e3a06ef59e797ddc19df0eb0b9bb3627e Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Wed, 20 Sep 2023 15:39:56 +0200
Subject: [PATCH 51/51] chore: add post_processing

---
 src/concrete/ml/sklearn/base.py        | 82 +++++++++++++++-----------
 src/concrete/ml/sklearn/neighbors.py   | 10 ++--
 tests/deployment/test_client_server.py | 24 ++++++--
 tests/sklearn/test_sklearn_models.py   |  3 +-
 4 files changed, 74 insertions(+), 45 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index bf0090f2e..0184615a7 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1719,14 +1719,14 @@ def __init__(self, n_bits: int = 3):
                 quantizing inputs and X_fit. Default to 3.
         """
         self.n_bits: int = n_bits
-        # _q_X_fit: In distance metric algorithms, `_q_X_fit` stores the training set to compute
+        # _q_fit_X: In distance metric algorithms, `_q_fit_X` stores the training set to compute
         # the similarity or distance measures. There is no `weights` attribute because there isn't
         # a training phase
-        self._q_X_fit: numpy.ndarray
-        # _y: Labels of `_q_X_fit`
+        self._q_fit_X: numpy.ndarray
+        # _y: Labels of `_q_fit_X`
         self._y: numpy.ndarray
-        # _q_X_fit_quantizer: The quantizer to use for quantizing the model's training set
-        self._q_X_fit_quantizer: Optional[UniformQuantizer] = None
+        # _q_fit_X_quantizer: The quantizer to use for quantizing the model's training set
+        self._q_fit_X_quantizer: Optional[UniformQuantizer] = None
 
         BaseEstimator.__init__(self)
 
@@ -1768,8 +1768,6 @@ def fit(self, X: Data, y: Target, **fit_parameters):
         # KNeighbors handles multi-labels data
         X, y = check_X_y_and_assert_multi_output(X, y)
 
-        self._y = numpy.array(y)
-
         # Fit the scikit-learn model
         self._fit_sklearn_model(X, y, **fit_parameters)
 
@@ -1785,28 +1783,30 @@ def fit(self, X: Data, y: Target, **fit_parameters):
         input_quantizer = q_inputs.quantizer
         self.input_quantizers.append(input_quantizer)
 
-        # Quantize the _X_fit and store the associated quantizer
+        # Quantize the _fit_X and store the associated quantizer
         # pylint: disable-next=protected-access
-        _X_fit = self.sklearn_model._fit_X
-        # We assume that the inputs have the same distribution as the _X_fit
-        q_X_fit = QuantizedArray(
+        _fit_X = self.sklearn_model._fit_X
+        # We assume that the inputs have the same distribution as the _fit_X
+        q_fit_X = QuantizedArray(
             n_bits=self.n_bits,
-            values=numpy.expand_dims(_X_fit, axis=1) if len(_X_fit.shape) == 1 else _X_fit,
+            values=numpy.expand_dims(_fit_X, axis=1) if len(_fit_X.shape) == 1 else _fit_X,
             options=input_options,
         )
-        self._q_X_fit = q_X_fit.qvalues
-        self._q_X_fit_quantizer = q_X_fit.quantizer
+        self._q_fit_X = q_fit_X.qvalues
+        self._q_fit_X_quantizer = q_fit_X.quantizer
 
         # mypy
-        assert self._q_X_fit_quantizer.scale is not None
+        assert self._q_fit_X_quantizer.scale is not None
+
+        self._y = numpy.array(y)
 
         # We assume that the query has the same distribution as the data in _X_fit.
         # therefore, they use the same scaling and zero point.
         # https://arxiv.org/abs/1712.05877
 
         self.output_quant_params = UniformQuantizationParameters(
-            scale=self._q_X_fit_quantizer.scale,
-            zero_point=self._q_X_fit_quantizer.zero_point,
+            scale=self._q_fit_X_quantizer.scale,
+            zero_point=self._q_fit_X_quantizer.zero_point,
             offset=0,
         )
 
@@ -1879,15 +1879,15 @@ def _inference(self, q_X: numpy.ndarray) -> numpy.ndarray:
         Returns:
             numpy.ndarray: The quantized predicted values.
         """
-        assert self._q_X_fit_quantizer is not None, self._is_not_fitted_error_message()
+        assert self._q_fit_X_quantizer is not None, self._is_not_fitted_error_message()
 
         def pairwise_euclidean_distance(q_X):
             # 1. Pairwise euclidean distance
             # dist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))
             return (
                 numpy.sum(q_X**2, axis=1, keepdims=True)
-                - 2 * q_X @ self._q_X_fit.T
-                + numpy.expand_dims(numpy.sum(self._q_X_fit**2, axis=1), 0)
+                - 2 * q_X @ self._q_fit_X.T
+                + numpy.expand_dims(numpy.sum(self._q_fit_X**2, axis=1), 0)
             )
 
         def topk_sorting(x, labels):
@@ -1896,7 +1896,8 @@ def topk_sorting(x, labels):
             Time complexity: O(nlog²(k))
 
             Args:
-                x (numpy.ndarray): The quantized input values.
+                x (numpy.ndarray): The quantized input values
+                labels (numpy.ndarray): The labels of the training data-set
 
             Returns:
                 numpy.ndarray: The argsort.
@@ -1982,10 +1983,10 @@ def scatter1d(x, v, indices):
                     x = scatter1d(x, max_x, range_i + d)
 
                     # Max index selection
-                    sign = diff <= 0
+                    is_a_greater_than_b = diff <= 0
 
                     # Update labels array according to the max items
-                    max_labels = labels_a + (labels_b - labels_a) * sign
+                    max_labels = labels_a + (labels_b - labels_a) * is_a_greater_than_b
                     labels = scatter1d(labels, labels_a + labels_b - max_labels, range_i)
                     labels = scatter1d(labels, max_labels, range_i + d)
 
@@ -2002,8 +2003,6 @@ def scatter1d(x, v, indices):
             return fhe_array(topk_labels)
 
         # 1. Pairwise_euclidiean distance
-        # from concrete import fhe
-        # with fhe.tag(f"distance_matrix"):
         distance_matrix = pairwise_euclidean_distance(q_X)
 
         # The square root in the Euclidean distance calculation is not applied to speed up FHE
@@ -2011,10 +2010,6 @@ def scatter1d(x, v, indices):
         # Being a monotonic function, it does not affect the logic of the calculation, notably for
         # the argsort.
 
-        # 2. Sorting args
-        # with fhe.tag(f"sorted_args"):
-
-        # pylint: disable-next=protected-access
         topk_labels = topk_sorting(distance_matrix.flatten(), self._y)
 
         return numpy.expand_dims(topk_labels, axis=0)
@@ -2038,17 +2033,34 @@ def compile(self, *args, **kwargs) -> Circuit:
 
         return BaseEstimator.compile(self, *args, **kwargs)
 
+    def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray:
+        """Perform the majority.
+
+        For KNN, the de-quantization step is not required. Because _inference returns the label of
+        the k-nearest neighbors.
+
+        Args:
+            y_preds (numpy.ndarray): The topk nearest labels
+
+        Returns:
+            numpy.ndarray: The majority vote.
+        """
+        y_preds_processed = []
+        for y in y_preds:
+            vote = self.majority_vote(y.flatten())
+            y_preds_processed.append(vote)
+
+        return numpy.array(y_preds_processed)
+
     def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
 
         X = check_array_and_assert(X)
 
-        y_preds = []
+        topk_labels = []
         for query in X:
-            # Argsort
-            topk_labels = super().predict(query[None], fhe)
-            # Majority vote
-            y_pred = self.majority_vote(topk_labels.flatten())
-            y_preds.append(y_pred)
+            topk_labels.append(super().predict(query[None], fhe))
+
+        y_preds = self.post_processing(numpy.array(topk_labels))
 
         return numpy.array(y_preds)
 
diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py
index 3fed38276..368c9690b 100644
--- a/src/concrete/ml/sklearn/neighbors.py
+++ b/src/concrete/ml/sklearn/neighbors.py
@@ -61,7 +61,7 @@ def __init__(
         self.weights = weights
 
     def dump_dict(self) -> Dict[str, Any]:
-        assert self._q_X_fit_quantizer is not None, self._is_not_fitted_error_message()
+        assert self._q_fit_X_quantizer is not None, self._is_not_fitted_error_message()
 
         metadata: Dict[str, Any] = {}
 
@@ -71,8 +71,8 @@ def dump_dict(self) -> Dict[str, Any]:
         metadata["_is_fitted"] = self._is_fitted
         metadata["_is_compiled"] = self._is_compiled
         metadata["input_quantizers"] = self.input_quantizers
-        metadata["_q_X_fit_quantizer"] = self._q_X_fit_quantizer
-        metadata["_q_X_fit"] = self._q_X_fit
+        metadata["_q_fit_X_quantizer"] = self._q_fit_X_quantizer
+        metadata["_q_fit_X"] = self._q_fit_X
         metadata["_y"] = self._y
 
         metadata["output_quantizers"] = self.output_quantizers
@@ -106,8 +106,8 @@ def load_dict(cls, metadata: Dict):
         obj._is_compiled = metadata["_is_compiled"]
         obj.input_quantizers = metadata["input_quantizers"]
         obj.output_quantizers = metadata["output_quantizers"]
-        obj._q_X_fit_quantizer = metadata["_q_X_fit_quantizer"]
-        obj._q_X_fit = metadata["_q_X_fit"]
+        obj._q_fit_X_quantizer = metadata["_q_fit_X_quantizer"]
+        obj._q_fit_X = metadata["_q_fit_X"]
         obj._y = metadata["_y"]
 
         obj.onnx_model_ = metadata["onnx_model_"]
diff --git a/tests/deployment/test_client_server.py b/tests/deployment/test_client_server.py
index 7df681a1a..05c7fd53a 100644
--- a/tests/deployment/test_client_server.py
+++ b/tests/deployment/test_client_server.py
@@ -14,11 +14,15 @@
 
 from concrete.ml.deployment.fhe_client_server import FHEModelClient, FHEModelDev, FHEModelServer
 from concrete.ml.pytest.torch_models import FCSmall
-from concrete.ml.pytest.utils import instantiate_model_generic, sklearn_models_and_datasets
+from concrete.ml.pytest.utils import (
+    get_model_name,
+    instantiate_model_generic,
+    sklearn_models_and_datasets,
+)
 from concrete.ml.quantization.quantized_module import QuantizedModule
 from concrete.ml.torch.compile import compile_torch_model
 
-# pylint: disable=too-many-statements
+# pylint: disable=too-many-statements,too-many-locals
 
 
 class OnDiskNetwork:
@@ -67,7 +71,7 @@ def cleanup(self):
 
 
 @pytest.mark.parametrize("model_class, parameters", sklearn_models_and_datasets)
-@pytest.mark.parametrize("n_bits", [2])
+@pytest.mark.parametrize("n_bits", [3])
 def test_client_server_sklearn(
     default_configuration,
     model_class,
@@ -99,10 +103,17 @@ def test_client_server_sklearn(
     with pytest.raises(AttributeError, match=".* model is not compiled.*"):
         client_server_simulation(x_train, x_test, model, default_configuration)
 
-    # With n_bits = 3, KNN is not compilable
     fhe_circuit = model.compile(
         x_train, default_configuration, **extra_params, show_mlir=(n_bits <= 8)
     )
+
+    if get_model_name(model) == "KNeighborsClassifier":
+        # Fit the model
+        with warnings.catch_warnings():
+            # Sometimes, we miss convergence, which is not a problem for our test
+            warnings.simplefilter("ignore", category=ConvergenceWarning)
+            model.fit(x, y)
+
     max_bit_width = fhe_circuit.graph.maximum_integer_bit_width()
     print(f"Max width {max_bit_width}")
 
@@ -259,5 +270,10 @@ def client_server_simulation(x_train, x_test, model, default_configuration):
         y_pred_on_client_dequantized, y_pred_model_server_ds_dequantized
     )
 
+    # Make sure the clear predictions are the same for the server
+    if get_model_name(model) == "KNeighborsClassifier":
+        y_pred_model_clear = model.predict(x_test, fhe="disable")
+        numpy.testing.assert_array_equal(y_pred_model_clear, y_pred_model_server_ds_dequantized)
+
     # Clean up
     network.cleanup()
diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 0342198ba..307e412d3 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -1569,7 +1569,8 @@ def check_for_divergent_predictions(x, model, fhe, max_iterations=N_ALLOWED_FHE_
         predict_function = (
             model.predict_proba
             if is_classifier_or_partial_classifier(model)
-            # predict_prob not implemented yet for KNeighborsClassifier
+            # `predict_prob` not implemented yet for KNeighborsClassifier
+            # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962
             and get_model_name(model) != "KNeighborsClassifier"
             else model.predict
         )