From 60e9688fef2e543fa246d5313c6cf3b7f307c564 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 18 Sep 2023 10:56:08 +0200
Subject: [PATCH] chore: force the configuration of KNN to run under MONO
 settings

---
 src/concrete/ml/sklearn/base.py               | 29 ++++++++++++---
 .../test_pbs_error_probability_settings.py    | 14 --------
 tests/deployment/test_client_server.py        | 15 +-------
 tests/sklearn/test_dump_onnx.py               |  9 +++--
 tests/sklearn/test_sklearn_models.py          | 36 +++++++++----------
 .../credit_scoring/CreditScoring.ipynb        | 10 +++---
 6 files changed, 52 insertions(+), 61 deletions(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 83f40bf1f1..a070510adf 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -1426,7 +1426,7 @@ class BaseTreeClassifierMixin(
     """
 
 
-# pylint: disable=invalid-name,too-many-instance-attributes
+# pylint: disable-next=invalid-name,too-many-instance-attributes
 class SklearnLinearModelMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
     """A Mixin class for sklearn linear models with FHE.
 
@@ -1697,7 +1697,7 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) ->
         return y_proba
 
 
-# pylint: disable=invalid-name,too-many-instance-attributes
+# pylint: disable-next=invalid-name,too-many-instance-attributes
 class SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
     """A Mixin class for sklearn KNeighbors models with FHE.
 
@@ -1748,7 +1748,7 @@ def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
             test_input=test_input,
             extra_config={
                 "onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT,
-                # pylint: disable=protected-access, no-member
+                # pylint: disable-next=protected-access, no-member
                 constants.BATCH_SIZE: self.sklearn_model._fit_X.shape[0],
             },
         ).model
@@ -1796,7 +1796,7 @@ def fit(self, X: Data, y: Target, **fit_parameters):
 
         # Quantize the _X_fit and store the associated quantizer
         # Weights in KNN algorithms are the train data points
-        # pylint: disable=protected-access
+        # pylint: disable-next=protected-access
         _X_fit = self.sklearn_model._fit_X
         q_X_fit = QuantizedArray(
             n_bits=n_bits["op_weights"],
@@ -2031,6 +2031,25 @@ def mul_tlu(a, b):
 
         return numpy.expand_dims(sorted_args, axis=0)
 
+    # KNN works only for MONO in the latest concrete Python version
+    # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3978
+    def compile(self, *args, **kwargs) -> Circuit:
+        # If a configuration instance is given as a positional parameter, set the strategy to
+        # multi-parameter
+        if len(args) >= 2:
+            configuration = force_mono_parameter_in_configuration(args[1])
+            args_list = list(args)
+            args_list[1] = configuration
+            args = tuple(args_list)
+
+        # Else, retrieve the configuration in kwargs if it exists, or create a new one, and set the
+        # strategy to multi-parameter
+        else:
+            configuration = kwargs.get("configuration", None)
+            kwargs["configuration"] = force_mono_parameter_in_configuration(configuration)
+
+        return BaseEstimator.compile(self, *args, **kwargs)
+
     def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
 
         X = check_array_and_assert(X)
@@ -2040,7 +2059,7 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
             # Argsort
             arg_sort = super().predict(query[None], fhe)
             # Majority vote
-            # pylint: disable=protected-access
+            # pylint: disable-next=protected-access
             label_indices = self._y[arg_sort.flatten()]
             y_pred = self.majority_vote(label_indices)
             y_preds.append(y_pred)
diff --git a/tests/common/test_pbs_error_probability_settings.py b/tests/common/test_pbs_error_probability_settings.py
index 4066119eb9..55062dd52a 100644
--- a/tests/common/test_pbs_error_probability_settings.py
+++ b/tests/common/test_pbs_error_probability_settings.py
@@ -4,12 +4,9 @@
 
 import numpy
 import pytest
-from concrete.fhe.compilation import Configuration
 from sklearn.exceptions import ConvergenceWarning
 from torch import nn
 
-from concrete import fhe
-from concrete.ml.common.utils import get_model_name
 from concrete.ml.pytest.torch_models import FCSmall
 from concrete.ml.pytest.utils import sklearn_models_and_datasets
 from concrete.ml.torch.compile import compile_torch_model
@@ -41,17 +38,6 @@ def test_config_sklearn(model_class, parameters, kwargs, load_data, default_conf
         # Fit the model
         model.fit(x, y)
 
-    if get_model_name(model_class) == "KNeighborsClassifier":
-
-        default_configuration = Configuration(
-            dump_artifacts_on_unexpected_failures=False,
-            enable_unsafe_features=True,
-            use_insecure_key_cache=True,
-            insecure_key_cache_location="ConcreteNumpyKeyCache",
-            parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO,
-            single_precision=True,
-        )
-
     if kwargs.get("p_error", None) is not None and kwargs.get("global_p_error", None) is not None:
         with pytest.raises(ValueError) as excinfo:
             model.compile(x, default_configuration, verbose=True, **kwargs)
diff --git a/tests/deployment/test_client_server.py b/tests/deployment/test_client_server.py
index f5e4a8e438..7df681a1a5 100644
--- a/tests/deployment/test_client_server.py
+++ b/tests/deployment/test_client_server.py
@@ -9,12 +9,9 @@
 
 import numpy
 import pytest
-from concrete.fhe.compilation import Configuration
 from sklearn.exceptions import ConvergenceWarning
 from torch import nn
 
-from concrete import fhe
-from concrete.ml.common.utils import get_model_name
 from concrete.ml.deployment.fhe_client_server import FHEModelClient, FHEModelDev, FHEModelServer
 from concrete.ml.pytest.torch_models import FCSmall
 from concrete.ml.pytest.utils import instantiate_model_generic, sklearn_models_and_datasets
@@ -98,20 +95,10 @@ def test_client_server_sklearn(
     # Compile
     extra_params = {"global_p_error": 1 / 100_000}
 
-    if get_model_name(model_class) == "KNeighborsClassifier":
-
-        default_configuration = Configuration(
-            dump_artifacts_on_unexpected_failures=False,
-            enable_unsafe_features=True,
-            use_insecure_key_cache=True,
-            insecure_key_cache_location="ConcreteNumpyKeyCache",
-            parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO,
-            single_precision=True,
-        )
-
     # Running the simulation using a model that is not compiled should not be possible
     with pytest.raises(AttributeError, match=".* model is not compiled.*"):
         client_server_simulation(x_train, x_test, model, default_configuration)
+
     # With n_bits = 3, KNN is not compilable
     fhe_circuit = model.compile(
         x_train, default_configuration, **extra_params, show_mlir=(n_bits <= 8)
diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py
index f1949a6ca3..16646bc47f 100644
--- a/tests/sklearn/test_dump_onnx.py
+++ b/tests/sklearn/test_dump_onnx.py
@@ -9,7 +9,6 @@
 import pytest
 from sklearn.exceptions import ConvergenceWarning
 
-from concrete import fhe
 from concrete.ml.common.utils import is_model_class_in_a_list
 from concrete.ml.pytest.utils import get_model_name, sklearn_models_and_datasets
 from concrete.ml.sklearn import get_sklearn_tree_models
@@ -37,9 +36,9 @@ def check_onnx_file_dump(model_class, parameters, load_data, str_expected, defau
         model.set_params(**model_params)
 
     if get_model_name(model) == "KNeighborsClassifier":
-        model.n_bits = 4
-        default_configuration.parameter_selection_strategy = fhe.ParameterSelectionStrategy.MONO
-        default_configuration.single_precision = True
+        # KNN works only for small quantization bits
+        # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979
+        model.n_bits = 2
 
     with warnings.catch_warnings():
         # Sometimes, we miss convergence, which is not a problem for our test
@@ -423,7 +422,7 @@ def test_dump(
   return %variable
 }""",
         "KNeighborsClassifier": """graph torch_jit (
-  %input_0[DOUBLE, symx3]
+  %input_0[DOUBLE, symx2]
 ) {
   %/_operators.0/Constant_output_0 = Constant[value = <Tensor>]()
   %/_operators.0/Unsqueeze_output_0 = Unsqueeze(%input_0, %/_operators.0/Constant_output_0)
diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py
index 931d0c3222..15e858269a 100644
--- a/tests/sklearn/test_sklearn_models.py
+++ b/tests/sklearn/test_sklearn_models.py
@@ -656,8 +656,8 @@ def check_pipeline(model_class, x, y):
         param_grid = {
             "model__n_bits": [2, 3],
         }
-
-    grid_search = GridSearchCV(pipe_cv, param_grid, error_score="raise", cv=3)
+    # Since the data-set is really small for KNN, we have to decrease the number of splits
+    grid_search = GridSearchCV(pipe_cv, param_grid, error_score="raise", cv=2)
 
     # Sometimes, we miss convergence, which is not a problem for our test
     with warnings.catch_warnings():
@@ -686,9 +686,7 @@ def check_grid_search(model_class, x, y, scoring):
             "n_jobs": [1],
         }
     elif model_class in get_sklearn_neighbors_models():
-        param_grid = {
-            "n_bits": [3],
-        }
+        param_grid = {"n_bits": [2], "n_neighbors": [2]}
     else:
         param_grid = {
             "n_bits": [20],
@@ -706,8 +704,11 @@ def check_grid_search(model_class, x, y, scoring):
             # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962
             pytest.skip("Skipping predict_proba for KNN, doesn't work for now")
 
+        # pylint: disable=invalid-name
+        cv = 2 if get_model_name(model_class) == "KNeighborsClassifier" else 5
+
         _ = GridSearchCV(
-            model_class(), param_grid, cv=5, scoring=scoring, error_score="raise", n_jobs=1
+            model_class(), param_grid, cv=cv, scoring=scoring, error_score="raise", n_jobs=1
         ).fit(x, y)
 
 
@@ -807,7 +808,8 @@ def get_hyper_param_combinations(model_class):
             "base_score": [0.5, None],
         }
     elif model_class in get_sklearn_neighbors_models():
-        hyper_param_combinations = {"n_neighbors": [2, 4]}
+        # Use small `n_neighbors` values for KNN, because the data-set is too small for now
+        hyper_param_combinations = {"n_neighbors": [2]}
     else:
 
         assert is_model_class_in_a_list(
@@ -1476,10 +1478,6 @@ def test_predict_correctness(
 
             with warnings.catch_warnings():
 
-                if get_model_name(model) == "KNeighborsClassifier":
-                    default_configuration.parameter_selection_strategy = (
-                        ParameterSelectionStrategy.MONO
-                    )
                 fhe_circuit = model.compile(
                     x,
                     default_configuration,
@@ -1567,13 +1565,14 @@ def test_p_error_global_p_error_simulation(
     if "global_p_error" in error_param:
         pytest.skip("global_p_error behave very differently depending on the type of model.")
 
-    # Get data-set
-    n_bits = min(N_BITS_REGULAR_BUILDS)
     if get_model_name(model_class) == "KNeighborsClassifier":
-        n_bits = min(n_bits, 2)
-        default_configuration.parameter_selection_strategy = ParameterSelectionStrategy.MONO
+        # KNN works only for smaller quantization bits
+        # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979
+        n_bits = min([2] + N_BITS_REGULAR_BUILDS)
+    else:
+        n_bits = min(N_BITS_REGULAR_BUILDS)
 
-    # Initialize and fit the model
+    # Get data-set, initialize and fit the model
     model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option)
 
     # Check if model is linear
@@ -1720,9 +1719,10 @@ def test_mono_parameter_warnings(
     if is_model_class_in_a_list(model_class, get_sklearn_linear_models()):
         return
 
-    # KNN works only for ParameterSelectionStrategy.MULTI
+    # KNN is manually forced to use mono-parameter
+    # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3978
     if is_model_class_in_a_list(model_class, get_sklearn_neighbors_models()):
-        pytest.skip("Skipping predict_proba for KNN, doesn't work for now")
+        return
 
     n_bits = min(N_BITS_REGULAR_BUILDS)
 
diff --git a/use_case_examples/credit_scoring/CreditScoring.ipynb b/use_case_examples/credit_scoring/CreditScoring.ipynb
index b5af7d35c1..c4ce77f6cc 100644
--- a/use_case_examples/credit_scoring/CreditScoring.ipynb
+++ b/use_case_examples/credit_scoring/CreditScoring.ipynb
@@ -20,11 +20,7 @@
     "from functools import partial\n",
     "\n",
     "import numpy as np\n",
-    "import pandas as pd\n",
-    "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.pipeline import Pipeline\n",
-    "from sklearn.preprocessing import StandardScaler"
+    "import pandas as pd"
    ]
   },
   {
@@ -36,6 +32,10 @@
     "# Importing the models, from both scikit-learn and Concrete ML\n",
     "from sklearn.ensemble import RandomForestClassifier as SklearnRandomForestClassifier\n",
     "from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression\n",
+    "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.preprocessing import StandardScaler\n",
     "from sklearn.tree import DecisionTreeClassifier as SklearnDecisionTreeClassifier\n",
     "from xgboost import XGBClassifier as SklearnXGBoostClassifier\n",
     "\n",