From 84dee0c678ec04163ca055c4572d216fd5ecdadc Mon Sep 17 00:00:00 2001 From: kcelia Date: Mon, 18 Sep 2023 10:56:08 +0200 Subject: [PATCH] chore: force the configuration of KNN to run under MONO settings --- src/concrete/ml/sklearn/base.py | 32 +++++++++++++---- .../test_pbs_error_probability_settings.py | 14 -------- tests/deployment/test_client_server.py | 15 +------- tests/sklearn/test_dump_onnx.py | 9 +++-- tests/sklearn/test_sklearn_models.py | 36 +++++++++---------- .../credit_scoring/CreditScoring.ipynb | 10 +++--- 6 files changed, 53 insertions(+), 63 deletions(-) diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 83f40bf1f1..fc001f929e 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -1426,7 +1426,7 @@ class BaseTreeClassifierMixin( """ -# pylint: disable=invalid-name,too-many-instance-attributes +# pylint: disable-next=invalid-name,too-many-instance-attributes class SklearnLinearModelMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC): """A Mixin class for sklearn linear models with FHE. @@ -1697,7 +1697,7 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> return y_proba -# pylint: disable=invalid-name,too-many-instance-attributes +# pylint: disable-next=invalid-name,too-many-instance-attributes class SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC): """A Mixin class for sklearn KNeighbors models with FHE. @@ -1748,7 +1748,7 @@ def _set_onnx_model(self, test_input: numpy.ndarray) -> None: test_input=test_input, extra_config={ "onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT, - # pylint: disable=protected-access, no-member + # pylint: disable-next=protected-access, no-member constants.BATCH_SIZE: self.sklearn_model._fit_X.shape[0], }, ).model @@ -1796,7 +1796,7 @@ def fit(self, X: Data, y: Target, **fit_parameters): # Quantize the _X_fit and store the associated quantizer # Weights in KNN algorithms are the train data points - # pylint: disable=protected-access + # pylint: disable-next=protected-access _X_fit = self.sklearn_model._fit_X q_X_fit = QuantizedArray( n_bits=n_bits["op_weights"], @@ -1969,6 +1969,7 @@ def mul_tlu(a, b): n, k = x.size, self.n_neighbors ln2n = int(numpy.ceil(numpy.log2(n))) + # Number of stages for t in range(ln2n - 1, -1, -1): p = 2**t r = 0 @@ -1979,8 +1980,6 @@ def mul_tlu(a, b): range_i = numpy.array( [i for i in range(0, n - d) if i & p == r and comparisons[i] < k] ) - if len(range_i) == 0: - continue a = gather1d(x, range_i) # x[range_i] a_i = gather1d(idx, range_i) # idx[range_i] @@ -2031,6 +2030,25 @@ def mul_tlu(a, b): return numpy.expand_dims(sorted_args, axis=0) + # KNN works only for MONO in the latest concrete Python version + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3978 + def compile(self, *args, **kwargs) -> Circuit: + # If a configuration instance is given as a positional parameter, set the strategy to + # multi-parameter + if len(args) >= 2: + configuration = force_mono_parameter_in_configuration(args[1]) + args_list = list(args) + args_list[1] = configuration + args = tuple(args_list) + + # Else, retrieve the configuration in kwargs if it exists, or create a new one, and set the + # strategy to multi-parameter + else: + configuration = kwargs.get("configuration", None) + kwargs["configuration"] = force_mono_parameter_in_configuration(configuration) + + return BaseEstimator.compile(self, *args, **kwargs) + def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray: X = check_array_and_assert(X) @@ -2040,7 +2058,7 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy. # Argsort arg_sort = super().predict(query[None], fhe) # Majority vote - # pylint: disable=protected-access + # pylint: disable-next=protected-access label_indices = self._y[arg_sort.flatten()] y_pred = self.majority_vote(label_indices) y_preds.append(y_pred) diff --git a/tests/common/test_pbs_error_probability_settings.py b/tests/common/test_pbs_error_probability_settings.py index 4066119eb9..55062dd52a 100644 --- a/tests/common/test_pbs_error_probability_settings.py +++ b/tests/common/test_pbs_error_probability_settings.py @@ -4,12 +4,9 @@ import numpy import pytest -from concrete.fhe.compilation import Configuration from sklearn.exceptions import ConvergenceWarning from torch import nn -from concrete import fhe -from concrete.ml.common.utils import get_model_name from concrete.ml.pytest.torch_models import FCSmall from concrete.ml.pytest.utils import sklearn_models_and_datasets from concrete.ml.torch.compile import compile_torch_model @@ -41,17 +38,6 @@ def test_config_sklearn(model_class, parameters, kwargs, load_data, default_conf # Fit the model model.fit(x, y) - if get_model_name(model_class) == "KNeighborsClassifier": - - default_configuration = Configuration( - dump_artifacts_on_unexpected_failures=False, - enable_unsafe_features=True, - use_insecure_key_cache=True, - insecure_key_cache_location="ConcreteNumpyKeyCache", - parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO, - single_precision=True, - ) - if kwargs.get("p_error", None) is not None and kwargs.get("global_p_error", None) is not None: with pytest.raises(ValueError) as excinfo: model.compile(x, default_configuration, verbose=True, **kwargs) diff --git a/tests/deployment/test_client_server.py b/tests/deployment/test_client_server.py index f5e4a8e438..7df681a1a5 100644 --- a/tests/deployment/test_client_server.py +++ b/tests/deployment/test_client_server.py @@ -9,12 +9,9 @@ import numpy import pytest -from concrete.fhe.compilation import Configuration from sklearn.exceptions import ConvergenceWarning from torch import nn -from concrete import fhe -from concrete.ml.common.utils import get_model_name from concrete.ml.deployment.fhe_client_server import FHEModelClient, FHEModelDev, FHEModelServer from concrete.ml.pytest.torch_models import FCSmall from concrete.ml.pytest.utils import instantiate_model_generic, sklearn_models_and_datasets @@ -98,20 +95,10 @@ def test_client_server_sklearn( # Compile extra_params = {"global_p_error": 1 / 100_000} - if get_model_name(model_class) == "KNeighborsClassifier": - - default_configuration = Configuration( - dump_artifacts_on_unexpected_failures=False, - enable_unsafe_features=True, - use_insecure_key_cache=True, - insecure_key_cache_location="ConcreteNumpyKeyCache", - parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO, - single_precision=True, - ) - # Running the simulation using a model that is not compiled should not be possible with pytest.raises(AttributeError, match=".* model is not compiled.*"): client_server_simulation(x_train, x_test, model, default_configuration) + # With n_bits = 3, KNN is not compilable fhe_circuit = model.compile( x_train, default_configuration, **extra_params, show_mlir=(n_bits <= 8) diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py index f1949a6ca3..16646bc47f 100644 --- a/tests/sklearn/test_dump_onnx.py +++ b/tests/sklearn/test_dump_onnx.py @@ -9,7 +9,6 @@ import pytest from sklearn.exceptions import ConvergenceWarning -from concrete import fhe from concrete.ml.common.utils import is_model_class_in_a_list from concrete.ml.pytest.utils import get_model_name, sklearn_models_and_datasets from concrete.ml.sklearn import get_sklearn_tree_models @@ -37,9 +36,9 @@ def check_onnx_file_dump(model_class, parameters, load_data, str_expected, defau model.set_params(**model_params) if get_model_name(model) == "KNeighborsClassifier": - model.n_bits = 4 - default_configuration.parameter_selection_strategy = fhe.ParameterSelectionStrategy.MONO - default_configuration.single_precision = True + # KNN works only for small quantization bits + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979 + model.n_bits = 2 with warnings.catch_warnings(): # Sometimes, we miss convergence, which is not a problem for our test @@ -423,7 +422,7 @@ def test_dump( return %variable }""", "KNeighborsClassifier": """graph torch_jit ( - %input_0[DOUBLE, symx3] + %input_0[DOUBLE, symx2] ) { %/_operators.0/Constant_output_0 = Constant[value = ]() %/_operators.0/Unsqueeze_output_0 = Unsqueeze(%input_0, %/_operators.0/Constant_output_0) diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 931d0c3222..15e858269a 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -656,8 +656,8 @@ def check_pipeline(model_class, x, y): param_grid = { "model__n_bits": [2, 3], } - - grid_search = GridSearchCV(pipe_cv, param_grid, error_score="raise", cv=3) + # Since the data-set is really small for KNN, we have to decrease the number of splits + grid_search = GridSearchCV(pipe_cv, param_grid, error_score="raise", cv=2) # Sometimes, we miss convergence, which is not a problem for our test with warnings.catch_warnings(): @@ -686,9 +686,7 @@ def check_grid_search(model_class, x, y, scoring): "n_jobs": [1], } elif model_class in get_sklearn_neighbors_models(): - param_grid = { - "n_bits": [3], - } + param_grid = {"n_bits": [2], "n_neighbors": [2]} else: param_grid = { "n_bits": [20], @@ -706,8 +704,11 @@ def check_grid_search(model_class, x, y, scoring): # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 pytest.skip("Skipping predict_proba for KNN, doesn't work for now") + # pylint: disable=invalid-name + cv = 2 if get_model_name(model_class) == "KNeighborsClassifier" else 5 + _ = GridSearchCV( - model_class(), param_grid, cv=5, scoring=scoring, error_score="raise", n_jobs=1 + model_class(), param_grid, cv=cv, scoring=scoring, error_score="raise", n_jobs=1 ).fit(x, y) @@ -807,7 +808,8 @@ def get_hyper_param_combinations(model_class): "base_score": [0.5, None], } elif model_class in get_sklearn_neighbors_models(): - hyper_param_combinations = {"n_neighbors": [2, 4]} + # Use small `n_neighbors` values for KNN, because the data-set is too small for now + hyper_param_combinations = {"n_neighbors": [2]} else: assert is_model_class_in_a_list( @@ -1476,10 +1478,6 @@ def test_predict_correctness( with warnings.catch_warnings(): - if get_model_name(model) == "KNeighborsClassifier": - default_configuration.parameter_selection_strategy = ( - ParameterSelectionStrategy.MONO - ) fhe_circuit = model.compile( x, default_configuration, @@ -1567,13 +1565,14 @@ def test_p_error_global_p_error_simulation( if "global_p_error" in error_param: pytest.skip("global_p_error behave very differently depending on the type of model.") - # Get data-set - n_bits = min(N_BITS_REGULAR_BUILDS) if get_model_name(model_class) == "KNeighborsClassifier": - n_bits = min(n_bits, 2) - default_configuration.parameter_selection_strategy = ParameterSelectionStrategy.MONO + # KNN works only for smaller quantization bits + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979 + n_bits = min([2] + N_BITS_REGULAR_BUILDS) + else: + n_bits = min(N_BITS_REGULAR_BUILDS) - # Initialize and fit the model + # Get data-set, initialize and fit the model model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option) # Check if model is linear @@ -1720,9 +1719,10 @@ def test_mono_parameter_warnings( if is_model_class_in_a_list(model_class, get_sklearn_linear_models()): return - # KNN works only for ParameterSelectionStrategy.MULTI + # KNN is manually forced to use mono-parameter + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3978 if is_model_class_in_a_list(model_class, get_sklearn_neighbors_models()): - pytest.skip("Skipping predict_proba for KNN, doesn't work for now") + return n_bits = min(N_BITS_REGULAR_BUILDS) diff --git a/use_case_examples/credit_scoring/CreditScoring.ipynb b/use_case_examples/credit_scoring/CreditScoring.ipynb index b5af7d35c1..c4ce77f6cc 100644 --- a/use_case_examples/credit_scoring/CreditScoring.ipynb +++ b/use_case_examples/credit_scoring/CreditScoring.ipynb @@ -20,11 +20,7 @@ "from functools import partial\n", "\n", "import numpy as np\n", - "import pandas as pd\n", - "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.preprocessing import StandardScaler" + "import pandas as pd" ] }, { @@ -36,6 +32,10 @@ "# Importing the models, from both scikit-learn and Concrete ML\n", "from sklearn.ensemble import RandomForestClassifier as SklearnRandomForestClassifier\n", "from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression\n", + "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import StandardScaler\n", "from sklearn.tree import DecisionTreeClassifier as SklearnDecisionTreeClassifier\n", "from xgboost import XGBClassifier as SklearnXGBoostClassifier\n", "\n",