Skip to content

Commit

Permalink
chore: force the configuration of KNN to run under MONO settings
Browse files Browse the repository at this point in the history
  • Loading branch information
kcelia committed Sep 18, 2023
1 parent ca03c3c commit 60e9688
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 61 deletions.
29 changes: 24 additions & 5 deletions src/concrete/ml/sklearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1426,7 +1426,7 @@ class BaseTreeClassifierMixin(
"""


# pylint: disable=invalid-name,too-many-instance-attributes
# pylint: disable-next=invalid-name,too-many-instance-attributes
class SklearnLinearModelMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
"""A Mixin class for sklearn linear models with FHE.
Expand Down Expand Up @@ -1697,7 +1697,7 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) ->
return y_proba


# pylint: disable=invalid-name,too-many-instance-attributes
# pylint: disable-next=invalid-name,too-many-instance-attributes
class SklearnKNeighborsMixin(BaseEstimator, sklearn.base.BaseEstimator, ABC):
"""A Mixin class for sklearn KNeighbors models with FHE.
Expand Down Expand Up @@ -1748,7 +1748,7 @@ def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
test_input=test_input,
extra_config={
"onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT,
# pylint: disable=protected-access, no-member
# pylint: disable-next=protected-access, no-member
constants.BATCH_SIZE: self.sklearn_model._fit_X.shape[0],
},
).model
Expand Down Expand Up @@ -1796,7 +1796,7 @@ def fit(self, X: Data, y: Target, **fit_parameters):

# Quantize the _X_fit and store the associated quantizer
# Weights in KNN algorithms are the train data points
# pylint: disable=protected-access
# pylint: disable-next=protected-access
_X_fit = self.sklearn_model._fit_X
q_X_fit = QuantizedArray(
n_bits=n_bits["op_weights"],
Expand Down Expand Up @@ -2031,6 +2031,25 @@ def mul_tlu(a, b):

return numpy.expand_dims(sorted_args, axis=0)

# KNN works only for MONO in the latest concrete Python version
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3978
def compile(self, *args, **kwargs) -> Circuit:
# If a configuration instance is given as a positional parameter, set the strategy to
# multi-parameter
if len(args) >= 2:
configuration = force_mono_parameter_in_configuration(args[1])
args_list = list(args)
args_list[1] = configuration
args = tuple(args_list)

# Else, retrieve the configuration in kwargs if it exists, or create a new one, and set the
# strategy to multi-parameter
else:
configuration = kwargs.get("configuration", None)
kwargs["configuration"] = force_mono_parameter_in_configuration(configuration)

return BaseEstimator.compile(self, *args, **kwargs)

def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:

X = check_array_and_assert(X)
Expand All @@ -2040,7 +2059,7 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.
# Argsort
arg_sort = super().predict(query[None], fhe)
# Majority vote
# pylint: disable=protected-access
# pylint: disable-next=protected-access
label_indices = self._y[arg_sort.flatten()]
y_pred = self.majority_vote(label_indices)
y_preds.append(y_pred)
Expand Down
14 changes: 0 additions & 14 deletions tests/common/test_pbs_error_probability_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,9 @@

import numpy
import pytest
from concrete.fhe.compilation import Configuration
from sklearn.exceptions import ConvergenceWarning
from torch import nn

from concrete import fhe
from concrete.ml.common.utils import get_model_name
from concrete.ml.pytest.torch_models import FCSmall
from concrete.ml.pytest.utils import sklearn_models_and_datasets
from concrete.ml.torch.compile import compile_torch_model
Expand Down Expand Up @@ -41,17 +38,6 @@ def test_config_sklearn(model_class, parameters, kwargs, load_data, default_conf
# Fit the model
model.fit(x, y)

if get_model_name(model_class) == "KNeighborsClassifier":

default_configuration = Configuration(
dump_artifacts_on_unexpected_failures=False,
enable_unsafe_features=True,
use_insecure_key_cache=True,
insecure_key_cache_location="ConcreteNumpyKeyCache",
parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO,
single_precision=True,
)

if kwargs.get("p_error", None) is not None and kwargs.get("global_p_error", None) is not None:
with pytest.raises(ValueError) as excinfo:
model.compile(x, default_configuration, verbose=True, **kwargs)
Expand Down
15 changes: 1 addition & 14 deletions tests/deployment/test_client_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@

import numpy
import pytest
from concrete.fhe.compilation import Configuration
from sklearn.exceptions import ConvergenceWarning
from torch import nn

from concrete import fhe
from concrete.ml.common.utils import get_model_name
from concrete.ml.deployment.fhe_client_server import FHEModelClient, FHEModelDev, FHEModelServer
from concrete.ml.pytest.torch_models import FCSmall
from concrete.ml.pytest.utils import instantiate_model_generic, sklearn_models_and_datasets
Expand Down Expand Up @@ -98,20 +95,10 @@ def test_client_server_sklearn(
# Compile
extra_params = {"global_p_error": 1 / 100_000}

if get_model_name(model_class) == "KNeighborsClassifier":

default_configuration = Configuration(
dump_artifacts_on_unexpected_failures=False,
enable_unsafe_features=True,
use_insecure_key_cache=True,
insecure_key_cache_location="ConcreteNumpyKeyCache",
parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO,
single_precision=True,
)

# Running the simulation using a model that is not compiled should not be possible
with pytest.raises(AttributeError, match=".* model is not compiled.*"):
client_server_simulation(x_train, x_test, model, default_configuration)

# With n_bits = 3, KNN is not compilable
fhe_circuit = model.compile(
x_train, default_configuration, **extra_params, show_mlir=(n_bits <= 8)
Expand Down
9 changes: 4 additions & 5 deletions tests/sklearn/test_dump_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import pytest
from sklearn.exceptions import ConvergenceWarning

from concrete import fhe
from concrete.ml.common.utils import is_model_class_in_a_list
from concrete.ml.pytest.utils import get_model_name, sklearn_models_and_datasets
from concrete.ml.sklearn import get_sklearn_tree_models
Expand Down Expand Up @@ -37,9 +36,9 @@ def check_onnx_file_dump(model_class, parameters, load_data, str_expected, defau
model.set_params(**model_params)

if get_model_name(model) == "KNeighborsClassifier":
model.n_bits = 4
default_configuration.parameter_selection_strategy = fhe.ParameterSelectionStrategy.MONO
default_configuration.single_precision = True
# KNN works only for small quantization bits
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979
model.n_bits = 2

with warnings.catch_warnings():
# Sometimes, we miss convergence, which is not a problem for our test
Expand Down Expand Up @@ -423,7 +422,7 @@ def test_dump(
return %variable
}""",
"KNeighborsClassifier": """graph torch_jit (
%input_0[DOUBLE, symx3]
%input_0[DOUBLE, symx2]
) {
%/_operators.0/Constant_output_0 = Constant[value = <Tensor>]()
%/_operators.0/Unsqueeze_output_0 = Unsqueeze(%input_0, %/_operators.0/Constant_output_0)
Expand Down
36 changes: 18 additions & 18 deletions tests/sklearn/test_sklearn_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,8 +656,8 @@ def check_pipeline(model_class, x, y):
param_grid = {
"model__n_bits": [2, 3],
}

grid_search = GridSearchCV(pipe_cv, param_grid, error_score="raise", cv=3)
# Since the data-set is really small for KNN, we have to decrease the number of splits
grid_search = GridSearchCV(pipe_cv, param_grid, error_score="raise", cv=2)

# Sometimes, we miss convergence, which is not a problem for our test
with warnings.catch_warnings():
Expand Down Expand Up @@ -686,9 +686,7 @@ def check_grid_search(model_class, x, y, scoring):
"n_jobs": [1],
}
elif model_class in get_sklearn_neighbors_models():
param_grid = {
"n_bits": [3],
}
param_grid = {"n_bits": [2], "n_neighbors": [2]}
else:
param_grid = {
"n_bits": [20],
Expand All @@ -706,8 +704,11 @@ def check_grid_search(model_class, x, y, scoring):
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962
pytest.skip("Skipping predict_proba for KNN, doesn't work for now")

# pylint: disable=invalid-name
cv = 2 if get_model_name(model_class) == "KNeighborsClassifier" else 5

_ = GridSearchCV(
model_class(), param_grid, cv=5, scoring=scoring, error_score="raise", n_jobs=1
model_class(), param_grid, cv=cv, scoring=scoring, error_score="raise", n_jobs=1
).fit(x, y)


Expand Down Expand Up @@ -807,7 +808,8 @@ def get_hyper_param_combinations(model_class):
"base_score": [0.5, None],
}
elif model_class in get_sklearn_neighbors_models():
hyper_param_combinations = {"n_neighbors": [2, 4]}
# Use small `n_neighbors` values for KNN, because the data-set is too small for now
hyper_param_combinations = {"n_neighbors": [2]}
else:

assert is_model_class_in_a_list(
Expand Down Expand Up @@ -1476,10 +1478,6 @@ def test_predict_correctness(

with warnings.catch_warnings():

if get_model_name(model) == "KNeighborsClassifier":
default_configuration.parameter_selection_strategy = (
ParameterSelectionStrategy.MONO
)
fhe_circuit = model.compile(
x,
default_configuration,
Expand Down Expand Up @@ -1567,13 +1565,14 @@ def test_p_error_global_p_error_simulation(
if "global_p_error" in error_param:
pytest.skip("global_p_error behave very differently depending on the type of model.")

# Get data-set
n_bits = min(N_BITS_REGULAR_BUILDS)
if get_model_name(model_class) == "KNeighborsClassifier":
n_bits = min(n_bits, 2)
default_configuration.parameter_selection_strategy = ParameterSelectionStrategy.MONO
# KNN works only for smaller quantization bits
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979
n_bits = min([2] + N_BITS_REGULAR_BUILDS)
else:
n_bits = min(N_BITS_REGULAR_BUILDS)

# Initialize and fit the model
# Get data-set, initialize and fit the model
model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option)

# Check if model is linear
Expand Down Expand Up @@ -1720,9 +1719,10 @@ def test_mono_parameter_warnings(
if is_model_class_in_a_list(model_class, get_sklearn_linear_models()):
return

# KNN works only for ParameterSelectionStrategy.MULTI
# KNN is manually forced to use mono-parameter
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3978
if is_model_class_in_a_list(model_class, get_sklearn_neighbors_models()):
pytest.skip("Skipping predict_proba for KNN, doesn't work for now")
return

n_bits = min(N_BITS_REGULAR_BUILDS)

Expand Down
10 changes: 5 additions & 5 deletions use_case_examples/credit_scoring/CreditScoring.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,7 @@
"from functools import partial\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import StandardScaler"
"import pandas as pd"
]
},
{
Expand All @@ -36,6 +32,10 @@
"# Importing the models, from both scikit-learn and Concrete ML\n",
"from sklearn.ensemble import RandomForestClassifier as SklearnRandomForestClassifier\n",
"from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression\n",
"from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.tree import DecisionTreeClassifier as SklearnDecisionTreeClassifier\n",
"from xgboost import XGBClassifier as SklearnXGBoostClassifier\n",
"\n",
Expand Down

0 comments on commit 60e9688

Please sign in to comment.