From 152a2e2e5a7b93a779e0f683499d48d1bf4802ec Mon Sep 17 00:00:00 2001 From: Roman <56846628+RomanBredehoft@users.noreply.github.com> Date: Wed, 4 Oct 2023 16:16:09 +0200 Subject: [PATCH] chore: improve inference tests (decision_function, predict, predict_proba) + post_processing --- conftest.py | 59 +- src/concrete/ml/sklearn/base.py | 23 +- src/concrete/ml/sklearn/neighbors.py | 28 +- tests/deployment/test_client_server.py | 242 ++++---- tests/quantization/test_quantized_ops.py | 24 +- tests/quantization/test_quantizers.py | 4 +- tests/sklearn/test_dump_onnx.py | 2 +- tests/sklearn/test_sklearn_models.py | 680 ++++++++++++----------- 8 files changed, 561 insertions(+), 501 deletions(-) diff --git a/conftest.py b/conftest.py index b073cc3bb..fbd86bc27 100644 --- a/conftest.py +++ b/conftest.py @@ -288,41 +288,40 @@ def check_circuit_precision(): return check_circuit_precision_impl -def check_array_equality_impl(actual: Any, expected: Any, verbose: bool = True): - """Assert that `actual` is equal to `expected`.""" - - assert numpy.array_equal(actual, expected), ( - "" - if not verbose - else f""" +@pytest.fixture +def check_array_equal(): + """Fixture to check array equality.""" -Expected Output -=============== -{expected} + def check_array_equal_impl(actual: Any, expected: Any, verbose: bool = True): + """Assert that `actual` is equal to `expected`.""" -Actual Output -============= -{actual} + assert numpy.array_equal(actual, expected), ( + "" + if not verbose + else f""" - """ - ) + Expected Output + =============== + {expected} + Actual Output + ============= + {actual} -@pytest.fixture -def check_array_equality(): - """Fixture to check array equality.""" + """ + ) - return check_array_equality_impl + return check_array_equal_impl @pytest.fixture -def check_float_arrays_equal(): +def check_float_array_equal(): """Fixture to check if two float arrays are equal with epsilon precision tolerance.""" - def check_float_arrays_equal_impl(a, b): + def check_float_array_equal_impl(a, b): assert numpy.all(numpy.isclose(a, b, rtol=0, atol=0.001)) - return check_float_arrays_equal_impl + return check_float_array_equal_impl @pytest.fixture @@ -492,15 +491,13 @@ def check_is_good_execution_for_cml_vs_circuit_impl( # as much post-processing steps in the clear (that could lead to more flaky # tests), especially since these results are tested in other tests such as the # `check_subfunctions_in_fhe` - if is_classifier_or_partial_classifier(model): - if isinstance(model, SklearnKNeighborsMixin): - # For KNN `predict_proba` is not supported for now - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 - results_cnp_circuit = model.predict(*inputs, fhe=fhe_mode) - results_model = model.predict(*inputs, fhe="disable") - else: - results_cnp_circuit = model.predict_proba(*inputs, fhe=fhe_mode) - results_model = model.predict_proba(*inputs, fhe="disable") + # For KNN `predict_proba` is not supported for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 + if is_classifier_or_partial_classifier(model) and not isinstance( + model, SklearnKNeighborsMixin + ): + results_cnp_circuit = model.predict_proba(*inputs, fhe=fhe_mode) + results_model = model.predict_proba(*inputs, fhe="disable") else: results_cnp_circuit = model.predict(*inputs, fhe=fhe_mode) diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 1978f0fdf..4d7ae7251 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -260,15 +260,6 @@ def fhe_circuit(self) -> Optional[Circuit]: assert isinstance(self.fhe_circuit_, Circuit) or self.fhe_circuit_ is None return self.fhe_circuit_ - @fhe_circuit.setter - def fhe_circuit(self, value: Circuit) -> None: - """Set the FHE circuit. - - Args: - value (Circuit): The FHE circuit to set. - """ - self.fhe_circuit_ = value - def _sklearn_model_is_not_fitted_error_message(self) -> str: return ( f"The underlying model (class: {self.sklearn_model_class}) is not fitted and thus " @@ -556,7 +547,7 @@ def compile( # Jit compiler is now deprecated and will soon be removed, it is thus forced to False # by default - self.fhe_circuit = module_to_compile.compile( + self.fhe_circuit_ = module_to_compile.compile( inputset, configuration=configuration, artifacts=artifacts, @@ -570,6 +561,9 @@ def compile( jit=False, ) + # For mypy + assert isinstance(self.fhe_circuit, Circuit) + # CRT simulation is not supported yet # TODO: https://github.com/zama-ai/concrete-ml-internal/issues/3841 if not USE_OLD_VL: @@ -577,7 +571,6 @@ def compile( self._is_compiled = True - assert isinstance(self.fhe_circuit, Circuit) return self.fhe_circuit @abstractmethod @@ -883,10 +876,6 @@ def output_quantizers(self, value: List[UniformQuantizer]) -> None: def fhe_circuit(self) -> Circuit: return self.quantized_module_.fhe_circuit - @fhe_circuit.setter - def fhe_circuit(self, value: Circuit) -> None: - self.quantized_module_.fhe_circuit = value - def get_params(self, deep: bool = True) -> dict: """Get parameters for this estimator. @@ -2093,11 +2082,11 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy. topk_labels = [] for query in X: - topk_labels.append(super().predict(query[None], fhe)) + topk_labels.append(BaseEstimator.predict(self, query[None], fhe=fhe)) y_preds = self.post_processing(numpy.array(topk_labels)) - return numpy.array(y_preds) + return y_preds class SklearnKNeighborsClassifierMixin(SklearnKNeighborsMixin, sklearn.base.ClassifierMixin, ABC): diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py index 368c9690b..c17180964 100644 --- a/src/concrete/ml/sklearn/neighbors.py +++ b/src/concrete/ml/sklearn/neighbors.py @@ -1,11 +1,12 @@ """Implement sklearn linear model.""" -from typing import Any, Dict +from typing import Any, Dict, Union import numpy import sklearn.linear_model from ..common.debugging.custom_assert import assert_true -from .base import SklearnKNeighborsClassifierMixin +from ..common.utils import FheMode +from .base import Data, SklearnKNeighborsClassifierMixin # pylint: disable=invalid-name,too-many-instance-attributes @@ -123,3 +124,26 @@ def load_dict(cls, metadata: Dict): obj.metric_params = metadata["metric_params"] obj.n_jobs = metadata["n_jobs"] return obj + + # KNeighborsClassifier does not provide a predict_proba method for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 + def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray: + """Predict class probabilities. + + Args: + X (Data): The input values to predict, as a Numpy array, Torch tensor, Pandas DataFrame + or List. + fhe (Union[FheMode, str]): The mode to use for prediction. + Can be FheMode.DISABLE for Concrete ML Python inference, + FheMode.SIMULATE for FHE simulation and FheMode.EXECUTE for actual FHE execution. + Can also be the string representation of any of these values. + Default to FheMode.DISABLE. + + Raises: + NotImplementedError: The method is not implemented for now. + """ + + raise NotImplementedError( + "The `predict_proba` method is not implemented for KNeighborsClassifier. Please " + "call `predict` instead." + ) diff --git a/tests/deployment/test_client_server.py b/tests/deployment/test_client_server.py index ab3b2d2f4..b0507bfc5 100644 --- a/tests/deployment/test_client_server.py +++ b/tests/deployment/test_client_server.py @@ -22,7 +22,7 @@ class OnDiskNetwork: - """Simulate a network on disk.""" + """A network interaction on disk.""" def __init__(self): # Create 3 temporary folder for server, client and dev with tempfile @@ -75,8 +75,10 @@ def test_client_server_sklearn( n_bits, load_data, check_is_good_execution_for_cml_vs_circuit, + check_array_equal, + check_float_array_equal, ): - """Tests the encrypt decrypt api.""" + """Test the client-server interface for built-in models.""" if get_model_name(model_class) == "KNeighborsClassifier": # Skipping KNN for this test @@ -97,172 +99,192 @@ def test_client_server_sklearn( warnings.simplefilter("ignore", category=ConvergenceWarning) model.fit(x_train, y_train) - # Compile - extra_params = {"global_p_error": 1 / 100_000} + key_dir = default_configuration.insecure_key_cache_location # Running the simulation using a model that is not compiled should not be possible with pytest.raises(AttributeError, match=".* model is not compiled.*"): - client_server_simulation(x_train, x_test, model, default_configuration) + check_client_server_execution( + x_test, model, key_dir, check_array_equal, check_float_array_equal + ) + + # Compile the model + fhe_circuit = model.compile(x_train, configuration=default_configuration) - fhe_circuit = model.compile(x_train, default_configuration, **extra_params, show_mlir=False) + # Check that client and server files are properly generated + check_client_server_files(model) max_bit_width = fhe_circuit.graph.maximum_integer_bit_width() print(f"Max width {max_bit_width}") - # Compare the FHE predictions with the clear ones. - # Note that: - # - With a global_p_error of 1/100_000 we only allow one run. - # - The simulated predictions are not considered in this test. + # Compare the FHE predictions with the clear ones. Simulated predictions are not considered in + # this test. check_is_good_execution_for_cml_vs_circuit(x_test, model, simulate=False, n_allowed_runs=1) # Check client/server FHE predictions vs the FHE predictions of the dev model - client_server_simulation(x_train, x_test, model, default_configuration) + check_client_server_execution( + x_test, model, key_dir, check_array_equal, check_float_array_equal + ) def test_client_server_custom_model( - default_configuration, check_is_good_execution_for_cml_vs_circuit + default_configuration, + check_is_good_execution_for_cml_vs_circuit, + check_array_equal, + check_float_array_equal, ): - """Tests the client server custom model.""" + """Test the client-server interface for a custom model (through a quantized module).""" # Generate random data x_train, x_test = numpy.random.rand(100, 2), numpy.random.rand(1, 2) + key_dir = default_configuration.insecure_key_cache_location + # Running the simulation using a QuantizedModule that is not compiled should not be possible with pytest.raises(AttributeError, match=".* quantized module is not compiled.*"): # Instantiate an empty QuantizedModule object quantized_module = QuantizedModule() - client_server_simulation(x_train, x_test, quantized_module, default_configuration) + check_client_server_execution( + x_test, quantized_module, key_dir, check_array_equal, check_float_array_equal + ) torch_model = FCSmall(2, nn.ReLU) - n_bits = 2 - # Get the quantized module from the model + # Get the quantized module from the model and compile it quantized_numpy_module = compile_torch_model( torch_model, x_train, configuration=default_configuration, - n_bits=n_bits, - global_p_error=1 / 100_000, + n_bits=2, ) + # Check that client and server files are properly generated + check_client_server_files(quantized_numpy_module) + # Check that the FHE execution is correct. - # With a global_p_error of 1/100_000 we only allow one run. check_is_good_execution_for_cml_vs_circuit( x_test, quantized_numpy_module, simulate=False, n_allowed_runs=1 ) - client_server_simulation(x_train, x_test, quantized_numpy_module, default_configuration) - - -def client_server_simulation(x_train, x_test, model, default_configuration): - """Simulate the client server interaction.""" - # Model has been trained and compiled on the server. - # Now we use the fhe api to go into production. - - # Set up the fake network - network = OnDiskNetwork() - - # Instantiate the dev client and server FHEModel client server API - fhemodel_dev = FHEModelDev(path_dir=network.dev_dir.name, model=model) - fhemodel_dev.save() - - # Check that the processing json file is in the client.zip file - with zipfile.ZipFile(Path(network.dev_dir.name) / "client.zip") as client_zip: - with client_zip.open("serialized_processing.json", "r") as file: - assert isinstance(json.load(file), dict) - - # Send necessary files to server and client - network.dev_send_clientspecs_and_modelspecs_to_client() - network.dev_send_model_to_server() - - # Make sure the save fails now that the folder is populated - err_msg = ( - f"path_dir: {network.dev_dir.name} is not empty." - "Please delete it before saving a new model." - ) - with pytest.raises(Exception, match=err_msg): - fhemodel_dev.save() - - fhemodel_client = FHEModelClient( - path_dir=network.client_dir.name, - key_dir=default_configuration.insecure_key_cache_location, + check_client_server_execution( + x_test, quantized_numpy_module, key_dir, check_array_equal, check_float_array_equal ) - fhemodel_client.load() - # Grab the model and save it again - # No user is expected to load a FHEModelDev instance from a FHEModelClient's model. This is - # only made a testing for making sure the model has the expected attributes - client_model = fhemodel_client.model - client_model.fhe_circuit = model.fhe_circuit - # pylint: disable-next=protected-access - client_model._is_compiled = True - - network.cleanup() +def check_client_server_files(model): + """Test the client server interface API generates the expected file. + This test expects that the given model has been trained and compiled in development. + """ # Create a new network - network = OnDiskNetwork() + disk_network = OnDiskNetwork() # And try to save it again - fhemodel_dev_ = FHEModelDev(path_dir=network.dev_dir.name, model=client_model) - fhemodel_dev_.save() - - # Send necessary files to server and client - network.dev_send_clientspecs_and_modelspecs_to_client() - network.dev_send_model_to_server() + fhe_model_dev = FHEModelDev(path_dir=disk_network.dev_dir.name, model=model) + fhe_model_dev.save() + + # Check that re-saving the dev model fails + with pytest.raises( + Exception, + match=( + f"path_dir: {disk_network.dev_dir.name} is not empty." + "Please delete it before saving a new model." + ), + ): + fhe_model_dev.save() + + client_zip_path = Path(disk_network.dev_dir.name) / "client.zip" + server_zip_path = Path(disk_network.dev_dir.name) / "server.zip" + + # Check that client and server zip files has been generated + assert ( + client_zip_path.is_file() + ), f"Client files were not properly generated. Expected {client_zip_path} to be a file." + assert ( + server_zip_path.is_file() + ), f"Server files were not properly generated. Expected {server_zip_path} to be a file." + + processing_file_name = "serialized_processing.json" + versions_file_name = "versions.json" + + # Check that the client.zip file has the processing and versions json files + with zipfile.ZipFile(client_zip_path) as client_zip: + with client_zip.open(processing_file_name, "r") as file: + assert isinstance( + json.load(file), dict + ), f"{client_zip_path} does not contain a '{processing_file_name}' file." + + with client_zip.open(versions_file_name, "r") as file: + assert isinstance( + json.load(file), dict + ), f"{client_zip_path} does not contain a '{versions_file_name}' file." + + # Check that the server.zip file has the versions json file + with zipfile.ZipFile(server_zip_path) as server_zip: + with server_zip.open("versions.json", "r") as file: + assert isinstance( + json.load(file), dict + ), f"{server_zip_path} does not contain a '{versions_file_name}' file." - # And try to load it again - fhemodel_client_ = FHEModelClient( - path_dir=network.client_dir.name, - key_dir=default_configuration.insecure_key_cache_location, - ) - fhemodel_client_.load() + # Clean up + disk_network.cleanup() - # Now we can also load the server part - fhemodel_server = FHEModelServer(path_dir=network.server_dir.name) - fhemodel_server.load() - # Make sure the client has the exact same quantization as the server - qx_ref_model = model.quantize_input(x_train) - qx_client = fhemodel_client.model.quantize_input(x_train) - qx_dev = fhemodel_dev.model.quantize_input(x_train) - numpy.testing.assert_array_equal(qx_ref_model, qx_client) - numpy.testing.assert_array_equal(qx_ref_model, qx_dev) +def check_client_server_execution( + x_test, model, key_dir, check_array_equal, check_float_array_equal +): + """Test the client server interface API. - # Create evaluation keys for the server - fhemodel_client.generate_private_and_evaluation_keys() + This test expects that the given model has been trained and compiled in development. It + basically replicates a production-like interaction and checks that results are on matching the + development model. + """ + # Create a new network + disk_network = OnDiskNetwork() - # Get the server evaluation key - serialized_evaluation_keys = fhemodel_client.get_serialized_evaluation_keys() + # Save development files + fhe_model_dev = FHEModelDev(path_dir=disk_network.dev_dir.name, model=model) + fhe_model_dev.save() - # Encrypt new data - serialized_qx_new_encrypted = fhemodel_client.quantize_encrypt_serialize(x_test) + # Send necessary files to server and client + disk_network.dev_send_clientspecs_and_modelspecs_to_client() + disk_network.dev_send_model_to_server() - # Here data can be saved, sent over the network, etc. + # Load the client + fhe_model_client = FHEModelClient( + path_dir=disk_network.client_dir.name, + key_dir=key_dir, + ) + fhe_model_client.load() - # Now back to the server + # Load the server + fhe_model_server = FHEModelServer(path_dir=disk_network.server_dir.name) + fhe_model_server.load() - # Run the model over encrypted data - serialized_result = fhemodel_server.run(serialized_qx_new_encrypted, serialized_evaluation_keys) + # Client side : Generate all keys and serialize the evaluation keys for the server + fhe_model_client.generate_private_and_evaluation_keys() + evaluation_keys = fhe_model_client.get_serialized_evaluation_keys() - # Back to the client + # Client side : Encrypt the data + q_x_encrypted_serialized = fhe_model_client.quantize_encrypt_serialize(x_test) - # Decrypt, de-quantize and post-processed the result - y_pred_on_client_quantized = fhemodel_client.deserialize_decrypt(serialized_result) - y_pred_on_client_dequantized = fhemodel_client.deserialize_decrypt_dequantize(serialized_result) + # Server side: Run the model over encrypted data + q_y_pred_encrypted_serialized = fhe_model_server.run(q_x_encrypted_serialized, evaluation_keys) - # Get the y_pred_model_server_clear + # Client side : Decrypt, de-quantize and post-process the result + q_y_pred = fhe_model_client.deserialize_decrypt(q_y_pred_encrypted_serialized) + y_pred = fhe_model_client.deserialize_decrypt_dequantize(q_y_pred_encrypted_serialized) - # Predict based on the model we are testing - qtest = model.quantize_input(x_test) - y_pred_model_dev_quantized = model.fhe_circuit.encrypt_run_decrypt(qtest) - y_pred_model_dev_dequantized = model.dequantize_output(y_pred_model_dev_quantized) - y_pred_model_dev_dequantized = model.post_processing(y_pred_model_dev_dequantized) + # Dev side: Predict using the model and circuit from development + q_x_test = model.quantize_input(x_test) + q_y_pred_dev = model.fhe_circuit.encrypt_run_decrypt(q_x_test) + y_pred_dev = model.dequantize_output(q_y_pred_dev) + y_pred_dev = model.post_processing(y_pred_dev) - # Make sure the quantized predictions are the same for the client model and the dev model - numpy.testing.assert_array_equal(y_pred_on_client_quantized, y_pred_model_dev_quantized) - numpy.testing.assert_array_equal(y_pred_on_client_dequantized, y_pred_model_dev_dequantized) + # Check that both quantized and de-quantized (+ post-processed) results from the server are + # matching the ones from the dec model + check_float_array_equal(y_pred, y_pred_dev) + check_array_equal(q_y_pred, q_y_pred_dev) # Clean up - network.cleanup() + disk_network.cleanup() diff --git a/tests/quantization/test_quantized_ops.py b/tests/quantization/test_quantized_ops.py index d3e7a15ee..fd4023850 100644 --- a/tests/quantization/test_quantized_ops.py +++ b/tests/quantization/test_quantized_ops.py @@ -325,7 +325,7 @@ def test_all_arith_ops( n_dims: int, generator: Callable, check_r2_score: Callable, - check_float_arrays_equal: Callable, + check_float_array_equal: Callable, ): """Test all quantized arithmetic ops""" @@ -407,11 +407,11 @@ def test_all_arith_ops( # Check that we get the same fp32 results in V+V (if supported), V+C and C+V modes if supports_enc_with_enc: - check_float_arrays_equal(raw_output_vv, raw_output_vc) - check_float_arrays_equal(raw_output_cv, raw_output_vc) + check_float_array_equal(raw_output_vv, raw_output_vc) + check_float_array_equal(raw_output_cv, raw_output_vc) # Check that V+C and C+V is symmetric (int+float mode) - check_float_arrays_equal(quantized_output_cv, quantized_output_vc) + check_float_array_equal(quantized_output_cv, quantized_output_vc) # As V+C and C+V work on float values they will not be exactly equal to # the V+V case which works in quantized, we only check R2 for a high bit-width in this case @@ -454,7 +454,7 @@ def test_all_gemm_ops( n_neurons: int, generator: Callable, check_r2_score: Callable, - check_array_equality: Callable, + check_array_equal: Callable, ): """Test for gemm style ops.""" @@ -547,7 +547,7 @@ def test_all_gemm_ops( check_r2_score(expected_gemm_outputs, actual_gemm_output) # Without a bias, MatMul and Gemm should give the same output - check_array_equality(actual_mm_output, actual_gemm_output) + check_array_equal(actual_mm_output, actual_gemm_output) # Test the serialization of QuantizedGemm with (alpha, beta) = (1, 0) check_serialization( @@ -672,7 +672,7 @@ def test_identity_op(x, n_bits): ) @pytest.mark.parametrize("produces_output", [True, False]) # pylint: disable-next=too-many-locals -def test_quantized_conv(params, n_bits, produces_output, check_r2_score, check_float_arrays_equal): +def test_quantized_conv(params, n_bits, produces_output, check_r2_score, check_float_array_equal): """Test the quantized convolution operator.""" # Retrieve arguments @@ -733,7 +733,7 @@ def test_quantized_conv(params, n_bits, produces_output, check_r2_score, check_f strides, groups=group, ).numpy() - check_float_arrays_equal(torch_res, expected_result) + check_float_array_equal(torch_res, expected_result) # Compute the quantized result result = q_op(q_input).dequant() @@ -810,7 +810,7 @@ def test_quantized_conv(params, n_bits, produces_output, check_r2_score, check_f ], ) @pytest.mark.parametrize("is_signed", [True, False]) -def test_quantized_avg_pool(params, n_bits, is_signed, check_r2_score, check_float_arrays_equal): +def test_quantized_avg_pool(params, n_bits, is_signed, check_r2_score, check_float_array_equal): """Test the quantized average pool operator.""" # Retrieve arguments @@ -842,7 +842,7 @@ def test_quantized_avg_pool(params, n_bits, is_signed, check_r2_score, check_flo # Compute the torch average pool bceil_mode = bool(ceil_mode) torch_res = torch.nn.functional.avg_pool2d(tx_pad, kernel_shape, strides, 0, bceil_mode).numpy() - check_float_arrays_equal(torch_res, expected_result) + check_float_array_equal(torch_res, expected_result) # Compute the quantized result result = q_op(q_input).dequant() @@ -929,7 +929,7 @@ def test_quantized_avg_pool(params, n_bits, is_signed, check_r2_score, check_flo ], ) @pytest.mark.parametrize("is_signed", [True, False]) -def test_quantized_max_pool(params, n_bits, is_signed, check_r2_score, check_float_arrays_equal): +def test_quantized_max_pool(params, n_bits, is_signed, check_r2_score, check_float_array_equal): """Test the quantized max pool operator.""" # Retrieve arguments @@ -974,7 +974,7 @@ def test_quantized_max_pool(params, n_bits, is_signed, check_r2_score, check_flo print("Expected") print(expected_result) - check_float_arrays_equal(torch_res, expected_result) + check_float_array_equal(torch_res, expected_result) # Compute the quantized result result = q_op(q_input).dequant() diff --git a/tests/quantization/test_quantizers.py b/tests/quantization/test_quantizers.py index fcbbb6bf9..62d8b675a 100644 --- a/tests/quantization/test_quantizers.py +++ b/tests/quantization/test_quantizers.py @@ -22,7 +22,7 @@ [pytest.param(True, True), pytest.param(True, False), pytest.param(False, False)], ) @pytest.mark.parametrize("values", [pytest.param(numpy.random.randn(2000))]) -def test_quant_dequant_update(values, n_bits, is_signed, is_symmetric, check_array_equality): +def test_quant_dequant_update(values, n_bits, is_signed, is_symmetric, check_array_equal): """Test the quant and de-quant function.""" quant_array = QuantizedArray(n_bits, values, is_signed=is_signed, is_symmetric=is_symmetric) @@ -72,7 +72,7 @@ def test_quant_dequant_update(values, n_bits, is_signed, is_symmetric, check_arr assert not numpy.array_equal(new_values, new_values_updated) # Check that the __call__ returns also the qvalues. - check_array_equality(quant_array(), new_qvalues) + check_array_equal(quant_array(), new_qvalues) @pytest.mark.parametrize( diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py index 94e688622..3f72bfea6 100644 --- a/tests/sklearn/test_dump_onnx.py +++ b/tests/sklearn/test_dump_onnx.py @@ -36,7 +36,7 @@ def check_onnx_file_dump(model_class, parameters, load_data, str_expected, defau model.set_params(**model_params) if get_model_name(model) == "KNeighborsClassifier": - # KNN works only for small quantization bits + # KNN can only be compiled with small quantization bit numbers for now # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979 model.n_bits = 2 diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index b95e616be..ab6b6bbff 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -19,12 +19,6 @@ - pipeline - calls to predict_proba - calls to decision_function - -Are currently missing - - check of predict_proba - - check of decision_function - -More information in https://github.com/zama-ai/concrete-ml-internal/issues/2682 """ import copy @@ -81,20 +75,11 @@ # sufficiently number of bits for precision N_BITS_THRESHOLD_FOR_SKLEARN_CORRECTNESS_TESTS = 26 -# We check correctness with check_is_good_execution_for_cml_vs_circuit or predict in -# fhe="disable" only if n_bits >= N_BITS_THRESHOLD_FOR_PREDICT_CORRECTNESS_TESTS. This is -# because we need sufficiently number of bits for precision -N_BITS_THRESHOLD_FOR_PREDICT_CORRECTNESS_TESTS = 6 - # We never do checks with check_is_good_execution_for_cml_vs_circuit if # n_bits >= N_BITS_THRESHOLD_TO_FORCE_EXECUTION_NOT_IN_FHE. This is because computations are very # slow N_BITS_THRESHOLD_TO_FORCE_EXECUTION_NOT_IN_FHE = 17 -assert ( - N_BITS_THRESHOLD_FOR_PREDICT_CORRECTNESS_TESTS <= N_BITS_THRESHOLD_TO_FORCE_EXECUTION_NOT_IN_FHE -) - # If n_bits >= N_BITS_THRESHOLD_FOR_SKLEARN_EQUIVALENCE_TESTS, we check that the two models # returned by fit_benchmark (the Concrete ML model and the scikit-learn model) are equivalent N_BITS_THRESHOLD_FOR_SKLEARN_EQUIVALENCE_TESTS = 16 @@ -105,9 +90,9 @@ N_BITS_LINEAR_MODEL_CRYPTO_PARAMETERS = 11 # n_bits that we test, either in regular builds or just in weekly builds. 6 is to do tests in -# FHE which are not too long (relation with N_BITS_THRESHOLD_FOR_PREDICT_CORRECTNESS_TESTS and -# N_BITS_THRESHOLD_TO_FORCE_EXECUTION_NOT_IN_FHE). 26 is in relation with -# N_BITS_THRESHOLD_FOR_SKLEARN_CORRECTNESS_TESTS, to do tests with check_correctness_with_sklearn +# FHE which are not too long (relation with N_BITS_THRESHOLD_TO_FORCE_EXECUTION_NOT_IN_FHE). +# 26 is in relation with N_BITS_THRESHOLD_FOR_SKLEARN_CORRECTNESS_TESTS, to do tests with +# check_correctness_with_sklearn N_BITS_REGULAR_BUILDS = [6, 26] N_BITS_WEEKLY_ONLY_BUILDS = [2, 8, 16] @@ -125,7 +110,7 @@ def get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option): model_class, _get_sklearn_linear_models() + _get_sklearn_neighbors_models() ): if n_bits in N_BITS_WEEKLY_ONLY_BUILDS and not is_weekly_option: - pytest.skip("Skipping some tests in non-weekly builds, except for linear models") + pytest.skip("Skipping some tests in non-weekly builds") # Get the data-set. The data generation is seeded in load_data. x, y = load_data(model_class, **parameters) @@ -154,6 +139,19 @@ def preamble(model_class, parameters, n_bits, load_data, is_weekly_option): return model, x +def get_n_bits_non_correctness(model_class): + """Get the number of bits to use for non correctness related tests.""" + + if get_model_name(model_class) == "KNeighborsClassifier": + # KNN can only be compiled with small quantization bit numbers for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979 + n_bits = 2 + else: + n_bits = min(N_BITS_REGULAR_BUILDS) + + return n_bits + + def check_correctness_with_sklearn( model_class, x, @@ -176,19 +174,11 @@ def check_correctness_with_sklearn( warnings.simplefilter("ignore", category=ConvergenceWarning) model, sklearn_model = model.fit_benchmark(x, y) - y_pred = model.predict(x) - - y_pred_sklearn = sklearn_model.predict(x) - y_pred_cml = model.predict(x, fhe=fhe) - - # Check that the output shapes are correct - assert y_pred.shape == y_pred_cml.shape, "Outputs have different shapes" - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/2604 # Generic tests look to show issues in accuracy / R2 score, even for high n_bits - # For regressions - acceptance_r2score_dic = { + # For R2 score measures + acceptance_r2scores = { "TweedieRegressor": 0.9, "GammaRegressor": 0.9, "LinearRegression": 0.9, @@ -198,33 +188,79 @@ def check_correctness_with_sklearn( "Ridge": 0.9, "ElasticNet": 0.9, "XGBRegressor": -0.2, - "NeuralNetRegressor": -10, } - # For classifiers - threshold_accuracy_dic = { + # For accuracy measures + threshold_accuracies = { "LogisticRegression": 0.9, "LinearSVC": 0.9, "XGBClassifier": 0.7, "RandomForestClassifier": 0.8, - "NeuralNetClassifier": 0.7, "KNeighborsClassifier": 0.9, } model_name = get_model_name(model_class) - acceptance_r2score = acceptance_r2score_dic.get(model_name, 0.9) - threshold_accuracy = threshold_accuracy_dic.get(model_name, 0.9) + acceptance_r2score = acceptance_r2scores.get(model_name, 0.9) + threshold_accuracy = threshold_accuracies.get(model_name, 0.9) + + # If the model is a classifier + # KNeighborsClassifier does not provide a predict_proba method for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 + if ( + is_classifier_or_partial_classifier(model) + and get_model_name(model_class) != "KNeighborsClassifier" + ): + if is_model_class_in_a_list(model, _get_sklearn_linear_models()): + + # Check outputs from the 'decision_function' method (for linear classifiers) + y_scores_sklearn = sklearn_model.decision_function(x) + y_scores_fhe = model.decision_function(x, fhe=fhe) + + # Currently, for single target data sets, Concrete models' outputs have shape (n, 1) + # while scikit-learn models' outputs have shape (n, ) + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4029 + # assert y_scores_sklearn.shape == y_scores_fhe.shape, ( + # "Method 'decision_function' outputs different shapes between scikit-learn and " + # f"Concrete ML in FHE (fhe={fhe})" + # ) + check_r2_score(y_scores_sklearn, y_scores_fhe, acceptance_score=acceptance_r2score) + + # LinearSVC models from scikit-learn do not provide a 'predict_proba' method + if get_model_name(model_class) != "LinearSVC": + + # Check outputs from the 'predict_proba' method (for all classifiers, + # except KNeighborsClassifier) + y_proba_sklearn = sklearn_model.predict_proba(x) + y_proba_fhe = model.predict_proba(x, fhe=fhe) + + assert y_proba_sklearn.shape == y_proba_fhe.shape, ( + "Method 'decision_function' outputs different shapes between scikit-learn and " + f"Concrete ML in FHE (fhe={fhe})" + ) + check_r2_score(y_proba_sklearn, y_proba_fhe, acceptance_score=acceptance_r2score) + + # Check outputs from the 'predict_proba' method (for all models) + y_pred_sklearn = sklearn_model.predict(x) + y_pred_fhe = model.predict(x, fhe=fhe) + + # Currently, for single target data sets, Concrete models' outputs have shape (n, 1) while + # scikit-learn models' outputs have shape (n, ) + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4029 + # assert y_pred_sklearn.shape == y_pred_fhe.shape, ( + # "Method 'predict' outputs different shapes between scikit-learn and " + # f"Concrete ML in FHE (fhe={fhe})" + # ) # If the model is a classifier, check that accuracies are similar if is_classifier_or_partial_classifier(model): - check_accuracy(y_pred_sklearn, y_pred_cml, threshold=threshold_accuracy) + check_accuracy(y_pred_sklearn, y_pred_fhe, threshold=threshold_accuracy) # If the model is a regressor, check that R2 scores are similar + elif is_regressor_or_partial_regressor(model): + check_r2_score(y_pred_sklearn, y_pred_fhe, acceptance_score=acceptance_r2score) + else: - assert is_regressor_or_partial_regressor( - model - ), "not a regressor, not a classifier, really?" - check_r2_score(y_pred_sklearn, y_pred_cml, acceptance_score=acceptance_r2score) + raise AssertionError(f"Model {model_name} is neither a classifier nor a regressor.") def check_double_fit(model_class, n_bits, x_1, x_2, y_1, y_2): @@ -462,10 +498,8 @@ def check_offset(model_class, n_bits, x, y): model.fit(x, y) -def check_subfunctions(fitted_model, model_class, x): - """Check subfunctions.""" - - fitted_model.predict(x[:1]) +def check_inference_methods(model, model_class, x, check_float_array_equal): + """Check that all inference methods provided are coherent between clear and FHE executions.""" # skorch provides a predict_proba method for neural network regressors while Scikit-Learn does # not. We decided to follow Scikit-Learn's API as we build most of our tools on this library. @@ -474,7 +508,7 @@ def check_subfunctions(fitted_model, model_class, x): # confusion, a NotImplementedError is raised. This issue could be fixed by making these classes # not inherit from skorch. # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3373 - if get_model_name(fitted_model) == "NeuralNetRegressor": + if get_model_name(model) == "NeuralNetRegressor": with pytest.raises( NotImplementedError, match=( @@ -482,67 +516,128 @@ def check_subfunctions(fitted_model, model_class, x): "Please call `predict` instead." ), ): - fitted_model.predict_proba(x) + model.predict_proba(x) - if get_model_name(fitted_model) == "KNeighborsClassifier": - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 - pytest.skip("Skipping subfunctions test for KNN, doesn't work for now") - - if is_classifier_or_partial_classifier(model_class): + # KNeighborsClassifier does not provide a predict_proba method for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 + elif get_model_name(model) == "KNeighborsClassifier": + with pytest.raises( + NotImplementedError, + match=( + "The `predict_proba` method is not implemented for KNeighborsClassifier. " + "Please call `predict` instead." + ), + ): + model.predict_proba(x) - fitted_model.predict_proba(x) + # Only check 'predict_proba' and not 'predict' as some issues were found with the argmax not + # being consistent because of precision errors with epsilon magnitude. This argmax should be + # done in the clear the same way for both anyway. Ultimately, we would want to only compare the + # circuit's quantized outputs against the ones computed in the clear but built-in models do not + # currently provide the necessary API for that + elif is_classifier_or_partial_classifier(model_class): - # Only linear classifiers have a decision function method if is_model_class_in_a_list(model_class, _get_sklearn_linear_models()): - fitted_model.decision_function(x) + # Check outputs from the 'decision_function' method (for all linear classifiers) + y_scores_clear = model.decision_function(x) + y_scores_simulated = model.decision_function(x, fhe="simulate") + + assert y_scores_clear.shape == y_scores_simulated.shape, ( + "Method 'decision_function' from Concrete ML outputs different shapes when executed" + "in the clear and with simulation." + ) + check_float_array_equal(y_scores_clear, y_scores_simulated) -def check_subfunctions_in_fhe(model, fhe_circuit, x): - """Check subfunctions in FHE: calls and correctness.""" + else: + # Check outputs from the 'predict_proba' method (for all non-linear classifiers, + # except KNeighborsClassifier) + y_proba_clear = model.predict_proba(x) + y_proba_simulated = model.predict_proba(x, fhe="simulate") + + assert y_proba_clear.shape == y_proba_simulated.shape, ( + "Method 'predict_proba' from Concrete ML outputs different shapes when executed" + "in the clear and with simulation." + ) + check_float_array_equal(y_proba_clear, y_proba_simulated) + + else: + # Check outputs from the 'predict' method (for all regressors and KNeighborsClassifier) + y_pred_clear = model.predict(x) + y_pred_simulated = model.predict(x, fhe="simulate") + + assert y_pred_clear.shape == y_pred_simulated.shape, ( + "Method 'predict' from Concrete ML outputs different shapes when executed in the clear " + "and with simulation." + ) + check_float_array_equal(y_pred_clear, y_pred_simulated) + + +def check_separated_inference(model, fhe_circuit, x, check_float_array_equal): + """Run inference methods in separated steps and check their correctness.""" # Generate the keys fhe_circuit.keygen() - y_pred_fhe = [] + # Quantize an input (float) + q_x = model.quantize_input(x) - for _ in range(N_ALLOWED_FHE_RUN): - for f_input in x: - # Quantize an input (float) - q_input = model.quantize_input(f_input.reshape(1, -1)) + # Encrypt the input + q_x_encrypted = fhe_circuit.encrypt(q_x) - # Encrypt the input - q_input_enc = fhe_circuit.encrypt(q_input) + # Execute the linear product in FHE + q_y_pred_encrypted = fhe_circuit.run(q_x_encrypted) - # Execute the linear product in FHE - q_y_enc = fhe_circuit.run(q_input_enc) + # Decrypt the result (integer) + q_y_pred = fhe_circuit.decrypt(q_y_pred_encrypted) - # Decrypt the result (integer) - q_y = fhe_circuit.decrypt(q_y_enc) + # De-quantize the result + y_pred = model.dequantize_output(q_y_pred) - # De-quantize the result - y = model.dequantize_output(q_y) + if is_model_class_in_a_list( + model, _get_sklearn_linear_models(classifier=True, regressor=False) + ): + y_scores = model.decision_function(x, fhe="simulate") + + # For linear classifiers, the circuit's de-quantized outputs should be the same as the ones + # from the `decision_function` built-in method + check_float_array_equal(y_pred, y_scores) + + # Apply post-processing step (in the clear) + # This includes (non-exhaustive): + # - sigmoid or softmax function for classifiers + # - final sum for tree-based models + # - link function for GLMs + y_pred = model.post_processing(y_pred) + + # KNeighborsClassifier does not provide a predict_proba method for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 + if ( + is_classifier_or_partial_classifier(model) + and get_model_name(model) != "KNeighborsClassifier" + ): + y_proba = model.predict_proba(x, fhe="simulate") + else: + y_proba = model.predict(x, fhe="simulate") - # Apply either the sigmoid if it is a binary classification task, - # which is the case in this example, or a softmax function in order - # to get the probabilities (in the clear) - y_proba = model.post_processing(y) + # The circuit's de-quantized outputs followed by `post_processing` should be the same as the + # ones from the `predict_proba` built-in method for classifiers, and from the `predict` + # built-in method for regressors + check_float_array_equal(y_pred, y_proba) - # Apply the argmax to get the class predictions (in the clear) - if is_classifier_or_partial_classifier(model): - y_class = numpy.argmax(y_proba, axis=-1) - y_pred_fhe += list(y_class) - else: - y_pred_fhe += list(y_proba) + # KNeighborsClassifier does not apply a final argmax for computing prediction + if ( + is_classifier_or_partial_classifier(model) + and get_model_name(model) != "KNeighborsClassifier" + ): + y_pred = numpy.argmax(y_pred, axis=-1) - # Compare with the FHE simulation mode - y_pred_expected_in_simulation = model.predict(x, fhe="simulate") - if numpy.isclose(numpy.array(y_pred_fhe), y_pred_expected_in_simulation).all(): - break + y_pred_class = model.predict(x, fhe="simulate") - assert numpy.isclose(numpy.array(y_pred_fhe), y_pred_expected_in_simulation).all(), ( - "computations are not the same between individual functions (in FHE) " - "and predict function (in FHE simulation mode)" - ) + # For classifiers (other than KNeighborsClassifier), the circuit's de-quantized outputs + # followed by `post_processing` as well as an argmax should be the same as the ones from + # the `predict` built-in method + check_float_array_equal(y_pred, y_pred_class) def check_input_support(model_class, n_bits, default_configuration, x, y, input_type): @@ -582,24 +677,26 @@ def cast_input(x, y, input_type): model.predict(x) # Similarly, we test `predict_proba` for classifiers - if is_classifier_or_partial_classifier(model): - if get_model_name(model_class) == "KNeighborsClassifier": - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 - pytest.skip("Skipping predict_proba for KNN, doesn't work for now") + # KNeighborsClassifier does not provide a predict_proba method for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 + if ( + is_classifier_or_partial_classifier(model) + and get_model_name(model_class) != "KNeighborsClassifier" + ): model.predict_proba(x) - # If n_bits is above N_BITS_LINEAR_MODEL_CRYPTO_PARAMETERS, do not compile the model - # as there won't be any crypto parameters - if n_bits >= N_BITS_LINEAR_MODEL_CRYPTO_PARAMETERS: - return - model.compile(x, default_configuration) # Make sure `predict` is working when FHE is disabled model.predict(x, fhe="simulate") # Similarly, we test `predict_proba` for classifiers - if is_classifier_or_partial_classifier(model): + # KNeighborsClassifier does not provide a predict_proba method for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 + if ( + is_classifier_or_partial_classifier(model) + and get_model_name(model_class) != "KNeighborsClassifier" + ): model.predict_proba(x, fhe="simulate") @@ -678,11 +775,12 @@ def check_grid_search(model_class, x, y, scoring): warnings.simplefilter("ignore", category=ConvergenceWarning) warnings.simplefilter("ignore", category=UndefinedMetricWarning) + # KNeighborsClassifier does not provide a predict_proba method for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 if get_model_name(model_class) == "KNeighborsClassifier" and scoring in [ "roc_auc", "average_precision", ]: - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 pytest.skip("Skipping predict_proba for KNN, doesn't work for now") _ = GridSearchCV( @@ -690,57 +788,6 @@ def check_grid_search(model_class, x, y, scoring): ).fit(x, y) -def check_sklearn_equivalence(model_class, n_bits, x, y, check_accuracy, check_r2_score): - """Check equivalence between the two models returned by fit_benchmark: the Concrete ML model and - the scikit-learn model.""" - model = instantiate_model_generic(model_class, n_bits=n_bits) - - # Sometimes, we miss convergence, which is not a problem for our test - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=ConvergenceWarning) - - # Random state should be taken from the method parameter - model, sklearn_model = model.fit_benchmark(x, y) - - # If the model is a classifier - if is_classifier_or_partial_classifier(model): - - # Check that accuracies are similar - y_pred_cml = model.predict(x) - y_pred_sklearn = sklearn_model.predict(x) - check_accuracy(y_pred_sklearn, y_pred_cml) - - # If the model is a LinearSVC model, compute its predicted confidence score - # This is done separately as scikit-learn doesn't provide a predict_proba method for - # LinearSVC models - if get_model_name(model_class) == "LinearSVC": - y_pred_cml = model.decision_function(x) - y_pred_sklearn = sklearn_model.decision_function(x) - - # Else, compute the model's predicted probabilities - # predict_proba not implemented for KNeighborsClassifier for now - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 - elif get_model_name(model_class) != "KNeighborsClassifier": - y_pred_cml = model.predict_proba(x) - y_pred_sklearn = sklearn_model.predict_proba(x) - - # If the model is a regressor, compute its predictions - else: - y_pred_cml = model.predict(x) - y_pred_sklearn = sklearn_model.predict(x) - - # Check that predictions, probabilities or confidence scores are similar using the R2 score - check_r2_score(y_pred_sklearn, y_pred_cml) - - -def check_properties_of_circuit(model_class, fhe_circuit, check_circuit_has_no_tlu): - """Check some properties of circuit, depending on the model class""" - - if is_model_class_in_a_list(model_class, _get_sklearn_linear_models()): - # Check that no TLUs are found within the MLIR - check_circuit_has_no_tlu(fhe_circuit) - - def get_hyper_param_combinations(model_class): """Return the hyper_param_combinations, depending on the model class""" hyper_param_combinations: Dict[str, List[Any]] @@ -799,7 +846,6 @@ def check_hyper_parameters( n_bits, x, y, - test_correctness_in_clear, check_r2_score, check_accuracy, ): @@ -815,11 +861,6 @@ def check_hyper_parameters( model = instantiate_model_generic(model_class, n_bits=n_bits, **hyper_parameters) - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/2450 - # does not work for now, issue in HummingBird - if get_model_name(model_class) == "RandomForestClassifier" and n_bits == 2: - continue - # Also fit with these hyper parameters to check it works fine with warnings.catch_warnings(): # Sometimes, we miss convergence, which is not a problem for our test @@ -828,18 +869,17 @@ def check_hyper_parameters( # Here, we really need to fit, to take into account hyper parameters model.fit(x, y) - # Check correctness with sklearn (if we have sufficiently bits of precision) - if test_correctness_in_clear and n_bits >= N_BITS_THRESHOLD_FOR_SKLEARN_CORRECTNESS_TESTS: - check_correctness_with_sklearn( - model_class, - x, - y, - n_bits, - check_r2_score, - check_accuracy, - fhe="disable", - hyper_parameters=hyper_parameters, - ) + # Check correctness with sklearn + check_correctness_with_sklearn( + model_class, + x, + y, + n_bits, + check_r2_score, + check_accuracy, + fhe="disable", + hyper_parameters=hyper_parameters, + ) def check_fitted_compiled_error_raises(model_class, n_bits, x, y): @@ -869,9 +909,13 @@ def check_fitted_compiled_error_raises(model_class, n_bits, x, y): with pytest.raises(AttributeError, match=".* model is not fitted.*"): model.predict(x) - if is_classifier_or_partial_classifier(model_class): - if get_model_name(model) == "KNeighborsClassifier": - pytest.skip("predict_proba not implement for KNN") + # KNeighborsClassifier does not provide a predict_proba method for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 + if ( + is_classifier_or_partial_classifier(model_class) + and get_model_name(model) != "KNeighborsClassifier" + ): + # Predicting probabilities using an untrained linear or tree-based classifier should not # be possible if not is_model_class_in_a_list(model_class, _get_sklearn_neural_net_models()): @@ -1086,49 +1130,9 @@ def check_load_fitted_sklearn_linear_models(model_class, n_bits, x, y): + get_sklearn_tree_models_and_datasets() + get_sklearn_neighbors_models_and_datasets(), ) -@pytest.mark.parametrize( - "n_bits", - [ - n - for n in N_BITS_WEEKLY_ONLY_BUILDS + N_BITS_REGULAR_BUILDS - if n >= N_BITS_THRESHOLD_FOR_SKLEARN_EQUIVALENCE_TESTS - ], -) -def test_quantization( - model_class, - parameters, - n_bits, - load_data, - check_r2_score, - check_accuracy, - is_weekly_option, - verbose=True, -): - """Test quantization.""" - x, y = get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option) - - if verbose: - print("Run check_sklearn_equivalence") - - check_sklearn_equivalence(model_class, n_bits, x, y, check_accuracy, check_r2_score) - - -# This test is a known flaky -# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3661 -@pytest.mark.flaky -@pytest.mark.parametrize("model_class, parameters", MODELS_AND_DATASETS) -@pytest.mark.parametrize( - "n_bits", - [ - n - for n in N_BITS_WEEKLY_ONLY_BUILDS + N_BITS_REGULAR_BUILDS - if n >= N_BITS_THRESHOLD_FOR_SKLEARN_CORRECTNESS_TESTS - ], -) def test_correctness_with_sklearn( model_class, parameters, - n_bits, load_data, check_r2_score, check_accuracy, @@ -1136,9 +1140,11 @@ def test_correctness_with_sklearn( verbose=True, ): """Test that Concrete ML and scikit-learn models are 'equivalent'.""" + + n_bits = N_BITS_THRESHOLD_FOR_SKLEARN_CORRECTNESS_TESTS + x, y = get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option) - # Check correctness with sklearn (if we have sufficiently bits of precision) if verbose: print("Run check_correctness_with_sklearn with fhe='disable'") @@ -1153,15 +1159,16 @@ def test_correctness_with_sklearn( ) -@pytest.mark.parametrize("model_class, parameters", MODELS_AND_DATASETS) +# Neural network hyper-parameters are not tested @pytest.mark.parametrize( - "n_bits", - N_BITS_WEEKLY_ONLY_BUILDS + N_BITS_REGULAR_BUILDS, + "model_class, parameters", + get_sklearn_linear_models_and_datasets() + + get_sklearn_tree_models_and_datasets() + + get_sklearn_neighbors_models_and_datasets(), ) def test_hyper_parameters( model_class, parameters, - n_bits, load_data, check_r2_score, check_accuracy, @@ -1169,19 +1176,19 @@ def test_hyper_parameters( verbose=True, ): """Testing hyper parameters.""" + + n_bits = N_BITS_THRESHOLD_FOR_SKLEARN_CORRECTNESS_TESTS + x, y = get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option) if verbose: print("Run check_hyper_parameters") - test_correctness_in_clear = True - check_hyper_parameters( model_class, n_bits, x, y, - test_correctness_in_clear, check_r2_score, check_accuracy, ) @@ -1264,9 +1271,7 @@ def test_serialization( verbose=True, ): """Test Serialization.""" - # This test only checks the serialization's functionalities, so there is no need to test it - # over several n_bits - n_bits = min(N_BITS_REGULAR_BUILDS) + n_bits = get_n_bits_non_correctness(model_class) model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option) @@ -1338,15 +1343,10 @@ def test_offset( @pytest.mark.parametrize("model_class, parameters", UNIQUE_MODELS_AND_DATASETS) -@pytest.mark.parametrize( - "n_bits", - N_BITS_WEEKLY_ONLY_BUILDS + N_BITS_REGULAR_BUILDS, -) @pytest.mark.parametrize("input_type", ["numpy", "torch", "pandas", "list"]) def test_input_support( model_class, parameters, - n_bits, load_data, input_type, default_configuration, @@ -1354,6 +1354,8 @@ def test_input_support( verbose=True, ): """Test all models with Pandas, List or Torch inputs.""" + n_bits = get_n_bits_non_correctness(model_class) + x, y = get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option) if verbose: @@ -1362,26 +1364,27 @@ def test_input_support( check_input_support(model_class, n_bits, default_configuration, x, y, input_type) -@pytest.mark.parametrize("model_class, parameters", UNIQUE_MODELS_AND_DATASETS) -@pytest.mark.parametrize( - "n_bits", - N_BITS_WEEKLY_ONLY_BUILDS + N_BITS_REGULAR_BUILDS, -) -def test_subfunctions( +@pytest.mark.parametrize("model_class, parameters", MODELS_AND_DATASETS) +def test_inference_methods( model_class, parameters, - n_bits, load_data, is_weekly_option, + check_float_array_equal, + default_configuration, verbose=True, ): - """Test subfunctions.""" + """Test inference methods.""" + n_bits = get_n_bits_non_correctness(model_class) + model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option) + model.compile(x, default_configuration) + if verbose: - print("Run check_subfunctions") + print("Run check_inference_methods") - check_subfunctions(model, model_class, x) + check_inference_methods(model, model_class, x, check_float_array_equal) # Pipeline test sometimes fails with RandomForest models. This bug may come from Hummingbird @@ -1419,12 +1422,17 @@ def test_pipeline( pytest.param(True, id="simulate"), ], ) +# N_BITS_LINEAR_MODEL_CRYPTO_PARAMETERS bits is currently the +# limit to find crypto parameters for linear models +# make sure we only compile below that bit-width. +# Additionally, prevent computations in FHE with too many bits @pytest.mark.parametrize( "n_bits", [ - n - for n in N_BITS_WEEKLY_ONLY_BUILDS + N_BITS_REGULAR_BUILDS - if n >= N_BITS_THRESHOLD_FOR_PREDICT_CORRECTNESS_TESTS + n_bits + for n_bits in N_BITS_WEEKLY_ONLY_BUILDS + N_BITS_REGULAR_BUILDS + if n_bits + < min(N_BITS_LINEAR_MODEL_CRYPTO_PARAMETERS, N_BITS_THRESHOLD_TO_FORCE_EXECUTION_NOT_IN_FHE) ], ) # pylint: disable=too-many-branches @@ -1436,98 +1444,96 @@ def test_predict_correctness( load_data, default_configuration, check_is_good_execution_for_cml_vs_circuit, - check_circuit_has_no_tlu, is_weekly_option, - test_subfunctions_in_fhe=True, verbose=True, ): - """Test correct execution, if there is sufficiently n_bits.""" + """Test prediction correctness between clear quantized and FHE simulation or execution.""" + + # KNN can only be compiled with small quantization bit numbers for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979 + if n_bits > 5 and get_model_name(model_class) == "KNeighborsClassifier": + pytest.skip("KNeighborsClassifier models can only run with 5 bits at most.") model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option) - # How many samples for tests in FHE (i.e., predict with fhe = "execute" or "simulate") + # Run the test with more samples during weekly CIs or when using FHE simulation if is_weekly_option or simulate: - number_of_tests_in_fhe = 5 + fhe_samples = 5 else: - number_of_tests_in_fhe = 1 + fhe_samples = 1 - # How many samples for tests in quantized module (i.e., predict with fhe = "disable") - if is_weekly_option: - number_of_tests_in_non_fhe = 50 - else: - number_of_tests_in_non_fhe = 10 - - # Do some inferences in clear if verbose: - print( - "Inference in the clear (with " - f"number_of_tests_in_non_fhe = {number_of_tests_in_non_fhe})" - ) - # KNN works only for smaller quantization bits - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979 - if n_bits > 5 and get_model_name(model) == "KNeighborsClassifier": - pytest.skip("Use less than 5 bits with KNN.") - - y_pred = model.predict(x[:number_of_tests_in_non_fhe]) - - list_of_possibilities = [False, True] - - # Prevent computations in FHE if too many bits - if n_bits >= N_BITS_THRESHOLD_TO_FORCE_EXECUTION_NOT_IN_FHE: - list_of_possibilities = [False] + print("Compile the model") - for test_with_execute_in_fhe in list_of_possibilities: - - # N_BITS_LINEAR_MODEL_CRYPTO_PARAMETERS bits is currently the - # limit to find crypto parameters for linear models - # make sure we only compile below that bit-width. - if test_with_execute_in_fhe and not n_bits >= N_BITS_LINEAR_MODEL_CRYPTO_PARAMETERS: + model.compile(x, default_configuration) - if verbose: - print("Compile the model") + if verbose: + print(f"Check prediction correctness for {fhe_samples} samples.") - with warnings.catch_warnings(): - fhe_circuit = model.compile( - x, - default_configuration, - show_mlir=verbose and (n_bits <= 8), - ) + # Check prediction correctness between quantized clear and FHE simulation or execution + check_is_good_execution_for_cml_vs_circuit(x[:fhe_samples], model=model, simulate=simulate) - check_properties_of_circuit(model_class, fhe_circuit, check_circuit_has_no_tlu) - if verbose: - print("Compilation done") +@pytest.mark.parametrize("model_class, parameters", MODELS_AND_DATASETS) +# Test separated inference steps with new simulation once Concrete Python provides the feature +# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4025 +@pytest.mark.parametrize( + "simulate", + [ + pytest.param(False, id="fhe"), + ], +) +# N_BITS_LINEAR_MODEL_CRYPTO_PARAMETERS bits is currently the +# limit to find crypto parameters for linear models +# make sure we only compile below that bit-width. +# Additionally, prevent computations in FHE with too many bits +@pytest.mark.parametrize( + "n_bits", + [ + n_bits + for n_bits in N_BITS_WEEKLY_ONLY_BUILDS + N_BITS_REGULAR_BUILDS + if n_bits + < min(N_BITS_LINEAR_MODEL_CRYPTO_PARAMETERS, N_BITS_THRESHOLD_TO_FORCE_EXECUTION_NOT_IN_FHE) + ], +) +# pylint: disable=too-many-branches +def test_separated_inference( + model_class, + parameters, + simulate, + n_bits, + load_data, + default_configuration, + is_weekly_option, + check_float_array_equal, + verbose=True, +): + """Test prediction correctness between clear quantized and FHE simulation or execution.""" - if verbose: - print( - "Run check_is_good_execution_for_cml_vs_circuit " - + f"(with number_of_tests_in_fhe = {number_of_tests_in_fhe})" - ) + # KNN can only be compiled with small quantization bit numbers for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979 + if n_bits > 5 and get_model_name(model_class) == "KNeighborsClassifier": + pytest.skip("KNeighborsClassifier models can only run with 5 bits at most.") - # Check the `predict` method - check_is_good_execution_for_cml_vs_circuit( - x[:number_of_tests_in_fhe], model=model, simulate=simulate - ) + model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option) - if test_subfunctions_in_fhe and (not simulate): - if verbose: - print("Testing subfunctions in FHE") + # Run the test with more samples during weekly CIs or when using FHE simulation + if is_weekly_option or simulate: + fhe_samples = 5 + else: + fhe_samples = 1 - check_subfunctions_in_fhe(model, fhe_circuit, x[:number_of_tests_in_fhe]) + if verbose: + print("Compile the model") - else: - if verbose: - print( - "Run predict in fhe='disable' " - f"(with number_of_tests_in_non_fhe = {number_of_tests_in_non_fhe})" - ) + fhe_circuit = model.compile(x, default_configuration) - # At least, check in clear mode - y_pred_fhe = model.predict(x[:number_of_tests_in_non_fhe], fhe="disable") + if verbose: + print("Run check_separated_inference") - # Check that the output shape is correct - assert y_pred_fhe.shape == y_pred.shape - assert numpy.array_equal(y_pred_fhe, y_pred) + # Check that separated inference steps (encrypt, run, decrypt, post_processing, ...) are + # equivalent to built-in methods (predict, predict_proba, ...) + check_separated_inference(model, fhe_circuit, x[:fhe_samples], check_float_array_equal) @pytest.mark.parametrize("model_class, parameters", UNIQUE_MODELS_AND_DATASETS) @@ -1539,7 +1545,7 @@ def test_fitted_compiled_error_raises( verbose=True, ): """Test Fit and Compile error raises.""" - n_bits = min(N_BITS_REGULAR_BUILDS) + n_bits = get_n_bits_non_correctness(model_class) x, y = get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option) @@ -1550,6 +1556,8 @@ def test_fitted_compiled_error_raises( @pytest.mark.parametrize("model_class, parameters", MODELS_AND_DATASETS) +# Enable support for global_p_error testing if possible +# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3297 @pytest.mark.parametrize( "error_param", [{"p_error": 0.9999999999990905}], # 1 - 2**-40 @@ -1564,21 +1572,10 @@ def test_p_error_global_p_error_simulation( ): """Test p_error and global_p_error simulation. - Description: - A model is compiled with a large p_error. The test then checks the predictions for - simulated and fully homomorphic encryption (FHE) inference, and asserts - that the predictions for both are different from the expected predictions. + The test checks that models compiled with a large p_error value predicts very different results + with simulation or in FHE compared to the expected clear quantized ones. """ - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3297 - if "global_p_error" in error_param: - pytest.skip("global_p_error behave very differently depending on the type of model.") - - if get_model_name(model_class) == "KNeighborsClassifier": - # KNN works only for smaller quantization bits - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979 - n_bits = min([2] + N_BITS_REGULAR_BUILDS) - else: - n_bits = min(N_BITS_REGULAR_BUILDS) + n_bits = get_n_bits_non_correctness(model_class) # Get data-set, initialize and fit the model model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option) @@ -1591,14 +1588,16 @@ def test_p_error_global_p_error_simulation( def check_for_divergent_predictions(x, model, fhe, max_iterations=N_ALLOWED_FHE_RUN): """Detect divergence between simulated/FHE execution and clear run.""" + + # KNeighborsClassifier does not provide a predict_proba method for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 predict_function = ( model.predict_proba if is_classifier_or_partial_classifier(model) - # `predict_prob` not implemented yet for KNeighborsClassifier - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962 and get_model_name(model) != "KNeighborsClassifier" else model.predict ) + y_expected = predict_function(x, fhe="disable") for i in range(max_iterations): y_pred = predict_function(x[i : i + 1], fhe=fhe).ravel() @@ -1716,3 +1715,32 @@ def test_load_fitted_sklearn_linear_models( print("Run check_load_pre_trained_sklearn_models") check_load_fitted_sklearn_linear_models(model_class, n_bits, x, y) + + +# Only circuits from linear models do not have any TLUs +@pytest.mark.parametrize("model_class, parameters", get_sklearn_linear_models_and_datasets()) +def test_linear_models_have_no_tlu( + model_class, + parameters, + load_data, + is_weekly_option, + check_circuit_has_no_tlu, + default_configuration, + verbose=True, +): + """Test that circuits from linear models have no TLUs.""" + + n_bits = min(N_BITS_REGULAR_BUILDS) + + model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option) + + if verbose: + print("Compile the model") + + fhe_circuit = model.compile(x, default_configuration) + + if verbose: + print("Run check_circuit_has_no_tlu") + + # Check that no TLUs are found within the MLIR + check_circuit_has_no_tlu(fhe_circuit)