chore: update

change the conftest.default_configuration code such that we can choose Parameter_selection_strategy mode. further reduce the dataset size for KNN, it impacts pipeline and grid_search tests, where we had to reduce the n_split
zama-ai · Sep 14, 2023 · a4255bb · a4255bb
1 parent aeb9196
commit a4255bb
Show file tree

Hide file tree

Showing 17 changed files with 120 additions and 136 deletions.
diff --git a/conftest.py b/conftest.py
@@ -147,19 +147,32 @@ def pytest_sessionfinish(session: pytest.Session, exitstatus):  # pylint: disabl
 def default_configuration():
     """Return the default test compilation configuration."""
 
-    # Remove parameter_selection_strategy once it is set to multi-parameter in Concrete Python
-    # by default
-    # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3860
-    # Parameter `enable_unsafe_features` and `use_insecure_key_cache` are needed in order to be
-    # able to cache generated keys through `insecure_key_cache_location`. As the name suggests,
-    # these parameters are unsafe and should only be used for debugging in development
-    return Configuration(
-        dump_artifacts_on_unexpected_failures=False,
-        enable_unsafe_features=True,
-        use_insecure_key_cache=True,
-        insecure_key_cache_location="ConcreteNumpyKeyCache",
-        parameter_selection_strategy=ParameterSelectionStrategy.MULTI,
-    )
+    def default_configuration_impl(mode: Optional[str] = "MULTI"):
+        """Return the default test compilation configuration for a given strategy.
+
+        Args:
+            mode (Optional[str]): Whether we execute in a MONO or MULTI precision settings.
+
+        Returns:
+            The configuration.
+        """
+        if mode == "MULTI":
+            parameter_selection_strategy = ParameterSelectionStrategy.MULTI
+            single_precision = False
+        else:
+            parameter_selection_strategy = ParameterSelectionStrategy.MONO
+            single_precision = True
+
+        return Configuration(
+            dump_artifacts_on_unexpected_failures=False,
+            enable_unsafe_features=True,
+            use_insecure_key_cache=True,
+            insecure_key_cache_location="ConcreteNumpyKeyCache",
+            parameter_selection_strategy=parameter_selection_strategy,
+            single_precision=single_precision,
+        )
+
+    return default_configuration_impl
 
 
 REMOVE_COLOR_CODES_RE = re.compile(r"\x1b[^m]*m")

diff --git a/src/concrete/ml/pytest/utils.py b/src/concrete/ml/pytest/utils.py
@@ -103,8 +103,8 @@
     pytest.param(
         model,
         {
-            "n_samples": 10,
-            "n_features": 3,
+            "n_samples": 4,
+            "n_features": 2,
             "n_classes": n_classes,
             "n_informative": 2,
             "n_redundant": 0,

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
@@ -1875,23 +1875,6 @@ def inference_to_compile(q_X: numpy.ndarray) -> numpy.ndarray:
 
         return compiler
 
-    @staticmethod
-    def top_k_indices(distance_matrix: numpy.ndarray, k: int) -> numpy.ndarray:
-        """Get the indices of the top-k smallest distances for each point.
-
-        Args:
-            distance_matrix (numpy.ndarray): Represents the pairwise euclidean distance between
-                the query and other points
-            k (int): The top nearest neighbors to consider
-
-        Returns:
-            numpy.ndarray: The k nearest neighbors for the corresponding query, sorted in
-            ascending order.
-        """
-
-        # Sort the distances in an ascending order and select the k smallest distanes
-        return numpy.argsort(distance_matrix, axis=1)[:, :k]
-
     @staticmethod
     def majority_vote(nearest_classes: numpy.ndarray):
         """Determine the most common class among nearest neighborsfor each query.

diff --git a/tests/common/test_pbs_error_probability_settings.py b/tests/common/test_pbs_error_probability_settings.py
@@ -4,11 +4,9 @@
 
 import numpy
 import pytest
-from concrete.fhe.compilation import Configuration
 from sklearn.exceptions import ConvergenceWarning
 from torch import nn
 
-from concrete import fhe
 from concrete.ml.common.utils import get_model_name
 from concrete.ml.pytest.torch_models import FCSmall
 from concrete.ml.pytest.utils import sklearn_models_and_datasets
@@ -41,24 +39,17 @@ def test_config_sklearn(model_class, parameters, kwargs, load_data, default_conf
         # Fit the model
         model.fit(x, y)
 
-    if get_model_name(model_class) == "KNeighborsClassifier":
-
-        default_configuration = Configuration(
-            dump_artifacts_on_unexpected_failures=False,
-            enable_unsafe_features=True,
-            use_insecure_key_cache=True,
-            insecure_key_cache_location="ConcreteNumpyKeyCache",
-            parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO,
-            single_precision=True,
-        )
+    # KNN works only for MONO in the latest concrete Python version
+    # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3978
+    mode = "MONO" if get_model_name(model) == "KNeighborsClassifier" else "MULTI"
 
     if kwargs.get("p_error", None) is not None and kwargs.get("global_p_error", None) is not None:
         with pytest.raises(ValueError) as excinfo:
-            model.compile(x, default_configuration, verbose=True, **kwargs)
+            model.compile(x, default_configuration(mode), verbose=True, **kwargs)
         assert "Please only set one of (p_error, global_p_error) values" in str(excinfo.value)
     else:
 
-        model.compile(x, default_configuration, verbose=True, **kwargs)
+        model.compile(x, default_configuration(mode), verbose=True, **kwargs)
 
     # We still need to check that we have the expected probabilities
     # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/2206

diff --git a/tests/deployment/test_client_server.py b/tests/deployment/test_client_server.py
@@ -9,11 +9,9 @@
 
 import numpy
 import pytest
-from concrete.fhe.compilation import Configuration
 from sklearn.exceptions import ConvergenceWarning
 from torch import nn
 
-from concrete import fhe
 from concrete.ml.common.utils import get_model_name
 from concrete.ml.deployment.fhe_client_server import FHEModelClient, FHEModelDev, FHEModelServer
 from concrete.ml.pytest.torch_models import FCSmall
@@ -97,32 +95,25 @@ def test_client_server_sklearn(
 
     # Compile
     extra_params = {"global_p_error": 1 / 100_000}
-
-    if get_model_name(model_class) == "KNeighborsClassifier":
-
-        default_configuration = Configuration(
-            dump_artifacts_on_unexpected_failures=False,
-            enable_unsafe_features=True,
-            use_insecure_key_cache=True,
-            insecure_key_cache_location="ConcreteNumpyKeyCache",
-            parameter_selection_strategy=fhe.ParameterSelectionStrategy.MONO,
-            single_precision=True,
-        )
+    # KNN works only for MONO in the latest concrete Python version
+    # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3978
+    mode = "MONO" if get_model_name(model) == "KNeighborsClassifier" else "MULTI"
 
     # Running the simulation using a model that is not compiled should not be possible
     with pytest.raises(AttributeError, match=".* model is not compiled.*"):
-        client_server_simulation(x_train, x_test, model, default_configuration)
+        client_server_simulation(x_train, x_test, model, default_configuration(mode))
+
     # With n_bits = 3, KNN is not compilable
     fhe_circuit = model.compile(
-        x_train, default_configuration, **extra_params, show_mlir=(n_bits <= 8)
+        x_train, default_configuration(mode), **extra_params, show_mlir=(n_bits <= 8)
     )
     max_bit_width = fhe_circuit.graph.maximum_integer_bit_width()
     print(f"Max width {max_bit_width}")
 
     # Check that the FHE execution is correct.
     # With a global_p_error of 1/100_000 we only allow one run.
     check_is_good_execution_for_cml_vs_circuit(x_test, model, simulate=False, n_allowed_runs=1)
-    client_server_simulation(x_train, x_test, model, default_configuration)
+    client_server_simulation(x_train, x_test, model, default_configuration(mode))
 
 
 def test_client_server_custom_model(
@@ -138,7 +129,7 @@ def test_client_server_custom_model(
         # Instantiate an empty QuantizedModule object
         quantized_module = QuantizedModule()
 
-        client_server_simulation(x_train, x_test, quantized_module, default_configuration)
+        client_server_simulation(x_train, x_test, quantized_module, default_configuration("MULTI"))
 
     torch_model = FCSmall(2, nn.ReLU)
     n_bits = 2
@@ -147,7 +138,7 @@ def test_client_server_custom_model(
     quantized_numpy_module = compile_torch_model(
         torch_model,
         x_train,
-        configuration=default_configuration,
+        configuration=default_configuration("MULTI"),
         n_bits=n_bits,
         global_p_error=1 / 100_000,
     )
@@ -158,7 +149,7 @@ def test_client_server_custom_model(
         x_test, quantized_numpy_module, simulate=False, n_allowed_runs=1
     )
 
-    client_server_simulation(x_train, x_test, quantized_numpy_module, default_configuration)
+    client_server_simulation(x_train, x_test, quantized_numpy_module, default_configuration())
 
 
 def client_server_simulation(x_train, x_test, model, default_configuration):

diff --git a/tests/parameter_search/test_p_error_binary_search.py b/tests/parameter_search/test_p_error_binary_search.py
@@ -312,6 +312,7 @@ def test_binary_search_for_built_in_models(model_class, parameters, threshold, p
     # Skorch but since Scikit-Learn does not, we don't as well. This issue could be fixed by making
     # neural networks not inherit from Skorch.
     # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3373
+
     # Skipping predict_proba for KNN, doesn't work for now.
     # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3962
 

diff --git a/tests/quantization/test_compilation.py b/tests/quantization/test_compilation.py
@@ -78,7 +78,7 @@ def test_quantized_module_compilation(
     # Compile
     quantized_model.compile(
         numpy_input,
-        default_configuration,
+        default_configuration(),
         verbose=verbose,
     )
 
@@ -138,7 +138,7 @@ def test_quantized_cnn_compilation(
     # Compile
     quantized_model.compile(
         numpy_input,
-        default_configuration,
+        default_configuration(),
         verbose=verbose,
     )
     check_is_good_execution_for_cml_vs_circuit(numpy_input, quantized_model, simulate=simulate)
@@ -317,7 +317,7 @@ def test_compile_multi_input_nn_with_input_tlus(
     # Compile
     quantized_model.compile(
         numpy_input,
-        default_configuration,
+        default_configuration(),
     )
     check_is_good_execution_for_cml_vs_circuit(numpy_input, quantized_model, simulate=True)
 

diff --git a/tests/quantization/test_quantized_module.py b/tests/quantization/test_quantized_module.py
@@ -246,7 +246,7 @@ def test_bitwidth_report(model_class, input_shape, activation_function, default_
         torch_fc_model,
         torch_input,
         False,
-        default_configuration,
+        default_configuration(),
         n_bits=2,
         p_error=0.01,
     )
@@ -336,7 +336,7 @@ def test_quantized_module_rounding_fhe(model_class, input_shape, default_configu
         torch_fc_model,
         torch_input,
         False,
-        default_configuration,
+        default_configuration(),
         n_bits=2,
         p_error=0.01,
         rounding_threshold_bits=6,

diff --git a/tests/seeding/test_seeding.py b/tests/seeding/test_seeding.py
@@ -106,7 +106,7 @@ def test_seed_sklearn(model_class, parameters, load_data, default_configuration)
 
     # Test the determinism of our package (even if the bit-width may be too large)
     try:
-        model.compile(x, configuration=default_configuration, show_mlir=True)
+        model.compile(x, configuration=default_configuration("MULTI"), show_mlir=True)
     except RuntimeError as err:
         print(err)
     except AssertionError as err:

diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py
@@ -9,7 +9,6 @@
 import pytest
 from sklearn.exceptions import ConvergenceWarning
 
-from concrete import fhe
 from concrete.ml.common.utils import is_model_class_in_a_list
 from concrete.ml.pytest.utils import get_model_name, sklearn_models_and_datasets
 from concrete.ml.sklearn import get_sklearn_tree_models
@@ -37,9 +36,15 @@ def check_onnx_file_dump(model_class, parameters, load_data, str_expected, defau
         model.set_params(**model_params)
 
     if get_model_name(model) == "KNeighborsClassifier":
-        model.n_bits = 4
-        default_configuration.parameter_selection_strategy = fhe.ParameterSelectionStrategy.MONO
-        default_configuration.single_precision = True
+        # KNN works only for MONO in the latest concrete Python version
+        # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3978
+        mode = "MONO"
+        # KNN works only for small quantization bits
+        # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979
+        model.n_bits = 2
+
+    else:
+        mode = "MULTI"
 
     with warnings.catch_warnings():
         # Sometimes, we miss convergence, which is not a problem for our test
@@ -49,7 +54,7 @@ def check_onnx_file_dump(model_class, parameters, load_data, str_expected, defau
 
     with warnings.catch_warnings():
         # Use FHE simulation to not have issues with precision
-        model.compile(x, default_configuration)
+        model.compile(x, default_configuration(mode))
     # Get ONNX model
     onnx_model = model.onnx_model
 
@@ -423,7 +428,7 @@ def test_dump(
   return %variable
 }""",
         "KNeighborsClassifier": """graph torch_jit (
-  %input_0[DOUBLE, symx3]
+  %input_0[DOUBLE, symx2]
 ) {
   %/_operators.0/Constant_output_0 = Constant[value = <Tensor>]()
   %/_operators.0/Unsqueeze_output_0 = Unsqueeze(%input_0, %/_operators.0/Constant_output_0)

diff --git a/tests/sklearn/test_qnn.py b/tests/sklearn/test_qnn.py
@@ -224,7 +224,7 @@ def test_compile_and_calib(
     # Compile the model
     model.compile(
         x_train,
-        configuration=default_configuration,
+        configuration=default_configuration("MULTI"),
     )
 
     # Execute in FHE, but don't check the value.
@@ -406,7 +406,7 @@ def _get_number_of_neurons(module: SparseQuantNeuralNetwork):
     # Compile the model
     model.compile(
         x_train,
-        configuration=default_configuration,
+        configuration=default_configuration("MULTI"),
     )
 
     pruned_model = model.prune(x_train, y_train, 0.5)
@@ -415,7 +415,7 @@ def _get_number_of_neurons(module: SparseQuantNeuralNetwork):
     # Compile the pruned model, this will also perform ONNX export and calibration
     pruned_model.compile(
         x_train,
-        configuration=default_configuration,
+        configuration=default_configuration("MULTI"),
     )
 
     with pytest.raises(
@@ -592,7 +592,7 @@ def test_power_of_two_scaling(
         # in compilation
         model.compile(
             x_train,
-            configuration=default_configuration,
+            configuration=default_configuration("MULTI"),
         )
 
         # Compute the results with simulation, which uses the actual