From e284bd2ffa24c82bcd4619247bce623c4457b950 Mon Sep 17 00:00:00 2001 From: jfrery Date: Mon, 18 Dec 2023 16:49:54 +0100 Subject: [PATCH 01/38] chore: run ensemble model aggregation in FHE closes https://github.com/zama-ai/concrete-ml-internal/issues/451 --- src/concrete/ml/sklearn/base.py | 9 --------- src/concrete/ml/sklearn/tree_to_numpy.py | 12 ++++++------ tests/sklearn/test_dump_onnx.py | 13 +++++++++---- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index d1275c130..f63f63902 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -1408,15 +1408,6 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy. y_pred = self.post_processing(y_pred) return y_pred - def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: - # Sum all tree outputs - # Remove the sum once we handle multi-precision circuits - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/451 - y_preds = numpy.sum(y_preds, axis=-1) - - assert_true(y_preds.ndim == 2, "y_preds should be a 2D array") - return y_preds - class BaseTreeRegressorMixin(BaseTreeEstimatorMixin, sklearn.base.RegressorMixin, ABC): """Mixin class for tree-based regressors. diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py index 15940e0ce..42c241fe7 100644 --- a/src/concrete/ml/sklearn/tree_to_numpy.py +++ b/src/concrete/ml/sklearn/tree_to_numpy.py @@ -17,7 +17,7 @@ OPSET_VERSION_FOR_ONNX_EXPORT, get_equivalent_numpy_forward_from_onnx_tree, ) -from ..onnx.onnx_model_manipulations import clean_graph_at_node_op_type, remove_node_types +from ..onnx.onnx_model_manipulations import clean_graph_after_node_op_type, remove_node_types from ..onnx.onnx_utils import get_op_type from ..quantization import QuantizedArray from ..quantization.quantizers import UniformQuantizer @@ -141,12 +141,12 @@ def add_transpose_after_last_node(onnx_model: onnx.ModelProto): # Get the output node output_node = onnx_model.graph.output[0] - # Create the node with perm attribute equal to (2, 1, 0) + # Create the node with perm attribute equal to (1, 0) transpose_node = onnx.helper.make_node( "Transpose", inputs=[output_node.name], outputs=["transposed_output"], - perm=[2, 1, 0], + perm=[1, 0], ) onnx_model.graph.node.append(transpose_node) @@ -237,9 +237,9 @@ def tree_onnx_graph_preprocessing( if len(onnx_model.graph.output) == 1: assert_add_node_and_constant_in_xgboost_regressor_graph(onnx_model) - # Cut the graph at the ReduceSum node as large sum are not yet supported. - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/451 - clean_graph_at_node_op_type(onnx_model, "ReduceSum") + # Cut the graph after the ReduceSum node to remove + # argmax, sigmoid, softmax from the graph. + clean_graph_after_node_op_type(onnx_model, "ReduceSum") if framework == "xgboost": # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/2778 diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py index ecfafc879..637a25ac8 100644 --- a/tests/sklearn/test_dump_onnx.py +++ b/tests/sklearn/test_dump_onnx.py @@ -222,7 +222,8 @@ def test_dump( %/_operators.0/Reshape_2_output_0 = Reshape[allowzero = 0](%/_operators.0/Equal_output_0, %/_operators.0/Constant_2_output_0) %/_operators.0/MatMul_1_output_0 = MatMul(%_operators.0.weight_3, %/_operators.0/Reshape_2_output_0) %/_operators.0/Reshape_3_output_0 = Reshape[allowzero = 0](%/_operators.0/MatMul_1_output_0, %/_operators.0/Constant_3_output_0) - %transposed_output = Transpose[perm = [2, 1, 0]](%/_operators.0/Reshape_3_output_0) + %/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_3_output_0, %onnx::ReduceSum_22) + %transposed_output = Transpose[perm = [1, 0]](%/_operators.0/ReduceSum_output_0) return %transposed_output }""", "RandomForestClassifier": """graph torch_jit ( @@ -294,7 +295,8 @@ def test_dump( %/_operators.0/Reshape_2_output_0 = Reshape[allowzero = 0](%/_operators.0/Equal_output_0, %/_operators.0/Constant_2_output_0) %/_operators.0/MatMul_1_output_0 = MatMul(%_operators.0.weight_3, %/_operators.0/Reshape_2_output_0) %/_operators.0/Reshape_3_output_0 = Reshape[allowzero = 0](%/_operators.0/MatMul_1_output_0, %/_operators.0/Constant_3_output_0) - %transposed_output = Transpose[perm = [2, 1, 0]](%/_operators.0/Reshape_3_output_0) + %/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_3_output_0, %onnx::ReduceSum_22) + %transposed_output = Transpose[perm = [1, 0]](%/_operators.0/ReduceSum_output_0) return %transposed_output }""", "GammaRegressor": """graph torch_jit ( @@ -339,7 +341,8 @@ def test_dump( %/_operators.0/Squeeze_output_0 = Squeeze(%/_operators.0/Reshape_3_output_0, %axes_squeeze) %/_operators.0/Transpose_output_0 = Transpose[perm = [1, 0]](%/_operators.0/Squeeze_output_0) %/_operators.0/Reshape_4_output_0 = Reshape[allowzero = 0](%/_operators.0/Transpose_output_0, %/_operators.0/Constant_4_output_0) - return %/_operators.0/Reshape_4_output_0 + %/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_4_output_0, %onnx::ReduceSum_26) + return %/_operators.0/ReduceSum_output_0 }""", "RandomForestRegressor": """graph torch_jit ( %input_0[DOUBLE, symx10] @@ -374,6 +377,7 @@ def test_dump( %/_operators.0/Constant_2_output_0[INT64, 3] %/_operators.0/Constant_3_output_0[INT64, 3] %/_operators.0/Constant_4_output_0[INT64, 3] + %onnx::ReduceSum_27[INT64, 1] ) { %/_operators.0/Gemm_output_0 = Gemm[alpha = 1, beta = 0, transB = 1](%_operators.0.weight_1, %input_0) %/_operators.0/Less_output_0 = Less(%/_operators.0/Gemm_output_0, %_operators.0.bias_1) @@ -387,7 +391,8 @@ def test_dump( %/_operators.0/Squeeze_output_0 = Squeeze(%/_operators.0/Reshape_3_output_0, %axes_squeeze) %/_operators.0/Transpose_output_0 = Transpose[perm = [1, 0]](%/_operators.0/Squeeze_output_0) %/_operators.0/Reshape_4_output_0 = Reshape[allowzero = 0](%/_operators.0/Transpose_output_0, %/_operators.0/Constant_4_output_0) - return %/_operators.0/Reshape_4_output_0 + %/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_4_output_0, %onnx::ReduceSum_27) + return %/_operators.0/ReduceSum_output_0 }""", "LinearRegression": """graph torch_jit ( %input_0[DOUBLE, symx10] From 8c1e99a0dd0ace387c94cc59f31184ad2459c6b9 Mon Sep 17 00:00:00 2001 From: jfrery Date: Mon, 18 Dec 2023 14:35:50 +0100 Subject: [PATCH 02/38] chore: refresh notebooks --- .../ExperimentPrivacyTreePaper.ipynb | 42 +++++++++---------- .../advanced_examples/KNearestNeighbors.ipynb | 3 ++ 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb b/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb index 388454993..23ed43935 100644 --- a/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb +++ b/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb @@ -494,7 +494,7 @@ " 87.4\\% ± 1.2\\%\n", " 82.4\\% ± 1.8\\%\n", " -\n", - " 0.003\n", + " 0.004\n", " -\n", " \n", " \n", @@ -518,7 +518,7 @@ " \n", " FHE-RF\n", " 90.9\\% ± 1.1\\%\n", - " 87.5\\% ± 1.5\\%\n", + " 87.5\\% ± 1.6\\%\n", " 84.6\\% ± 1.7\\%\n", " 750.000\n", " 1.623\n", @@ -554,7 +554,7 @@ " \n", " \n", " FHE-XGB\n", - " 97.0\\% ± 2.4\\%\n", + " 96.8\\% ± 2.5\\%\n", " -\n", " -\n", " 900.000\n", @@ -792,9 +792,9 @@ " \n", " \n", " FHE-RF\n", - " 96.9\\% ± 1.2\\%\n", + " 96.8\\% ± 1.3\\%\n", " 95.4\\% ± 1.8\\%\n", - " 93.6\\% ± 2.2\\%\n", + " 93.5\\% ± 2.3\\%\n", " 700.000\n", " 1.477\n", " 576x\n", @@ -805,7 +805,7 @@ " 93.9\\% ± 1.5\\%\n", " 91.4\\% ± 2.3\\%\n", " -\n", - " 0.003\n", + " 0.002\n", " -\n", " \n", " \n", @@ -818,11 +818,11 @@ " FP32-DT 90.3\\% ± 1.0\\% 87.4\\% ± 1.2\\% \n", " FHE-XGB 94.5\\% ± 0.8\\% 92.9\\% ± 1.1\\% \n", " FP32-XGB 95.0\\% ± 0.7\\% 93.6\\% ± 0.9\\% \n", - " FHE-RF 90.9\\% ± 1.1\\% 87.5\\% ± 1.5\\% \n", + " FHE-RF 90.9\\% ± 1.1\\% 87.5\\% ± 1.6\\% \n", " FP32-RF 91.8\\% ± 1.1\\% 89.0\\% ± 1.4\\% \n", "wine (#features: 13) FHE-DT 90.8\\% ± 5.2\\% - \n", " FP32-DT 90.5\\% ± 5.0\\% - \n", - " FHE-XGB 97.0\\% ± 2.4\\% - \n", + " FHE-XGB 96.8\\% ± 2.5\\% - \n", " FP32-XGB 96.2\\% ± 2.9\\% - \n", " FHE-RF 98.5\\% ± 1.4\\% - \n", " FP32-RF 98.1\\% ± 2.0\\% - \n", @@ -848,7 +848,7 @@ " FP32-DT 97.2\\% ± 0.7\\% 96.1\\% ± 0.9\\% \n", " FHE-XGB 100.0\\% ± 0.0\\% 100.0\\% ± 0.0\\% \n", " FP32-XGB 100.0\\% ± 0.0\\% 100.0\\% ± 0.0\\% \n", - " FHE-RF 96.9\\% ± 1.2\\% 95.4\\% ± 1.8\\% \n", + " FHE-RF 96.8\\% ± 1.3\\% 95.4\\% ± 1.8\\% \n", " FP32-RF 95.9\\% ± 1.1\\% 93.9\\% ± 1.5\\% \n", "\n", " AP nodes Time (s) \\\n", @@ -1610,19 +1610,19 @@ "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.\n" + "ap relative: [0.49626943 0.70187731 0.82640876 0.89067066 0.98315255 1.02264581\n", + " 1.02436888 1.01090038 1.01268386], f1_relative: [0.06488922 0.65490682 0.87590196 0.90861806 0.97920588 1.00604989\n", + " 1.00914511 1.00274636 1.00389957]\n" ] }, { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "ap relative: [0.49626943 0.70187731 0.82640876 0.89067066 0.98315255 1.02264581\n", - " 1.02436888 1.01090038 1.01268386], f1_relative: [0.06488922 0.65490682 0.87590196 0.90861806 0.97920588 1.00604989\n", - " 1.00914511 1.00274636 1.00389957]\n" + "The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.\n" ] }, { @@ -1646,9 +1646,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "ap relative: [0.43556747 0.69054787 0.8789863 0.94180188 0.97097036 0.99094624\n", - " 0.99348364 0.99626825 0.99932372], f1_relative: [0. 0.65970362 0.91412713 0.95762445 0.97789164 0.99281277\n", - " 0.99447789 0.99697611 0.99969255]\n" + "ap relative: [0.43556747 0.69054787 0.8789863 0.94213852 0.97097036 0.99083622\n", + " 0.99365961 0.99626825 0.99920411], f1_relative: [0. 0.65970362 0.91412713 0.95780357 0.97789164 0.99271147\n", + " 0.99456864 0.99697611 0.99959059]\n" ] }, { @@ -1672,9 +1672,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "ap relative: [0.45810941 0.66176353 0.85701522 0.93668402 0.96541385 0.98353791\n", - " 0.99091316 0.99133601 0.99740638], f1_relative: [0. 0.57332946 0.87035559 0.9402579 0.96505021 0.983713\n", - " 0.99082334 0.99224022 0.99758998]\n" + "ap relative: [0.45810941 0.65828111 0.85617664 0.93660034 0.96541385 0.98342004\n", + " 0.99091316 0.9911998 0.99740638], f1_relative: [0. 0.56676488 0.86901886 0.93986022 0.96505021 0.98359134\n", + " 0.99082334 0.99211045 0.99758998]\n" ] }, { diff --git a/docs/advanced_examples/KNearestNeighbors.ipynb b/docs/advanced_examples/KNearestNeighbors.ipynb index d7ae1b8c1..9b4b7ed7e 100644 --- a/docs/advanced_examples/KNearestNeighbors.ipynb +++ b/docs/advanced_examples/KNearestNeighbors.ipynb @@ -287,6 +287,9 @@ "data": { "text/html": [ "\n", "\n", " \n", From 70f1775fac8e217e5c53863feab0e0939f2fce4e Mon Sep 17 00:00:00 2001 From: kcelia Date: Mon, 25 Dec 2023 20:28:47 +0100 Subject: [PATCH 03/38] chore: update celia --- src/concrete/ml/quantization/post_training.py | 3 ++- src/concrete/ml/sklearn/base.py | 3 ++- src/concrete/ml/sklearn/tree_to_numpy.py | 9 +++++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/concrete/ml/quantization/post_training.py b/src/concrete/ml/quantization/post_training.py index 9389ab05f..c22ac8b85 100644 --- a/src/concrete/ml/quantization/post_training.py +++ b/src/concrete/ml/quantization/post_training.py @@ -50,7 +50,7 @@ def get_n_bits_dict(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: or ( isinstance(n_bits, Dict) and set(n_bits.keys()).issubset( - {"model_inputs", "op_weights", "model_outputs", "op_inputs"} + {"model_inputs", "op_weights", "model_outputs", "op_inputs", "leaves"} ) and {"op_weights", "op_inputs"}.issubset(set(n_bits.keys())) ), @@ -69,6 +69,7 @@ def get_n_bits_dict(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: "op_weights": n_bits, "op_inputs": n_bits, "model_outputs": max(DEFAULT_MODEL_BITS, n_bits), + "leaves": n_bits, } # If model_inputs or model_outputs are not given, we consider a default value diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index f63f63902..587968ff8 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -1292,6 +1292,7 @@ def __init__(self, n_bits: int): #: The model's inference function. Is None if the model is not fitted. self._tree_inference: Optional[Callable] = None + BaseEstimator.__init__(self) def fit(self, X: Data, y: Target, **fit_parameters): @@ -1306,7 +1307,7 @@ def fit(self, X: Data, y: Target, **fit_parameters): # Quantization of each feature in X for i in range(X.shape[1]): - input_quantizer = QuantizedArray(n_bits=self.n_bits, values=X[:, i]).quantizer + input_quantizer = QuantizedArray(n_bits=self.n_bits["op_inputs"], values=X[:, i]).quantizer self.input_quantizers.append(input_quantizer) q_X[:, i] = input_quantizer.quant(X[:, i]) diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py index 42c241fe7..5983819aa 100644 --- a/src/concrete/ml/sklearn/tree_to_numpy.py +++ b/src/concrete/ml/sklearn/tree_to_numpy.py @@ -261,7 +261,7 @@ def tree_onnx_graph_preprocessing( def tree_values_preprocessing( onnx_model: onnx.ModelProto, framework: str, - output_n_bits: int, + n_bits: int, ) -> QuantizedArray: """Pre-process tree values. @@ -277,18 +277,23 @@ def tree_values_preprocessing( # Modify ONNX graph to fit in FHE for i, initializer in enumerate(onnx_model.graph.initializer): + + # All constants in our tree should be integers. # Tree thresholds can be rounded up or down (depending on the tree implementation) # while the final probabilities/regression values must be quantized. # We extract the value stored in each initializer node into the init_tensor. init_tensor = numpy_helper.to_array(initializer) + #print(initializer.name, init_tensor.shape) if "weight_3" in initializer.name: + #print(init_tensor) # weight_3 is the prediction tensor, apply the required pre-processing - q_y = preprocess_tree_predictions(init_tensor, output_n_bits) + q_y = preprocess_tree_predictions(init_tensor, n_bits["leaves"]) # Get the preprocessed tree predictions to replace the current (non-quantized) # values in the onnx_model. init_tensor = q_y.qvalues + elif "bias_1" in initializer.name: if framework == "xgboost": # xgboost uses "<" (Less) operator thus we must round up. From 6262f12b9f7c73705561b749957562193a1cfea0 Mon Sep 17 00:00:00 2001 From: kcelia Date: Thu, 11 Jan 2024 11:35:40 +0100 Subject: [PATCH 04/38] chore: add op_input and op_leaves --- src/concrete/ml/quantization/__init__.py | 2 +- src/concrete/ml/quantization/post_training.py | 43 ++++++++++++++++++- src/concrete/ml/sklearn/base.py | 33 +++++++++++--- src/concrete/ml/sklearn/tree_to_numpy.py | 11 +++-- 4 files changed, 73 insertions(+), 16 deletions(-) diff --git a/src/concrete/ml/quantization/__init__.py b/src/concrete/ml/quantization/__init__.py index 845b5dc11..58f90f916 100644 --- a/src/concrete/ml/quantization/__init__.py +++ b/src/concrete/ml/quantization/__init__.py @@ -1,6 +1,6 @@ """Modules for quantization.""" from .base_quantized_op import QuantizedOp -from .post_training import PostTrainingAffineQuantization, PostTrainingQATImporter, get_n_bits_dict +from .post_training import PostTrainingAffineQuantization, PostTrainingQATImporter, get_n_bits_dict, get_n_bits_dict_trees from .quantized_module import QuantizedModule from .quantized_ops import ( QuantizedAbs, diff --git a/src/concrete/ml/quantization/post_training.py b/src/concrete/ml/quantization/post_training.py index c22ac8b85..ffa259fc2 100644 --- a/src/concrete/ml/quantization/post_training.py +++ b/src/concrete/ml/quantization/post_training.py @@ -24,6 +24,46 @@ from .quantizers import QuantizationOptions, QuantizedArray, UniformQuantizer +def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: + """Convert the n_bits parameter into a proper dictionary for tree based-models. + + Args: + n_bits (int, Dict[str, int]): number of bits for quantization, can be a single value or + a dictionary with the following keys : + - "op_inputs" (mandatory) + - "op_leaves" (optional) + TODO + + Returns: + n_bits_dict (Dict[str, int]): TODO + """ + + assert_true( + isinstance(n_bits, int) + or (isinstance(n_bits, Dict) and set(n_bits.keys()).issubset({"op_inputs", "op_leaves"})), + "Invalid n_bits, either pass an integer or a dictionary containing integer values for " + "the following keys:\n" + "- `op_inputs` and `op_leaves` (mandatory)", + ) + + # If a single integer is passed, we use a default value for the model's input and + # output bits + if isinstance(n_bits, int): + n_bits_dict = { + "op_inputs": n_bits, + "op_leaves": n_bits, + } + # If model_inputs or model_outputs are not given, we consider a default value + elif isinstance(n_bits, Dict): + n_bits_dict = { + "model_inputs": n_bits, + "model_outputs": n_bits, + } + + n_bits_dict.update(n_bits) + return n_bits_dict + + def get_n_bits_dict(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: """Convert the n_bits parameter into a proper dictionary. @@ -50,7 +90,7 @@ def get_n_bits_dict(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: or ( isinstance(n_bits, Dict) and set(n_bits.keys()).issubset( - {"model_inputs", "op_weights", "model_outputs", "op_inputs", "leaves"} + {"model_inputs", "op_weights", "model_outputs", "op_inputs"} ) and {"op_weights", "op_inputs"}.issubset(set(n_bits.keys())) ), @@ -69,7 +109,6 @@ def get_n_bits_dict(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: "op_weights": n_bits, "op_inputs": n_bits, "model_outputs": max(DEFAULT_MODEL_BITS, n_bits), - "leaves": n_bits, } # If model_inputs or model_outputs are not given, we consider a default value diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 587968ff8..1935f25f7 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -49,7 +49,12 @@ # The sigmoid and softmax functions are already defined in the ONNX module and thus are imported # here in order to avoid duplicating them. from ..onnx.ops_impl import numpy_sigmoid, numpy_softmax -from ..quantization import PostTrainingQATImporter, QuantizedArray, get_n_bits_dict +from ..quantization import ( + PostTrainingQATImporter, + QuantizedArray, + get_n_bits_dict, + get_n_bits_dict_trees, +) from ..quantization.quantized_module import QuantizedModule, _get_inputset_generator from ..quantization.quantizers import ( QuantizationOptions, @@ -98,6 +103,9 @@ # However, for internal testing purposes, we retain the capability to disable this feature os.environ["TREES_USE_ROUNDING"] = "1" +# TODO +os.environ["TREES_USE_FHE_SUM"] = "0" + # pylint: disable=too-many-public-methods @@ -1285,14 +1293,22 @@ def __init__(self, n_bits: int): """Initialize the TreeBasedEstimatorMixin. Args: - n_bits (int): The number of bits used for quantization. + n_bits (int, Dict[str, int]): Number of bits to quantize the model. If an int is passed + for n_bits, the value will be used for quantizing inputs and leaves. If a dict is + passed, then it should contain "op_inputs" and "op_leaves" as keys with + corresponding number of quantization bits so that: + - op_inputs : number of bits to quantize the input values + - op_leaves: number of bits to quantize the leaves + Default to 6. """ - self.n_bits: int = n_bits + self.n_bits: Union[int, Dict[str, int]] = n_bits + + # Convert the n_bits attribute into a proper dictionary + self.n_bits = get_n_bits_dict_trees(self.n_bits) #: The model's inference function. Is None if the model is not fitted. self._tree_inference: Optional[Callable] = None - BaseEstimator.__init__(self) def fit(self, X: Data, y: Target, **fit_parameters): @@ -1307,7 +1323,9 @@ def fit(self, X: Data, y: Target, **fit_parameters): # Quantization of each feature in X for i in range(X.shape[1]): - input_quantizer = QuantizedArray(n_bits=self.n_bits["op_inputs"], values=X[:, i]).quantizer + input_quantizer = QuantizedArray( + n_bits=self.n_bits["op_inputs"], values=X[:, i] + ).quantizer self.input_quantizers.append(input_quantizer) q_X[:, i] = input_quantizer.quant(X[:, i]) @@ -1320,7 +1338,7 @@ def fit(self, X: Data, y: Target, **fit_parameters): # Check that the underlying sklearn model has been set and fit assert self.sklearn_model is not None, self._sklearn_model_is_not_fitted_error_message() - # Convert the tree inference with Numpy operators + # Enable rounding feature enable_rounding = os.environ.get("TREES_USE_ROUNDING", "1") == "1" if not enable_rounding: @@ -1333,12 +1351,13 @@ def fit(self, X: Data, y: Target, **fit_parameters): stacklevel=2, ) + # Convert the tree inference with Numpy operators self._tree_inference, self.output_quantizers, self.onnx_model_ = tree_to_numpy( self.sklearn_model, q_X, use_rounding=enable_rounding, framework=self.framework, - output_n_bits=self.n_bits, + output_n_bits=self.n_bits["op_leaves"], ) self._is_fitted = True diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py index 5983819aa..3d1025a1f 100644 --- a/src/concrete/ml/sklearn/tree_to_numpy.py +++ b/src/concrete/ml/sklearn/tree_to_numpy.py @@ -277,23 +277,22 @@ def tree_values_preprocessing( # Modify ONNX graph to fit in FHE for i, initializer in enumerate(onnx_model.graph.initializer): - - + # All constants in our tree should be integers. # Tree thresholds can be rounded up or down (depending on the tree implementation) # while the final probabilities/regression values must be quantized. # We extract the value stored in each initializer node into the init_tensor. init_tensor = numpy_helper.to_array(initializer) - #print(initializer.name, init_tensor.shape) + # print(initializer.name, init_tensor.shape) if "weight_3" in initializer.name: - #print(init_tensor) + # print(init_tensor) # weight_3 is the prediction tensor, apply the required pre-processing - q_y = preprocess_tree_predictions(init_tensor, n_bits["leaves"]) + q_y = preprocess_tree_predictions(init_tensor, n_bits) # Get the preprocessed tree predictions to replace the current (non-quantized) # values in the onnx_model. init_tensor = q_y.qvalues - + elif "bias_1" in initializer.name: if framework == "xgboost": # xgboost uses "<" (Less) operator thus we must round up. From 67e3572df80bea5a686251ee0b04cfed4709bada Mon Sep 17 00:00:00 2001 From: kcelia Date: Thu, 11 Jan 2024 15:10:25 +0100 Subject: [PATCH 05/38] chore: restore non fhe computation --- src/concrete/ml/quantization/__init__.py | 7 ++++++- src/concrete/ml/sklearn/base.py | 16 +++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/concrete/ml/quantization/__init__.py b/src/concrete/ml/quantization/__init__.py index 58f90f916..6669d76a7 100644 --- a/src/concrete/ml/quantization/__init__.py +++ b/src/concrete/ml/quantization/__init__.py @@ -1,6 +1,11 @@ """Modules for quantization.""" from .base_quantized_op import QuantizedOp -from .post_training import PostTrainingAffineQuantization, PostTrainingQATImporter, get_n_bits_dict, get_n_bits_dict_trees +from .post_training import ( + PostTrainingAffineQuantization, + PostTrainingQATImporter, + get_n_bits_dict, + get_n_bits_dict_trees, +) from .quantized_module import QuantizedModule from .quantized_ops import ( QuantizedAbs, diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 1935f25f7..510a9d2db 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -104,7 +104,7 @@ os.environ["TREES_USE_ROUNDING"] = "1" # TODO -os.environ["TREES_USE_FHE_SUM"] = "0" +os.environ["TREES_USE_FHE_SUM"] = "1" # pylint: disable=too-many-public-methods @@ -1428,6 +1428,20 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy. y_pred = self.post_processing(y_pred) return y_pred + def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: + # Sum all tree outputs + # Remove the sum once we handle multi-precision circuits + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/451 + if os.getenv("TREES_USE_FHE_SUM") == "0": + print("post_processing: Non FHE SUM") + y_preds = numpy.sum(y_preds, axis=-1) + + assert_true(y_preds.ndim == 2, "y_preds should be a 2D array") + return y_preds + else: + print("post_processing: FHE SUM") + return super().post_processing(y_preds) + class BaseTreeRegressorMixin(BaseTreeEstimatorMixin, sklearn.base.RegressorMixin, ABC): """Mixin class for tree-based regressors. From 8e2e056a221c69a926c47b99b55d9d181d7fbb32 Mon Sep 17 00:00:00 2001 From: kcelia Date: Thu, 11 Jan 2024 15:10:59 +0100 Subject: [PATCH 06/38] chore: update dump test --- tests/sklearn/test_dump_onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py index 637a25ac8..a6727139a 100644 --- a/tests/sklearn/test_dump_onnx.py +++ b/tests/sklearn/test_dump_onnx.py @@ -1,6 +1,6 @@ """Tests for the sklearn decision trees.""" - +import os import warnings from functools import partial From 3e2289a23da3dfb874b701da49d630364df61e50 Mon Sep 17 00:00:00 2001 From: kcelia Date: Thu, 11 Jan 2024 16:30:27 +0100 Subject: [PATCH 07/38] chore: update test dump --- tests/sklearn/test_dump_onnx.py | 54 ++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py index a6727139a..a5fe6497a 100644 --- a/tests/sklearn/test_dump_onnx.py +++ b/tests/sklearn/test_dump_onnx.py @@ -222,9 +222,15 @@ def test_dump( %/_operators.0/Reshape_2_output_0 = Reshape[allowzero = 0](%/_operators.0/Equal_output_0, %/_operators.0/Constant_2_output_0) %/_operators.0/MatMul_1_output_0 = MatMul(%_operators.0.weight_3, %/_operators.0/Reshape_2_output_0) %/_operators.0/Reshape_3_output_0 = Reshape[allowzero = 0](%/_operators.0/MatMul_1_output_0, %/_operators.0/Constant_3_output_0) - %/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_3_output_0, %onnx::ReduceSum_22) + """ + + ( + """%/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_3_output_0, %onnx::ReduceSum_22) %transposed_output = Transpose[perm = [1, 0]](%/_operators.0/ReduceSum_output_0) - return %transposed_output + """ + if os.getenv("TREES_USE_FHE_SUM") == "1" + else "%transposed_output = Transpose[perm = [2, 1, 0]](%/_operators.0/Reshape_3_output_0)" + ) + + """return %transposed_output }""", "RandomForestClassifier": """graph torch_jit ( %input_0[DOUBLE, symx10] @@ -295,9 +301,15 @@ def test_dump( %/_operators.0/Reshape_2_output_0 = Reshape[allowzero = 0](%/_operators.0/Equal_output_0, %/_operators.0/Constant_2_output_0) %/_operators.0/MatMul_1_output_0 = MatMul(%_operators.0.weight_3, %/_operators.0/Reshape_2_output_0) %/_operators.0/Reshape_3_output_0 = Reshape[allowzero = 0](%/_operators.0/MatMul_1_output_0, %/_operators.0/Constant_3_output_0) - %/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_3_output_0, %onnx::ReduceSum_22) + """ + + ( + """%/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_3_output_0, %onnx::ReduceSum_22) %transposed_output = Transpose[perm = [1, 0]](%/_operators.0/ReduceSum_output_0) - return %transposed_output + """ + if os.getenv("TREES_USE_FHE_SUM") == "1" + else "" + ) + + """return %transposed_output }""", "GammaRegressor": """graph torch_jit ( %input_0[DOUBLE, symx10] @@ -341,9 +353,15 @@ def test_dump( %/_operators.0/Squeeze_output_0 = Squeeze(%/_operators.0/Reshape_3_output_0, %axes_squeeze) %/_operators.0/Transpose_output_0 = Transpose[perm = [1, 0]](%/_operators.0/Squeeze_output_0) %/_operators.0/Reshape_4_output_0 = Reshape[allowzero = 0](%/_operators.0/Transpose_output_0, %/_operators.0/Constant_4_output_0) - %/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_4_output_0, %onnx::ReduceSum_26) + """ + + ( + """%/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_4_output_0, %onnx::ReduceSum_26) return %/_operators.0/ReduceSum_output_0 -}""", +}""" + if os.getenv("TREES_USE_FHE_SUM") == "1" + else """return %/_operators.0/Reshape_4_output_0 + }""" + ), "RandomForestRegressor": """graph torch_jit ( %input_0[DOUBLE, symx10] ) { @@ -360,8 +378,15 @@ def test_dump( %/_operators.0/MatMul_1_output_0 = MatMul(%_operators.0.weight_3, %/_operators.0/Reshape_2_output_0) %/_operators.0/Constant_3_output_0 = Constant[value = ]() %/_operators.0/Reshape_3_output_0 = Reshape[allowzero = 0](%/_operators.0/MatMul_1_output_0, %/_operators.0/Constant_3_output_0) - %transposed_output = Transpose[perm = [2, 1, 0]](%/_operators.0/Reshape_3_output_0) - return %transposed_output + """ + + ( + """%/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_3_output_0, %onnx::ReduceSum_22) + %transposed_output = Transpose[perm = [1, 0]](%/_operators.0/ReduceSum_output_0) + """ + if os.getenv("TREES_USE_FHE_SUM") == "1" + else "%transposed_output = Transpose[perm = [2, 1, 0]](%/_operators.0/Reshape_3_output_0)" + ) + + """return %transposed_output }""", "XGBRegressor": """graph torch_jit ( %input_0[DOUBLE, symx10] @@ -377,7 +402,9 @@ def test_dump( %/_operators.0/Constant_2_output_0[INT64, 3] %/_operators.0/Constant_3_output_0[INT64, 3] %/_operators.0/Constant_4_output_0[INT64, 3] - %onnx::ReduceSum_27[INT64, 1] + """ + + ("%onnx::ReduceSum_27[INT64, 1]" if os.getenv("TREES_USE_FHE_SUM") == "1" else "") + + """ ) { %/_operators.0/Gemm_output_0 = Gemm[alpha = 1, beta = 0, transB = 1](%_operators.0.weight_1, %input_0) %/_operators.0/Less_output_0 = Less(%/_operators.0/Gemm_output_0, %_operators.0.bias_1) @@ -391,9 +418,14 @@ def test_dump( %/_operators.0/Squeeze_output_0 = Squeeze(%/_operators.0/Reshape_3_output_0, %axes_squeeze) %/_operators.0/Transpose_output_0 = Transpose[perm = [1, 0]](%/_operators.0/Squeeze_output_0) %/_operators.0/Reshape_4_output_0 = Reshape[allowzero = 0](%/_operators.0/Transpose_output_0, %/_operators.0/Constant_4_output_0) - %/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_4_output_0, %onnx::ReduceSum_27) + """ + + ( + """%/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_4_output_0, %onnx::ReduceSum_27) return %/_operators.0/ReduceSum_output_0 -}""", +}""" + if os.getenv("TREES_USE_FHE_SUM") == "1" + else "return %/_operators.0/Reshape_4_output_0" + ), "LinearRegression": """graph torch_jit ( %input_0[DOUBLE, symx10] ) initializers ( From a9c8385dee5f45e90ce77aab5c92d41d018795e6 Mon Sep 17 00:00:00 2001 From: kcelia Date: Fri, 12 Jan 2024 10:54:07 +0100 Subject: [PATCH 08/38] chore: fix pipeline test --- src/concrete/ml/quantization/post_training.py | 13 +++++++++++-- src/concrete/ml/sklearn/base.py | 10 +++++++--- tests/sklearn/test_sklearn_models.py | 5 +++++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/concrete/ml/quantization/post_training.py b/src/concrete/ml/quantization/post_training.py index ffa259fc2..3ba0168c7 100644 --- a/src/concrete/ml/quantization/post_training.py +++ b/src/concrete/ml/quantization/post_training.py @@ -53,14 +53,23 @@ def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: "op_inputs": n_bits, "op_leaves": n_bits, } + # If model_inputs or model_outputs are not given, we consider a default value elif isinstance(n_bits, Dict): n_bits_dict = { - "model_inputs": n_bits, - "model_outputs": n_bits, + "model_inputs": DEFAULT_MODEL_BITS, + "model_outputs": max(DEFAULT_MODEL_BITS, n_bits["op_inputs"]), # TODO } n_bits_dict.update(n_bits) + + assert_true( + n_bits_dict["op_inputs"] >= n_bits_dict["op_leaves"], + "Using fewer bits to represent the model_outputs than the op inputs is not " + f"recommended. Got op_leaves: {n_bits_dict['op_leaves']} and op_inputs: " + f"{n_bits_dict['op_inputs']}", + ) + return n_bits_dict diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 510a9d2db..77e3dfca2 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -1303,9 +1303,6 @@ def __init__(self, n_bits: int): """ self.n_bits: Union[int, Dict[str, int]] = n_bits - # Convert the n_bits attribute into a proper dictionary - self.n_bits = get_n_bits_dict_trees(self.n_bits) - #: The model's inference function. Is None if the model is not fitted. self._tree_inference: Optional[Callable] = None @@ -1321,6 +1318,10 @@ def fit(self, X: Data, y: Target, **fit_parameters): q_X = numpy.zeros_like(X) + # Convert the n_bits attribute into a proper dictionary + self.n_bits = get_n_bits_dict_trees(self.n_bits) + print(f"{self.n_bits=}") + # Quantization of each feature in X for i in range(X.shape[1]): input_quantizer = QuantizedArray( @@ -1338,6 +1339,9 @@ def fit(self, X: Data, y: Target, **fit_parameters): # Check that the underlying sklearn model has been set and fit assert self.sklearn_model is not None, self._sklearn_model_is_not_fitted_error_message() + # Convert the n_bits attribute into a proper dictionary + self.n_bits = get_n_bits_dict_trees(self.n_bits) + # Enable rounding feature enable_rounding = os.environ.get("TREES_USE_ROUNDING", "1") == "1" diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index d3e68f731..aac20fb90 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -726,12 +726,16 @@ def check_pipeline(model_class, x, y): {key: value} for key, values in hyper_param_combinations.items() for value in values ] + print(f"{hyperparameters_list=}") + # Take one of the hyper_parameters randomly (testing everything would be too long) if len(hyperparameters_list) == 0: hyper_parameters = {} else: hyper_parameters = hyperparameters_list[numpy.random.randint(0, len(hyperparameters_list))] + print(f"{hyperparameters_list=}") + pipe_cv = Pipeline( [ ("pca", PCA(n_components=2, random_state=numpy.random.randint(0, 2**15))), @@ -748,6 +752,7 @@ def check_pipeline(model_class, x, y): } else: + print("ELSE") param_grid = { "model__n_bits": [2, 3], } From a43f04c11125f041389e4b09cc7d759a2ee0f553 Mon Sep 17 00:00:00 2001 From: kcelia Date: Fri, 12 Jan 2024 11:27:12 +0100 Subject: [PATCH 09/38] chore: fix rounding test by decreasing the n_bits value because no crypto params are found and fix serialization function --- src/concrete/ml/quantization/post_training.py | 8 +++----- src/concrete/ml/sklearn/rf.py | 4 ++-- src/concrete/ml/sklearn/tree.py | 4 ++-- src/concrete/ml/sklearn/xgb.py | 4 ++-- tests/sklearn/test_sklearn_models.py | 2 +- 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/concrete/ml/quantization/post_training.py b/src/concrete/ml/quantization/post_training.py index 3ba0168c7..70f198fa9 100644 --- a/src/concrete/ml/quantization/post_training.py +++ b/src/concrete/ml/quantization/post_training.py @@ -46,6 +46,8 @@ def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: "- `op_inputs` and `op_leaves` (mandatory)", ) + n_bits_dict = {} + # If a single integer is passed, we use a default value for the model's input and # output bits if isinstance(n_bits, int): @@ -55,12 +57,8 @@ def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: } # If model_inputs or model_outputs are not given, we consider a default value - elif isinstance(n_bits, Dict): - n_bits_dict = { - "model_inputs": DEFAULT_MODEL_BITS, - "model_outputs": max(DEFAULT_MODEL_BITS, n_bits["op_inputs"]), # TODO - } + elif isinstance(n_bits, Dict): n_bits_dict.update(n_bits) assert_true( diff --git a/src/concrete/ml/sklearn/rf.py b/src/concrete/ml/sklearn/rf.py index e5f756664..14aabf060 100644 --- a/src/concrete/ml/sklearn/rf.py +++ b/src/concrete/ml/sklearn/rf.py @@ -124,7 +124,7 @@ def load_dict(cls, metadata: Dict): obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, - output_n_bits=obj.n_bits, + output_n_bits=obj.n_bits["op_leaves"], )[0] obj.post_processing_params = metadata["post_processing_params"] @@ -259,7 +259,7 @@ def load_dict(cls, metadata: Dict): obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, - output_n_bits=obj.n_bits, + output_n_bits=obj.n_bits["op_leaves"], )[0] obj.post_processing_params = metadata["post_processing_params"] diff --git a/src/concrete/ml/sklearn/tree.py b/src/concrete/ml/sklearn/tree.py index 1ea972cfd..c870d2804 100644 --- a/src/concrete/ml/sklearn/tree.py +++ b/src/concrete/ml/sklearn/tree.py @@ -119,7 +119,7 @@ def load_dict(cls, metadata: Dict): obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, - output_n_bits=obj.n_bits, + output_n_bits=obj.n_bits["op_leaves"], )[0] obj.post_processing_params = metadata["post_processing_params"] @@ -242,7 +242,7 @@ def load_dict(cls, metadata: Dict): obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, - output_n_bits=obj.n_bits, + output_n_bits=obj.n_bits["op_leaves"], )[0] obj.post_processing_params = metadata["post_processing_params"] diff --git a/src/concrete/ml/sklearn/xgb.py b/src/concrete/ml/sklearn/xgb.py index a10b1400b..a2b121f4d 100644 --- a/src/concrete/ml/sklearn/xgb.py +++ b/src/concrete/ml/sklearn/xgb.py @@ -178,7 +178,7 @@ def load_dict(cls, metadata: Dict): obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, - output_n_bits=obj.n_bits, + output_n_bits=obj.n_bits["op_leaves"], )[0] obj.post_processing_params = metadata["post_processing_params"] @@ -407,7 +407,7 @@ def load_dict(cls, metadata: Dict): obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, - output_n_bits=obj.n_bits, + output_n_bits=obj.n_bits["op_leaves"], )[0] obj.post_processing_params = metadata["post_processing_params"] diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index aac20fb90..26fac9d03 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1839,7 +1839,7 @@ def test_linear_models_have_no_tlu( # Additional tests for this purpose should be added in future updates # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4179 @pytest.mark.parametrize("model_class, parameters", get_sklearn_tree_models_and_datasets()) -@pytest.mark.parametrize("n_bits", [2, 5, 11]) +@pytest.mark.parametrize("n_bits", [2, 5, 10]) def test_rounding_consistency_for_regular_models( model_class, parameters, From ecf5c668ef37b684f8840ecdecaac64c4054f41b Mon Sep 17 00:00:00 2001 From: kcelia Date: Fri, 12 Jan 2024 13:09:43 +0100 Subject: [PATCH 10/38] chore: reduce n_bits in simulation test to 4 bits otherwise OOM --- tests/sklearn/test_sklearn_models.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 26fac9d03..3427792b9 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1663,7 +1663,11 @@ def test_p_error_simulation( The test checks that models compiled with a large p_error value predicts very different results with simulation or in FHE compared to the expected clear quantized ones. """ - n_bits = get_n_bits_non_correctness(model_class) + + if os.getenv('TREES_USE_FHE_SUM') == "1": + n_bits = 4 + else: + n_bits = get_n_bits_non_correctness(model_class) # Get data-set, initialize and fit the model model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option) @@ -1672,7 +1676,7 @@ def test_p_error_simulation( is_linear_model = is_model_class_in_a_list(model_class, _get_sklearn_linear_models()) # Compile with a large p_error to be sure the result is random. - model.compile(x, **error_param) + c = model.compile(x, **error_param) def check_for_divergent_predictions(x, model, fhe, max_iterations=N_ALLOWED_FHE_RUN): """Detect divergence between simulated/FHE execution and clear run.""" @@ -1693,7 +1697,11 @@ def check_for_divergent_predictions(x, model, fhe, max_iterations=N_ALLOWED_FHE_ return True return False + print("Start simulation") + print(model) + simulation_diff_found = check_for_divergent_predictions(x, model, fhe="simulate") + print("execution") fhe_diff_found = check_for_divergent_predictions(x, model, fhe="execute") # Check for differences in predictions From 9007362d274be734cac3561857ead659a3bdaa2f Mon Sep 17 00:00:00 2001 From: kcelia Date: Fri, 12 Jan 2024 14:07:54 +0100 Subject: [PATCH 11/38] chore: add a test for fhe sum --- src/concrete/ml/sklearn/base.py | 14 ---- src/concrete/ml/sklearn/tree_to_numpy.py | 7 +- tests/sklearn/test_sklearn_models.py | 94 +++++++++++++++++++++++- 3 files changed, 98 insertions(+), 17 deletions(-) diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 77e3dfca2..539cfd74e 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -1432,20 +1432,6 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy. y_pred = self.post_processing(y_pred) return y_pred - def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: - # Sum all tree outputs - # Remove the sum once we handle multi-precision circuits - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/451 - if os.getenv("TREES_USE_FHE_SUM") == "0": - print("post_processing: Non FHE SUM") - y_preds = numpy.sum(y_preds, axis=-1) - - assert_true(y_preds.ndim == 2, "y_preds should be a 2D array") - return y_preds - else: - print("post_processing: FHE SUM") - return super().post_processing(y_preds) - class BaseTreeRegressorMixin(BaseTreeEstimatorMixin, sklearn.base.RegressorMixin, ABC): """Mixin class for tree-based regressors. diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py index 3d1025a1f..a748197b9 100644 --- a/src/concrete/ml/sklearn/tree_to_numpy.py +++ b/src/concrete/ml/sklearn/tree_to_numpy.py @@ -1,5 +1,6 @@ """Implements the conversion of a tree model to a numpy function.""" import math +import os import warnings from typing import Callable, List, Optional, Tuple @@ -141,12 +142,16 @@ def add_transpose_after_last_node(onnx_model: onnx.ModelProto): # Get the output node output_node = onnx_model.graph.output[0] + if os.getenv("TREES_USE_FHE_SUM") == "1": + perm = [1, 0] + else: + perm = [2, 1, 0] # Create the node with perm attribute equal to (1, 0) transpose_node = onnx.helper.make_node( "Transpose", inputs=[output_node.name], outputs=["transposed_output"], - perm=[1, 0], + perm=perm, ) onnx_model.graph.node.append(transpose_node) diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 3427792b9..beeceed4e 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1664,9 +1664,9 @@ def test_p_error_simulation( with simulation or in FHE compared to the expected clear quantized ones. """ - if os.getenv('TREES_USE_FHE_SUM') == "1": + if os.getenv("TREES_USE_FHE_SUM") == "1": n_bits = 4 - else: + else: n_bits = get_n_bits_non_correctness(model_class) # Get data-set, initialize and fit the model @@ -1884,3 +1884,93 @@ def test_rounding_consistency_for_regular_models( metric, is_weekly_option, ) + + +def check_fhe_sum_consistency( + x, + predict_method, + metric, + is_weekly_option, +): + """Test that Concrete ML without and with rounding are 'equivalent'.""" + + # Run the test with more samples during weekly CIs + if is_weekly_option: + fhe_test = get_random_samples(x, n_sample=5) + + # By default, FHE_SUM is disabled + fhe_sum_disabled = os.getenv("TREES_USE_FHE_SUM") == "1" + assert fhe_sum_disabled + + non_fhe_sume_predict_quantized = predict_method(x, fhe="disable") + non_fhe_sume_predict_simulate = predict_method(x, fhe="simulate") + + # Compute the FHE predictions only during weekly CIs + if is_weekly_option: + rounded_predict_fhe = predict_method(fhe_test, fhe="execute") + + print("ROUNGING ENABLED") + + with pytest.MonkeyPatch.context() as mp_context: + + # Enable FHE sum + mp_context.setenv("TREES_USE_FHE_SUM", "0") + + # Check that rounding is disabled + fhe_sum_enbled = os.environ.get("TREES_USE_FHE_SUM") == "0" + assert fhe_sum_enbled + + fhe_sum_predict_quantized = predict_method(x, fhe="disable") + fhe_sum_predict_simulate = predict_method(x, fhe="simulate") + + metric(non_fhe_sume_predict_quantized, fhe_sum_predict_quantized) + metric(non_fhe_sume_predict_simulate, fhe_sum_predict_simulate) + + # Compute the FHE predictions only during weekly CIs + if is_weekly_option: + not_rounded_predict_fhe = predict_method(fhe_test, fhe="execute") + metric(rounded_predict_fhe, not_rounded_predict_fhe) + + # Check that the maximum bit-width of the circuit with rounding is at most: + # maximum bit-width (of the circuit without rounding) + 2 + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4178 + + +@pytest.mark.parametrize("model_class, parameters", get_sklearn_tree_models_and_datasets()) +@pytest.mark.parametrize("n_bits", [2, 5, 10]) +def test_fhe_sum_for_tree_based_models( + model_class, + parameters, + n_bits, + load_data, + check_r2_score, + check_accuracy, + is_weekly_option, + default_configuration, + verbose=True, +): + """Test that Concrete ML without and with rounding are 'equivalent'.""" + + if verbose: + print("Run check_rounding_consistency") + + model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option) + + # Compile the model to make sure we consider all possible attributes during the serialization + model.compile(x, default_configuration) + + # Check `predict_proba` for classifiers + if is_classifier_or_partial_classifier(model): + predict_method = model.predict_proba + metric = check_r2_score + else: + # Check `predict` for regressors + predict_method = model.predict + metric = check_accuracy + + check_fhe_sum_consistency( + x, + predict_method, + metric, + is_weekly_option, + ) From 8fab929ad3c6b611f31474b9c2e370c06edc497d Mon Sep 17 00:00:00 2001 From: kcelia Date: Mon, 15 Jan 2024 11:17:33 +0100 Subject: [PATCH 12/38] chore: update --- src/concrete/ml/quantization/post_training.py | 12 ++- src/concrete/ml/sklearn/base.py | 16 +++- src/concrete/ml/sklearn/rf.py | 10 +- src/concrete/ml/sklearn/tree.py | 10 +- src/concrete/ml/sklearn/tree_to_numpy.py | 8 +- src/concrete/ml/sklearn/xgb.py | 8 +- tests/sklearn/test_sklearn_models.py | 96 +++++++++---------- 7 files changed, 84 insertions(+), 76 deletions(-) diff --git a/src/concrete/ml/quantization/post_training.py b/src/concrete/ml/quantization/post_training.py index 70f198fa9..22d1a3cb5 100644 --- a/src/concrete/ml/quantization/post_training.py +++ b/src/concrete/ml/quantization/post_training.py @@ -23,6 +23,8 @@ from .quantized_ops import QuantizedBrevitasQuant from .quantizers import QuantizationOptions, QuantizedArray, UniformQuantizer +# pylint: disable=too-many-lines + def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: """Convert the n_bits parameter into a proper dictionary for tree based-models. @@ -32,10 +34,12 @@ def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: a dictionary with the following keys : - "op_inputs" (mandatory) - "op_leaves" (optional) - TODO + When using a single integer for n_bits, its value is assigned to "op_inputs" and + "op_leaves" bits. Returns: - n_bits_dict (Dict[str, int]): TODO + n_bits_dict (Dict[str, int]): A dictionary properly representing the number of bits to use + for quantization. """ assert_true( @@ -46,7 +50,7 @@ def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: "- `op_inputs` and `op_leaves` (mandatory)", ) - n_bits_dict = {} + n_bits_dict: Dict = {} # If a single integer is passed, we use a default value for the model's input and # output bits @@ -56,8 +60,6 @@ def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: "op_leaves": n_bits, } - # If model_inputs or model_outputs are not given, we consider a default value - elif isinstance(n_bits, Dict): n_bits_dict.update(n_bits) diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 539cfd74e..aacf301a2 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -103,8 +103,8 @@ # However, for internal testing purposes, we retain the capability to disable this feature os.environ["TREES_USE_ROUNDING"] = "1" -# TODO -os.environ["TREES_USE_FHE_SUM"] = "1" +# By default, the decision of the tree ensembles is made in clear +os.environ["TREES_USE_FHE_SUM"] = "0" # pylint: disable=too-many-public-methods @@ -1289,7 +1289,7 @@ def __init_subclass__(cls): _TREE_MODELS.add(cls) _ALL_SKLEARN_MODELS.add(cls) - def __init__(self, n_bits: int): + def __init__(self, n_bits: Union[int, Dict[str, int]]): """Initialize the TreeBasedEstimatorMixin. Args: @@ -1432,6 +1432,16 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy. y_pred = self.post_processing(y_pred) return y_pred + # def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: + + + # # Sum all tree outputs + # # Remove the sum once we handle multi-precision circuits + # # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/451 + # y_preds = numpy.sum(y_preds, axis=-1) + + # assert_true(y_preds.ndim == 2, "y_preds should be a 2D array") + # return y_preds class BaseTreeRegressorMixin(BaseTreeEstimatorMixin, sklearn.base.RegressorMixin, ABC): """Mixin class for tree-based regressors. diff --git a/src/concrete/ml/sklearn/rf.py b/src/concrete/ml/sklearn/rf.py index 14aabf060..2ebca55b8 100644 --- a/src/concrete/ml/sklearn/rf.py +++ b/src/concrete/ml/sklearn/rf.py @@ -1,5 +1,5 @@ """Implement RandomForest models.""" -from typing import Any, Dict +from typing import Any, Dict, Union import numpy import sklearn.ensemble @@ -19,7 +19,7 @@ class RandomForestClassifier(BaseTreeClassifierMixin): # pylint: disable-next=too-many-arguments def __init__( self, - n_bits: int = 6, + n_bits: Union[int, Dict[str, int]] = 6, n_estimators=20, criterion="gini", max_depth=4, @@ -124,7 +124,7 @@ def load_dict(cls, metadata: Dict): obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, - output_n_bits=obj.n_bits["op_leaves"], + output_n_bits=obj.n_bits["op_leaves"] if isinstance(obj.n_bits, Dict) else obj.n_bits, )[0] obj.post_processing_params = metadata["post_processing_params"] @@ -162,7 +162,7 @@ class RandomForestRegressor(BaseTreeRegressorMixin): # pylint: disable-next=too-many-arguments def __init__( self, - n_bits: int = 6, + n_bits: Union[int, Dict[str, int]] = 6, n_estimators=20, criterion="squared_error", max_depth=4, @@ -259,7 +259,7 @@ def load_dict(cls, metadata: Dict): obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, - output_n_bits=obj.n_bits["op_leaves"], + output_n_bits=obj.n_bits["op_leaves"] if isinstance(obj.n_bits, Dict) else obj.n_bits, )[0] obj.post_processing_params = metadata["post_processing_params"] diff --git a/src/concrete/ml/sklearn/tree.py b/src/concrete/ml/sklearn/tree.py index c870d2804..5ba1f8cff 100644 --- a/src/concrete/ml/sklearn/tree.py +++ b/src/concrete/ml/sklearn/tree.py @@ -1,5 +1,5 @@ """Implement DecisionTree models.""" -from typing import Any, Dict +from typing import Any, Dict, Union import numpy import sklearn.tree @@ -31,7 +31,7 @@ def __init__( min_impurity_decrease=0.0, class_weight=None, ccp_alpha: float = 0.0, - n_bits: int = 6, + n_bits: Union[int, Dict[str, int]] = 6, ): """Initialize the DecisionTreeClassifier. @@ -119,7 +119,7 @@ def load_dict(cls, metadata: Dict): obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, - output_n_bits=obj.n_bits["op_leaves"], + output_n_bits=obj.n_bits["op_leaves"] if isinstance(obj.n_bits, Dict) else obj.n_bits, )[0] obj.post_processing_params = metadata["post_processing_params"] @@ -162,7 +162,7 @@ def __init__( max_leaf_nodes=None, min_impurity_decrease=0.0, ccp_alpha=0.0, - n_bits: int = 6, + n_bits: Union[int, Dict[str, int]] = 6, ): """Initialize the DecisionTreeRegressor. @@ -242,7 +242,7 @@ def load_dict(cls, metadata: Dict): obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, - output_n_bits=obj.n_bits["op_leaves"], + output_n_bits=obj.n_bits["op_leaves"] if isinstance(obj.n_bits, Dict) else obj.n_bits, )[0] obj.post_processing_params = metadata["post_processing_params"] diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py index a748197b9..49b86705e 100644 --- a/src/concrete/ml/sklearn/tree_to_numpy.py +++ b/src/concrete/ml/sklearn/tree_to_numpy.py @@ -143,10 +143,12 @@ def add_transpose_after_last_node(onnx_model: onnx.ModelProto): output_node = onnx_model.graph.output[0] if os.getenv("TREES_USE_FHE_SUM") == "1": + # Create the node with perm attribute equal to (1, 0) perm = [1, 0] else: + # Create the node with perm attribute equal to (2, 1, 0) perm = [2, 1, 0] - # Create the node with perm attribute equal to (1, 0) + transpose_node = onnx.helper.make_node( "Transpose", inputs=[output_node.name], @@ -266,7 +268,7 @@ def tree_onnx_graph_preprocessing( def tree_values_preprocessing( onnx_model: onnx.ModelProto, framework: str, - n_bits: int, + output_n_bits: int, ) -> QuantizedArray: """Pre-process tree values. @@ -292,7 +294,7 @@ def tree_values_preprocessing( if "weight_3" in initializer.name: # print(init_tensor) # weight_3 is the prediction tensor, apply the required pre-processing - q_y = preprocess_tree_predictions(init_tensor, n_bits) + q_y = preprocess_tree_predictions(init_tensor, output_n_bits) # Get the preprocessed tree predictions to replace the current (non-quantized) # values in the onnx_model. diff --git a/src/concrete/ml/sklearn/xgb.py b/src/concrete/ml/sklearn/xgb.py index a2b121f4d..28722b706 100644 --- a/src/concrete/ml/sklearn/xgb.py +++ b/src/concrete/ml/sklearn/xgb.py @@ -27,7 +27,7 @@ class XGBClassifier(BaseTreeClassifierMixin): # pylint: disable=too-many-arguments,too-many-locals def __init__( self, - n_bits: int = 6, + n_bits: Union[int, Dict[str, int]] = 6, max_depth: Optional[int] = 3, learning_rate: Optional[float] = None, n_estimators: Optional[int] = 20, @@ -178,7 +178,7 @@ def load_dict(cls, metadata: Dict): obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, - output_n_bits=obj.n_bits["op_leaves"], + output_n_bits=obj.n_bits["op_leaves"] if isinstance(obj.n_bits, Dict) else obj.n_bits, )[0] obj.post_processing_params = metadata["post_processing_params"] @@ -233,7 +233,7 @@ class XGBRegressor(BaseTreeRegressorMixin): # pylint: disable=too-many-arguments,too-many-locals def __init__( self, - n_bits: int = 6, + n_bits: Union[int, Dict[str, int]] = 6, max_depth: Optional[int] = 3, learning_rate: Optional[float] = None, n_estimators: Optional[int] = 20, @@ -407,7 +407,7 @@ def load_dict(cls, metadata: Dict): obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, - output_n_bits=obj.n_bits["op_leaves"], + output_n_bits=obj.n_bits["op_leaves"] if isinstance(obj.n_bits, Dict) else obj.n_bits, )[0] obj.post_processing_params = metadata["post_processing_params"] diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index beeceed4e..1910d15b4 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1205,6 +1205,50 @@ def check_rounding_consistency( # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4178 +def check_fhe_sum_consistency( + x, + predict_method, + metric, + is_weekly_option, +): + """Test that Concrete ML without and with rounding are 'equivalent'.""" + + # Run the test with more samples during weekly CIs + if is_weekly_option: + fhe_test = get_random_samples(x, n_sample=5) + + # By default, FHE_SUM is disabled + fhe_sum_disabled = os.getenv("TREES_USE_FHE_SUM") == "1" + assert fhe_sum_disabled + + non_fhe_sum_predict_quantized = predict_method(x, fhe="disable") + non_fhe_sum_predict_simulate = predict_method(x, fhe="simulate") + + # Compute the FHE predictions only during weekly CIs + if is_weekly_option: + rounded_predict_fhe = predict_method(fhe_test, fhe="execute") + + with pytest.MonkeyPatch.context() as mp_context: + + # Enable FHE sum + mp_context.setenv("TREES_USE_FHE_SUM", "0") + + # Check that rounding is disabled + fhe_sum_enbled = os.environ.get("TREES_USE_FHE_SUM") == "0" + assert fhe_sum_enbled + + fhe_sum_predict_quantized = predict_method(x, fhe="disable") + fhe_sum_predict_simulate = predict_method(x, fhe="simulate") + + metric(non_fhe_sum_predict_quantized, fhe_sum_predict_quantized) + metric(non_fhe_sum_predict_simulate, fhe_sum_predict_simulate) + + # Compute the FHE predictions only during weekly CIs + if is_weekly_option: + not_rounded_predict_fhe = predict_method(fhe_test, fhe="execute") + metric(rounded_predict_fhe, not_rounded_predict_fhe) + + # Neural network models are skipped for this test # The `fit_benchmark` function of QNNs returns a QAT model and a FP32 model that is similar # in structure but trained from scratch. Furthermore, the `n_bits` setting @@ -1676,7 +1720,7 @@ def test_p_error_simulation( is_linear_model = is_model_class_in_a_list(model_class, _get_sklearn_linear_models()) # Compile with a large p_error to be sure the result is random. - c = model.compile(x, **error_param) + model.compile(x, **error_param) def check_for_divergent_predictions(x, model, fhe, max_iterations=N_ALLOWED_FHE_RUN): """Detect divergence between simulated/FHE execution and clear run.""" @@ -1886,56 +1930,6 @@ def test_rounding_consistency_for_regular_models( ) -def check_fhe_sum_consistency( - x, - predict_method, - metric, - is_weekly_option, -): - """Test that Concrete ML without and with rounding are 'equivalent'.""" - - # Run the test with more samples during weekly CIs - if is_weekly_option: - fhe_test = get_random_samples(x, n_sample=5) - - # By default, FHE_SUM is disabled - fhe_sum_disabled = os.getenv("TREES_USE_FHE_SUM") == "1" - assert fhe_sum_disabled - - non_fhe_sume_predict_quantized = predict_method(x, fhe="disable") - non_fhe_sume_predict_simulate = predict_method(x, fhe="simulate") - - # Compute the FHE predictions only during weekly CIs - if is_weekly_option: - rounded_predict_fhe = predict_method(fhe_test, fhe="execute") - - print("ROUNGING ENABLED") - - with pytest.MonkeyPatch.context() as mp_context: - - # Enable FHE sum - mp_context.setenv("TREES_USE_FHE_SUM", "0") - - # Check that rounding is disabled - fhe_sum_enbled = os.environ.get("TREES_USE_FHE_SUM") == "0" - assert fhe_sum_enbled - - fhe_sum_predict_quantized = predict_method(x, fhe="disable") - fhe_sum_predict_simulate = predict_method(x, fhe="simulate") - - metric(non_fhe_sume_predict_quantized, fhe_sum_predict_quantized) - metric(non_fhe_sume_predict_simulate, fhe_sum_predict_simulate) - - # Compute the FHE predictions only during weekly CIs - if is_weekly_option: - not_rounded_predict_fhe = predict_method(fhe_test, fhe="execute") - metric(rounded_predict_fhe, not_rounded_predict_fhe) - - # Check that the maximum bit-width of the circuit with rounding is at most: - # maximum bit-width (of the circuit without rounding) + 2 - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4178 - - @pytest.mark.parametrize("model_class, parameters", get_sklearn_tree_models_and_datasets()) @pytest.mark.parametrize("n_bits", [2, 5, 10]) def test_fhe_sum_for_tree_based_models( From ba26a5cc06bebe2114f7e85e7d030f9e40671b4a Mon Sep 17 00:00:00 2001 From: kcelia Date: Mon, 15 Jan 2024 15:21:54 +0100 Subject: [PATCH 13/38] chore: update --- src/concrete/ml/sklearn/base.py | 17 +++-- src/concrete/ml/sklearn/tree_to_numpy.py | 18 +++-- tests/sklearn/test_dump_onnx.py | 15 ++-- tests/sklearn/test_sklearn_models.py | 94 ++++++++++++------------ 4 files changed, 77 insertions(+), 67 deletions(-) diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index aacf301a2..179e5a5c4 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -1320,7 +1320,6 @@ def fit(self, X: Data, y: Target, **fit_parameters): # Convert the n_bits attribute into a proper dictionary self.n_bits = get_n_bits_dict_trees(self.n_bits) - print(f"{self.n_bits=}") # Quantization of each feature in X for i in range(X.shape[1]): @@ -1432,16 +1431,18 @@ def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy. y_pred = self.post_processing(y_pred) return y_pred - # def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: + def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: + # Sum all tree outputs + # Remove the sum once we handle multi-precision circuits + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/451 + if os.getenv("TREES_USE_FHE_SUM") == "0": + y_preds = numpy.sum(y_preds, axis=-1) + assert_true(y_preds.ndim == 2, "y_preds should be a 2D array") + return y_preds - # # Sum all tree outputs - # # Remove the sum once we handle multi-precision circuits - # # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/451 - # y_preds = numpy.sum(y_preds, axis=-1) + return super().post_processing(y_preds) - # assert_true(y_preds.ndim == 2, "y_preds should be a 2D array") - # return y_preds class BaseTreeRegressorMixin(BaseTreeEstimatorMixin, sklearn.base.RegressorMixin, ABC): """Mixin class for tree-based regressors. diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py index 49b86705e..65a115759 100644 --- a/src/concrete/ml/sklearn/tree_to_numpy.py +++ b/src/concrete/ml/sklearn/tree_to_numpy.py @@ -18,7 +18,11 @@ OPSET_VERSION_FOR_ONNX_EXPORT, get_equivalent_numpy_forward_from_onnx_tree, ) -from ..onnx.onnx_model_manipulations import clean_graph_after_node_op_type, remove_node_types +from ..onnx.onnx_model_manipulations import ( + clean_graph_after_node_op_type, + clean_graph_at_node_op_type, + remove_node_types, +) from ..onnx.onnx_utils import get_op_type from ..quantization import QuantizedArray from ..quantization.quantizers import UniformQuantizer @@ -142,13 +146,14 @@ def add_transpose_after_last_node(onnx_model: onnx.ModelProto): # Get the output node output_node = onnx_model.graph.output[0] + # When using FHE sum for tree ensembles, create the node with perm attribute equal to (1, 0) if os.getenv("TREES_USE_FHE_SUM") == "1": - # Create the node with perm attribute equal to (1, 0) perm = [1, 0] + + # Otherwise, create the node with perm attribute equal to (2, 1, 0) else: - # Create the node with perm attribute equal to (2, 1, 0) perm = [2, 1, 0] - + transpose_node = onnx.helper.make_node( "Transpose", inputs=[output_node.name], @@ -246,7 +251,10 @@ def tree_onnx_graph_preprocessing( # Cut the graph after the ReduceSum node to remove # argmax, sigmoid, softmax from the graph. - clean_graph_after_node_op_type(onnx_model, "ReduceSum") + if os.getenv("TREES_USE_FHE_SUM") == "1": + clean_graph_after_node_op_type(onnx_model, "ReduceSum") + else: + clean_graph_at_node_op_type(onnx_model, "ReduceSum") if framework == "xgboost": # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/2778 diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py index a5fe6497a..484e2c0d7 100644 --- a/tests/sklearn/test_dump_onnx.py +++ b/tests/sklearn/test_dump_onnx.py @@ -69,7 +69,6 @@ def check_onnx_file_dump(model_class, parameters, load_data, str_expected, defau str_model = onnx.helper.printable_graph(onnx_model.graph) print(f"{model_name}:") print(str_model) - # Test equality when it does not depend on seeds # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3266 if not is_model_class_in_a_list(model_class, _get_sklearn_tree_models(select="RandomForest")): @@ -228,7 +227,7 @@ def test_dump( %transposed_output = Transpose[perm = [1, 0]](%/_operators.0/ReduceSum_output_0) """ if os.getenv("TREES_USE_FHE_SUM") == "1" - else "%transposed_output = Transpose[perm = [2, 1, 0]](%/_operators.0/Reshape_3_output_0)" + else "%transposed_output = Transpose[perm = [2, 1, 0]](%/_operators.0/Reshape_3_output_0)\n " ) + """return %transposed_output }""", @@ -307,7 +306,7 @@ def test_dump( %transposed_output = Transpose[perm = [1, 0]](%/_operators.0/ReduceSum_output_0) """ if os.getenv("TREES_USE_FHE_SUM") == "1" - else "" + else "%transposed_output = Transpose[perm = [2, 1, 0]](%/_operators.0/Reshape_3_output_0)\n " ) + """return %transposed_output }""", @@ -359,8 +358,7 @@ def test_dump( return %/_operators.0/ReduceSum_output_0 }""" if os.getenv("TREES_USE_FHE_SUM") == "1" - else """return %/_operators.0/Reshape_4_output_0 - }""" + else "return %/_operators.0/Reshape_4_output_0\n}" ), "RandomForestRegressor": """graph torch_jit ( %input_0[DOUBLE, symx10] @@ -401,9 +399,8 @@ def test_dump( %/_operators.0/Constant_1_output_0[INT64, 2] %/_operators.0/Constant_2_output_0[INT64, 3] %/_operators.0/Constant_3_output_0[INT64, 3] - %/_operators.0/Constant_4_output_0[INT64, 3] - """ - + ("%onnx::ReduceSum_27[INT64, 1]" if os.getenv("TREES_USE_FHE_SUM") == "1" else "") + %/_operators.0/Constant_4_output_0[INT64, 3]""" + + ("\n %onnx::ReduceSum_27[INT64, 1]" if os.getenv("TREES_USE_FHE_SUM") == "1" else "") + """ ) { %/_operators.0/Gemm_output_0 = Gemm[alpha = 1, beta = 0, transB = 1](%_operators.0.weight_1, %input_0) @@ -424,7 +421,7 @@ def test_dump( return %/_operators.0/ReduceSum_output_0 }""" if os.getenv("TREES_USE_FHE_SUM") == "1" - else "return %/_operators.0/Reshape_4_output_0" + else """return %/_operators.0/Reshape_4_output_0\n}""" ), "LinearRegression": """graph torch_jit ( %input_0[DOUBLE, symx10] diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 1910d15b4..8fcc086dd 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -46,6 +46,7 @@ from concrete.ml.common.serialization.loaders import load, loads from concrete.ml.common.utils import ( USE_OLD_VL, + array_allclose_and_same_shape, get_model_class, get_model_name, is_classifier_or_partial_classifier, @@ -726,16 +727,12 @@ def check_pipeline(model_class, x, y): {key: value} for key, values in hyper_param_combinations.items() for value in values ] - print(f"{hyperparameters_list=}") - # Take one of the hyper_parameters randomly (testing everything would be too long) if len(hyperparameters_list) == 0: hyper_parameters = {} else: hyper_parameters = hyperparameters_list[numpy.random.randint(0, len(hyperparameters_list))] - print(f"{hyperparameters_list=}") - pipe_cv = Pipeline( [ ("pca", PCA(n_components=2, random_state=numpy.random.randint(0, 2**15))), @@ -752,7 +749,6 @@ def check_pipeline(model_class, x, y): } else: - print("ELSE") param_grid = { "model__n_bits": [2, 3], } @@ -1206,47 +1202,73 @@ def check_rounding_consistency( def check_fhe_sum_consistency( + model_class, x, - predict_method, - metric, + y, + n_bits, is_weekly_option, ): - """Test that Concrete ML without and with rounding are 'equivalent'.""" + """Test that Concrete ML without and with FHE sum are 'equivalent'.""" # Run the test with more samples during weekly CIs if is_weekly_option: fhe_test = get_random_samples(x, n_sample=5) - # By default, FHE_SUM is disabled - fhe_sum_disabled = os.getenv("TREES_USE_FHE_SUM") == "1" + # By default, the summation of tree ensemble outputs is done in clear + fhe_sum_disabled = os.getenv("TREES_USE_FHE_SUM") == "0" assert fhe_sum_disabled + model_ref = instantiate_model_generic(model_class, n_bits=n_bits) + fit_and_compile(model_ref, x, y) + + # Check `predict_proba` for classifiers and `predict` for regressors + predict_method = ( + model_ref.predict_proba + if is_classifier_or_partial_classifier(model_class) + else model_ref.predict + ) + non_fhe_sum_predict_quantized = predict_method(x, fhe="disable") non_fhe_sum_predict_simulate = predict_method(x, fhe="simulate") + # Sanity check + array_allclose_and_same_shape(non_fhe_sum_predict_quantized, non_fhe_sum_predict_simulate) + # Compute the FHE predictions only during weekly CIs if is_weekly_option: - rounded_predict_fhe = predict_method(fhe_test, fhe="execute") + non_fhe_sum_predict_fhe = predict_method(fhe_test, fhe="execute") with pytest.MonkeyPatch.context() as mp_context: - # Enable FHE sum - mp_context.setenv("TREES_USE_FHE_SUM", "0") + # Enable the FHE summation of tree ensemble outputs + mp_context.setenv("TREES_USE_FHE_SUM", "1") - # Check that rounding is disabled - fhe_sum_enbled = os.environ.get("TREES_USE_FHE_SUM") == "0" - assert fhe_sum_enbled + # Check that the summation of tree ensemble outputs is enabled + fhe_sum_enabled = os.environ.get("TREES_USE_FHE_SUM") == "1" + assert fhe_sum_enabled + + model = model_class(**model_ref.get_params()) + fit_and_compile(model, x, y) + + # Check `predict_proba` for classifiers and `predict` for regressors + predict_method = ( + model.predict_proba + if is_classifier_or_partial_classifier(model_class) + else model.predict + ) fhe_sum_predict_quantized = predict_method(x, fhe="disable") fhe_sum_predict_simulate = predict_method(x, fhe="simulate") - metric(non_fhe_sum_predict_quantized, fhe_sum_predict_quantized) - metric(non_fhe_sum_predict_simulate, fhe_sum_predict_simulate) + # Sanity check + array_allclose_and_same_shape(fhe_sum_predict_quantized, fhe_sum_predict_simulate) - # Compute the FHE predictions only during weekly CIs - if is_weekly_option: - not_rounded_predict_fhe = predict_method(fhe_test, fhe="execute") - metric(rounded_predict_fhe, not_rounded_predict_fhe) + # Check that we have the exact same predictions + array_allclose_and_same_shape(fhe_sum_predict_quantized, non_fhe_sum_predict_quantized) + array_allclose_and_same_shape(fhe_sum_predict_simulate, non_fhe_sum_predict_simulate) + if is_weekly_option: + fhe_sum_predict_fhe = predict_method(fhe_test, fhe="execute") + array_allclose_and_same_shape(fhe_sum_predict_fhe, non_fhe_sum_predict_fhe) # Neural network models are skipped for this test @@ -1741,11 +1763,7 @@ def check_for_divergent_predictions(x, model, fhe, max_iterations=N_ALLOWED_FHE_ return True return False - print("Start simulation") - print(model) - simulation_diff_found = check_for_divergent_predictions(x, model, fhe="simulate") - print("execution") fhe_diff_found = check_for_divergent_predictions(x, model, fhe="execute") # Check for differences in predictions @@ -1937,34 +1955,20 @@ def test_fhe_sum_for_tree_based_models( parameters, n_bits, load_data, - check_r2_score, - check_accuracy, is_weekly_option, - default_configuration, verbose=True, ): """Test that Concrete ML without and with rounding are 'equivalent'.""" if verbose: - print("Run check_rounding_consistency") - - model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option) - - # Compile the model to make sure we consider all possible attributes during the serialization - model.compile(x, default_configuration) + print("Run check_fhe_sum_consistency") - # Check `predict_proba` for classifiers - if is_classifier_or_partial_classifier(model): - predict_method = model.predict_proba - metric = check_r2_score - else: - # Check `predict` for regressors - predict_method = model.predict - metric = check_accuracy + x, y = get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option) check_fhe_sum_consistency( + model_class, x, - predict_method, - metric, + y, + n_bits, is_weekly_option, ) From 05256b212263a0d17ee0a612f03fe475454ed844 Mon Sep 17 00:00:00 2001 From: kcelia Date: Tue, 16 Jan 2024 12:43:04 +0100 Subject: [PATCH 14/38] chore: update --- src/concrete/ml/quantization/post_training.py | 11 +++++++---- src/concrete/ml/sklearn/base.py | 8 ++++---- tests/sklearn/test_sklearn_models.py | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/concrete/ml/quantization/post_training.py b/src/concrete/ml/quantization/post_training.py index 22d1a3cb5..a8c453330 100644 --- a/src/concrete/ml/quantization/post_training.py +++ b/src/concrete/ml/quantization/post_training.py @@ -32,8 +32,10 @@ def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: Args: n_bits (int, Dict[str, int]): number of bits for quantization, can be a single value or a dictionary with the following keys : - - "op_inputs" (mandatory) - - "op_leaves" (optional) + - "op_inputs" (mandatory): number of bits to quantize the input values + - "op_leaves" (optional): number of bits to quantize the leaves, defaults to the value + of "op_inputs" if not specified. + When using a single integer for n_bits, its value is assigned to "op_inputs" and "op_leaves" bits. @@ -44,10 +46,11 @@ def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: assert_true( isinstance(n_bits, int) - or (isinstance(n_bits, Dict) and set(n_bits.keys()).issubset({"op_inputs", "op_leaves"})), + or (isinstance(n_bits, Dict) and not set(n_bits.keys()) - set(("op_leaves", "op_input"))), "Invalid n_bits, either pass an integer or a dictionary containing integer values for " "the following keys:\n" - "- `op_inputs` and `op_leaves` (mandatory)", + "- `op_inputs` (mandatory)\n" + "- `op_leaves` (optional)", ) n_bits_dict: Dict = {} diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 179e5a5c4..792d37c98 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -101,10 +101,10 @@ # Enable rounding feature for all tree-based models by default # Note: This setting is fixed and cannot be altered by users # However, for internal testing purposes, we retain the capability to disable this feature -os.environ["TREES_USE_ROUNDING"] = "1" +os.environ["TREES_USE_ROUNDING"] = os.environ.get("TREES_USE_ROUNDING", "1") # By default, the decision of the tree ensembles is made in clear -os.environ["TREES_USE_FHE_SUM"] = "0" +os.environ["TREES_USE_FHE_SUM"] = os.environ.get("TREES_USE_FHE_SUM", "0") # pylint: disable=too-many-public-methods @@ -1297,8 +1297,8 @@ def __init__(self, n_bits: Union[int, Dict[str, int]]): for n_bits, the value will be used for quantizing inputs and leaves. If a dict is passed, then it should contain "op_inputs" and "op_leaves" as keys with corresponding number of quantization bits so that: - - op_inputs : number of bits to quantize the input values - - op_leaves: number of bits to quantize the leaves + - op_inputs (mandatory): number of bits to quantize the input values + - op_leaves (optional): number of bits to quantize the leaves Default to 6. """ self.n_bits: Union[int, Dict[str, int]] = n_bits diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 8fcc086dd..d95024098 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1958,7 +1958,7 @@ def test_fhe_sum_for_tree_based_models( is_weekly_option, verbose=True, ): - """Test that Concrete ML without and with rounding are 'equivalent'.""" + """Test that the tree ensembles' output are the same with and without the sum in FHE.""" if verbose: print("Run check_fhe_sum_consistency") From 03d498b0822fab1793aaa7f25731c36dffd90c87 Mon Sep 17 00:00:00 2001 From: kcelia Date: Tue, 16 Jan 2024 13:46:08 +0100 Subject: [PATCH 15/38] chore: remove useless prints --- src/concrete/ml/sklearn/tree_to_numpy.py | 2 -- tests/sklearn/test_sklearn_models.py | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py index 65a115759..4536f6a9c 100644 --- a/src/concrete/ml/sklearn/tree_to_numpy.py +++ b/src/concrete/ml/sklearn/tree_to_numpy.py @@ -298,9 +298,7 @@ def tree_values_preprocessing( # while the final probabilities/regression values must be quantized. # We extract the value stored in each initializer node into the init_tensor. init_tensor = numpy_helper.to_array(initializer) - # print(initializer.name, init_tensor.shape) if "weight_3" in initializer.name: - # print(init_tensor) # weight_3 is the prediction tensor, apply the required pre-processing q_y = preprocess_tree_predictions(init_tensor, output_n_bits) diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index d95024098..c47c592f3 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1201,7 +1201,7 @@ def check_rounding_consistency( # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4178 -def check_fhe_sum_consistency( +def check_fhe_sum_for_tree_based_models( model_class, x, y, @@ -1961,11 +1961,11 @@ def test_fhe_sum_for_tree_based_models( """Test that the tree ensembles' output are the same with and without the sum in FHE.""" if verbose: - print("Run check_fhe_sum_consistency") + print("Run check_fhe_sum_for_tree_based_models") x, y = get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option) - check_fhe_sum_consistency( + check_fhe_sum_for_tree_based_models( model_class, x, y, From cf879d392b1b7308f810cd9764afcb3d00f12299 Mon Sep 17 00:00:00 2001 From: kcelia Date: Wed, 17 Jan 2024 10:29:59 +0100 Subject: [PATCH 16/38] chore: update get_n_bits_dict_trees --- src/concrete/ml/quantization/post_training.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/concrete/ml/quantization/post_training.py b/src/concrete/ml/quantization/post_training.py index a8c453330..e62c1a261 100644 --- a/src/concrete/ml/quantization/post_training.py +++ b/src/concrete/ml/quantization/post_training.py @@ -44,14 +44,13 @@ def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: for quantization. """ - assert_true( - isinstance(n_bits, int) - or (isinstance(n_bits, Dict) and not set(n_bits.keys()) - set(("op_leaves", "op_input"))), - "Invalid n_bits, either pass an integer or a dictionary containing integer values for " - "the following keys:\n" - "- `op_inputs` (mandatory)\n" - "- `op_leaves` (optional)", - ) + if not isinstance(n_bits, int) and not(isinstance(n_bits, Dict) and not set(n_bits.keys()) - set(("op_leaves", "op_inputs"))): + raise ValueError( + "Invalid n_bits, either pass an integer or a dictionary containing integer values for " + "the following keys:\n" + "- `op_inputs` (mandatory)\n" + "- `op_leaves` (optional)" + ) n_bits_dict: Dict = {} @@ -66,12 +65,13 @@ def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: elif isinstance(n_bits, Dict): n_bits_dict.update(n_bits) - assert_true( - n_bits_dict["op_inputs"] >= n_bits_dict["op_leaves"], - "Using fewer bits to represent the model_outputs than the op inputs is not " - f"recommended. Got op_leaves: {n_bits_dict['op_leaves']} and op_inputs: " - f"{n_bits_dict['op_inputs']}", - ) + if n_bits_dict["op_inputs"] < n_bits_dict["op_leaves"]: + + raise ValueError( + "Using fewer bits to represent the model_outputs than the op inputs is not " + f"recommended. Got op_leaves: {n_bits_dict['op_leaves']} and op_inputs: " + f"{n_bits_dict['op_inputs']}", + ) return n_bits_dict From ff1c6b14d85d8d8a1cd70cb529bf69290d91eef4 Mon Sep 17 00:00:00 2001 From: kcelia Date: Wed, 17 Jan 2024 15:18:17 +0100 Subject: [PATCH 17/38] chore: update add a test to check valid n_bits for trees make get_n_bits_trees and inspect_tree_n_bits private functions --- src/concrete/ml/quantization/__init__.py | 3 +- src/concrete/ml/quantization/post_training.py | 97 +++++++++++++------ src/concrete/ml/sklearn/base.py | 15 ++- tests/sklearn/test_sklearn_models.py | 34 +++++++ 4 files changed, 114 insertions(+), 35 deletions(-) diff --git a/src/concrete/ml/quantization/__init__.py b/src/concrete/ml/quantization/__init__.py index 6669d76a7..20fba6653 100644 --- a/src/concrete/ml/quantization/__init__.py +++ b/src/concrete/ml/quantization/__init__.py @@ -4,7 +4,8 @@ PostTrainingAffineQuantization, PostTrainingQATImporter, get_n_bits_dict, - get_n_bits_dict_trees, + _get_n_bits_dict_trees, + _inspect_tree_n_bits, ) from .quantized_module import QuantizedModule from .quantized_ops import ( diff --git a/src/concrete/ml/quantization/post_training.py b/src/concrete/ml/quantization/post_training.py index e62c1a261..89aad3aa0 100644 --- a/src/concrete/ml/quantization/post_training.py +++ b/src/concrete/ml/quantization/post_training.py @@ -26,15 +26,73 @@ # pylint: disable=too-many-lines -def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: +def _inspect_tree_n_bits(n_bits): + """Validate the 'n_bits' parameter for tree-based models. + + This function checks whether 'n_bits' is a valid integer or dictionary. + - If 'n_bits' is an integer, it must be a non-null positive, its value is assigned to + "op_inputs" and "op_leaves" bits + - If it is a dictionary, it should contain integer values for keys 'op_leaves' and 'op_inputs', + where 'op_leaves' should not exceed 'op_inputs'. + + The function raises a ValueError with a descriptive message if 'n_bits' does not meet + these criteria. + + Args: + n_bits (int, Dict[str, int]): number of bits for quantization, can be a single value or + a dictionary with the following keys : + - "op_inputs" (mandatory): number of bits to quantize the input values + - "op_leaves" (optional): number of bits to quantize the leaves, must be less than or + equal to `op_inputs`. defaults to the value of "op_inputs" if not specified. + + Raises: + ValueError: If 'n_bits' does not conform to the required format or value constraints. + """ + + detailed_message = ( + "Invalid `n_bits`, either pass a non-null positive integer or a dictionary containing " + "integer values for the following keys:\n" + "- `op_inputs` (mandatory): number of bits to quantize the input values\n" + "- `op_leaves` (optional): number of bits to quantize the leaves, must be less than or " + "equal to `op_inputs`. Defaults to the value of `op_inputs` if not specified." + "When using a single integer for n_bits, its value is assigned to `op_inputs` and " + "`op_leaves` bits.\n" + ) + + error_message = "" + + if isinstance(n_bits, int): + if n_bits <= 0: + error_message = "n_bits must be a non-null, positive integer" + elif isinstance(n_bits, dict): + if "op_inputs" not in n_bits.keys() or set(n_bits.keys()) - {"op_leaves", "op_inputs"}: + error_message = ( + "Invalid keys in `n_bits` dictionary. Only 'op_inputs' (mandatory) and " + "'op_leaves' (optional) are allowed" + ) + elif not all(isinstance(value, int) and value > 0 for value in n_bits.values()): + error_message = "All values in `n_bits` dictionary must be non-null, positive integers" + + elif n_bits.get("op_leaves", 0) > n_bits.get("op_inputs", 0): + error_message = "`op_leaves` must be less than or equal to `op_inputs`" + else: + error_message = "n_bits must be either an integer or a dictionary" + + if len(error_message) > 0: + raise ValueError( + f"{error_message}. Got `{type(n_bits)}` and `{n_bits}` value.\n{detailed_message}" + ) + + +def _get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: """Convert the n_bits parameter into a proper dictionary for tree based-models. Args: n_bits (int, Dict[str, int]): number of bits for quantization, can be a single value or a dictionary with the following keys : - "op_inputs" (mandatory): number of bits to quantize the input values - - "op_leaves" (optional): number of bits to quantize the leaves, defaults to the value - of "op_inputs" if not specified. + - "op_leaves" (optional): number of bits to quantize the leaves, must be less than or + equal to `op_inputs`. defaults to the value of "op_inputs" if not specified. When using a single integer for n_bits, its value is assigned to "op_inputs" and "op_leaves" bits. @@ -44,36 +102,17 @@ def get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: for quantization. """ - if not isinstance(n_bits, int) and not(isinstance(n_bits, Dict) and not set(n_bits.keys()) - set(("op_leaves", "op_inputs"))): - raise ValueError( - "Invalid n_bits, either pass an integer or a dictionary containing integer values for " - "the following keys:\n" - "- `op_inputs` (mandatory)\n" - "- `op_leaves` (optional)" - ) + _inspect_tree_n_bits(n_bits) - n_bits_dict: Dict = {} - - # If a single integer is passed, we use a default value for the model's input and - # output bits + # If a single integer is passed, we use a default value for the model's input and leaves if isinstance(n_bits, int): - n_bits_dict = { - "op_inputs": n_bits, - "op_leaves": n_bits, - } - - elif isinstance(n_bits, Dict): - n_bits_dict.update(n_bits) + return {"op_inputs": n_bits, "op_leaves": n_bits} - if n_bits_dict["op_inputs"] < n_bits_dict["op_leaves"]: + # Default `op_leaves` to `op_inputs` if not specified + if "op_leaves" not in n_bits: + n_bits["op_leaves"] = n_bits["op_inputs"] - raise ValueError( - "Using fewer bits to represent the model_outputs than the op inputs is not " - f"recommended. Got op_leaves: {n_bits_dict['op_leaves']} and op_inputs: " - f"{n_bits_dict['op_inputs']}", - ) - - return n_bits_dict + return n_bits def get_n_bits_dict(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 792d37c98..62ff03cbe 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -53,7 +53,8 @@ PostTrainingQATImporter, QuantizedArray, get_n_bits_dict, - get_n_bits_dict_trees, + _get_n_bits_dict_trees, + _inspect_tree_n_bits, ) from ..quantization.quantized_module import QuantizedModule, _get_inputset_generator from ..quantization.quantizers import ( @@ -1301,6 +1302,10 @@ def __init__(self, n_bits: Union[int, Dict[str, int]]): - op_leaves (optional): number of bits to quantize the leaves Default to 6. """ + + # Check if 'n_bits' is a valid value + _inspect_tree_n_bits(n_bits) + self.n_bits: Union[int, Dict[str, int]] = n_bits #: The model's inference function. Is None if the model is not fitted. @@ -1319,7 +1324,7 @@ def fit(self, X: Data, y: Target, **fit_parameters): q_X = numpy.zeros_like(X) # Convert the n_bits attribute into a proper dictionary - self.n_bits = get_n_bits_dict_trees(self.n_bits) + self.n_bits = _get_n_bits_dict_trees(self.n_bits) # Quantization of each feature in X for i in range(X.shape[1]): @@ -1338,9 +1343,6 @@ def fit(self, X: Data, y: Target, **fit_parameters): # Check that the underlying sklearn model has been set and fit assert self.sklearn_model is not None, self._sklearn_model_is_not_fitted_error_message() - # Convert the n_bits attribute into a proper dictionary - self.n_bits = get_n_bits_dict_trees(self.n_bits) - # Enable rounding feature enable_rounding = os.environ.get("TREES_USE_ROUNDING", "1") == "1" @@ -1867,12 +1869,15 @@ def __init__(self, n_bits: int = 3): quantizing inputs and X_fit. Default to 3. """ self.n_bits: int = n_bits + # _q_fit_X: In distance metric algorithms, `_q_fit_X` stores the training set to compute # the similarity or distance measures. There is no `weights` attribute because there isn't # a training phase self._q_fit_X: numpy.ndarray + # _y: Labels of `_q_fit_X` self._y: numpy.ndarray + # _q_fit_X_quantizer: The quantizer to use for quantizing the model's training set self._q_fit_X_quantizer: Optional[UniformQuantizer] = None diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index c47c592f3..560edde27 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1972,3 +1972,37 @@ def test_fhe_sum_for_tree_based_models( n_bits, is_weekly_option, ) + + +@pytest.mark.parametrize( + "n_bits, error_message", + [ + (0, "n_bits must be a non-null, positive integer"), + (-1, "n_bits must be a non-null, positive integer"), + # ( + # {"op_inputs": 4, "op_leaves": 2, "op_weights": 2}, + # "Invalid keys in `n_bits` dictionary. Only 'op_inputs' (mandatory) and 'op_leaves' " + # "(optional) are allowed", + # ), + ( + {"op_inputs": -2, "op_leaves": -5}, + "All values in `n_bits` dictionary must be non-null, positive integers", + ), + ({"op_inputs": 2, "op_leaves": 5}, "`op_leaves` must be less than or equal to `op_inputs`"), + (0.5, "n_bits must be either an integer or a dictionary"), + ], +) +@pytest.mark.parametrize("model_class", _get_sklearn_tree_models()) +def test_invalid_n_bits_setting(model_class, n_bits, error_message): + """Check if the model instantiation raises an exception with invalid 'n_bits' settings.""" + + with pytest.raises(ValueError, match=f"{error_message}. Got `{type(n_bits)}` and `{n_bits}`.*"): + instantiate_model_generic(model_class, n_bits=n_bits) + + +@pytest.mark.parametrize("n_bits", [5, {"op_inputs": 5}, {"op_inputs": 2, "op_leaves": 1}]) +@pytest.mark.parametrize("model_class", _get_sklearn_tree_models()) +def test_valid_n_bits_setting(model_class, n_bits): + """Check valid `n_bits' settings.""" + + instantiate_model_generic(model_class, n_bits=n_bits) From d4ca14081672bf171361f0f89fb87a723c6b94fc Mon Sep 17 00:00:00 2001 From: kcelia Date: Wed, 17 Jan 2024 15:41:36 +0100 Subject: [PATCH 18/38] chore: update comment --- src/concrete/ml/quantization/__init__.py | 2 +- src/concrete/ml/sklearn/base.py | 2 +- src/concrete/ml/sklearn/tree_to_numpy.py | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/concrete/ml/quantization/__init__.py b/src/concrete/ml/quantization/__init__.py index 20fba6653..f9c94793e 100644 --- a/src/concrete/ml/quantization/__init__.py +++ b/src/concrete/ml/quantization/__init__.py @@ -3,9 +3,9 @@ from .post_training import ( PostTrainingAffineQuantization, PostTrainingQATImporter, - get_n_bits_dict, _get_n_bits_dict_trees, _inspect_tree_n_bits, + get_n_bits_dict, ) from .quantized_module import QuantizedModule from .quantized_ops import ( diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 62ff03cbe..b951d2224 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -52,9 +52,9 @@ from ..quantization import ( PostTrainingQATImporter, QuantizedArray, - get_n_bits_dict, _get_n_bits_dict_trees, _inspect_tree_n_bits, + get_n_bits_dict, ) from ..quantization.quantized_module import QuantizedModule, _get_inputset_generator from ..quantization.quantizers import ( diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py index 4536f6a9c..e9896f5e7 100644 --- a/src/concrete/ml/sklearn/tree_to_numpy.py +++ b/src/concrete/ml/sklearn/tree_to_numpy.py @@ -146,6 +146,12 @@ def add_transpose_after_last_node(onnx_model: onnx.ModelProto): # Get the output node output_node = onnx_model.graph.output[0] + # The state of the `TREES_USE_FHE_SUM` variable affects the structure of the model's ONNX graph. + # When the option is enabled, the graph is cut after the ReduceSum node. + # On the other hand, when it is disabled, the graph is cut at the ReduceSum node, + # which alters the output shape. + # Therefore, it is necessary to adjust this shape with the correct permutation. + # When using FHE sum for tree ensembles, create the node with perm attribute equal to (1, 0) if os.getenv("TREES_USE_FHE_SUM") == "1": perm = [1, 0] From f96333bc1dfbbb11e37da9275bdb2eb0ef576816 Mon Sep 17 00:00:00 2001 From: kcelia Date: Wed, 17 Jan 2024 16:49:22 +0100 Subject: [PATCH 19/38] chore: update simulated p_error test --- tests/sklearn/test_sklearn_models.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 560edde27..1b6b2a58b 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -144,10 +144,16 @@ def preamble(model_class, parameters, n_bits, load_data, is_weekly_option): def get_n_bits_non_correctness(model_class): """Get the number of bits to use for non correctness related tests.""" + # KNN can only be compiled with small quantization bit numbers for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979 if get_model_name(model_class) == "KNeighborsClassifier": - # KNN can only be compiled with small quantization bit numbers for now - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979 n_bits = 2 + + # Adjust the quantization precision for tree-based model based on `TREES_USE_FHE_SUM` setting. + # When enabled, the circuit's bitwidth increases, potentially leading to Out-of-Memory issues. + # Therefore, the maximum quantization precision is 4 bits in this case. + elif model_class in _get_sklearn_tree_models() and os.environ.get("TREES_USE_FHE_SUM") == "1": + n_bits = min(min(N_BITS_REGULAR_BUILDS), 4) else: n_bits = min(N_BITS_REGULAR_BUILDS) @@ -1730,10 +1736,7 @@ def test_p_error_simulation( with simulation or in FHE compared to the expected clear quantized ones. """ - if os.getenv("TREES_USE_FHE_SUM") == "1": - n_bits = 4 - else: - n_bits = get_n_bits_non_correctness(model_class) + n_bits = get_n_bits_non_correctness(model_class) # Get data-set, initialize and fit the model model, x = preamble(model_class, parameters, n_bits, load_data, is_weekly_option) From cc4781f85b8505eb4469a432c62e16f88adebaac Mon Sep 17 00:00:00 2001 From: kcelia Date: Wed, 17 Jan 2024 17:59:12 +0100 Subject: [PATCH 20/38] chore: update coverage --- src/concrete/ml/quantization/post_training.py | 6 +++--- tests/sklearn/test_sklearn_models.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/concrete/ml/quantization/post_training.py b/src/concrete/ml/quantization/post_training.py index 89aad3aa0..34bfe8dee 100644 --- a/src/concrete/ml/quantization/post_training.py +++ b/src/concrete/ml/quantization/post_training.py @@ -67,8 +67,8 @@ def _inspect_tree_n_bits(n_bits): elif isinstance(n_bits, dict): if "op_inputs" not in n_bits.keys() or set(n_bits.keys()) - {"op_leaves", "op_inputs"}: error_message = ( - "Invalid keys in `n_bits` dictionary. Only 'op_inputs' (mandatory) and " - "'op_leaves' (optional) are allowed" + "Invalid keys in `n_bits` dictionary. Only 'op_inputs' (mandatory) and 'op_leaves' " + "(optional) are allowed" ) elif not all(isinstance(value, int) and value > 0 for value in n_bits.values()): error_message = "All values in `n_bits` dictionary must be non-null, positive integers" @@ -110,7 +110,7 @@ def _get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int] # Default `op_leaves` to `op_inputs` if not specified if "op_leaves" not in n_bits: - n_bits["op_leaves"] = n_bits["op_inputs"] + n_bits["op_leaves"] = n_bits["op_inputs"] # pragma: no cover return n_bits diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 1b6b2a58b..94f098d72 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1982,11 +1982,11 @@ def test_fhe_sum_for_tree_based_models( [ (0, "n_bits must be a non-null, positive integer"), (-1, "n_bits must be a non-null, positive integer"), - # ( - # {"op_inputs": 4, "op_leaves": 2, "op_weights": 2}, - # "Invalid keys in `n_bits` dictionary. Only 'op_inputs' (mandatory) and 'op_leaves' " - # "(optional) are allowed", - # ), + ( + {"op_inputs": 4, "op_leaves": 2, "op_weights": 2}, + "Invalid keys in `n_bits` dictionary. Only 'op_inputs' \\(mandatory\\) and 'op_leaves' " + "\\(optional\\) are allowed", + ), ( {"op_inputs": -2, "op_leaves": -5}, "All values in `n_bits` dictionary must be non-null, positive integers", From a65200169b018ee9adcf15051c5a593c614eb3d2 Mon Sep 17 00:00:00 2001 From: kcelia Date: Thu, 18 Jan 2024 12:32:25 +0100 Subject: [PATCH 21/38] chore: update tests --- src/concrete/ml/quantization/post_training.py | 34 ++++++++++--------- tests/sklearn/test_sklearn_models.py | 32 +++++++++++++---- 2 files changed, 43 insertions(+), 23 deletions(-) diff --git a/src/concrete/ml/quantization/post_training.py b/src/concrete/ml/quantization/post_training.py index 34bfe8dee..621697f14 100644 --- a/src/concrete/ml/quantization/post_training.py +++ b/src/concrete/ml/quantization/post_training.py @@ -31,7 +31,7 @@ def _inspect_tree_n_bits(n_bits): This function checks whether 'n_bits' is a valid integer or dictionary. - If 'n_bits' is an integer, it must be a non-null positive, its value is assigned to - "op_inputs" and "op_leaves" bits + 'op_inputs' and 'op_leaves' bits - If it is a dictionary, it should contain integer values for keys 'op_leaves' and 'op_inputs', where 'op_leaves' should not exceed 'op_inputs'. @@ -43,20 +43,20 @@ def _inspect_tree_n_bits(n_bits): a dictionary with the following keys : - "op_inputs" (mandatory): number of bits to quantize the input values - "op_leaves" (optional): number of bits to quantize the leaves, must be less than or - equal to `op_inputs`. defaults to the value of "op_inputs" if not specified. + equal to 'op_inputs. defaults to the value of 'op_inputs if not specified. Raises: ValueError: If 'n_bits' does not conform to the required format or value constraints. """ detailed_message = ( - "Invalid `n_bits`, either pass a non-null positive integer or a dictionary containing " + "Invalid 'n_bits', either pass a non-null positive integer or a dictionary containing " "integer values for the following keys:\n" - "- `op_inputs` (mandatory): number of bits to quantize the input values\n" - "- `op_leaves` (optional): number of bits to quantize the leaves, must be less than or " - "equal to `op_inputs`. Defaults to the value of `op_inputs` if not specified." - "When using a single integer for n_bits, its value is assigned to `op_inputs` and " - "`op_leaves` bits.\n" + "- 'op_inputs' (mandatory): number of bits to quantize the input values\n" + "- 'op_leaves' (optional): number of bits to quantize the leaves, must be less than or " + "equal to 'op_inputs'. Defaults to the value of 'op_inputs' if not specified." + "When using a single integer for n_bits, its value is assigned to 'op_inputs' and " + "'op_leaves' bits.\n" ) error_message = "" @@ -65,22 +65,24 @@ def _inspect_tree_n_bits(n_bits): if n_bits <= 0: error_message = "n_bits must be a non-null, positive integer" elif isinstance(n_bits, dict): - if "op_inputs" not in n_bits.keys() or set(n_bits.keys()) - {"op_leaves", "op_inputs"}: + if "op_inputs" not in n_bits.keys(): + error_message = "Invalid keys in `n_bits` dictionary. The key 'op_inputs' is mandatory" + elif set(n_bits.keys()) - {"op_leaves", "op_inputs"}: error_message = ( - "Invalid keys in `n_bits` dictionary. Only 'op_inputs' (mandatory) and 'op_leaves' " + "Invalid keys in 'n_bits' dictionary. Only 'op_inputs' (mandatory) and 'op_leaves' " "(optional) are allowed" ) elif not all(isinstance(value, int) and value > 0 for value in n_bits.values()): - error_message = "All values in `n_bits` dictionary must be non-null, positive integers" + error_message = "All values in 'n_bits' dictionary must be non-null, positive integers" elif n_bits.get("op_leaves", 0) > n_bits.get("op_inputs", 0): - error_message = "`op_leaves` must be less than or equal to `op_inputs`" + error_message = "'op_leaves' must be less than or equal to 'op_inputs'" else: error_message = "n_bits must be either an integer or a dictionary" if len(error_message) > 0: raise ValueError( - f"{error_message}. Got `{type(n_bits)}` and `{n_bits}` value.\n{detailed_message}" + f"{error_message}. Got '{type(n_bits)}' and '{n_bits}' value.\n{detailed_message}" ) @@ -92,7 +94,7 @@ def _get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int] a dictionary with the following keys : - "op_inputs" (mandatory): number of bits to quantize the input values - "op_leaves" (optional): number of bits to quantize the leaves, must be less than or - equal to `op_inputs`. defaults to the value of "op_inputs" if not specified. + equal to 'op_inputs'. defaults to the value of "op_inputs" if not specified. When using a single integer for n_bits, its value is assigned to "op_inputs" and "op_leaves" bits. @@ -108,9 +110,9 @@ def _get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int] if isinstance(n_bits, int): return {"op_inputs": n_bits, "op_leaves": n_bits} - # Default `op_leaves` to `op_inputs` if not specified + # Default 'op_leaves' to 'op_inputs' if not specified if "op_leaves" not in n_bits: - n_bits["op_leaves"] = n_bits["op_inputs"] # pragma: no cover + n_bits["op_leaves"] = n_bits["op_inputs"] return n_bits diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 94f098d72..1d6dbe406 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1982,16 +1982,17 @@ def test_fhe_sum_for_tree_based_models( [ (0, "n_bits must be a non-null, positive integer"), (-1, "n_bits must be a non-null, positive integer"), + ({"op_leaves": 2}, "The key 'op_inputs' is mandatory"), ( {"op_inputs": 4, "op_leaves": 2, "op_weights": 2}, - "Invalid keys in `n_bits` dictionary. Only 'op_inputs' \\(mandatory\\) and 'op_leaves' " + "Invalid keys in 'n_bits' dictionary. Only 'op_inputs' \\(mandatory\\) and 'op_leaves' " "\\(optional\\) are allowed", ), ( {"op_inputs": -2, "op_leaves": -5}, - "All values in `n_bits` dictionary must be non-null, positive integers", + "All values in 'n_bits' dictionary must be non-null, positive integers", ), - ({"op_inputs": 2, "op_leaves": 5}, "`op_leaves` must be less than or equal to `op_inputs`"), + ({"op_inputs": 2, "op_leaves": 5}, "'op_leaves' must be less than or equal to 'op_inputs'"), (0.5, "n_bits must be either an integer or a dictionary"), ], ) @@ -1999,13 +2000,30 @@ def test_fhe_sum_for_tree_based_models( def test_invalid_n_bits_setting(model_class, n_bits, error_message): """Check if the model instantiation raises an exception with invalid 'n_bits' settings.""" - with pytest.raises(ValueError, match=f"{error_message}. Got `{type(n_bits)}` and `{n_bits}`.*"): + with pytest.raises(ValueError, match=f"{error_message}. Got '{type(n_bits)}' and '{n_bits}'.*"): instantiate_model_generic(model_class, n_bits=n_bits) @pytest.mark.parametrize("n_bits", [5, {"op_inputs": 5}, {"op_inputs": 2, "op_leaves": 1}]) -@pytest.mark.parametrize("model_class", _get_sklearn_tree_models()) -def test_valid_n_bits_setting(model_class, n_bits): +@pytest.mark.parametrize("model_class, parameters", get_sklearn_tree_models_and_datasets()) +def test_valid_n_bits_setting( + model_class, + n_bits, + parameters, + load_data, + is_weekly_option, + verbose=True, +): """Check valid `n_bits' settings.""" - instantiate_model_generic(model_class, n_bits=n_bits) + if verbose: + print("Run test_valid_n_bits_setting") + + x, y = get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option) + + model = instantiate_model_generic(model_class, n_bits=n_bits) + + with warnings.catch_warnings(): + # Sometimes, we miss convergence, which is not a problem for our test + warnings.simplefilter("ignore", category=ConvergenceWarning) + model.fit(x, y) From 9839ce9623c4cca4776e72ca9d2e2477970bec43 Mon Sep 17 00:00:00 2001 From: kcelia Date: Thu, 18 Jan 2024 17:08:06 +0100 Subject: [PATCH 22/38] chore: update assert --- tests/sklearn/test_sklearn_models.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 1d6dbe406..1ec4d7c98 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1159,8 +1159,7 @@ def check_rounding_consistency( fhe_test = get_random_samples(x, n_sample=5) # Check that rounding is enabled - rounding_enabled = os.getenv("TREES_USE_ROUNDING") == "1" - assert rounding_enabled + assert os.environ.get("TREES_USE_ROUNDING") == "1", "'TREES_USE_ROUNDING' is not enabled" # Fit and compile with rounding enabled fit_and_compile(model, x, y) @@ -1178,8 +1177,7 @@ def check_rounding_consistency( mp_context.setenv("TREES_USE_ROUNDING", "0") # Check that rounding is disabled - rounding_disabled = os.environ.get("TREES_USE_ROUNDING") == "0" - assert rounding_disabled + assert os.environ.get("TREES_USE_ROUNDING") == "0", "'TREES_USE_ROUNDING' is not disabled" with pytest.warns( DeprecationWarning, @@ -1221,8 +1219,7 @@ def check_fhe_sum_for_tree_based_models( fhe_test = get_random_samples(x, n_sample=5) # By default, the summation of tree ensemble outputs is done in clear - fhe_sum_disabled = os.getenv("TREES_USE_FHE_SUM") == "0" - assert fhe_sum_disabled + assert os.getenv("TREES_USE_FHE_SUM") == "0", "'TREES_USE_FHE_SUM' is not disabled" model_ref = instantiate_model_generic(model_class, n_bits=n_bits) fit_and_compile(model_ref, x, y) @@ -1250,8 +1247,7 @@ def check_fhe_sum_for_tree_based_models( mp_context.setenv("TREES_USE_FHE_SUM", "1") # Check that the summation of tree ensemble outputs is enabled - fhe_sum_enabled = os.environ.get("TREES_USE_FHE_SUM") == "1" - assert fhe_sum_enabled + assert os.getenv("TREES_USE_FHE_SUM") == "1", "'TREES_USE_FHE_SUM' is not enabled" model = model_class(**model_ref.get_params()) fit_and_compile(model, x, y) From 7cb13e08d064c3daef91ba1ae317477c87b50816 Mon Sep 17 00:00:00 2001 From: kcelia Date: Mon, 22 Jan 2024 12:15:56 +0100 Subject: [PATCH 23/38] chore: update comment --- src/concrete/ml/sklearn/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index b951d2224..d732d664f 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -1303,9 +1303,10 @@ def __init__(self, n_bits: Union[int, Dict[str, int]]): Default to 6. """ - # Check if 'n_bits' is a valid value + # Check if 'n_bits' is a valid value. _inspect_tree_n_bits(n_bits) + #: The number of bits to quantize the model. self.n_bits: Union[int, Dict[str, int]] = n_bits #: The model's inference function. Is None if the model is not fitted. From 7d935754c3e397cd7a82d13bbdeb7fe7b42c2c2d Mon Sep 17 00:00:00 2001 From: kcelia Date: Mon, 22 Jan 2024 12:44:47 +0100 Subject: [PATCH 24/38] chore: update comment --- tests/sklearn/test_dump_onnx.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py index 484e2c0d7..554b24d28 100644 --- a/tests/sklearn/test_dump_onnx.py +++ b/tests/sklearn/test_dump_onnx.py @@ -67,8 +67,9 @@ def check_onnx_file_dump(model_class, parameters, load_data, str_expected, defau del onnx_model.graph.initializer[0] str_model = onnx.helper.printable_graph(onnx_model.graph) - print(f"{model_name}:") - print(str_model) + print(f"\nCurrent {model_name=}:\n{str_model}") + print(f"\nExpected {model_name=}:\n{str_expected}") + # Test equality when it does not depend on seeds # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3266 if not is_model_class_in_a_list(model_class, _get_sklearn_tree_models(select="RandomForest")): From 70adfd5e11834ab8fff608493dd9345066a440a6 Mon Sep 17 00:00:00 2001 From: kcelia Date: Mon, 22 Jan 2024 13:14:17 +0100 Subject: [PATCH 25/38] chore: test dump in both cases (sum_fhe enabled and disabled) --- tests/sklearn/test_dump_onnx.py | 211 +++++++++++++++++--------------- 1 file changed, 112 insertions(+), 99 deletions(-) diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py index 554b24d28..58925f6f0 100644 --- a/tests/sklearn/test_dump_onnx.py +++ b/tests/sklearn/test_dump_onnx.py @@ -20,107 +20,10 @@ # pylint: disable=line-too-long -def check_onnx_file_dump(model_class, parameters, load_data, str_expected, default_configuration): +def check_onnx_file_dump(model_class, parameters, load_data, default_configuration): """Fit the model and dump the corresponding ONNX.""" - # Get the data-set. The data generation is seeded in load_data. - x, y = load_data(model_class, **parameters) - - # Set the model - model = model_class() - - model_params = model.get_params() - if "random_state" in model_params: - model_params["random_state"] = numpy.random.randint(0, 2**15) - - model.set_params(**model_params) - - if get_model_name(model) == "KNeighborsClassifier": - # KNN can only be compiled with small quantization bit numbers for now - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979 - model.n_bits = 2 - - with warnings.catch_warnings(): - # Sometimes, we miss convergence, which is not a problem for our test - warnings.simplefilter("ignore", category=ConvergenceWarning) - - model.fit(x, y) - - with warnings.catch_warnings(): - # Use FHE simulation to not have issues with precision - model.compile(x, default_configuration) - - # Get ONNX model - onnx_model = model.onnx_model - - # Remove initializers, since they change from one seed to the other - model_name = get_model_name(model_class) - if model_name in [ - "DecisionTreeRegressor", - "DecisionTreeClassifier", - "RandomForestClassifier", - "RandomForestRegressor", - "XGBClassifier", - "KNeighborsClassifier", - ]: - while len(onnx_model.graph.initializer) > 0: - del onnx_model.graph.initializer[0] - - str_model = onnx.helper.printable_graph(onnx_model.graph) - print(f"\nCurrent {model_name=}:\n{str_model}") - print(f"\nExpected {model_name=}:\n{str_expected}") - - # Test equality when it does not depend on seeds - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3266 - if not is_model_class_in_a_list(model_class, _get_sklearn_tree_models(select="RandomForest")): - # The expected graph is usually a string and we therefore directly test if it is equal to - # the retrieved graph's string. However, in some cases such as for TweedieRegressor models, - # this graph can slightly changed depending on some input's values. We then expected the - # string to match as least one of them expected strings (as a list) - if isinstance(str_expected, str): - assert str_model == str_expected - else: - assert str_model in str_expected - - -@pytest.mark.parametrize("model_class, parameters", UNIQUE_MODELS_AND_DATASETS) -def test_dump( - model_class, - parameters, - load_data, - default_configuration, -): - """Tests dump.""" - model_name = get_model_name(model_class) - - # Some models have been done with different n_classes which create different ONNX - if parameters.get("n_classes", 2) != 2 and model_name in ["LinearSVC", "LogisticRegression"]: - return - - if model_name == "NeuralNetClassifier": - model_class = partial( - NeuralNetClassifier, - module__n_layers=3, - module__power_of_two_scaling=False, - max_epochs=1, - verbose=0, - callbacks="disable", - ) - elif model_name == "NeuralNetRegressor": - model_class = partial( - NeuralNetRegressor, - module__n_layers=3, - module__n_w_bits=2, - module__n_a_bits=2, - module__n_accum_bits=7, # Stay with 7 bits for test exec time - module__n_hidden_neurons_multiplier=1, - module__power_of_two_scaling=False, - max_epochs=1, - verbose=0, - callbacks="disable", - ) - n_classes = parameters.get("n_classes", 2) # Ignore long lines here @@ -492,4 +395,114 @@ def test_dump( } str_expected = expected_strings.get(model_name, "") - check_onnx_file_dump(model_class, parameters, load_data, str_expected, default_configuration) + + # Get the data-set. The data generation is seeded in load_data. + x, y = load_data(model_class, **parameters) + + # Set the model + model = model_class() + + model_params = model.get_params() + if "random_state" in model_params: + model_params["random_state"] = numpy.random.randint(0, 2**15) + + model.set_params(**model_params) + + if model_name == "KNeighborsClassifier": + # KNN can only be compiled with small quantization bit numbers for now + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979 + model.n_bits = 2 + + with warnings.catch_warnings(): + # Sometimes, we miss convergence, which is not a problem for our test + warnings.simplefilter("ignore", category=ConvergenceWarning) + + model.fit(x, y) + + with warnings.catch_warnings(): + # Use FHE simulation to not have issues with precision + model.compile(x, default_configuration) + + # Get ONNX model + onnx_model = model.onnx_model + + # Remove initializers, since they change from one seed to the other + model_name = get_model_name(model_class) + if model_name in [ + "DecisionTreeRegressor", + "DecisionTreeClassifier", + "RandomForestClassifier", + "RandomForestRegressor", + "XGBClassifier", + "KNeighborsClassifier", + ]: + while len(onnx_model.graph.initializer) > 0: + del onnx_model.graph.initializer[0] + + str_model = onnx.helper.printable_graph(onnx_model.graph) + print(f"\nCurrent {model_name=}:\n{str_model}") + print(f"\nExpected {model_name=}:\n{str_expected}") + + # Test equality when it does not depend on seeds + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3266 + if not is_model_class_in_a_list(model_class, _get_sklearn_tree_models(select="RandomForest")): + # The expected graph is usually a string and we therefore directly test if it is equal to + # the retrieved graph's string. However, in some cases such as for TweedieRegressor models, + # this graph can slightly changed depending on some input's values. We then expected the + # string to match as least one of them expected strings (as a list) + if isinstance(str_expected, str): + assert str_model == str_expected + else: + assert str_model in str_expected + + +@pytest.mark.parametrize("model_class, parameters", UNIQUE_MODELS_AND_DATASETS) +def test_dump( + model_class, + parameters, + load_data, + default_configuration, +): + """Tests dump.""" + + model_name = get_model_name(model_class) + + # Some models have been done with different n_classes which create different ONNX + if parameters.get("n_classes", 2) != 2 and model_name in ["LinearSVC", "LogisticRegression"]: + return + + if model_name == "NeuralNetClassifier": + model_class = partial( + NeuralNetClassifier, + module__n_layers=3, + module__power_of_two_scaling=False, + max_epochs=1, + verbose=0, + callbacks="disable", + ) + elif model_name == "NeuralNetRegressor": + model_class = partial( + NeuralNetRegressor, + module__n_layers=3, + module__n_w_bits=2, + module__n_a_bits=2, + module__n_accum_bits=7, # Stay with 7 bits for test exec time + module__n_hidden_neurons_multiplier=1, + module__power_of_two_scaling=False, + max_epochs=1, + verbose=0, + callbacks="disable", + ) + + # Check with 'TREES_USE_ROUNDING' disabled + assert os.environ.get("TREES_USE_FHE_SUM") == "0", "'TREES_USE_FHE_SUM' is not disabled" + check_onnx_file_dump(model_class, parameters, load_data, default_configuration) + + with pytest.MonkeyPatch.context() as mp_context: + + # Disable rounding + mp_context.setenv("TREES_USE_FHE_SUM", "1") + + # Check that rounding is disabled + assert os.environ.get("TREES_USE_FHE_SUM") == "1", "'TREES_USE_FHE_SUM' is enabled" + check_onnx_file_dump(model_class, parameters, load_data, default_configuration) From 783e7af01b7aca338d73fb805b677e41ea6ba4ca Mon Sep 17 00:00:00 2001 From: kcelia Date: Tue, 23 Jan 2024 00:43:32 +0100 Subject: [PATCH 26/38] chore: remove env var --- src/concrete/ml/sklearn/base.py | 40 +++++++++++++++++- src/concrete/ml/sklearn/tree_to_numpy.py | 32 ++++++++------ tests/sklearn/test_dump_onnx.py | 44 +++++++++---------- tests/sklearn/test_sklearn_models.py | 54 ++++++++---------------- 4 files changed, 95 insertions(+), 75 deletions(-) diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index d732d664f..dbd8ca4a8 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -105,7 +105,7 @@ os.environ["TREES_USE_ROUNDING"] = os.environ.get("TREES_USE_ROUNDING", "1") # By default, the decision of the tree ensembles is made in clear -os.environ["TREES_USE_FHE_SUM"] = os.environ.get("TREES_USE_FHE_SUM", "0") +TREES_USE_FHE_SUM = False # pylint: disable=too-many-public-methods @@ -1312,8 +1312,43 @@ def __init__(self, n_bits: Union[int, Dict[str, int]]): #: The model's inference function. Is None if the model is not fitted. self._tree_inference: Optional[Callable] = None + #: Wether to perform the sum of the output's tree ensembles in FHE or not. + self._use_fhe_sum = False + BaseEstimator.__init__(self) + @property + def use_fhe_sum(self) -> bool: + """Property getter for `use_fhe_sum`. + + Returns: + bool: The current setting of the `_use_fhe_sum` attribute. + """ + return self._use_fhe_sum + + @use_fhe_sum.setter + def use_fhe_sum(self, value) -> None: + """Property setter for `use_fhe_sum`. + + Args: + value (int): Whether to enable or disable the feature. + """ + + assert isinstance(value, bool), "Value must be a boolean type" + + if value is True: + warnings.simplefilter("always") + warnings.warn( + "Enabling `use_fhe_sum` computes the sum of the ouputs of tree ensembles in FHE.\n" + "This may slow down the computation and increase the maximum bitwidth.\n" + "To optimize performance, consider reducing the quantization leaf precision.\n" + "Additionally, the model must be refitted for these changes to take effect.", + category=UserWarning, + stacklevel=2, + ) + + self._use_fhe_sum = value + def fit(self, X: Data, y: Target, **fit_parameters): # Reset for double fit self._is_fitted = False @@ -1362,6 +1397,7 @@ def fit(self, X: Data, y: Target, **fit_parameters): self.sklearn_model, q_X, use_rounding=enable_rounding, + use_fhe_sum=self._use_fhe_sum, framework=self.framework, output_n_bits=self.n_bits["op_leaves"], ) @@ -1438,7 +1474,7 @@ def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: # Sum all tree outputs # Remove the sum once we handle multi-precision circuits # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/451 - if os.getenv("TREES_USE_FHE_SUM") == "0": + if not self._use_fhe_sum: y_preds = numpy.sum(y_preds, axis=-1) assert_true(y_preds.ndim == 2, "y_preds should be a 2D array") diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py index e9896f5e7..c61ea9d1c 100644 --- a/src/concrete/ml/sklearn/tree_to_numpy.py +++ b/src/concrete/ml/sklearn/tree_to_numpy.py @@ -1,6 +1,5 @@ """Implements the conversion of a tree model to a numpy function.""" import math -import os import warnings from typing import Callable, List, Optional, Tuple @@ -137,28 +136,25 @@ def assert_add_node_and_constant_in_xgboost_regressor_graph(onnx_model: onnx.Mod ) -def add_transpose_after_last_node(onnx_model: onnx.ModelProto): +def add_transpose_after_last_node(onnx_model: onnx.ModelProto, use_fhe_sum: bool): """Add transpose after last node. Args: onnx_model (onnx.ModelProto): The ONNX model. + use_fhe_sum (bool): This parameter is exclusively used to tree-based models. + It determines whether the sum of the trees' outputs is computed in FHE. """ # Get the output node output_node = onnx_model.graph.output[0] - # The state of the `TREES_USE_FHE_SUM` variable affects the structure of the model's ONNX graph. + # The state of the 'use_fhe_sum' variable affects the structure of the model's ONNX graph. # When the option is enabled, the graph is cut after the ReduceSum node. - # On the other hand, when it is disabled, the graph is cut at the ReduceSum node, - # which alters the output shape. + # When it is disabled, the graph is cut at the ReduceSum node, which alters the output shape. # Therefore, it is necessary to adjust this shape with the correct permutation. # When using FHE sum for tree ensembles, create the node with perm attribute equal to (1, 0) - if os.getenv("TREES_USE_FHE_SUM") == "1": - perm = [1, 0] - # Otherwise, create the node with perm attribute equal to (2, 1, 0) - else: - perm = [2, 1, 0] + perm = [1, 0] if use_fhe_sum else [2, 1, 0] transpose_node = onnx.helper.make_node( "Transpose", @@ -222,7 +218,10 @@ def preprocess_tree_predictions( def tree_onnx_graph_preprocessing( - onnx_model: onnx.ModelProto, framework: str, expected_number_of_outputs: int + onnx_model: onnx.ModelProto, + framework: str, + expected_number_of_outputs: int, + use_fhe_sum: bool = False, ): """Apply pre-processing onto the ONNX graph. @@ -231,6 +230,8 @@ def tree_onnx_graph_preprocessing( framework (str): The framework from which the ONNX model is generated. (options: 'xgboost', 'sklearn') expected_number_of_outputs (int): The expected number of outputs in the ONNX model. + use_fhe_sum (bool): This parameter is exclusively used to tree-based models. + It determines whether the sum of the trees' outputs is computed in FHE. """ # Make sure the ONNX version returned by Hummingbird is OPSET_VERSION_FOR_ONNX_EXPORT onnx_version = get_onnx_opset_version(onnx_model) @@ -257,7 +258,7 @@ def tree_onnx_graph_preprocessing( # Cut the graph after the ReduceSum node to remove # argmax, sigmoid, softmax from the graph. - if os.getenv("TREES_USE_FHE_SUM") == "1": + if use_fhe_sum: clean_graph_after_node_op_type(onnx_model, "ReduceSum") else: clean_graph_at_node_op_type(onnx_model, "ReduceSum") @@ -273,7 +274,7 @@ def tree_onnx_graph_preprocessing( # sklearn models apply the reduce sum before the transpose. # To have equivalent output between xgboost in sklearn, # apply the transpose before returning the output. - add_transpose_after_last_node(onnx_model) + add_transpose_after_last_node(onnx_model, use_fhe_sum) # Cast nodes are not necessary so remove them. remove_node_types(onnx_model, op_types_to_remove=["Cast"]) @@ -330,6 +331,7 @@ def tree_to_numpy( x: numpy.ndarray, framework: str, use_rounding: bool = True, + use_fhe_sum: bool = False, output_n_bits: int = MAX_BITWIDTH_BACKWARD_COMPATIBLE, ) -> Tuple[Callable, List[UniformQuantizer], onnx.ModelProto]: """Convert the tree inference to a numpy functions using Hummingbird. @@ -339,6 +341,8 @@ def tree_to_numpy( x (numpy.ndarray): The input data. use_rounding (bool): This parameter is exclusively used to tree-based models. It determines whether the rounding feature is enabled or disabled. + use_fhe_sum (bool): This parameter is exclusively used to tree-based models. + It determines whether the sum of the trees' outputs is computed in FHE. framework (str): The framework from which the ONNX model is generated. (options: 'xgboost', 'sklearn') output_n_bits (int): The number of bits of the output. Default to 8. @@ -375,7 +379,7 @@ def tree_to_numpy( # ONNX graph pre-processing to make the model FHE friendly # i.e., delete irrelevant nodes and cut the graph before the final ensemble sum) - tree_onnx_graph_preprocessing(onnx_model, framework, expected_number_of_outputs) + tree_onnx_graph_preprocessing(onnx_model, framework, expected_number_of_outputs, use_fhe_sum) # Tree values pre-processing # i.e., mainly predictions quantization diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py index 58925f6f0..fa398cd7e 100644 --- a/tests/sklearn/test_dump_onnx.py +++ b/tests/sklearn/test_dump_onnx.py @@ -1,6 +1,5 @@ """Tests for the sklearn decision trees.""" -import os import warnings from functools import partial @@ -20,12 +19,20 @@ # pylint: disable=line-too-long -def check_onnx_file_dump(model_class, parameters, load_data, default_configuration): +def check_onnx_file_dump(model_class, parameters, load_data, default_configuration, use_fhe_sum): """Fit the model and dump the corresponding ONNX.""" model_name = get_model_name(model_class) n_classes = parameters.get("n_classes", 2) + # Set the model + model = model_class() + + # Set `use_fhe_sum` + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=UserWarning) + model.use_fhe_sum = use_fhe_sum + # Ignore long lines here # ruff: noqa: E501 expected_strings = { @@ -130,7 +137,7 @@ def check_onnx_file_dump(model_class, parameters, load_data, default_configurati """%/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_3_output_0, %onnx::ReduceSum_22) %transposed_output = Transpose[perm = [1, 0]](%/_operators.0/ReduceSum_output_0) """ - if os.getenv("TREES_USE_FHE_SUM") == "1" + if use_fhe_sum else "%transposed_output = Transpose[perm = [2, 1, 0]](%/_operators.0/Reshape_3_output_0)\n " ) + """return %transposed_output @@ -209,7 +216,7 @@ def check_onnx_file_dump(model_class, parameters, load_data, default_configurati """%/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_3_output_0, %onnx::ReduceSum_22) %transposed_output = Transpose[perm = [1, 0]](%/_operators.0/ReduceSum_output_0) """ - if os.getenv("TREES_USE_FHE_SUM") == "1" + if use_fhe_sum is True else "%transposed_output = Transpose[perm = [2, 1, 0]](%/_operators.0/Reshape_3_output_0)\n " ) + """return %transposed_output @@ -261,7 +268,7 @@ def check_onnx_file_dump(model_class, parameters, load_data, default_configurati """%/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_4_output_0, %onnx::ReduceSum_26) return %/_operators.0/ReduceSum_output_0 }""" - if os.getenv("TREES_USE_FHE_SUM") == "1" + if use_fhe_sum is True else "return %/_operators.0/Reshape_4_output_0\n}" ), "RandomForestRegressor": """graph torch_jit ( @@ -285,7 +292,7 @@ def check_onnx_file_dump(model_class, parameters, load_data, default_configurati """%/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_3_output_0, %onnx::ReduceSum_22) %transposed_output = Transpose[perm = [1, 0]](%/_operators.0/ReduceSum_output_0) """ - if os.getenv("TREES_USE_FHE_SUM") == "1" + if use_fhe_sum is True else "%transposed_output = Transpose[perm = [2, 1, 0]](%/_operators.0/Reshape_3_output_0)" ) + """return %transposed_output @@ -304,7 +311,7 @@ def check_onnx_file_dump(model_class, parameters, load_data, default_configurati %/_operators.0/Constant_2_output_0[INT64, 3] %/_operators.0/Constant_3_output_0[INT64, 3] %/_operators.0/Constant_4_output_0[INT64, 3]""" - + ("\n %onnx::ReduceSum_27[INT64, 1]" if os.getenv("TREES_USE_FHE_SUM") == "1" else "") + + ("\n %onnx::ReduceSum_27[INT64, 1]" if use_fhe_sum is True else "") + """ ) { %/_operators.0/Gemm_output_0 = Gemm[alpha = 1, beta = 0, transB = 1](%_operators.0.weight_1, %input_0) @@ -324,7 +331,7 @@ def check_onnx_file_dump(model_class, parameters, load_data, default_configurati """%/_operators.0/ReduceSum_output_0 = ReduceSum[keepdims = 0](%/_operators.0/Reshape_4_output_0, %onnx::ReduceSum_27) return %/_operators.0/ReduceSum_output_0 }""" - if os.getenv("TREES_USE_FHE_SUM") == "1" + if use_fhe_sum is True else """return %/_operators.0/Reshape_4_output_0\n}""" ), "LinearRegression": """graph torch_jit ( @@ -399,9 +406,6 @@ def check_onnx_file_dump(model_class, parameters, load_data, default_configurati # Get the data-set. The data generation is seeded in load_data. x, y = load_data(model_class, **parameters) - # Set the model - model = model_class() - model_params = model.get_params() if "random_state" in model_params: model_params["random_state"] = numpy.random.randint(0, 2**15) @@ -494,15 +498,9 @@ def test_dump( callbacks="disable", ) - # Check with 'TREES_USE_ROUNDING' disabled - assert os.environ.get("TREES_USE_FHE_SUM") == "0", "'TREES_USE_FHE_SUM' is not disabled" - check_onnx_file_dump(model_class, parameters, load_data, default_configuration) - - with pytest.MonkeyPatch.context() as mp_context: - - # Disable rounding - mp_context.setenv("TREES_USE_FHE_SUM", "1") - - # Check that rounding is disabled - assert os.environ.get("TREES_USE_FHE_SUM") == "1", "'TREES_USE_FHE_SUM' is enabled" - check_onnx_file_dump(model_class, parameters, load_data, default_configuration) + check_onnx_file_dump( + model_class, parameters, load_data, default_configuration, use_fhe_sum=False + ) + check_onnx_file_dump( + model_class, parameters, load_data, default_configuration, use_fhe_sum=True + ) diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 1ec4d7c98..50128e3ef 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1206,10 +1206,10 @@ def check_rounding_consistency( def check_fhe_sum_for_tree_based_models( - model_class, + model, x, y, - n_bits, + predict_method, is_weekly_option, ): """Test that Concrete ML without and with FHE sum are 'equivalent'.""" @@ -1218,18 +1218,8 @@ def check_fhe_sum_for_tree_based_models( if is_weekly_option: fhe_test = get_random_samples(x, n_sample=5) - # By default, the summation of tree ensemble outputs is done in clear - assert os.getenv("TREES_USE_FHE_SUM") == "0", "'TREES_USE_FHE_SUM' is not disabled" - - model_ref = instantiate_model_generic(model_class, n_bits=n_bits) - fit_and_compile(model_ref, x, y) - - # Check `predict_proba` for classifiers and `predict` for regressors - predict_method = ( - model_ref.predict_proba - if is_classifier_or_partial_classifier(model_class) - else model_ref.predict - ) + assert not model.use_fhe_sum, "`use_fhe_sum` is disabled by default." + fit_and_compile(model, x, y) non_fhe_sum_predict_quantized = predict_method(x, fhe="disable") non_fhe_sum_predict_simulate = predict_method(x, fhe="simulate") @@ -1241,29 +1231,15 @@ def check_fhe_sum_for_tree_based_models( if is_weekly_option: non_fhe_sum_predict_fhe = predict_method(fhe_test, fhe="execute") - with pytest.MonkeyPatch.context() as mp_context: - - # Enable the FHE summation of tree ensemble outputs - mp_context.setenv("TREES_USE_FHE_SUM", "1") + model.use_fhe_sum = True - # Check that the summation of tree ensemble outputs is enabled - assert os.getenv("TREES_USE_FHE_SUM") == "1", "'TREES_USE_FHE_SUM' is not enabled" - - model = model_class(**model_ref.get_params()) - fit_and_compile(model, x, y) - - # Check `predict_proba` for classifiers and `predict` for regressors - predict_method = ( - model.predict_proba - if is_classifier_or_partial_classifier(model_class) - else model.predict - ) + fit_and_compile(model, x, y) - fhe_sum_predict_quantized = predict_method(x, fhe="disable") - fhe_sum_predict_simulate = predict_method(x, fhe="simulate") + fhe_sum_predict_quantized = predict_method(x, fhe="disable") + fhe_sum_predict_simulate = predict_method(x, fhe="simulate") - # Sanity check - array_allclose_and_same_shape(fhe_sum_predict_quantized, fhe_sum_predict_simulate) + # Sanity check + array_allclose_and_same_shape(fhe_sum_predict_quantized, fhe_sum_predict_simulate) # Check that we have the exact same predictions array_allclose_and_same_shape(fhe_sum_predict_quantized, non_fhe_sum_predict_quantized) @@ -1962,13 +1938,19 @@ def test_fhe_sum_for_tree_based_models( if verbose: print("Run check_fhe_sum_for_tree_based_models") + model = instantiate_model_generic(model_class, n_bits=n_bits) + x, y = get_dataset(model_class, parameters, n_bits, load_data, is_weekly_option) + predict_method = ( + model.predict_proba if is_classifier_or_partial_classifier(model) else model.predict + ) + check_fhe_sum_for_tree_based_models( - model_class, + model, x, y, - n_bits, + predict_method, is_weekly_option, ) From 39b2972a356d920b638a61ee09da50963afe7b57 Mon Sep 17 00:00:00 2001 From: kcelia Date: Tue, 23 Jan 2024 10:19:52 +0100 Subject: [PATCH 27/38] chore: restore knn notebook --- docs/advanced_examples/KNearestNeighbors.ipynb | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/advanced_examples/KNearestNeighbors.ipynb b/docs/advanced_examples/KNearestNeighbors.ipynb index 9b4b7ed7e..d7ae1b8c1 100644 --- a/docs/advanced_examples/KNearestNeighbors.ipynb +++ b/docs/advanced_examples/KNearestNeighbors.ipynb @@ -287,9 +287,6 @@ "data": { "text/html": [ "\n", "
\n", " \n", From 07b2f2a531d48b5124ccd11803d32c6e33c75e68 Mon Sep 17 00:00:00 2001 From: kcelia Date: Tue, 23 Jan 2024 10:22:26 +0100 Subject: [PATCH 28/38] chore: restore exp notebotebook --- .../ExperimentPrivacyTreePaper.ipynb | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb b/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb index 23ed43935..388454993 100644 --- a/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb +++ b/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb @@ -494,7 +494,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -518,7 +518,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -554,7 +554,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -792,9 +792,9 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -805,7 +805,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -818,11 +818,11 @@ " FP32-DT 90.3\\% ± 1.0\\% 87.4\\% ± 1.2\\% \n", " FHE-XGB 94.5\\% ± 0.8\\% 92.9\\% ± 1.1\\% \n", " FP32-XGB 95.0\\% ± 0.7\\% 93.6\\% ± 0.9\\% \n", - " FHE-RF 90.9\\% ± 1.1\\% 87.5\\% ± 1.6\\% \n", + " FHE-RF 90.9\\% ± 1.1\\% 87.5\\% ± 1.5\\% \n", " FP32-RF 91.8\\% ± 1.1\\% 89.0\\% ± 1.4\\% \n", "wine (#features: 13) FHE-DT 90.8\\% ± 5.2\\% - \n", " FP32-DT 90.5\\% ± 5.0\\% - \n", - " FHE-XGB 96.8\\% ± 2.5\\% - \n", + " FHE-XGB 97.0\\% ± 2.4\\% - \n", " FP32-XGB 96.2\\% ± 2.9\\% - \n", " FHE-RF 98.5\\% ± 1.4\\% - \n", " FP32-RF 98.1\\% ± 2.0\\% - \n", @@ -848,7 +848,7 @@ " FP32-DT 97.2\\% ± 0.7\\% 96.1\\% ± 0.9\\% \n", " FHE-XGB 100.0\\% ± 0.0\\% 100.0\\% ± 0.0\\% \n", " FP32-XGB 100.0\\% ± 0.0\\% 100.0\\% ± 0.0\\% \n", - " FHE-RF 96.8\\% ± 1.3\\% 95.4\\% ± 1.8\\% \n", + " FHE-RF 96.9\\% ± 1.2\\% 95.4\\% ± 1.8\\% \n", " FP32-RF 95.9\\% ± 1.1\\% 93.9\\% ± 1.5\\% \n", "\n", " AP nodes Time (s) \\\n", @@ -1610,19 +1610,19 @@ "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "ap relative: [0.49626943 0.70187731 0.82640876 0.89067066 0.98315255 1.02264581\n", - " 1.02436888 1.01090038 1.01268386], f1_relative: [0.06488922 0.65490682 0.87590196 0.90861806 0.97920588 1.00604989\n", - " 1.00914511 1.00274636 1.00389957]\n" + "The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.\n" ] }, { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.\n" + "ap relative: [0.49626943 0.70187731 0.82640876 0.89067066 0.98315255 1.02264581\n", + " 1.02436888 1.01090038 1.01268386], f1_relative: [0.06488922 0.65490682 0.87590196 0.90861806 0.97920588 1.00604989\n", + " 1.00914511 1.00274636 1.00389957]\n" ] }, { @@ -1646,9 +1646,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "ap relative: [0.43556747 0.69054787 0.8789863 0.94213852 0.97097036 0.99083622\n", - " 0.99365961 0.99626825 0.99920411], f1_relative: [0. 0.65970362 0.91412713 0.95780357 0.97789164 0.99271147\n", - " 0.99456864 0.99697611 0.99959059]\n" + "ap relative: [0.43556747 0.69054787 0.8789863 0.94180188 0.97097036 0.99094624\n", + " 0.99348364 0.99626825 0.99932372], f1_relative: [0. 0.65970362 0.91412713 0.95762445 0.97789164 0.99281277\n", + " 0.99447789 0.99697611 0.99969255]\n" ] }, { @@ -1672,9 +1672,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "ap relative: [0.45810941 0.65828111 0.85617664 0.93660034 0.96541385 0.98342004\n", - " 0.99091316 0.9911998 0.99740638], f1_relative: [0. 0.56676488 0.86901886 0.93986022 0.96505021 0.98359134\n", - " 0.99082334 0.99211045 0.99758998]\n" + "ap relative: [0.45810941 0.66176353 0.85701522 0.93668402 0.96541385 0.98353791\n", + " 0.99091316 0.99133601 0.99740638], f1_relative: [0. 0.57332946 0.87035559 0.9402579 0.96505021 0.983713\n", + " 0.99082334 0.99224022 0.99758998]\n" ] }, { From 7fddeceb5661d737969f2b816d14e6ef2a691a2d Mon Sep 17 00:00:00 2001 From: kcelia Date: Tue, 23 Jan 2024 16:50:56 +0100 Subject: [PATCH 29/38] chore: update v1 --- src/concrete/ml/sklearn/base.py | 8 +++----- src/concrete/ml/sklearn/tree_to_numpy.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index dbd8ca4a8..380ee1625 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -104,9 +104,6 @@ # However, for internal testing purposes, we retain the capability to disable this feature os.environ["TREES_USE_ROUNDING"] = os.environ.get("TREES_USE_ROUNDING", "1") -# By default, the decision of the tree ensembles is made in clear -TREES_USE_FHE_SUM = False - # pylint: disable=too-many-public-methods @@ -1313,6 +1310,7 @@ def __init__(self, n_bits: Union[int, Dict[str, int]]): self._tree_inference: Optional[Callable] = None #: Wether to perform the sum of the output's tree ensembles in FHE or not. + # By default, the decision of the tree ensembles is made in clear. self._use_fhe_sum = False BaseEstimator.__init__(self) @@ -1327,11 +1325,11 @@ def use_fhe_sum(self) -> bool: return self._use_fhe_sum @use_fhe_sum.setter - def use_fhe_sum(self, value) -> None: + def use_fhe_sum(self, value: bool) -> None: """Property setter for `use_fhe_sum`. Args: - value (int): Whether to enable or disable the feature. + value (bool): Whether to enable or disable the feature. """ assert isinstance(value, bool), "Value must be a boolean type" diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py index c61ea9d1c..8f6beda61 100644 --- a/src/concrete/ml/sklearn/tree_to_numpy.py +++ b/src/concrete/ml/sklearn/tree_to_numpy.py @@ -141,8 +141,8 @@ def add_transpose_after_last_node(onnx_model: onnx.ModelProto, use_fhe_sum: bool Args: onnx_model (onnx.ModelProto): The ONNX model. - use_fhe_sum (bool): This parameter is exclusively used to tree-based models. - It determines whether the sum of the trees' outputs is computed in FHE. + use_fhe_sum (bool): Determines whether the sum of the trees' outputs is computed in FHE. + Default to False. """ # Get the output node output_node = onnx_model.graph.output[0] @@ -230,8 +230,8 @@ def tree_onnx_graph_preprocessing( framework (str): The framework from which the ONNX model is generated. (options: 'xgboost', 'sklearn') expected_number_of_outputs (int): The expected number of outputs in the ONNX model. - use_fhe_sum (bool): This parameter is exclusively used to tree-based models. - It determines whether the sum of the trees' outputs is computed in FHE. + use_fhe_sum (bool): Determines whether the sum of the trees' outputs is computed in FHE. + Default to False. """ # Make sure the ONNX version returned by Hummingbird is OPSET_VERSION_FOR_ONNX_EXPORT onnx_version = get_onnx_opset_version(onnx_model) @@ -339,10 +339,10 @@ def tree_to_numpy( Args: model (Callable): The tree model to convert. x (numpy.ndarray): The input data. - use_rounding (bool): This parameter is exclusively used to tree-based models. - It determines whether the rounding feature is enabled or disabled. - use_fhe_sum (bool): This parameter is exclusively used to tree-based models. - It determines whether the sum of the trees' outputs is computed in FHE. + use_rounding (bool): Determines whether the rounding feature is enabled or disabled. + Default to True. + use_fhe_sum (bool): Determines whether the sum of the trees' outputs is computed in FHE. + Default to False. framework (str): The framework from which the ONNX model is generated. (options: 'xgboost', 'sklearn') output_n_bits (int): The number of bits of the output. Default to 8. From 14d9dc0b0bca32982a99db216792ac8a794085ad Mon Sep 17 00:00:00 2001 From: kcelia Date: Tue, 23 Jan 2024 18:12:44 +0100 Subject: [PATCH 30/38] chore: update v2 --- src/concrete/ml/sklearn/base.py | 25 ++++++++++++------------ src/concrete/ml/sklearn/tree_to_numpy.py | 6 +++--- tests/sklearn/test_dump_onnx.py | 17 +++++++++------- tests/sklearn/test_sklearn_models.py | 17 ++++++++-------- 4 files changed, 34 insertions(+), 31 deletions(-) diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 380ee1625..7308bd58f 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -1311,22 +1311,22 @@ def __init__(self, n_bits: Union[int, Dict[str, int]]): #: Wether to perform the sum of the output's tree ensembles in FHE or not. # By default, the decision of the tree ensembles is made in clear. - self._use_fhe_sum = False + self._fhe_ensembling = False BaseEstimator.__init__(self) @property - def use_fhe_sum(self) -> bool: - """Property getter for `use_fhe_sum`. + def fhe_ensembling(self) -> bool: + """Property getter for `_fhe_ensembling`. Returns: - bool: The current setting of the `_use_fhe_sum` attribute. + bool: The current setting of the `fhe_ensembling` attribute. """ - return self._use_fhe_sum + return self._fhe_ensembling - @use_fhe_sum.setter - def use_fhe_sum(self, value: bool) -> None: - """Property setter for `use_fhe_sum`. + @fhe_ensembling.setter + def fhe_ensembling(self, value: bool) -> None: + """Property setter for `fhe_ensembling`. Args: value (bool): Whether to enable or disable the feature. @@ -1335,9 +1335,10 @@ def use_fhe_sum(self, value: bool) -> None: assert isinstance(value, bool), "Value must be a boolean type" if value is True: + print("LAA") warnings.simplefilter("always") warnings.warn( - "Enabling `use_fhe_sum` computes the sum of the ouputs of tree ensembles in FHE.\n" + "Enabling `fhe_ensembling` computes the sum of the ouputs of tree ensembles in FHE.\n" "This may slow down the computation and increase the maximum bitwidth.\n" "To optimize performance, consider reducing the quantization leaf precision.\n" "Additionally, the model must be refitted for these changes to take effect.", @@ -1345,7 +1346,7 @@ def use_fhe_sum(self, value: bool) -> None: stacklevel=2, ) - self._use_fhe_sum = value + self._fhe_ensembling = value def fit(self, X: Data, y: Target, **fit_parameters): # Reset for double fit @@ -1395,7 +1396,7 @@ def fit(self, X: Data, y: Target, **fit_parameters): self.sklearn_model, q_X, use_rounding=enable_rounding, - use_fhe_sum=self._use_fhe_sum, + fhe_ensembling=self.fhe_ensembling, framework=self.framework, output_n_bits=self.n_bits["op_leaves"], ) @@ -1472,7 +1473,7 @@ def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: # Sum all tree outputs # Remove the sum once we handle multi-precision circuits # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/451 - if not self._use_fhe_sum: + if not self._fhe_ensembling: y_preds = numpy.sum(y_preds, axis=-1) assert_true(y_preds.ndim == 2, "y_preds should be a 2D array") diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py index 8f6beda61..d3da342fe 100644 --- a/src/concrete/ml/sklearn/tree_to_numpy.py +++ b/src/concrete/ml/sklearn/tree_to_numpy.py @@ -331,7 +331,7 @@ def tree_to_numpy( x: numpy.ndarray, framework: str, use_rounding: bool = True, - use_fhe_sum: bool = False, + fhe_ensembling: bool = False, output_n_bits: int = MAX_BITWIDTH_BACKWARD_COMPATIBLE, ) -> Tuple[Callable, List[UniformQuantizer], onnx.ModelProto]: """Convert the tree inference to a numpy functions using Hummingbird. @@ -341,7 +341,7 @@ def tree_to_numpy( x (numpy.ndarray): The input data. use_rounding (bool): Determines whether the rounding feature is enabled or disabled. Default to True. - use_fhe_sum (bool): Determines whether the sum of the trees' outputs is computed in FHE. + fhe_ensembling (bool): Determines whether the sum of the trees' outputs is computed in FHE. Default to False. framework (str): The framework from which the ONNX model is generated. (options: 'xgboost', 'sklearn') @@ -379,7 +379,7 @@ def tree_to_numpy( # ONNX graph pre-processing to make the model FHE friendly # i.e., delete irrelevant nodes and cut the graph before the final ensemble sum) - tree_onnx_graph_preprocessing(onnx_model, framework, expected_number_of_outputs, use_fhe_sum) + tree_onnx_graph_preprocessing(onnx_model, framework, expected_number_of_outputs, fhe_ensembling) # Tree values pre-processing # i.e., mainly predictions quantization diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py index fa398cd7e..4e7589319 100644 --- a/tests/sklearn/test_dump_onnx.py +++ b/tests/sklearn/test_dump_onnx.py @@ -19,7 +19,9 @@ # pylint: disable=line-too-long -def check_onnx_file_dump(model_class, parameters, load_data, default_configuration, use_fhe_sum): +def check_onnx_file_dump( + model_class, parameters, load_data, default_configuration, use_fhe_sum=False +): """Fit the model and dump the corresponding ONNX.""" model_name = get_model_name(model_class) @@ -498,9 +500,10 @@ def test_dump( callbacks="disable", ) - check_onnx_file_dump( - model_class, parameters, load_data, default_configuration, use_fhe_sum=False - ) - check_onnx_file_dump( - model_class, parameters, load_data, default_configuration, use_fhe_sum=True - ) + check_onnx_file_dump(model_class, parameters, load_data, default_configuration) + + # Additional tests exclusively dedicated for tree ensemble models. + if model_class in _get_sklearn_tree_models()[2:]: + check_onnx_file_dump( + model_class, parameters, load_data, default_configuration, use_fhe_sum=True + ) diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 50128e3ef..446ad3d11 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -149,13 +149,7 @@ def get_n_bits_non_correctness(model_class): if get_model_name(model_class) == "KNeighborsClassifier": n_bits = 2 - # Adjust the quantization precision for tree-based model based on `TREES_USE_FHE_SUM` setting. - # When enabled, the circuit's bitwidth increases, potentially leading to Out-of-Memory issues. - # Therefore, the maximum quantization precision is 4 bits in this case. - elif model_class in _get_sklearn_tree_models() and os.environ.get("TREES_USE_FHE_SUM") == "1": - n_bits = min(min(N_BITS_REGULAR_BUILDS), 4) - else: - n_bits = min(N_BITS_REGULAR_BUILDS) + n_bits = min(N_BITS_REGULAR_BUILDS) return n_bits @@ -1218,7 +1212,7 @@ def check_fhe_sum_for_tree_based_models( if is_weekly_option: fhe_test = get_random_samples(x, n_sample=5) - assert not model.use_fhe_sum, "`use_fhe_sum` is disabled by default." + assert not model.fhe_ensembling, "`fhe_ensembling` is disabled by default." fit_and_compile(model, x, y) non_fhe_sum_predict_quantized = predict_method(x, fhe="disable") @@ -1231,7 +1225,8 @@ def check_fhe_sum_for_tree_based_models( if is_weekly_option: non_fhe_sum_predict_fhe = predict_method(fhe_test, fhe="execute") - model.use_fhe_sum = True + with pytest.warns(UserWarning, match="Enabling `fhe_ensembling` .*"): + model.fhe_ensembling = True fit_and_compile(model, x, y) @@ -1955,6 +1950,8 @@ def test_fhe_sum_for_tree_based_models( ) +# This test should be extended to all built-in models. +# FIXME: https://github.com/zama-ai/concrete-ml-internal#4234 @pytest.mark.parametrize( "n_bits, error_message", [ @@ -1982,6 +1979,8 @@ def test_invalid_n_bits_setting(model_class, n_bits, error_message): instantiate_model_generic(model_class, n_bits=n_bits) +# This test should be extended to all built-in models. +# FIXME: https://github.com/zama-ai/concrete-ml-internal#4234 @pytest.mark.parametrize("n_bits", [5, {"op_inputs": 5}, {"op_inputs": 2, "op_leaves": 1}]) @pytest.mark.parametrize("model_class, parameters", get_sklearn_tree_models_and_datasets()) def test_valid_n_bits_setting( From 324821652ec5cd8e2ba67478843da5afa12f09f3 Mon Sep 17 00:00:00 2001 From: kcelia Date: Tue, 23 Jan 2024 18:20:45 +0100 Subject: [PATCH 31/38] chore: update v3 --- src/concrete/ml/sklearn/tree_to_numpy.py | 16 ++++++++-------- tests/sklearn/test_sklearn_models.py | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py index d3da342fe..d2723e925 100644 --- a/src/concrete/ml/sklearn/tree_to_numpy.py +++ b/src/concrete/ml/sklearn/tree_to_numpy.py @@ -136,25 +136,25 @@ def assert_add_node_and_constant_in_xgboost_regressor_graph(onnx_model: onnx.Mod ) -def add_transpose_after_last_node(onnx_model: onnx.ModelProto, use_fhe_sum: bool): +def add_transpose_after_last_node(onnx_model: onnx.ModelProto, fhe_ensembling: bool): """Add transpose after last node. Args: onnx_model (onnx.ModelProto): The ONNX model. - use_fhe_sum (bool): Determines whether the sum of the trees' outputs is computed in FHE. + fhe_ensembling (bool): Determines whether the sum of the trees' outputs is computed in FHE. Default to False. """ # Get the output node output_node = onnx_model.graph.output[0] - # The state of the 'use_fhe_sum' variable affects the structure of the model's ONNX graph. + # The state of the 'fhe_ensembling' variable affects the structure of the model's ONNX graph. # When the option is enabled, the graph is cut after the ReduceSum node. # When it is disabled, the graph is cut at the ReduceSum node, which alters the output shape. # Therefore, it is necessary to adjust this shape with the correct permutation. # When using FHE sum for tree ensembles, create the node with perm attribute equal to (1, 0) # Otherwise, create the node with perm attribute equal to (2, 1, 0) - perm = [1, 0] if use_fhe_sum else [2, 1, 0] + perm = [1, 0] if fhe_ensembling else [2, 1, 0] transpose_node = onnx.helper.make_node( "Transpose", @@ -221,7 +221,7 @@ def tree_onnx_graph_preprocessing( onnx_model: onnx.ModelProto, framework: str, expected_number_of_outputs: int, - use_fhe_sum: bool = False, + fhe_ensembling: bool = False, ): """Apply pre-processing onto the ONNX graph. @@ -230,7 +230,7 @@ def tree_onnx_graph_preprocessing( framework (str): The framework from which the ONNX model is generated. (options: 'xgboost', 'sklearn') expected_number_of_outputs (int): The expected number of outputs in the ONNX model. - use_fhe_sum (bool): Determines whether the sum of the trees' outputs is computed in FHE. + fhe_ensembling (bool): Determines whether the sum of the trees' outputs is computed in FHE. Default to False. """ # Make sure the ONNX version returned by Hummingbird is OPSET_VERSION_FOR_ONNX_EXPORT @@ -258,7 +258,7 @@ def tree_onnx_graph_preprocessing( # Cut the graph after the ReduceSum node to remove # argmax, sigmoid, softmax from the graph. - if use_fhe_sum: + if fhe_ensembling: clean_graph_after_node_op_type(onnx_model, "ReduceSum") else: clean_graph_at_node_op_type(onnx_model, "ReduceSum") @@ -274,7 +274,7 @@ def tree_onnx_graph_preprocessing( # sklearn models apply the reduce sum before the transpose. # To have equivalent output between xgboost in sklearn, # apply the transpose before returning the output. - add_transpose_after_last_node(onnx_model, use_fhe_sum) + add_transpose_after_last_node(onnx_model, fhe_ensembling) # Cast nodes are not necessary so remove them. remove_node_types(onnx_model, op_types_to_remove=["Cast"]) diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 446ad3d11..cd156f808 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -148,8 +148,8 @@ def get_n_bits_non_correctness(model_class): # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3979 if get_model_name(model_class) == "KNeighborsClassifier": n_bits = 2 - - n_bits = min(N_BITS_REGULAR_BUILDS) + else: + n_bits = min(N_BITS_REGULAR_BUILDS) return n_bits From ab45587539e5986bcbdd0cddc431161bffb72e92 Mon Sep 17 00:00:00 2001 From: kcelia Date: Tue, 23 Jan 2024 19:52:15 +0100 Subject: [PATCH 32/38] chore: update --- src/concrete/ml/sklearn/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 7308bd58f..b1ce3d55d 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -1338,8 +1338,8 @@ def fhe_ensembling(self, value: bool) -> None: print("LAA") warnings.simplefilter("always") warnings.warn( - "Enabling `fhe_ensembling` computes the sum of the ouputs of tree ensembles in FHE.\n" - "This may slow down the computation and increase the maximum bitwidth.\n" + "Enabling `fhe_ensembling` computes the sum of the ouputs of tree ensembles in " + "FHE.\nThis may slow down the computation and increase the maximum bitwidth.\n" "To optimize performance, consider reducing the quantization leaf precision.\n" "Additionally, the model must be refitted for these changes to take effect.", category=UserWarning, From 0ad0f6d1dad2014151cb839e6f18bb02878b7c44 Mon Sep 17 00:00:00 2001 From: kcelia Date: Wed, 24 Jan 2024 10:25:51 +0100 Subject: [PATCH 33/38] chore: update comments --- src/concrete/ml/quantization/post_training.py | 6 +++--- src/concrete/ml/sklearn/base.py | 1 - tests/sklearn/test_sklearn_models.py | 10 +++++----- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/concrete/ml/quantization/post_training.py b/src/concrete/ml/quantization/post_training.py index 621697f14..46bed0214 100644 --- a/src/concrete/ml/quantization/post_training.py +++ b/src/concrete/ml/quantization/post_training.py @@ -50,7 +50,7 @@ def _inspect_tree_n_bits(n_bits): """ detailed_message = ( - "Invalid 'n_bits', either pass a non-null positive integer or a dictionary containing " + "Invalid 'n_bits', either pass a strictly positive integer or a dictionary containing " "integer values for the following keys:\n" "- 'op_inputs' (mandatory): number of bits to quantize the input values\n" "- 'op_leaves' (optional): number of bits to quantize the leaves, must be less than or " @@ -63,7 +63,7 @@ def _inspect_tree_n_bits(n_bits): if isinstance(n_bits, int): if n_bits <= 0: - error_message = "n_bits must be a non-null, positive integer" + error_message = "n_bits must be a strictly positive integer" elif isinstance(n_bits, dict): if "op_inputs" not in n_bits.keys(): error_message = "Invalid keys in `n_bits` dictionary. The key 'op_inputs' is mandatory" @@ -73,7 +73,7 @@ def _inspect_tree_n_bits(n_bits): "(optional) are allowed" ) elif not all(isinstance(value, int) and value > 0 for value in n_bits.values()): - error_message = "All values in 'n_bits' dictionary must be non-null, positive integers" + error_message = "All values in 'n_bits' dictionary must be strictly positive integers" elif n_bits.get("op_leaves", 0) > n_bits.get("op_inputs", 0): error_message = "'op_leaves' must be less than or equal to 'op_inputs'" diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index b1ce3d55d..8b947b163 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -1335,7 +1335,6 @@ def fhe_ensembling(self, value: bool) -> None: assert isinstance(value, bool), "Value must be a boolean type" if value is True: - print("LAA") warnings.simplefilter("always") warnings.warn( "Enabling `fhe_ensembling` computes the sum of the ouputs of tree ensembles in " diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index cd156f808..6e5bd2a9e 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1955,8 +1955,8 @@ def test_fhe_sum_for_tree_based_models( @pytest.mark.parametrize( "n_bits, error_message", [ - (0, "n_bits must be a non-null, positive integer"), - (-1, "n_bits must be a non-null, positive integer"), + (0, "n_bits must be a strictly positive integer"), + (-1, "n_bits must be a strictly positive integer"), ({"op_leaves": 2}, "The key 'op_inputs' is mandatory"), ( {"op_inputs": 4, "op_leaves": 2, "op_weights": 2}, @@ -1965,7 +1965,7 @@ def test_fhe_sum_for_tree_based_models( ), ( {"op_inputs": -2, "op_leaves": -5}, - "All values in 'n_bits' dictionary must be non-null, positive integers", + "All values in 'n_bits' dictionary must be strictly positive integers", ), ({"op_inputs": 2, "op_leaves": 5}, "'op_leaves' must be less than or equal to 'op_inputs'"), (0.5, "n_bits must be either an integer or a dictionary"), @@ -1973,7 +1973,7 @@ def test_fhe_sum_for_tree_based_models( ) @pytest.mark.parametrize("model_class", _get_sklearn_tree_models()) def test_invalid_n_bits_setting(model_class, n_bits, error_message): - """Check if the model instantiation raises an exception with invalid 'n_bits' settings.""" + """Check if the model instantiation raises an exception with invalid `n_bits` settings.""" with pytest.raises(ValueError, match=f"{error_message}. Got '{type(n_bits)}' and '{n_bits}'.*"): instantiate_model_generic(model_class, n_bits=n_bits) @@ -1991,7 +1991,7 @@ def test_valid_n_bits_setting( is_weekly_option, verbose=True, ): - """Check valid `n_bits' settings.""" + """Check valid `n_bits` settings.""" if verbose: print("Run test_valid_n_bits_setting") From 9b58948c5c91f78eff534c32ae6f48bce2460e2e Mon Sep 17 00:00:00 2001 From: kcelia Date: Wed, 24 Jan 2024 14:06:59 +0100 Subject: [PATCH 34/38] chore: update --- src/concrete/ml/sklearn/base.py | 37 +++------------------------- src/concrete/ml/sklearn/rf.py | 6 +++++ src/concrete/ml/sklearn/tree.py | 7 ++++++ src/concrete/ml/sklearn/xgb.py | 6 +++++ tests/sklearn/test_dump_onnx.py | 9 ++++--- tests/sklearn/test_sklearn_models.py | 7 +++--- 6 files changed, 31 insertions(+), 41 deletions(-) diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 8b947b163..b42cf5518 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -1310,43 +1310,12 @@ def __init__(self, n_bits: Union[int, Dict[str, int]]): self._tree_inference: Optional[Callable] = None #: Wether to perform the sum of the output's tree ensembles in FHE or not. - # By default, the decision of the tree ensembles is made in clear. + # By default, the decision of the tree ensembles is made in clear (not in FHE). + # This attribute should not be modified by users. self._fhe_ensembling = False BaseEstimator.__init__(self) - @property - def fhe_ensembling(self) -> bool: - """Property getter for `_fhe_ensembling`. - - Returns: - bool: The current setting of the `fhe_ensembling` attribute. - """ - return self._fhe_ensembling - - @fhe_ensembling.setter - def fhe_ensembling(self, value: bool) -> None: - """Property setter for `fhe_ensembling`. - - Args: - value (bool): Whether to enable or disable the feature. - """ - - assert isinstance(value, bool), "Value must be a boolean type" - - if value is True: - warnings.simplefilter("always") - warnings.warn( - "Enabling `fhe_ensembling` computes the sum of the ouputs of tree ensembles in " - "FHE.\nThis may slow down the computation and increase the maximum bitwidth.\n" - "To optimize performance, consider reducing the quantization leaf precision.\n" - "Additionally, the model must be refitted for these changes to take effect.", - category=UserWarning, - stacklevel=2, - ) - - self._fhe_ensembling = value - def fit(self, X: Data, y: Target, **fit_parameters): # Reset for double fit self._is_fitted = False @@ -1395,7 +1364,7 @@ def fit(self, X: Data, y: Target, **fit_parameters): self.sklearn_model, q_X, use_rounding=enable_rounding, - fhe_ensembling=self.fhe_ensembling, + fhe_ensembling=self._fhe_ensembling, framework=self.framework, output_n_bits=self.n_bits["op_leaves"], ) diff --git a/src/concrete/ml/sklearn/rf.py b/src/concrete/ml/sklearn/rf.py index 2ebca55b8..f4521bf06 100644 --- a/src/concrete/ml/sklearn/rf.py +++ b/src/concrete/ml/sklearn/rf.py @@ -84,6 +84,7 @@ def dump_dict(self) -> Dict[str, Any]: metadata["onnx_model_"] = self.onnx_model_ metadata["framework"] = self.framework metadata["post_processing_params"] = self.post_processing_params + metadata["_fhe_ensembling"] = self._fhe_ensembling # Scikit-Learn metadata["n_estimators"] = self.n_estimators @@ -120,11 +121,13 @@ def load_dict(cls, metadata: Dict): obj.framework = metadata["framework"] obj.onnx_model_ = metadata["onnx_model_"] obj.output_quantizers = metadata["output_quantizers"] + obj._fhe_ensembling = metadata["_fhe_ensembling"] obj._tree_inference = tree_to_numpy( obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, output_n_bits=obj.n_bits["op_leaves"] if isinstance(obj.n_bits, Dict) else obj.n_bits, + fhe_ensembling=obj._fhe_ensembling, )[0] obj.post_processing_params = metadata["post_processing_params"] @@ -219,6 +222,7 @@ def dump_dict(self) -> Dict[str, Any]: metadata["onnx_model_"] = self.onnx_model_ metadata["framework"] = self.framework metadata["post_processing_params"] = self.post_processing_params + metadata["_fhe_ensembling"] = self._fhe_ensembling # Scikit-Learn metadata["n_estimators"] = self.n_estimators @@ -255,11 +259,13 @@ def load_dict(cls, metadata: Dict): obj.framework = metadata["framework"] obj.onnx_model_ = metadata["onnx_model_"] obj.output_quantizers = metadata["output_quantizers"] + obj._fhe_ensembling = metadata["_fhe_ensembling"] obj._tree_inference = tree_to_numpy( obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, output_n_bits=obj.n_bits["op_leaves"] if isinstance(obj.n_bits, Dict) else obj.n_bits, + fhe_ensembling=obj._fhe_ensembling, )[0] obj.post_processing_params = metadata["post_processing_params"] diff --git a/src/concrete/ml/sklearn/tree.py b/src/concrete/ml/sklearn/tree.py index 5ba1f8cff..b496d4e47 100644 --- a/src/concrete/ml/sklearn/tree.py +++ b/src/concrete/ml/sklearn/tree.py @@ -84,6 +84,7 @@ def dump_dict(self) -> Dict[str, Any]: metadata["onnx_model_"] = self.onnx_model_ metadata["framework"] = self.framework metadata["post_processing_params"] = self.post_processing_params + metadata["_fhe_ensembling"] = self._fhe_ensembling # Scikit-Learn metadata["criterion"] = self.criterion @@ -115,11 +116,13 @@ def load_dict(cls, metadata: Dict): obj.framework = metadata["framework"] obj.onnx_model_ = metadata["onnx_model_"] obj.output_quantizers = metadata["output_quantizers"] + obj._fhe_ensembling = metadata["_fhe_ensembling"] obj._tree_inference = tree_to_numpy( obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, output_n_bits=obj.n_bits["op_leaves"] if isinstance(obj.n_bits, Dict) else obj.n_bits, + fhe_ensembling=obj._fhe_ensembling, )[0] obj.post_processing_params = metadata["post_processing_params"] @@ -208,6 +211,7 @@ def dump_dict(self) -> Dict[str, Any]: metadata["onnx_model_"] = self.onnx_model_ metadata["framework"] = self.framework metadata["post_processing_params"] = self.post_processing_params + metadata["_fhe_ensembling"] = self._fhe_ensembling # Scikit-Learn metadata["criterion"] = self.criterion @@ -233,16 +237,19 @@ def load_dict(cls, metadata: Dict): # Concrete-ML obj.sklearn_model = metadata["sklearn_model"] obj._is_fitted = metadata["_is_fitted"] + obj._fhe_ensembling = metadata["_fhe_ensembling"] obj._is_compiled = metadata["_is_compiled"] obj.input_quantizers = metadata["input_quantizers"] obj.framework = metadata["framework"] obj.onnx_model_ = metadata["onnx_model_"] obj.output_quantizers = metadata["output_quantizers"] + obj._fhe_ensembling = metadata["_fhe_ensembling"] obj._tree_inference = tree_to_numpy( obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, output_n_bits=obj.n_bits["op_leaves"] if isinstance(obj.n_bits, Dict) else obj.n_bits, + fhe_ensembling=obj._fhe_ensembling, )[0] obj.post_processing_params = metadata["post_processing_params"] diff --git a/src/concrete/ml/sklearn/xgb.py b/src/concrete/ml/sklearn/xgb.py index 28722b706..8f3925fd7 100644 --- a/src/concrete/ml/sklearn/xgb.py +++ b/src/concrete/ml/sklearn/xgb.py @@ -125,6 +125,7 @@ def dump_dict(self) -> Dict[str, Any]: metadata["onnx_model_"] = self.onnx_model_ metadata["framework"] = self.framework metadata["post_processing_params"] = self.post_processing_params + metadata["_fhe_ensembling"] = self._fhe_ensembling # XGBoost metadata["max_depth"] = self.max_depth @@ -174,11 +175,13 @@ def load_dict(cls, metadata: Dict): obj.framework = metadata["framework"] obj.onnx_model_ = metadata["onnx_model_"] obj.output_quantizers = metadata["output_quantizers"] + obj._fhe_ensembling = metadata["_fhe_ensembling"] obj._tree_inference = tree_to_numpy( obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, output_n_bits=obj.n_bits["op_leaves"] if isinstance(obj.n_bits, Dict) else obj.n_bits, + fhe_ensembling=obj._fhe_ensembling, )[0] obj.post_processing_params = metadata["post_processing_params"] @@ -354,6 +357,7 @@ def dump_dict(self) -> Dict[str, Any]: metadata["onnx_model_"] = self.onnx_model_ metadata["framework"] = self.framework metadata["post_processing_params"] = self.post_processing_params + metadata["_fhe_ensembling"] = self._fhe_ensembling # XGBoost metadata["max_depth"] = self.max_depth @@ -403,11 +407,13 @@ def load_dict(cls, metadata: Dict): obj.framework = metadata["framework"] obj.onnx_model_ = metadata["onnx_model_"] obj.output_quantizers = metadata["output_quantizers"] + obj._fhe_ensembling = metadata["_fhe_ensembling"] obj._tree_inference = tree_to_numpy( obj.sklearn_model, numpy.zeros((len(obj.input_quantizers),))[None, ...], framework=obj.framework, output_n_bits=obj.n_bits["op_leaves"] if isinstance(obj.n_bits, Dict) else obj.n_bits, + fhe_ensembling=obj._fhe_ensembling, )[0] obj.post_processing_params = metadata["post_processing_params"] diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py index 4e7589319..7ff40b71e 100644 --- a/tests/sklearn/test_dump_onnx.py +++ b/tests/sklearn/test_dump_onnx.py @@ -30,10 +30,11 @@ def check_onnx_file_dump( # Set the model model = model_class() - # Set `use_fhe_sum` - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=UserWarning) - model.use_fhe_sum = use_fhe_sum + # Set `_fhe_ensembling` for tree based models only + if model_class in _get_sklearn_tree_models(): + + # pylint: disable=protected-access + model._fhe_ensembling = use_fhe_sum # Ignore long lines here # ruff: noqa: E501 diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 6e5bd2a9e..02d773dfc 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1212,7 +1212,8 @@ def check_fhe_sum_for_tree_based_models( if is_weekly_option: fhe_test = get_random_samples(x, n_sample=5) - assert not model.fhe_ensembling, "`fhe_ensembling` is disabled by default." + # pylint: disable=protected-access + assert not model._fhe_ensembling, "`_fhe_ensembling` is disabled by default." fit_and_compile(model, x, y) non_fhe_sum_predict_quantized = predict_method(x, fhe="disable") @@ -1225,8 +1226,8 @@ def check_fhe_sum_for_tree_based_models( if is_weekly_option: non_fhe_sum_predict_fhe = predict_method(fhe_test, fhe="execute") - with pytest.warns(UserWarning, match="Enabling `fhe_ensembling` .*"): - model.fhe_ensembling = True + # pylint: disable=protected-access + model._fhe_ensembling = True fit_and_compile(model, x, y) From 25bf8399f8945abda6f386fa90e70ac62de8fcb1 Mon Sep 17 00:00:00 2001 From: kcelia Date: Wed, 24 Jan 2024 15:13:43 +0100 Subject: [PATCH 35/38] chore: fix test dump --- tests/sklearn/test_dump_onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sklearn/test_dump_onnx.py b/tests/sklearn/test_dump_onnx.py index 7ff40b71e..34e14d242 100644 --- a/tests/sklearn/test_dump_onnx.py +++ b/tests/sklearn/test_dump_onnx.py @@ -504,7 +504,7 @@ def test_dump( check_onnx_file_dump(model_class, parameters, load_data, default_configuration) # Additional tests exclusively dedicated for tree ensemble models. - if model_class in _get_sklearn_tree_models()[2:]: + if model_class in _get_sklearn_tree_models(): check_onnx_file_dump( model_class, parameters, load_data, default_configuration, use_fhe_sum=True ) From 5be7255c688afb73426464bc249b68f676ab4741 Mon Sep 17 00:00:00 2001 From: kcelia Date: Wed, 24 Jan 2024 20:07:11 +0100 Subject: [PATCH 36/38] chore: update comments --- src/concrete/ml/sklearn/tree_to_numpy.py | 2 +- tests/sklearn/test_sklearn_models.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/concrete/ml/sklearn/tree_to_numpy.py b/src/concrete/ml/sklearn/tree_to_numpy.py index d2723e925..b50944319 100644 --- a/src/concrete/ml/sklearn/tree_to_numpy.py +++ b/src/concrete/ml/sklearn/tree_to_numpy.py @@ -136,7 +136,7 @@ def assert_add_node_and_constant_in_xgboost_regressor_graph(onnx_model: onnx.Mod ) -def add_transpose_after_last_node(onnx_model: onnx.ModelProto, fhe_ensembling: bool): +def add_transpose_after_last_node(onnx_model: onnx.ModelProto, fhe_ensembling: bool = False): """Add transpose after last node. Args: diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 02d773dfc..58423df78 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1952,7 +1952,7 @@ def test_fhe_sum_for_tree_based_models( # This test should be extended to all built-in models. -# FIXME: https://github.com/zama-ai/concrete-ml-internal#4234 +# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4234 @pytest.mark.parametrize( "n_bits, error_message", [ @@ -1981,7 +1981,7 @@ def test_invalid_n_bits_setting(model_class, n_bits, error_message): # This test should be extended to all built-in models. -# FIXME: https://github.com/zama-ai/concrete-ml-internal#4234 +# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4234 @pytest.mark.parametrize("n_bits", [5, {"op_inputs": 5}, {"op_inputs": 2, "op_leaves": 1}]) @pytest.mark.parametrize("model_class, parameters", get_sklearn_tree_models_and_datasets()) def test_valid_n_bits_setting( From ee696ece4f624184e868334f9e27b90e1d77b77c Mon Sep 17 00:00:00 2001 From: kcelia Date: Thu, 25 Jan 2024 14:51:13 +0100 Subject: [PATCH 37/38] chore: remove comment --- src/concrete/ml/sklearn/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index b42cf5518..6ffdb8e2a 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -1303,7 +1303,6 @@ def __init__(self, n_bits: Union[int, Dict[str, int]]): # Check if 'n_bits' is a valid value. _inspect_tree_n_bits(n_bits) - #: The number of bits to quantize the model. self.n_bits: Union[int, Dict[str, int]] = n_bits #: The model's inference function. Is None if the model is not fitted. From 1c4ebc06d82f9f8a40562049322f758717e60b52 Mon Sep 17 00:00:00 2001 From: kcelia Date: Mon, 29 Jan 2024 16:30:02 +0100 Subject: [PATCH 38/38] chore: update --- src/concrete/ml/quantization/post_training.py | 2 ++ tests/sklearn/test_sklearn_models.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/concrete/ml/quantization/post_training.py b/src/concrete/ml/quantization/post_training.py index 46bed0214..022508507 100644 --- a/src/concrete/ml/quantization/post_training.py +++ b/src/concrete/ml/quantization/post_training.py @@ -86,6 +86,8 @@ def _inspect_tree_n_bits(n_bits): ) +# Find a better naming to describe leaf quantization in tree-based models +# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4258 def _get_n_bits_dict_trees(n_bits: Union[int, Dict[str, int]]) -> Dict[str, int]: """Convert the n_bits parameter into a proper dictionary for tree based-models. diff --git a/tests/sklearn/test_sklearn_models.py b/tests/sklearn/test_sklearn_models.py index 58423df78..6666087e1 100644 --- a/tests/sklearn/test_sklearn_models.py +++ b/tests/sklearn/test_sklearn_models.py @@ -1210,7 +1210,11 @@ def check_fhe_sum_for_tree_based_models( # Run the test with more samples during weekly CIs if is_weekly_option: - fhe_test = get_random_samples(x, n_sample=5) + fhe_samples = 5 + else: + fhe_samples = 1 + + fhe_test = get_random_samples(x, n_sample=fhe_samples) # pylint: disable=protected-access assert not model._fhe_ensembling, "`_fhe_ensembling` is disabled by default." @@ -1218,14 +1222,11 @@ def check_fhe_sum_for_tree_based_models( non_fhe_sum_predict_quantized = predict_method(x, fhe="disable") non_fhe_sum_predict_simulate = predict_method(x, fhe="simulate") + non_fhe_sum_predict_fhe = predict_method(fhe_test, fhe="execute") # Sanity check array_allclose_and_same_shape(non_fhe_sum_predict_quantized, non_fhe_sum_predict_simulate) - # Compute the FHE predictions only during weekly CIs - if is_weekly_option: - non_fhe_sum_predict_fhe = predict_method(fhe_test, fhe="execute") - # pylint: disable=protected-access model._fhe_ensembling = True @@ -1233,6 +1234,7 @@ def check_fhe_sum_for_tree_based_models( fhe_sum_predict_quantized = predict_method(x, fhe="disable") fhe_sum_predict_simulate = predict_method(x, fhe="simulate") + fhe_sum_predict_fhe = predict_method(fhe_test, fhe="execute") # Sanity check array_allclose_and_same_shape(fhe_sum_predict_quantized, fhe_sum_predict_simulate) @@ -1240,9 +1242,7 @@ def check_fhe_sum_for_tree_based_models( # Check that we have the exact same predictions array_allclose_and_same_shape(fhe_sum_predict_quantized, non_fhe_sum_predict_quantized) array_allclose_and_same_shape(fhe_sum_predict_simulate, non_fhe_sum_predict_simulate) - if is_weekly_option: - fhe_sum_predict_fhe = predict_method(fhe_test, fhe="execute") - array_allclose_and_same_shape(fhe_sum_predict_fhe, non_fhe_sum_predict_fhe) + array_allclose_and_same_shape(fhe_sum_predict_fhe, non_fhe_sum_predict_fhe) # Neural network models are skipped for this test
87.4\\% ± 1.2\\%82.4\\% ± 1.8\\%-0.0040.003-
FHE-RF90.9\\% ± 1.1\\%87.5\\% ± 1.6\\%87.5\\% ± 1.5\\%84.6\\% ± 1.7\\%750.0001.623
FHE-XGB96.8\\% ± 2.5\\%97.0\\% ± 2.4\\%--900.000
FHE-RF96.8\\% ± 1.3\\%96.9\\% ± 1.2\\%95.4\\% ± 1.8\\%93.5\\% ± 2.3\\%93.6\\% ± 2.2\\%700.0001.477576x93.9\\% ± 1.5\\%91.4\\% ± 2.3\\%-0.0020.003-