From 0b57c71140cca7535d1f5bdc0c088b205e28cbb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jordan=20Fr=C3=A9ry?= Date: Wed, 29 Nov 2023 15:44:56 +0100 Subject: [PATCH] feat: allow QuantizedAdd produces_output_graph --- .gitleaksignore | 1 + src/concrete/ml/pytest/torch_models.py | 21 +++++++ src/concrete/ml/quantization/post_training.py | 10 ++-- src/concrete/ml/quantization/quantized_ops.py | 56 ++++++++++++------- tests/torch/test_compile_torch.py | 2 + 5 files changed, 67 insertions(+), 23 deletions(-) diff --git a/.gitleaksignore b/.gitleaksignore index 66e5189fe..8f212aff2 100644 --- a/.gitleaksignore +++ b/.gitleaksignore @@ -4,3 +4,4 @@ 2d3b4ca188efb338c03d8d2c921ef39ffc5537e3:tests/deployment/test_deployment.py:generic-api-key:59 198d3fef188aaf3e3a582b9f7943f7ac6e9b5186:tests/deployment/test_deployment.py:generic-api-key:59 5abc7e86bb192e1f9f829bb2f22173c9d663e1d1:use_case_examples/credit_scoring/CreditScoringWithGraphics.ipynb:easypost-test-api-token:1414 +e2904473898ddd325f245f4faca526a0e9520f49:builders/Dockerfile.zamalang-env:generic-api-key:5 \ No newline at end of file diff --git a/src/concrete/ml/pytest/torch_models.py b/src/concrete/ml/pytest/torch_models.py index 2963ebe22..a9c14140f 100644 --- a/src/concrete/ml/pytest/torch_models.py +++ b/src/concrete/ml/pytest/torch_models.py @@ -1488,3 +1488,24 @@ def predict(x, weights, bias): outputs = torch.sigmoid(torch.bmm(x, weights_expanded) + bias_expanded) return outputs.squeeze() + + +class AddNet(nn.Module): + """Torch model that performs a simple addition between two inputs.""" + + def __init__(self, use_conv, use_qat, input_output, n_bits): # pylint: disable=unused-argument + super().__init__() + # No initialization needed for simple addition + + @staticmethod + def forward(x, y): + """Forward pass. + + Args: + x: First input tensor. + y: Second input tensor. + + Returns: + Result of adding x and y. + """ + return x + y diff --git a/src/concrete/ml/quantization/post_training.py b/src/concrete/ml/quantization/post_training.py index 8d15b41ac..9389ab05f 100644 --- a/src/concrete/ml/quantization/post_training.py +++ b/src/concrete/ml/quantization/post_training.py @@ -423,10 +423,6 @@ def _quantize_layers(self, *input_calibration_data: numpy.ndarray): quantized_op_class = ONNX_OPS_TO_QUANTIZED_IMPL[op_type] - # Add rounding_threshold_bits to the attributes if available in quantized_op_class - if issubclass(quantized_op_class, QuantizedMixingOp): - attributes.update({"rounding_threshold_bits": self.rounding_threshold_bits}) - # All inputs, allow optional constants (they become None) # Note that input of a node can be duplicated, e.g., (%a, %a, %b) curr_inputs = [ @@ -479,6 +475,12 @@ def _quantize_layers(self, *input_calibration_data: numpy.ndarray): # If we depend on a variable input use the quantized version of the operator if has_variable_inputs: + # Add rounding_threshold_bits to the attributes if available in quantized_op_class + # rounding_thresholds_bits only applies to QuantizedOp for now so we can't use them + # if we use the original operator on float (ops_impl.py) + if issubclass(quantized_op_class, QuantizedMixingOp): + attributes.update({"rounding_threshold_bits": self.rounding_threshold_bits}) + assert_true( op_type in ONNX_OPS_TO_QUANTIZED_IMPL, f"{op_type} can't be found in {ONNX_OPS_TO_QUANTIZED_IMPL}", diff --git a/src/concrete/ml/quantization/quantized_ops.py b/src/concrete/ml/quantization/quantized_ops.py index 67478ef72..09590550b 100644 --- a/src/concrete/ml/quantization/quantized_ops.py +++ b/src/concrete/ml/quantization/quantized_ops.py @@ -26,7 +26,12 @@ QuantizedOp, QuantizedOpUnivariateOfEncrypted, ) -from .quantizers import QuantizationOptions, QuantizedArray, UniformQuantizationParameters +from .quantizers import ( + QuantizationOptions, + QuantizedArray, + UniformQuantizationParameters, + UniformQuantizer, +) def _check_op_input_zero_point(zero_point: Any, op_name: Optional[str]): @@ -492,7 +497,7 @@ class QuantizedMatMul(QuantizedGemm): _impl_for_op_named: str = "MatMul" -class QuantizedAdd(QuantizedOp): +class QuantizedAdd(QuantizedMixingOp): """Quantized Addition operator. Can add either two variables (both encrypted) or a variable and a constant @@ -554,22 +559,32 @@ def q_impl( assert q_input_1.quantizer.scale is not None assert q_input_1.quantizer.zero_point is not None - # De-quantize with input params and re-quantize with output parameters - # This will use TLUs over each element of the two inputs - # We do the de-quantization directly, instead of q_inputs[0].dequant(), - # So that we do not lose precision in the computation + # Dequantize + input_0 = q_input_0.dequant() + input_1 = q_input_1.dequant() - rescale_q0 = numpy.rint( - q_input_0.quantizer.scale - / self.output_quant_params.scale - * (q_input_0.qvalues + (-q_input_0.quantizer.zero_point)) - ).astype(numpy.int64) + # If this operator is the last one in the graph, + # we rescale using the smallest scale to keep all information + if self.produces_graph_output: + common_scale = min(q_input_0.quantizer.scale, q_input_1.quantizer.scale) + # Otherwise we use the output op quantization scale + else: + common_scale = self.output_quant_params.scale - rescale_q1 = numpy.rint( - q_input_1.quantizer.scale - / self.output_quant_params.scale - * (q_input_1.qvalues + (-q_input_1.quantizer.zero_point)) - ).astype(numpy.int64) + common_zero_point = 0 + offset = 0 + + output_quant_params = UniformQuantizationParameters( + scale=common_scale, + zero_point=common_zero_point, + offset=offset, + ) + + quantizer = UniformQuantizer(params=output_quant_params, no_clipping=True) + + # Re-quantize using the common quantization paramaters + q_input_0_rescaled = quantizer.quant(input_0) + q_input_1_rescaled = quantizer.quant(input_1) # The sum of quantized encrypted integer values # This sum has << max(in_bits0, in_bits1) + 1 >> bits @@ -580,12 +595,15 @@ def q_impl( # sum_q = rescale_q0 + self.b_sign * rescale_q1 # when zama-ai/concrete-numpy-internal#1749 is done if self.b_sign == 1: - sum_q = rescale_q0 + rescale_q1 + sum_q = q_input_0_rescaled + q_input_1_rescaled elif self.b_sign == -1: - sum_q = rescale_q0 - rescale_q1 + sum_q = q_input_0_rescaled - q_input_1_rescaled + + if self.produces_graph_output: + return self.make_output_quant_parameters(sum_q, common_scale, common_zero_point) # But we would like the output to have n_bits, so we de-quantize - dequant_sum = self.output_quant_params.scale * sum_q + dequant_sum = quantizer.dequant(sum_q) # Return the raw float values without re-quantizing them to the new scale, as any # following Gemm/Add/Conv will quantize them with _prepare_inputs_with_constants(...) diff --git a/tests/torch/test_compile_torch.py b/tests/torch/test_compile_torch.py index aad6f9199..592301208 100644 --- a/tests/torch/test_compile_torch.py +++ b/tests/torch/test_compile_torch.py @@ -20,6 +20,7 @@ from concrete.ml.onnx.convert import OPSET_VERSION_FOR_ONNX_EXPORT from concrete.ml.pytest.torch_models import ( FC, + AddNet, BranchingGemmModule, BranchingModule, CNNGrouped, @@ -1038,6 +1039,7 @@ def __init__(self, input_output, activation_function): (MultiInputNNConfigurable, (1, 8, 8), 2, False), (DoubleQuantQATMixNet, (1, 8, 8), 1, False), (DoubleQuantQATMixNet, 10, 1, False), + (AddNet, 10, 2, False), ], ) def test_net_has_no_tlu(