diff --git a/docs/deep-learning/onnx_support.md b/docs/deep-learning/onnx_support.md index 47e350611..73a3d7643 100644 --- a/docs/deep-learning/onnx_support.md +++ b/docs/deep-learning/onnx_support.md @@ -179,6 +179,7 @@ The following operators are supported for evaluation and conversion to an equiva - Tanh - ThresholdedRelu - Transpose +- Unfold - Unsqueeze - Where - onnx.brevitas.Quant diff --git a/src/concrete/ml/onnx/onnx_utils.py b/src/concrete/ml/onnx/onnx_utils.py index 5614ff156..60a8cb02c 100644 --- a/src/concrete/ml/onnx/onnx_utils.py +++ b/src/concrete/ml/onnx/onnx_utils.py @@ -295,6 +295,7 @@ numpy_tanh, numpy_thresholdedrelu, numpy_transpose, + numpy_unfold, numpy_unsqueeze, numpy_where, rounded_numpy_equal_for_trees, @@ -382,6 +383,7 @@ "Shape": numpy_shape, "ConstantOfShape": numpy_constant_of_shape, "Expand": numpy_expand, + "Unfold": numpy_unfold, } diff --git a/src/concrete/ml/onnx/ops_impl.py b/src/concrete/ml/onnx/ops_impl.py index 7cae4eb27..ed0db0be3 100644 --- a/src/concrete/ml/onnx/ops_impl.py +++ b/src/concrete/ml/onnx/ops_impl.py @@ -2072,3 +2072,74 @@ def numpy_expand(x: numpy.ndarray, shape: Optional[Tuple[int]] = None) -> Tuple[ assert_true(shape_difference >= 0, "Target shape cannot have fewer dimensions than input shape") return (numpy.broadcast_to(x, target_shape),) + + +def numpy_unfold( + x: numpy.ndarray, + *, + kernel_shape: Tuple[int, ...], + pads: Tuple[int, ...] = None, + strides: Tuple[int, ...] = None, +) -> Tuple[numpy.ndarray]: + """Compute Unfold using Torch. + + Currently supports 2d Unfold with torch semantics. This function is ONNX compatible. + + See: https://github.com/onnx/onnx/blob/main/docs/Operators.md + + Args: + x (numpy.ndarray): input data (many dtypes are supported). Shape is N x C x H x W for 2d + kernel_shape (Tuple[int, ...]): shape of the kernel. Should have 2 elements for 2d conv + pads (Tuple[int, ...]): padding in ONNX format (begin, end) on each axis + strides (Tuple[int, ...]): stride of the convolution on each axis + + Returns: + res (numpy.ndarray): a tensor of size (N x InChannels x OutHeight * OutWidth). + See https://pytorch.org/docs/stable/generated/torch.nn.Unfold.html + + Raises: + AssertionError: if the unfold arguments are wrong + """ + + assert_true(len(kernel_shape) == 2, "The unfold operator currently supports only 2-d") + + # For mypy + assert pads is None or len(pads) == 4 + + # For mypy + assert len(kernel_shape) == 2 + + assert strides is None or len(strides) == 2 + + # Use default values if the ONNX did not set these parameters + pads = (0, 0, 0, 0) if pads is None else pads + strides = (1, 1) if strides is None else strides + + # Compute the unfold using a grouped convolution (groups = input channels) + # This means that each slice of the kernel is applied on each input channel respectively + # We create kernels with only one one at each position, which will redirect the kernel + # outputs to the output channels + n_in_channels = x.shape[1] + kernels_list = [] + for _ in range(n_in_channels): + for row in range(kernel_shape[0]): + for col in range(kernel_shape[1]): + kernel = numpy.zeros( + (1, 1, kernel_shape[0], kernel_shape[1]), + dtype=numpy.int64, + ) + kernel[:, :, row, col] = 1 + kernels_list.append(kernel) + kernels = numpy.concatenate(numpy.array(kernels_list), axis=0) + + # Pad the input tensor + pool_pads = compute_onnx_pool_padding(x.shape, kernel_shape, pads, strides, ceil_mode=0) + q_input_pad = numpy_onnx_pad(x, pool_pads) + + # Compute the kernels of input values for each kernel position + res = fhe_conv(q_input_pad, kernels, None, [0, 0, 0, 0], strides, None, None, n_in_channels) + + # reshape to fit the torch.F.unfold function output shapes + res = res.reshape((res.shape[0], res.shape[1], -1)) + + return (res,) diff --git a/src/concrete/ml/quantization/quantized_ops.py b/src/concrete/ml/quantization/quantized_ops.py index 26504cd8c..eb1ac1516 100644 --- a/src/concrete/ml/quantization/quantized_ops.py +++ b/src/concrete/ml/quantization/quantized_ops.py @@ -2479,3 +2479,128 @@ def __init__( # We do not support testing a == b where a,b are encrypted # only comparing to a constant is supported assert_true(constant_inputs is not None and len(constant_inputs) >= 1) + + +class QuantizedUnfold(QuantizedMixingOp): + """Quantized Unfold op.""" + + _impl_for_op_named: str = "Unfold" + + # Since this op takes a single input, we can set int_input_names to a single default id + def __init__( + self, + n_bits_output: int, + op_instance_name: str, + int_input_names: Set[str] = None, + constant_inputs: Optional[Union[Dict[str, Any], Dict[int, Any]]] = None, + input_quant_opts: QuantizationOptions = None, + **attrs, + ) -> None: + + super().__init__( + n_bits_output, + op_instance_name, + int_input_names, + constant_inputs, + input_quant_opts, + **attrs, + ) + + # Get the ONNX parameters + self.kernel_shape = attrs.get("kernel_shape", None) + self.pads = attrs.get("pads", tuple([0] * 2 * (len(self.kernel_shape) - 2))) + self.dilations = attrs.get("dilations", tuple([1] * len(self.kernel_shape))) + self.strides = attrs.get("strides", tuple([1] * len(self.kernel_shape))) + + # Validate the parameters + assert_true( + len(self.kernel_shape) == 2, + "The Unfold operator currently supports only 2d", + ) + assert_true( + len(self.kernel_shape) == len(self.strides), + "The Unfold operator requires the number of strides to " + "be the same as the number of kernel dimensions", + ) + assert_true( + len(self.pads) == 2 * len(self.kernel_shape), + "The Unfold operator in Concrete ML requires padding to be specified as " + " (pad_left_dim1, pad_right_dim1, pad_left_dim2, pad_right_dim2, ...), following ONNX" + " standard", + ) + + self.kernel: Union[numpy.ndarray, None] = None + self.norm_const: Union[float, None] = None + + def q_impl( + self, + *q_inputs: ONNXOpInputOutputType, + **attrs, + ) -> ONNXOpInputOutputType: + + # Retrieve the quantized inputs + prepared_inputs = self._prepare_inputs_with_constants( + *q_inputs, calibrate=False, quantize_actual_values=True + ) + q_input: QuantizedArray = prepared_inputs[0] + + n_in_channels = q_input.qvalues.shape[1] + kernels_list = [] + for _ in range(n_in_channels): + for row in range(self.kernel_shape[0]): + for col in range(self.kernel_shape[1]): + kernel = numpy.zeros( + (1, 1, self.kernel_shape[0], self.kernel_shape[1]), + dtype=numpy.int64, + ) + kernel[:, :, row, col] = 1 + kernels_list.append(kernel) + kernels = numpy.concatenate(numpy.array(kernels_list), axis=0) + + # for mypy: The Quantized ops can only run on QuantizedArray that have quantization + # parameters (i.e., were fully constructed). This should always be the case, except + # during the UniformQuantizer initialization when the zero_point can exist as None + assert q_input.quantizer.zero_point is not None + + # Compute padding with floor and apply it to the input, pad with the input zero-point + pool_pads = compute_onnx_pool_padding( + q_input.qvalues.shape, self.kernel_shape, self.pads, self.strides, ceil_mode=0 + ) + + # Can only pad with scalar zero-points, but zero-points can be float in special cases + # for output layers + _check_op_input_zero_point(q_input.quantizer.zero_point, self.op_instance_name) + pad_value = int(q_input.quantizer.zero_point) + q_input_pad = numpy_onnx_pad(q_input.qvalues, pool_pads, pad_value, int_only=True) + + # Remark that here, we are _not_ using Concrete pad, since it would pad with + # 0's while we want to pad with zero-point's. So, instead, he have done the padding + # on our side, with q_input_pad + fake_pads = [0] * len(self.pads) + + with tag(self.op_instance_name + ".unfold"): + sum_result = fhe_conv( + q_input_pad, kernels, None, fake_pads, self.strides, None, None, n_in_channels + ) + + if self.debug_value_tracker is not None: + # pylint: disable-next=unsubscriptable-object + self.debug_value_tracker[self.op_instance_name][ + "output" + ] = sum_result # pragma: no cover + + result = ( + sum_result.astype(numpy.float64) - q_input.quantizer.zero_point + ) * q_input.quantizer.scale + + # Reshape to fit the same shape output as unfold + result = result.reshape((result.shape[0], result.shape[1], -1)) + + return QuantizedArray( + self.n_bits, + result, + value_is_float=True, + options=self._get_output_quant_opts(), + stats=self.output_quant_stats, + params=self.output_quant_params, + ) diff --git a/tests/quantization/test_quantized_ops.py b/tests/quantization/test_quantized_ops.py index d1999795f..4fa475b27 100644 --- a/tests/quantization/test_quantized_ops.py +++ b/tests/quantization/test_quantized_ops.py @@ -78,6 +78,7 @@ QuantizedSub, QuantizedTanh, QuantizedTranspose, + QuantizedUnfold, QuantizedUnsqueeze, QuantizedWhere, ) @@ -1492,6 +1493,7 @@ def test_all_ops_were_tested(): QuantizedSqueeze: test_quantized_squeeze, QuantizedExpand: test_quantized_expand, QuantizedEqual: test_quantized_comparators_and_where, + QuantizedUnfold: test_quantized_unfold, ONNXSlice: test_quantized_slice, ONNXGather: test_quantized_gather, ONNXShape: test_quantized_shape, @@ -1980,3 +1982,106 @@ def test_quantized_shape(shape): check_serialization( q_op, ONNXShape, equal_method=partial(quantized_op_results_are_equal, q_input=q_input) ) + + +@pytest.mark.parametrize("n_bits", [16]) +@pytest.mark.parametrize( + "params", + [ + ( + numpy.random.uniform(low=-2.0, high=2.0, size=(1, 1, 32, 32)), + (3, 3), + (2, 2), + (0, 0, 0, 0), + ), + ( + numpy.random.uniform(low=-1.2, high=0.2, size=(10, 1, 16, 16)), + (2, 2), + (1, 1), + (0, 0, 0, 0), + ), + ( + numpy.random.uniform(low=-2.0, high=2.0, size=(2, 32, 4, 4)), + (2, 2), + (1, 1), + (0, 0, 0, 0), + ), + ( + numpy.random.uniform(low=-2.0, high=2.0, size=(2, 32, 4, 4)), + (2, 4), + (1, 1), + (1, 2, 1, 2), + ), + ( + numpy.random.uniform(low=-2.0, high=2.0, size=(2, 32, 4, 4)), + (2, 4), + (1, 1), + (0, 2, 0, 2), + ), + ( + numpy.random.uniform(low=-2.0, high=2.0, size=(2, 32, 5, 5)), + (3, 3), + (1, 1), + (1, 1, 1, 1), + ), + ( + numpy.random.uniform(low=-2.0, high=2.0, size=(2, 1, 7, 5)), + (5, 1), + (1, 1), + (1, 2, 0, 4), + ), + ( + numpy.random.uniform(low=-2.0, high=2.0, size=(1, 1, 16, 16)), + (2, 2), + (4, 4), + (1, 2, 0, 4), + ), + ], +) +@pytest.mark.parametrize("is_signed", [True, False]) +def test_quantized_unfold(params, n_bits, is_signed, check_r2_score, check_float_array_equal): + """Test the quantized average pool operator.""" + + # Retrieve arguments + net_input, kernel_shape, strides, pads = params + + # Create quantized data + q_input = QuantizedArray(n_bits, net_input, is_signed=is_signed) + + q_op = QuantizedUnfold( + n_bits, + OP_DEBUG_NAME + "QuantizedUnfold", + strides=strides, + pads=pads, + kernel_shape=kernel_shape, + # ceil_mode=ceil_mode, + input_quant_opts=q_input.quantizer.quant_options, + ) + + # Compute the result in floating point + expected_result = q_op.calibrate(net_input) + + # Pad the input if needed + tinputs = torch.Tensor(net_input.copy()) + + # Torch uses padding (padding_left,padding_right, padding_top,padding_bottom) + # While ONNX and Concrete ML use (padding_top, padding_left, padding_bottom, padding_right) + tx_pad = torch.nn.functional.pad(tinputs, (pads[1], pads[3], pads[0], pads[2])) + + # Compute the torch unfold + torch_res = torch.nn.functional.unfold(tx_pad, kernel_shape, 1, 0, strides).numpy() + + check_float_array_equal(torch_res, expected_result) + + # Compute the quantized result + result = q_op(q_input).dequant() + + # The fp32 and quantized results should be very similar when quantization precision is high + check_r2_score(expected_result, result) + + # Test the serialization of QuantizedUnfold + check_serialization( + q_op, + QuantizedUnfold, + equal_method=partial(quantized_op_results_are_equal, q_input=q_input), + )