Skip to content

Commit

Permalink
qlinearmatmul
Browse files Browse the repository at this point in the history
  • Loading branch information
chachaleo committed Oct 31, 2023
1 parent 5bd2405 commit 23aa0b3
Show file tree
Hide file tree
Showing 13 changed files with 567 additions and 1 deletion.
5 changes: 5 additions & 0 deletions src/operators/tensor.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ use orion::operators::tensor::implementations::tensor_fp8x23::{
FP8x23TensorPartialEq,
};

use orion::operators::tensor::implementations::tensor_fp32x32::{
FP32x32Tensor, FP32x32TensorAdd, FP32x32TensorSub, FP32x32TensorMul, FP32x32TensorDiv,
FP32x32TensorPartialEq,
};

use orion::operators::tensor::implementations::tensor_fp16x16::{
FP16x16Tensor, FP16x16TensorAdd, FP16x16TensorSub, FP16x16TensorMul, FP16x16TensorDiv,
FP16x16TensorPartialEq,
Expand Down
37 changes: 37 additions & 0 deletions src/operators/tensor/core.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -2360,6 +2360,43 @@ trait TensorTrait<T> {
fn dequantize_linear(
self: @Tensor<i8>, x_scale: @Tensor<T>, x_zero_point: @Tensor<T>
) -> Tensor::<T>;
/// # tensor.qlinear_matmul
///
/// ```rust
/// fn qlinear_matmul(self: @Tensor<i8>, a_scale: @Tensor<T>, a_zero_point: @Tensor<T>, b: @Tensor<i8>, b_scale: @Tensor<T>, b_zero_point: @Tensor<T>, y_scale: @Tensor<T>, y_zero_point: @Tensor<T>) -> Tensor::<i8>;
/// ```
///
/// Multiplies quantized Tensors
///
/// It consumes two quantized input tensors, their scales and zero points, scale and zero point of output, and computes the quantized output.
/// The quantization formula is y = saturate((x / y_scale) + y_zero_point).
/// It perfoms the multiplication of the two vectors once dequantized. If either argument is N-D, N > 2, it is treated as a stack of matrices residing in the last two indexes.
/// Then return the quantization of the result of the multiplication.
/// Scale and zero point must have same shape and the same type. They must be either scalar (per tensor) or N-D tensor (per row for 'a' and per column for 'b').
/// Scalar refers to per tensor quantization whereas N-D refers to per row or per column quantization.
///
/// ## Args
///
/// * `self`(`@Tensor<i8>`) - The first tensor to be multiplied (a).
/// * `a_scale`(`@Tensor<T>`) - Scale for input `a`.
/// * `a_zero_point`(`@Tensor<T>`) - Zero point for input `a`.
/// * `b`(`@Tensor<i8>`) - The second tensor to be multiplied
/// * `b_scale`(`@Tensor<T>`) - Scale for input `b`.
/// * `b_zero_point`(`@Tensor<T>`) - Zero point for input `b`.
/// * `y_scale`(`@Tensor<T>`) - Scale for outut.
/// * `y_zero_point`(`@Tensor<T>`) - Zero point for output.
///
/// ## Returns
///
/// A new `Tensor<i8>`, containing the quantized result of the multiplication of the dequantized inputs.
///
/// ## Type Constraints
///
/// u32 tensor, not supported.
///
fn qlinear_matmul(
self: @Tensor<i8>, a_scale: @Tensor<T>, a_zero_point: @Tensor<T>, b: @Tensor<i8>, b_scale: @Tensor<T>, b_zero_point: @Tensor<T>, y_scale: @Tensor<T>, y_zero_point: @Tensor<T>
) -> Tensor::<i8>;
/// # tensor.slice
///
/// ```rust
Expand Down
17 changes: 17 additions & 0 deletions src/operators/tensor/implementations/tensor_fp16x16.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,23 @@ impl FP16x16Tensor of TensorTrait<FP16x16> {
quantization::dequantize_linear::dequantize_linear(self, x_scale, x_zero_point)
}

fn qlinear_matmul(
self: @Tensor<i8>, a_scale: @Tensor<FP16x16>, a_zero_point: @Tensor<FP16x16>, b: @Tensor<i8>, b_scale: @Tensor<FP16x16>, b_zero_point: @Tensor<FP16x16>, y_scale: @Tensor<FP16x16>, y_zero_point: @Tensor<FP16x16>
) -> Tensor::<i8> {
quantization::qlinear_matmul::qlinear_matmul(
self,
a_scale,
a_zero_point,
b,
b_scale,
b_zero_point,
y_scale,
y_zero_point,
NumberTrait::new_unscaled(128, true),
NumberTrait::new_unscaled(127, false)
)
}

fn slice(
self: @Tensor<FP16x16>,
starts: Span<usize>,
Expand Down
17 changes: 17 additions & 0 deletions src/operators/tensor/implementations/tensor_fp32x32.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,23 @@ impl FP32x32Tensor of TensorTrait<FP32x32> {
quantization::dequantize_linear::dequantize_linear(self, x_scale, x_zero_point)
}

fn qlinear_matmul(
self: @Tensor<i8>, a_scale: @Tensor<FP32x32>, a_zero_point: @Tensor<FP32x32>, b: @Tensor<i8>, b_scale: @Tensor<FP32x32>, b_zero_point: @Tensor<FP32x32>, y_scale: @Tensor<FP32x32>, y_zero_point: @Tensor<FP32x32>
) -> Tensor::<i8> {
quantization::qlinear_matmul::qlinear_matmul(
self,
a_scale,
a_zero_point,
b,
b_scale,
b_zero_point,
y_scale,
y_zero_point,
NumberTrait::new_unscaled(128, true),
NumberTrait::new_unscaled(127, false)
)
}

fn slice(
self: @Tensor<FP32x32>,
starts: Span<usize>,
Expand Down
17 changes: 17 additions & 0 deletions src/operators/tensor/implementations/tensor_fp64x64.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,23 @@ impl FP64x64Tensor of TensorTrait<FP64x64> {
quantization::dequantize_linear::dequantize_linear(self, x_scale, x_zero_point)
}

fn qlinear_matmul(
self: @Tensor<i8>, a_scale: @Tensor<FP64x64>, a_zero_point: @Tensor<FP64x64>, b: @Tensor<i8>, b_scale: @Tensor<FP64x64>, b_zero_point: @Tensor<FP64x64>, y_scale: @Tensor<FP64x64>, y_zero_point: @Tensor<FP64x64>
) -> Tensor::<i8> {
quantization::qlinear_matmul::qlinear_matmul(
self,
a_scale,
a_zero_point,
b,
b_scale,
b_zero_point,
y_scale,
y_zero_point,
NumberTrait::new_unscaled(128, true),
NumberTrait::new_unscaled(127, false)
)
}

fn slice(
self: @Tensor<FP64x64>,
starts: Span<usize>,
Expand Down
17 changes: 17 additions & 0 deletions src/operators/tensor/implementations/tensor_fp8x23.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,23 @@ impl FP8x23Tensor of TensorTrait<FP8x23> {
quantization::dequantize_linear::dequantize_linear(self, x_scale, x_zero_point)
}

fn qlinear_matmul(
self: @Tensor<i8>, a_scale: @Tensor<FP8x23>, a_zero_point: @Tensor<FP8x23>, b: @Tensor<i8>, b_scale: @Tensor<FP8x23>, b_zero_point: @Tensor<FP8x23>, y_scale: @Tensor<FP8x23>, y_zero_point: @Tensor<FP8x23>
) -> Tensor::<i8> {
quantization::qlinear_matmul::qlinear_matmul(
self,
a_scale,
a_zero_point,
b,
b_scale,
b_zero_point,
y_scale,
y_zero_point,
NumberTrait::new_unscaled(128, true),
NumberTrait::new_unscaled(127, false)
)
}

fn slice(
self: @Tensor<FP8x23>,
starts: Span<usize>,
Expand Down
18 changes: 18 additions & 0 deletions src/operators/tensor/implementations/tensor_i32.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,24 @@ impl I32Tensor of TensorTrait<i32> {
quantization::dequantize_linear::dequantize_linear(self, x_scale, x_zero_point)
}

fn qlinear_matmul(
self: @Tensor<i8>, a_scale: @Tensor<i32>, a_zero_point: @Tensor<i32>, b: @Tensor<i8>, b_scale: @Tensor<i32>, b_zero_point: @Tensor<i32>, y_scale: @Tensor<i32>, y_zero_point: @Tensor<i32>
) -> Tensor::<i8> {
quantization::qlinear_matmul::qlinear_matmul(
self,
a_scale,
a_zero_point,
b,
b_scale,
b_zero_point,
y_scale,
y_zero_point,
NumberTrait::new_unscaled(128, true),
NumberTrait::new_unscaled(127, false)
)
}


fn slice(
self: @Tensor<i32>,
starts: Span<usize>,
Expand Down
18 changes: 18 additions & 0 deletions src/operators/tensor/implementations/tensor_i8.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,26 @@ impl I8Tensor of TensorTrait<i8> {
self: @Tensor<i8>, x_scale: @Tensor<i8>, x_zero_point: @Tensor<i8>
) -> Tensor::<i8> {
quantization::dequantize_linear::dequantize_linear(self, x_scale, x_zero_point)
}

fn qlinear_matmul(
self: @Tensor<i8>, a_scale: @Tensor<i8>, a_zero_point: @Tensor<i8>, b: @Tensor<i8>, b_scale: @Tensor<i8>, b_zero_point: @Tensor<i8>, y_scale: @Tensor<i8>, y_zero_point: @Tensor<i8>
) -> Tensor::<i8> {
quantization::qlinear_matmul::qlinear_matmul(
self,
a_scale,
a_zero_point,
b,
b_scale,
b_zero_point,
y_scale,
y_zero_point,
NumberTrait::new_unscaled(128, true),
NumberTrait::new_unscaled(127, false)
)
}


fn slice(
self: @Tensor<i8>,
starts: Span<usize>,
Expand Down
6 changes: 6 additions & 0 deletions src/operators/tensor/implementations/tensor_u32.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,12 @@ impl U32Tensor of TensorTrait<u32> {
panic(array!['not supported!'])
}

fn qlinear_matmul(
self: @Tensor<i8>, a_scale: @Tensor<u32>, a_zero_point: @Tensor<u32>, b: @Tensor<i8>, b_scale: @Tensor<u32>, b_zero_point: @Tensor<u32>, y_scale: @Tensor<u32>, y_zero_point: @Tensor<u32>
) -> Tensor::<i8> {
panic(array!['not supported!'])
}

fn slice(
self: @Tensor<u32>,
starts: Span<usize>,
Expand Down
1 change: 1 addition & 0 deletions src/operators/tensor/quantization.cairo
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
mod quantize_linear;
mod dequantize_linear;
mod qlinear_matmul;
Loading

0 comments on commit 23aa0b3

Please sign in to comment.