From 23aa0b34e4e5196f4f0e5c49637b545882556363 Mon Sep 17 00:00:00 2001
From: chachaleo <charlotte.leo@hotmail.com>
Date: Tue, 31 Oct 2023 16:53:52 +0700
Subject: [PATCH] qlinearmatmul

---
 src/operators/tensor.cairo                    |   5 +
 src/operators/tensor/core.cairo               |  37 +++
 .../implementations/tensor_fp16x16.cairo      |  17 ++
 .../implementations/tensor_fp32x32.cairo      |  17 ++
 .../implementations/tensor_fp64x64.cairo      |  17 ++
 .../implementations/tensor_fp8x23.cairo       |  17 ++
 .../tensor/implementations/tensor_i32.cairo   |  18 ++
 .../tensor/implementations/tensor_i8.cairo    |  18 ++
 .../tensor/implementations/tensor_u32.cairo   |   6 +
 src/operators/tensor/quantization.cairo       |   1 +
 .../tensor/quantization/qlinear_matmul.cairo  | 217 ++++++++++++++++++
 tests/src/operators.cairo                     |   3 +-
 tests/src/operators/qlinearmatmul_test.cairo  | 195 ++++++++++++++++
 13 files changed, 567 insertions(+), 1 deletion(-)
 create mode 100644 src/operators/tensor/quantization/qlinear_matmul.cairo
 create mode 100644 tests/src/operators/qlinearmatmul_test.cairo
diff --git a/src/operators/tensor.cairo b/src/operators/tensor.cairo
index 976102b52..30403831f 100644
--- a/src/operators/tensor.cairo
+++ b/src/operators/tensor.cairo
@@ -12,6 +12,11 @@ use orion::operators::tensor::implementations::tensor_fp8x23::{
     FP8x23TensorPartialEq,
 };
 
+use orion::operators::tensor::implementations::tensor_fp32x32::{
+    FP32x32Tensor, FP32x32TensorAdd, FP32x32TensorSub, FP32x32TensorMul, FP32x32TensorDiv,
+    FP32x32TensorPartialEq,
+};
+
 use orion::operators::tensor::implementations::tensor_fp16x16::{
     FP16x16Tensor, FP16x16TensorAdd, FP16x16TensorSub, FP16x16TensorMul, FP16x16TensorDiv,
     FP16x16TensorPartialEq,
diff --git a/src/operators/tensor/core.cairo b/src/operators/tensor/core.cairo
index 535ed699c..0fef25def 100644
--- a/src/operators/tensor/core.cairo
+++ b/src/operators/tensor/core.cairo
@@ -2360,6 +2360,43 @@ trait TensorTrait<T> {
     fn dequantize_linear(
         self: @Tensor<i8>, x_scale: @Tensor<T>, x_zero_point: @Tensor<T>
     ) -> Tensor::<T>;
+    /// # tensor.qlinear_matmul
+    /// 
+    /// ```rust
+    ///     fn qlinear_matmul(self: @Tensor<i8>, a_scale: @Tensor<T>, a_zero_point: @Tensor<T>, b: @Tensor<i8>, b_scale: @Tensor<T>, b_zero_point: @Tensor<T>, y_scale: @Tensor<T>, y_zero_point: @Tensor<T>) -> Tensor::<i8>;
+    /// ```
+    /// 
+    /// Multiplies quantized Tensors
+    ///
+    /// It consumes two quantized input tensors, their scales and zero points, scale and zero point of output, and computes the quantized output. 
+    /// The quantization formula is y = saturate((x / y_scale) + y_zero_point).
+    /// It perfoms the multiplication of the two vectors once dequantized. If either argument is N-D, N > 2, it is treated as a stack of matrices residing in the last two indexes.
+    /// Then return the quantization of the result of the multiplication.
+    /// Scale and zero point must have same shape and the same type. They must be either scalar (per tensor) or N-D tensor (per row for 'a' and per column for 'b'). 
+    /// Scalar refers to per tensor quantization whereas N-D refers to per row or per column quantization.
+    ///
+    /// ## Args
+    ///
+    /// * `self`(`@Tensor<i8>`) - The first tensor to be multiplied (a).
+    /// * `a_scale`(`@Tensor<T>`) - Scale for input `a`.
+    /// * `a_zero_point`(`@Tensor<T>`) - Zero point for input `a`.
+    /// * `b`(`@Tensor<i8>`) - The second tensor to be multiplied
+    /// * `b_scale`(`@Tensor<T>`) - Scale for input `b`.
+    /// * `b_zero_point`(`@Tensor<T>`) - Zero point for input `b`.    
+    /// * `y_scale`(`@Tensor<T>`) - Scale for outut.
+    /// * `y_zero_point`(`@Tensor<T>`) - Zero point for output.   
+    ///
+    /// ## Returns
+    ///
+    /// A new `Tensor<i8>`, containing the quantized result of the multiplication of the dequantized inputs.
+    ///
+    /// ## Type Constraints
+    ///
+    /// u32 tensor, not supported.
+    ///
+    fn qlinear_matmul(
+        self: @Tensor<i8>, a_scale: @Tensor<T>, a_zero_point: @Tensor<T>, b: @Tensor<i8>, b_scale: @Tensor<T>, b_zero_point: @Tensor<T>, y_scale: @Tensor<T>, y_zero_point: @Tensor<T>
+    ) -> Tensor::<i8>;
     /// # tensor.slice
     ///
     /// ```rust 
diff --git a/src/operators/tensor/implementations/tensor_fp16x16.cairo b/src/operators/tensor/implementations/tensor_fp16x16.cairo
index ac803904e..87c6b1daf 100644
--- a/src/operators/tensor/implementations/tensor_fp16x16.cairo
+++ b/src/operators/tensor/implementations/tensor_fp16x16.cairo
@@ -194,6 +194,23 @@ impl FP16x16Tensor of TensorTrait<FP16x16> {
         quantization::dequantize_linear::dequantize_linear(self, x_scale, x_zero_point)
     }
 
+    fn qlinear_matmul(
+        self: @Tensor<i8>, a_scale: @Tensor<FP16x16>, a_zero_point: @Tensor<FP16x16>, b: @Tensor<i8>, b_scale: @Tensor<FP16x16>, b_zero_point: @Tensor<FP16x16>, y_scale: @Tensor<FP16x16>, y_zero_point: @Tensor<FP16x16>
+    ) -> Tensor::<i8> {
+        quantization::qlinear_matmul::qlinear_matmul(
+            self,
+            a_scale,
+            a_zero_point,
+            b,
+            b_scale,
+            b_zero_point,
+            y_scale,
+            y_zero_point,
+            NumberTrait::new_unscaled(128, true),
+            NumberTrait::new_unscaled(127, false)
+        )
+    }
+
     fn slice(
         self: @Tensor<FP16x16>,
         starts: Span<usize>,
diff --git a/src/operators/tensor/implementations/tensor_fp32x32.cairo b/src/operators/tensor/implementations/tensor_fp32x32.cairo
index 19557eba1..b4ef4be78 100644
--- a/src/operators/tensor/implementations/tensor_fp32x32.cairo
+++ b/src/operators/tensor/implementations/tensor_fp32x32.cairo
@@ -195,6 +195,23 @@ impl FP32x32Tensor of TensorTrait<FP32x32> {
         quantization::dequantize_linear::dequantize_linear(self, x_scale, x_zero_point)
     }
 
+    fn qlinear_matmul(
+        self: @Tensor<i8>, a_scale: @Tensor<FP32x32>, a_zero_point: @Tensor<FP32x32>, b: @Tensor<i8>, b_scale: @Tensor<FP32x32>, b_zero_point: @Tensor<FP32x32>, y_scale: @Tensor<FP32x32>, y_zero_point: @Tensor<FP32x32>
+    ) -> Tensor::<i8> {
+        quantization::qlinear_matmul::qlinear_matmul(
+            self,
+            a_scale,
+            a_zero_point,
+            b,
+            b_scale,
+            b_zero_point,
+            y_scale,
+            y_zero_point,
+            NumberTrait::new_unscaled(128, true),
+            NumberTrait::new_unscaled(127, false)
+        )
+    }
+
     fn slice(
         self: @Tensor<FP32x32>,
         starts: Span<usize>,
diff --git a/src/operators/tensor/implementations/tensor_fp64x64.cairo b/src/operators/tensor/implementations/tensor_fp64x64.cairo
index 686d319b1..c565e6e5a 100644
--- a/src/operators/tensor/implementations/tensor_fp64x64.cairo
+++ b/src/operators/tensor/implementations/tensor_fp64x64.cairo
@@ -195,6 +195,23 @@ impl FP64x64Tensor of TensorTrait<FP64x64> {
         quantization::dequantize_linear::dequantize_linear(self, x_scale, x_zero_point)
     }
 
+    fn qlinear_matmul(
+        self: @Tensor<i8>, a_scale: @Tensor<FP64x64>, a_zero_point: @Tensor<FP64x64>, b: @Tensor<i8>, b_scale: @Tensor<FP64x64>, b_zero_point: @Tensor<FP64x64>, y_scale: @Tensor<FP64x64>, y_zero_point: @Tensor<FP64x64>
+    ) -> Tensor::<i8> {
+        quantization::qlinear_matmul::qlinear_matmul(
+            self,
+            a_scale,
+            a_zero_point,
+            b,
+            b_scale,
+            b_zero_point,
+            y_scale,
+            y_zero_point,
+            NumberTrait::new_unscaled(128, true),
+            NumberTrait::new_unscaled(127, false)
+        )
+    }
+    
     fn slice(
         self: @Tensor<FP64x64>,
         starts: Span<usize>,
diff --git a/src/operators/tensor/implementations/tensor_fp8x23.cairo b/src/operators/tensor/implementations/tensor_fp8x23.cairo
index f46e96fb5..c17fdb062 100644
--- a/src/operators/tensor/implementations/tensor_fp8x23.cairo
+++ b/src/operators/tensor/implementations/tensor_fp8x23.cairo
@@ -194,6 +194,23 @@ impl FP8x23Tensor of TensorTrait<FP8x23> {
         quantization::dequantize_linear::dequantize_linear(self, x_scale, x_zero_point)
     }
 
+    fn qlinear_matmul(
+        self: @Tensor<i8>, a_scale: @Tensor<FP8x23>, a_zero_point: @Tensor<FP8x23>, b: @Tensor<i8>, b_scale: @Tensor<FP8x23>, b_zero_point: @Tensor<FP8x23>, y_scale: @Tensor<FP8x23>, y_zero_point: @Tensor<FP8x23>
+    ) -> Tensor::<i8> {
+        quantization::qlinear_matmul::qlinear_matmul(
+            self,
+            a_scale,
+            a_zero_point,
+            b,
+            b_scale,
+            b_zero_point,
+            y_scale,
+            y_zero_point,
+            NumberTrait::new_unscaled(128, true),
+            NumberTrait::new_unscaled(127, false)
+        )
+    }
+
     fn slice(
         self: @Tensor<FP8x23>,
         starts: Span<usize>,
diff --git a/src/operators/tensor/implementations/tensor_i32.cairo b/src/operators/tensor/implementations/tensor_i32.cairo
index dd6ae6f59..e53b456df 100644
--- a/src/operators/tensor/implementations/tensor_i32.cairo
+++ b/src/operators/tensor/implementations/tensor_i32.cairo
@@ -195,6 +195,24 @@ impl I32Tensor of TensorTrait<i32> {
         quantization::dequantize_linear::dequantize_linear(self, x_scale, x_zero_point)
     }
 
+    fn qlinear_matmul(
+        self: @Tensor<i8>, a_scale: @Tensor<i32>, a_zero_point: @Tensor<i32>, b: @Tensor<i8>, b_scale: @Tensor<i32>, b_zero_point: @Tensor<i32>, y_scale: @Tensor<i32>, y_zero_point: @Tensor<i32>
+    ) -> Tensor::<i8> {
+        quantization::qlinear_matmul::qlinear_matmul(
+            self,
+            a_scale,
+            a_zero_point,
+            b,
+            b_scale,
+            b_zero_point,
+            y_scale,
+            y_zero_point,
+            NumberTrait::new_unscaled(128, true),
+            NumberTrait::new_unscaled(127, false)
+        )
+    }
+
+
     fn slice(
         self: @Tensor<i32>,
         starts: Span<usize>,
diff --git a/src/operators/tensor/implementations/tensor_i8.cairo b/src/operators/tensor/implementations/tensor_i8.cairo
index a4518cefb..df1210413 100644
--- a/src/operators/tensor/implementations/tensor_i8.cairo
+++ b/src/operators/tensor/implementations/tensor_i8.cairo
@@ -192,8 +192,26 @@ impl I8Tensor of TensorTrait<i8> {
         self: @Tensor<i8>, x_scale: @Tensor<i8>, x_zero_point: @Tensor<i8>
     ) -> Tensor::<i8> {
         quantization::dequantize_linear::dequantize_linear(self, x_scale, x_zero_point)
+    }    
+    
+    fn qlinear_matmul(
+        self: @Tensor<i8>, a_scale: @Tensor<i8>, a_zero_point: @Tensor<i8>, b: @Tensor<i8>, b_scale: @Tensor<i8>, b_zero_point: @Tensor<i8>, y_scale: @Tensor<i8>, y_zero_point: @Tensor<i8>
+    ) -> Tensor::<i8> {
+        quantization::qlinear_matmul::qlinear_matmul(
+            self,
+            a_scale,
+            a_zero_point,
+            b,
+            b_scale,
+            b_zero_point,
+            y_scale,
+            y_zero_point,
+            NumberTrait::new_unscaled(128, true),
+            NumberTrait::new_unscaled(127, false)
+        )
     }
 
+
     fn slice(
         self: @Tensor<i8>,
         starts: Span<usize>,
diff --git a/src/operators/tensor/implementations/tensor_u32.cairo b/src/operators/tensor/implementations/tensor_u32.cairo
index 4d26a8634..f87cc4177 100644
--- a/src/operators/tensor/implementations/tensor_u32.cairo
+++ b/src/operators/tensor/implementations/tensor_u32.cairo
@@ -188,6 +188,12 @@ impl U32Tensor of TensorTrait<u32> {
         panic(array!['not supported!'])
     }
 
+    fn qlinear_matmul(
+        self: @Tensor<i8>, a_scale: @Tensor<u32>, a_zero_point: @Tensor<u32>, b: @Tensor<i8>, b_scale: @Tensor<u32>, b_zero_point: @Tensor<u32>, y_scale: @Tensor<u32>, y_zero_point: @Tensor<u32>
+    ) -> Tensor::<i8> {
+        panic(array!['not supported!'])
+    }
+
     fn slice(
         self: @Tensor<u32>,
         starts: Span<usize>,
diff --git a/src/operators/tensor/quantization.cairo b/src/operators/tensor/quantization.cairo
index 0ed0c12d8..62b428766 100644
--- a/src/operators/tensor/quantization.cairo
+++ b/src/operators/tensor/quantization.cairo
@@ -1,2 +1,3 @@
 mod quantize_linear;
 mod dequantize_linear;
+mod qlinear_matmul;
\ No newline at end of file
diff --git a/src/operators/tensor/quantization/qlinear_matmul.cairo b/src/operators/tensor/quantization/qlinear_matmul.cairo
new file mode 100644
index 000000000..5409b8a11
--- /dev/null
+++ b/src/operators/tensor/quantization/qlinear_matmul.cairo
@@ -0,0 +1,217 @@
+use array::ArrayTrait;
+use array::SpanTrait;
+use option::OptionTrait;
+
+use orion::numbers::{NumberTrait};
+use orion::operators::tensor::quantization::dequantize_linear::dequantize_linear;
+use orion::operators::tensor::quantization::quantize_linear::quantize_linear;
+use orion::operators::tensor::{TensorTrait, Tensor};
+
+
+/// Cf: TensorTrait::qlinear_matmul docstring
+fn qlinear_matmul<
+    T,
+    MAG,
+    Q,
+    impl TTensor: TensorTrait<T>,
+    impl QTensor: TensorTrait<Q>,
+    impl QIntoT: Into<Q, T>,
+    impl QTensorIntoTTensor: Into<Tensor<Q>, Tensor<T>>,
+    impl TAdd: Add<T>,
+    impl TSub: Sub<T>,
+    impl TDiv: Div<T>,
+    impl TMul: Mul<T>,
+    impl TTensorSub: Sub<Tensor<T>>,
+    impl TTensorMul: Mul<Tensor<T>>,
+    impl TTensorDiv: Div<Tensor<T>>,
+    impl TPartialOrd: PartialOrd<T>,
+    impl TNumber: NumberTrait<T, MAG>,
+    impl TTryInto: TryInto<T, Q>,
+    //impl TTensorTryInto: TryInto<Tensor<T>, Tensor<Q>>,
+    impl TAddEq: AddEq<T>,
+    impl TCopy: Copy<T>,
+    impl TDrop: Drop<T>,
+    impl QCopy: Copy<Q>,
+    impl QDrop: Drop<Q>,
+>(
+    a: @Tensor<Q>,
+    a_scale: @Tensor<T>,
+    a_zero_point: @Tensor<T>,
+    b: @Tensor<Q>,
+    b_scale: @Tensor<T>,
+    b_zero_point: @Tensor<T>,
+    y_scale: @Tensor<T>,
+    y_zero_point: @Tensor<T>,
+    min: T,
+    max: T
+) -> Tensor<Q> {
+    let a_shape = *a.shape;
+    let b_shape = *b.shape;
+
+    let a_ndim = (a_shape).len();
+    let b_ndim = (b_shape).len();
+
+    //! Case: Both tensors are max 2-dimensional
+    if a_ndim <= 2 && b_ndim <= 2 {
+        let mut dequantized_a = dequantize_linear(@(*a), a_scale, a_zero_point);
+        let mut dequantized_b = dequantize_linear(@(*b), b_scale, b_zero_point);
+
+        let mut x = dequantized_a.matmul(@dequantized_b);
+
+        return quantize_linear(@x, y_scale, y_zero_point, min, max);
+    }
+
+    // (D1, D2, M, K) * (D1, D2, K, N) -> (D1, D2, M, N)
+    assert(a_ndim == b_ndim, 'dim missmatch');
+    let mut dequantized_a = dequantize_linear(@(*a), a_scale, a_zero_point);
+    let mut dequantized_b = dequantize_linear(@(*b), b_scale, b_zero_point);
+    let mut x_shape = ArrayTrait::<usize>::new();
+    let mut x_data = ArrayTrait::<T>::new();
+
+    assert(a_shape[a_ndim - 1] == b_shape[b_ndim - 2], 'incompatible dim for matmul');
+
+    let m = *a_shape[a_ndim - 2];
+    let k = *a_shape[a_ndim - 1];
+    let n = *b_shape[b_ndim - 1];
+
+    let mut a_shape_reduced = ArrayTrait::<usize>::new();
+    a_shape_reduced.append(m);
+    a_shape_reduced.append(k);
+
+    let mut b_shape_reduced = ArrayTrait::<usize>::new();
+    b_shape_reduced.append(k);
+    b_shape_reduced.append(n);
+
+    let mut i = 0;
+    loop {
+        if i == stride(a_shape) / (m * k) {
+            break;
+        };
+
+        result_updates(
+            @subtensor(@dequantized_a, i * (m * k), a_shape_reduced.span()),
+            @subtensor(@dequantized_b, i * (k * n), b_shape_reduced.span()),
+            ref x_data
+        );
+        i += 1;
+    };
+    x_shape(ref x_shape, a_shape, m, n);
+    let x = TensorTrait::new(x_shape.span(), x_data.span());
+    return quantize_linear(@x, y_scale, y_zero_point, min, max);
+}
+
+fn x_shape(ref x_data: Array<usize>, mut shape: Span<usize>, m: usize, n: usize) {
+    loop {
+        if shape.len() == 2 {
+            break;
+        }
+        match shape.pop_front() {
+            Option::Some(elem) => {
+                x_data.append(*elem);
+            },
+            Option::None(_) => {
+                break;
+            }
+        };
+    };
+    x_data.append(m);
+    x_data.append(n);
+}
+
+fn stride(mut shape: Span<usize>) -> usize {
+    let shape_len = shape.len();
+    assert(shape_len > 0, 'shape cannot be empty');
+
+    let mut accumulated: usize = 1;
+    loop {
+        match shape.pop_back() {
+            Option::Some(i) => {
+                accumulated *= *i;
+            },
+            Option::None(_) => {
+                break;
+            }
+        };
+    };
+    return accumulated;
+}
+
+fn subtensor<
+    T,
+    impl TTensor: TensorTrait<T>,
+    impl TCopy: Copy<T>,
+    impl TDrop: Drop<T>
+>(
+    x: @Tensor<T>, start: usize, shape: Span<usize>
+) -> Tensor::<T> {
+    let mut data = ArrayTrait::<T>::new();
+    let mut stride = stride(shape);
+    let mut i = 0;
+
+    loop {
+        if i == stride {
+            break;
+        }
+        data.append(*x.data[start + i]);
+        i += 1;
+    };
+    return TensorTrait::new(shape, data.span());
+}
+
+
+fn result_updates<
+    T,
+    MAG,
+    impl TTensor: TensorTrait<T>,
+    impl TNumber: NumberTrait<T, MAG>,
+    impl TMul: Mul<T>,
+    impl TAddEq: AddEq<T>,
+    impl TCopy: Copy<T>,
+    impl TDrop: Drop<T>
+>(
+    mat1: @Tensor<T>, mat2: @Tensor<T>, ref result_data: Array<T>
+) {
+    let m = *mat1.shape[0];
+    let n = *mat1.shape[1];
+    let p = *mat2.shape[1];
+
+    let mat1 = *mat1.data;
+    let mat2 = *mat2.data;
+
+    let mut result_shape = ArrayTrait::new();
+    result_shape.append(m);
+    result_shape.append(p);
+
+    let mut i = 0_usize;
+    loop {
+        if i == m {
+            break ();
+        }
+
+        let mut j = 0_usize;
+        loop {
+            if j == p {
+                break ();
+            }
+
+            let mut sum: T = NumberTrait::zero();
+            let mut k = 0_usize;
+            loop {
+                if k == n {
+                    break ();
+                }
+
+                let mat1_index = i * n + k;
+                let mat2_index = k * p + j;
+                sum += *mat1[mat1_index] * *mat2[mat2_index];
+
+                k += 1;
+            };
+
+            result_data.append(sum);
+            j += 1;
+        };
+
+        i += 1;
+    };
+}
diff --git a/tests/src/operators.cairo b/tests/src/operators.cairo
index 3c2ffc47b..c57bfef3a 100644
--- a/tests/src/operators.cairo
+++ b/tests/src/operators.cairo
@@ -1 +1,2 @@
-mod transpose_test;
\ No newline at end of file
+mod transpose_test;
+mod qlinearmatmul_test;
\ No newline at end of file
diff --git a/tests/src/operators/qlinearmatmul_test.cairo b/tests/src/operators/qlinearmatmul_test.cairo
new file mode 100644
index 000000000..4229de666
--- /dev/null
+++ b/tests/src/operators/qlinearmatmul_test.cairo
@@ -0,0 +1,195 @@
+use debug::PrintTrait;
+use array::{ArrayTrait, SpanTrait};
+
+use orion::operators::tensor::{
+    TensorTrait, Tensor, I8Tensor, I32Tensor, U32Tensor, FP16x16Tensor, FP32x32Tensor
+};
+use orion::numbers::{FP16x16, FP16x16Impl, FP32x32, FP32x32Impl, FixedTrait};
+use orion::numbers::{NumberTrait, IntegerTrait};
+use orion::numbers::{i8, i32};
+
+
+#[test]
+#[available_gas(200000000000)]
+fn qlinearmatmul_2D_test() {
+    let a = TensorTrait::<i8>::new(
+        shape: array![2, 4].span(),
+        data: array![
+            IntegerTrait::<i8>::new(1_u8, true),
+            IntegerTrait::<i8>::new(2_u8, true),
+            IntegerTrait::<i8>::new(3_u8, true),
+            IntegerTrait::<i8>::new(4_u8, true),
+            IntegerTrait::<i8>::new(5_u8, true),
+            IntegerTrait::<i8>::new(6_u8, true),
+            IntegerTrait::<i8>::new(7_u8, true),
+            IntegerTrait::<i8>::new(8_u8, true)
+        ]
+            .span(),
+    );
+    let b = TensorTrait::<i8>::new(
+        shape: array![4, 3].span(),
+        data: array![
+            IntegerTrait::<i8>::new(2_u8, true),
+            IntegerTrait::<i8>::new(4_u8, true),
+            IntegerTrait::<i8>::new(6_u8, true),
+            IntegerTrait::<i8>::new(8_u8, true),
+            IntegerTrait::<i8>::new(10_u8, true),
+            IntegerTrait::<i8>::new(12_u8, true),
+            IntegerTrait::<i8>::new(14_u8, true),
+            IntegerTrait::<i8>::new(16_u8, true),
+            IntegerTrait::<i8>::new(18_u8, true),
+            IntegerTrait::<i8>::new(20_u8, true),
+            IntegerTrait::<i8>::new(22_u8, true),
+            IntegerTrait::<i8>::new(24_u8, true)
+        ]
+            .span(),
+    );
+
+    let a_scale = TensorTrait::<FP16x16>::new(
+        shape: array![1].span(), data: array![FixedTrait::<FP16x16>::new(2000, false)].span(),
+    );
+    let a_zero_point = TensorTrait::<FP16x16>::new(
+        shape: array![1].span(), data: array![FixedTrait::<FP16x16>::new(0, false)].span(),
+    );
+    let b_scale = TensorTrait::<FP16x16>::new(
+        shape: array![1].span(), data: array![FixedTrait::<FP16x16>::new(2500, false)].span(),
+    );
+    let b_zero_point = TensorTrait::<FP16x16>::new(
+        shape: array![1].span(), data: array![FixedTrait::<FP16x16>::new(0, false)].span(),
+    );
+
+    let y_scale = TensorTrait::<FP16x16>::new(
+        shape: array![1].span(), data: array![FixedTrait::<FP16x16>::new(3000, false)].span(),
+    );
+    let y_zero_point = TensorTrait::<FP16x16>::new(
+        shape: array![1].span(), data: array![FixedTrait::<FP16x16>::new(0, false)].span(),
+    );
+
+    let actual_output = a
+        .qlinear_matmul(
+            @a_scale, @a_zero_point, @b, @b_scale, @b_zero_point, @y_scale, @y_zero_point
+        );
+    let expected_output = TensorTrait::<i8>::new(
+        shape: array![2, 3].span(),
+        data: array![
+            IntegerTrait::<i8>::new(3_u8, true),
+            IntegerTrait::<i8>::new(4_u8, true),
+            IntegerTrait::<i8>::new(4_u8, true),
+            IntegerTrait::<i8>::new(8_u8, true),
+            IntegerTrait::<i8>::new(9_u8, true),
+            IntegerTrait::<i8>::new(10_u8, true)
+        ]
+            .span(),
+    );
+
+    assert((*actual_output.data[0]).into() == 3, '*result[0] == 3');
+    assert((*actual_output.data[1]).into() == 4, '*result[1] == 4');
+    assert((*actual_output.data[2]).into() == 4, '*result[2] == 4');
+    assert((*actual_output.data[3]).into() == 8, '*result[3] == 8');
+    assert((*actual_output.data[4]).into() == 9, '*result[4] == 9');
+    assert((*actual_output.data[5]).into() == 10, '*result[5] == 10');
+
+}
+
+
+#[test]
+#[available_gas(200000000000)]
+fn qlinearmatmul_3D_test() {
+    let a = TensorTrait::<i8>::new(
+        shape: array![2, 2, 3].span(),
+        data: array![
+            IntegerTrait::<i8>::new(1_u8, true),
+            IntegerTrait::<i8>::new(2_u8, true),
+            IntegerTrait::<i8>::new(2_u8, true),
+            IntegerTrait::<i8>::new(3_u8, true),
+            IntegerTrait::<i8>::new(4_u8, true),
+            IntegerTrait::<i8>::new(4_u8, true),
+            IntegerTrait::<i8>::new(5_u8, true),
+            IntegerTrait::<i8>::new(6_u8, true),
+            IntegerTrait::<i8>::new(6_u8, true),
+            IntegerTrait::<i8>::new(7_u8, true),
+            IntegerTrait::<i8>::new(8_u8, true),
+            IntegerTrait::<i8>::new(8_u8, true)
+        ]
+            .span(),
+    );
+    let b = TensorTrait::<i8>::new(
+        shape: array![2, 3, 2].span(),
+        data: array![
+            IntegerTrait::<i8>::new(2_u8, true),
+            IntegerTrait::<i8>::new(4_u8, true),
+            IntegerTrait::<i8>::new(6_u8, true),
+            IntegerTrait::<i8>::new(8_u8, true),
+            IntegerTrait::<i8>::new(10_u8, true),
+            IntegerTrait::<i8>::new(12_u8, true),
+            IntegerTrait::<i8>::new(2_u8, true),
+            IntegerTrait::<i8>::new(4_u8, true),
+            IntegerTrait::<i8>::new(6_u8, true),
+            IntegerTrait::<i8>::new(8_u8, true),
+            IntegerTrait::<i8>::new(10_u8, true),
+            IntegerTrait::<i8>::new(12_u8, true)
+        ]
+            .span(),
+    );
+
+    let a_scale = TensorTrait::<FP16x16>::new(
+        shape: array![1].span(), data: array![FixedTrait::<FP16x16>::new(20000, false)].span(),
+    );
+    let a_zero_point = TensorTrait::<FP16x16>::new(
+        shape: array![1].span(), data: array![FixedTrait::<FP16x16>::new(0, false)].span(),
+    );
+    let b_scale = TensorTrait::<FP16x16>::new(
+        shape: array![1].span(), data: array![FixedTrait::<FP16x16>::new(25000, false)].span(),
+    );
+    let b_zero_point = TensorTrait::<FP16x16>::new(
+        shape: array![1].span(), data: array![FixedTrait::<FP16x16>::new(0, false)].span(),
+    );
+
+    let y_scale = TensorTrait::<FP16x16>::new(
+        shape: array![1].span(), data: array![FixedTrait::<FP16x16>::new(30000, false)].span(),
+    );
+    let y_zero_point = TensorTrait::<FP16x16>::new(
+        shape: array![1].span(), data: array![FixedTrait::<FP16x16>::new(0, false)].span(),
+    );
+
+    let actual_output = a
+        .qlinear_matmul(
+            @a_scale, @a_zero_point, @b, @b_scale, @b_zero_point, @y_scale, @y_zero_point
+        );
+    let expected_output = TensorTrait::<i8>::new(
+        shape: array![2, 2, 2].span(),
+        data: array![
+            IntegerTrait::<i8>::new(8_u8, true),
+            IntegerTrait::<i8>::new(11_u8, true),
+            IntegerTrait::<i8>::new(17_u8, true),
+            IntegerTrait::<i8>::new(23_u8, true),
+            IntegerTrait::<i8>::new(26_u8, true),
+            IntegerTrait::<i8>::new(35_u8, true),
+            IntegerTrait::<i8>::new(36_u8, true),
+            IntegerTrait::<i8>::new(47_u8, true)
+        ]
+            .span(),
+    );
+    assert((*actual_output.data[0]).into() == 8, '*result[0] == 8');
+    assert((*actual_output.data[1]).into() == 11, '*result[1] == 11');
+    assert((*actual_output.data[2]).into() == 17, '*result[2] == 17');
+    assert((*actual_output.data[3]).into() == 23, '*result[3] == 23');
+    assert((*actual_output.data[4]).into() == 26, '*result[4] == 26');
+    assert((*actual_output.data[5]).into() == 35, '*result[5] == 35');
+    assert((*actual_output.data[6]).into() == 36, '*result[6] == 36');
+    assert((*actual_output.data[7]).into() == 47, '*result[7] == 47');
+
+}
+
+fn print_span(mut span: Span<i8>) {
+    loop {
+        match span.pop_front() {
+            Option::Some(i) => {
+                (*i.mag).print();
+            },
+            Option::None(_) => {
+                break;
+            }
+        };
+    };
+}