From f9de712776a2b50e797be00c827debe85c40f418 Mon Sep 17 00:00:00 2001
From: Avimitin <dev@avimit.in>
Date: Thu, 8 Aug 2024 18:37:09 +0800
Subject: [PATCH] [pytorch] add matmul sample

---
 tests/pytorch/default.nix       |  2 +-
 tests/pytorch/matmul/config.nix | 52 +++++++++++++++++++++++++++++++++
 tests/pytorch/matmul/matmul.cc  | 22 ++++++++++++++
 tests/pytorch/matmul/matmul.py  | 26 +++++++++++++++++
 4 files changed, 101 insertions(+), 1 deletion(-)
 create mode 100644 tests/pytorch/matmul/config.nix
 create mode 100644 tests/pytorch/matmul/matmul.cc
 create mode 100644 tests/pytorch/matmul/matmul.py

diff --git a/tests/pytorch/default.nix b/tests/pytorch/default.nix
index 6b54203d4..eb232fc2b 100644
--- a/tests/pytorch/default.nix
+++ b/tests/pytorch/default.nix
@@ -9,7 +9,7 @@
 
 let
 
-  builder = makeBuilder { casePrefix = "mlir"; };
+  builder = makeBuilder { casePrefix = "pytorch"; };
   build = { caseName, sourcePath }:
     let
       buddyBuildConfig = import (sourcePath + "/config.nix");
diff --git a/tests/pytorch/matmul/config.nix b/tests/pytorch/matmul/config.nix
new file mode 100644
index 000000000..663aff59d
--- /dev/null
+++ b/tests/pytorch/matmul/config.nix
@@ -0,0 +1,52 @@
+{
+  includes = [
+    ../memref.hpp
+  ];
+
+  buddyOptArgs = [
+    [
+      "--pass-pipeline"
+      "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith), empty-tensor-to-alloc-tensor, convert-elementwise-to-linalg, arith-bufferize, func.func(linalg-bufferize, tensor-bufferize), func-bufferize)"
+    ]
+    [
+      "--pass-pipeline"
+      "builtin.module(func.func(buffer-deallocation-simplification, convert-linalg-to-loops), eliminate-empty-tensors, func.func(llvm-request-c-wrappers))"
+    ]
+    [
+      "--arith-expand"
+      "--eliminate-empty-tensors"
+      "--empty-tensor-to-alloc-tensor"
+      "--one-shot-bufferize"
+      "--matmul-paralell-vectorization-optimize"
+      "--batchmatmul-optimize"
+      "--convert-linalg-to-affine-loops"
+      "--affine-loop-fusion"
+      "--affine-parallelize"
+      "--lower-affine"
+      "--convert-scf-to-openmp"
+      "--func-bufferize-dynamic-offset"
+      "--tensor-bufferize"
+      "--arith-bufferize"
+      "--buffer-deallocation"
+      "--finalizing-bufferize"
+      "--convert-vector-to-scf"
+      "--expand-strided-metadata"
+      "--cse"
+      "--lower-vector-exp"
+      "--lower-rvv=rv32"
+      "--convert-vector-to-llvm"
+      "--memref-expand"
+      "--arith-expand"
+      "--convert-arith-to-llvm"
+      "--finalize-memref-to-llvm"
+      "--convert-scf-to-cf"
+      "--llvm-request-c-wrappers"
+      "--convert-openmp-to-llvm"
+      "--convert-arith-to-llvm"
+      "--convert-math-to-llvm"
+      "--convert-math-to-libm"
+      "--convert-func-to-llvm"
+      "--reconcile-unrealized-casts"
+    ]
+  ];
+}
diff --git a/tests/pytorch/matmul/matmul.cc b/tests/pytorch/matmul/matmul.cc
new file mode 100644
index 000000000..b523f0626
--- /dev/null
+++ b/tests/pytorch/matmul/matmul.cc
@@ -0,0 +1,22 @@
+#include "memref.hpp"
+
+extern "C" void _mlir_ciface_forward(MemRef<float, 1> *output,
+                                     MemRef<float, 1> *arg1,
+                                     MemRef<float, 1> *arg2);
+
+// One-dimension, with length 512
+static const int32_t sizes[3] = {8, 8, 8};
+
+__attribute((section(".vdata"))) float input_float_1[512];
+MemRef<float, 1> input1(input_float_1, sizes);
+
+__attribute((section(".vdata"))) float input_float_2[512];
+MemRef<float, 1> input2(input_float_2, sizes);
+
+__attribute((section(".vdata"))) float output_float_1[512];
+MemRef<float, 1> output(output_float_1, sizes);
+
+extern "C" int test() {
+  _mlir_ciface_forward(&output, &input1, &input2);
+  return 0;
+}
diff --git a/tests/pytorch/matmul/matmul.py b/tests/pytorch/matmul/matmul.py
new file mode 100644
index 000000000..267fe1339
--- /dev/null
+++ b/tests/pytorch/matmul/matmul.py
@@ -0,0 +1,26 @@
+import torch
+import torch._dynamo as dynamo
+from torch._inductor.decomposition import decompositions as inductor_decomp
+
+from buddy.compiler.frontend import DynamoCompiler
+from buddy.compiler.ops import tosa
+
+# Define the input data.
+float32_in1 = torch.randn(8, 8, 8).to(torch.float32)
+float32_in2 = torch.randn(8, 8, 8).to(torch.float32)
+
+# Initialize the dynamo compiler.
+dynamo_compiler = DynamoCompiler(
+    primary_registry=tosa.ops_registry,
+    aot_autograd_decomposition=inductor_decomp,
+)
+
+# Pass the function and input data to the dynamo compiler's importer, the 
+# importer will first build a graph. Then, lower the graph to top-level IR. 
+# (tosa, linalg, etc.). Finally, accepts the generated module and weight parameters.
+graphs = dynamo_compiler.importer(torch.matmul, *(float32_in1, float32_in2))
+graph = graphs[0]
+graph.lower_to_top_level_ir()
+
+with open("forward.mlir", "w") as mlir_module:
+    print(graph._imported_module, file = mlir_module)