[PkgCI] Add recipe for correctness on CPU (iree-org#15131)

This patch adds a recipe for checking correctness for llama_7b_i4 on CPU.
nod-ai · Oct 12, 2023 · dd26475 · dd26475
1 parent 9c424c4
commit dd26475
Showing 1 changed file with 70 additions and 1 deletion.
diff --git a/experimental/regression_suite/tests/pregenerated/test_llama2.py b/experimental/regression_suite/tests/pregenerated/test_llama2.py
@@ -55,6 +55,19 @@ def llama2_7b_f16qi4_stripped_host_cpu_vmfb(llama2_7b_f16qi4_stripped_source):
     )
 
 
+@pytest.fixture
+def llama2_7b_f16qi4_host_cpu_vmfb(llama2_7b_f16qi4_source):
+    return iree_compile(
+        llama2_7b_f16qi4_source,
+        "host_cpu",
+        flags=COMMON_FLAGS
+        + [
+            "--iree-hal-target-backends=llvm-cpu",
+            "--iree-llvmcpu-target-cpu-features=host",
+        ],
+    )
+
+
 @pytest.fixture
 def llama2_7b_f16qi4_a100_vulkan_vmfb(llama2_7b_f16qi4_stripped_source):
     return iree_compile(
@@ -95,7 +108,7 @@ def llama2_7b_f16qi4_sm80_cuda_vmfb(llama2_7b_f16qi4_source):
 
 
 ###############################################################################
-# Tests
+# Performance
 ###############################################################################
 
 
@@ -191,6 +204,62 @@ def test_step_a100_vulkan_stripped(llama2_7b_f16qi4_a100_vulkan_vmfb):
     )
 
 
+###############################################################################
+# Correctness
+###############################################################################
+
+
+llama2_7b_f16qi4_first_input_cpu = fetch_source_fixture(
+    "https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cpu/first_vicuna_forward_input.npy",
+    group="llama2_7b_f16qi4_first_input_cpu",
+)
+
+llama2_7b_f16qi4_first_output_cpu = fetch_source_fixture(
+    "https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cpu/first_vicuna_forward_output.npy",
+    group="llama2_7b_f16qi4_first_output_cpu",
+)
+
+llama2_7b_f16qi4_second_input_cpu = fetch_source_fixture(
+    "https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cpu/second_vicuna_forward_input.npy",
+    group="llama2_7b_f16qi4_second_input_cpu",
+)
+
+llama2_7b_f16qi4_second_output_cpu = fetch_source_fixture(
+    "https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cpu/second_vicuna_forward_output.npy",
+    group="llama2_7b_f16qi4_second_output_cpu",
+)
+
+
+@pytest.mark.postsubmit
+@pytest.mark.unstable_linalg
+@pytest.mark.plat_host_cpu
+def test_correctness_host_cpu(
+    llama2_7b_f16qi4_host_cpu_vmfb,
+    llama2_7b_f16qi4_first_input_cpu,
+    llama2_7b_f16qi4_first_output_cpu,
+    llama2_7b_f16qi4_second_input_cpu,
+    llama2_7b_f16qi4_second_output_cpu,
+):
+    iree_run_module(
+        llama2_7b_f16qi4_host_cpu_vmfb,
+        device="local-task",
+        function="first_vicuna_forward",
+        args=[
+            f"--input=@{llama2_7b_f16qi4_first_input_cpu.path}",
+            f"--expected_output=@{llama2_7b_f16qi4_first_output_cpu.path}",
+        ],
+    )
+    iree_run_module(
+        llama2_7b_f16qi4_host_cpu_vmfb,
+        device="local-task",
+        function="second_vicuna_forward",
+        args=[
+            f"--input=@{llama2_7b_f16qi4_second_input_cpu.path}",
+            f"--expected_output=@{llama2_7b_f16qi4_second_output_cpu.path}",
+        ],
+    )
+
+
 llama2_7b_f16qi4_first_input_cuda = fetch_source_fixture(
     "https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cuda/first_vicuna_forward_input.npy",
     group="llama2_7b_f16qi4_first_input_cuda",