diff --git a/experimental/regression_suite/tests/pregenerated/test_llama2.py b/experimental/regression_suite/tests/pregenerated/test_llama2.py index 42ab7e7d5998..fff7bd505795 100644 --- a/experimental/regression_suite/tests/pregenerated/test_llama2.py +++ b/experimental/regression_suite/tests/pregenerated/test_llama2.py @@ -55,6 +55,19 @@ def llama2_7b_f16qi4_stripped_host_cpu_vmfb(llama2_7b_f16qi4_stripped_source): ) +@pytest.fixture +def llama2_7b_f16qi4_host_cpu_vmfb(llama2_7b_f16qi4_source): + return iree_compile( + llama2_7b_f16qi4_source, + "host_cpu", + flags=COMMON_FLAGS + + [ + "--iree-hal-target-backends=llvm-cpu", + "--iree-llvmcpu-target-cpu-features=host", + ], + ) + + @pytest.fixture def llama2_7b_f16qi4_a100_vulkan_vmfb(llama2_7b_f16qi4_stripped_source): return iree_compile( @@ -95,7 +108,7 @@ def llama2_7b_f16qi4_sm80_cuda_vmfb(llama2_7b_f16qi4_source): ############################################################################### -# Tests +# Performance ############################################################################### @@ -191,6 +204,62 @@ def test_step_a100_vulkan_stripped(llama2_7b_f16qi4_a100_vulkan_vmfb): ) +############################################################################### +# Correctness +############################################################################### + + +llama2_7b_f16qi4_first_input_cpu = fetch_source_fixture( + "https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cpu/first_vicuna_forward_input.npy", + group="llama2_7b_f16qi4_first_input_cpu", +) + +llama2_7b_f16qi4_first_output_cpu = fetch_source_fixture( + "https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cpu/first_vicuna_forward_output.npy", + group="llama2_7b_f16qi4_first_output_cpu", +) + +llama2_7b_f16qi4_second_input_cpu = fetch_source_fixture( + "https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cpu/second_vicuna_forward_input.npy", + group="llama2_7b_f16qi4_second_input_cpu", +) + +llama2_7b_f16qi4_second_output_cpu = fetch_source_fixture( + "https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cpu/second_vicuna_forward_output.npy", + group="llama2_7b_f16qi4_second_output_cpu", +) + + +@pytest.mark.postsubmit +@pytest.mark.unstable_linalg +@pytest.mark.plat_host_cpu +def test_correctness_host_cpu( + llama2_7b_f16qi4_host_cpu_vmfb, + llama2_7b_f16qi4_first_input_cpu, + llama2_7b_f16qi4_first_output_cpu, + llama2_7b_f16qi4_second_input_cpu, + llama2_7b_f16qi4_second_output_cpu, +): + iree_run_module( + llama2_7b_f16qi4_host_cpu_vmfb, + device="local-task", + function="first_vicuna_forward", + args=[ + f"--input=@{llama2_7b_f16qi4_first_input_cpu.path}", + f"--expected_output=@{llama2_7b_f16qi4_first_output_cpu.path}", + ], + ) + iree_run_module( + llama2_7b_f16qi4_host_cpu_vmfb, + device="local-task", + function="second_vicuna_forward", + args=[ + f"--input=@{llama2_7b_f16qi4_second_input_cpu.path}", + f"--expected_output=@{llama2_7b_f16qi4_second_output_cpu.path}", + ], + ) + + llama2_7b_f16qi4_first_input_cuda = fetch_source_fixture( "https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cuda/first_vicuna_forward_input.npy", group="llama2_7b_f16qi4_first_input_cuda",