Skip to content

Commit

Permalink
[PkgCI] Add recipe for correctness on CPU (iree-org#15131)
Browse files Browse the repository at this point in the history
This patch adds a recipe for checking correctness for llama_7b_i4 on
CPU.
  • Loading branch information
Groverkss authored Oct 12, 2023
1 parent 9c424c4 commit dd26475
Showing 1 changed file with 70 additions and 1 deletion.
71 changes: 70 additions & 1 deletion experimental/regression_suite/tests/pregenerated/test_llama2.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,19 @@ def llama2_7b_f16qi4_stripped_host_cpu_vmfb(llama2_7b_f16qi4_stripped_source):
)


@pytest.fixture
def llama2_7b_f16qi4_host_cpu_vmfb(llama2_7b_f16qi4_source):
return iree_compile(
llama2_7b_f16qi4_source,
"host_cpu",
flags=COMMON_FLAGS
+ [
"--iree-hal-target-backends=llvm-cpu",
"--iree-llvmcpu-target-cpu-features=host",
],
)


@pytest.fixture
def llama2_7b_f16qi4_a100_vulkan_vmfb(llama2_7b_f16qi4_stripped_source):
return iree_compile(
Expand Down Expand Up @@ -95,7 +108,7 @@ def llama2_7b_f16qi4_sm80_cuda_vmfb(llama2_7b_f16qi4_source):


###############################################################################
# Tests
# Performance
###############################################################################


Expand Down Expand Up @@ -191,6 +204,62 @@ def test_step_a100_vulkan_stripped(llama2_7b_f16qi4_a100_vulkan_vmfb):
)


###############################################################################
# Correctness
###############################################################################


llama2_7b_f16qi4_first_input_cpu = fetch_source_fixture(
"https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cpu/first_vicuna_forward_input.npy",
group="llama2_7b_f16qi4_first_input_cpu",
)

llama2_7b_f16qi4_first_output_cpu = fetch_source_fixture(
"https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cpu/first_vicuna_forward_output.npy",
group="llama2_7b_f16qi4_first_output_cpu",
)

llama2_7b_f16qi4_second_input_cpu = fetch_source_fixture(
"https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cpu/second_vicuna_forward_input.npy",
group="llama2_7b_f16qi4_second_input_cpu",
)

llama2_7b_f16qi4_second_output_cpu = fetch_source_fixture(
"https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cpu/second_vicuna_forward_output.npy",
group="llama2_7b_f16qi4_second_output_cpu",
)


@pytest.mark.postsubmit
@pytest.mark.unstable_linalg
@pytest.mark.plat_host_cpu
def test_correctness_host_cpu(
llama2_7b_f16qi4_host_cpu_vmfb,
llama2_7b_f16qi4_first_input_cpu,
llama2_7b_f16qi4_first_output_cpu,
llama2_7b_f16qi4_second_input_cpu,
llama2_7b_f16qi4_second_output_cpu,
):
iree_run_module(
llama2_7b_f16qi4_host_cpu_vmfb,
device="local-task",
function="first_vicuna_forward",
args=[
f"--input=@{llama2_7b_f16qi4_first_input_cpu.path}",
f"--expected_output=@{llama2_7b_f16qi4_first_output_cpu.path}",
],
)
iree_run_module(
llama2_7b_f16qi4_host_cpu_vmfb,
device="local-task",
function="second_vicuna_forward",
args=[
f"--input=@{llama2_7b_f16qi4_second_input_cpu.path}",
f"--expected_output=@{llama2_7b_f16qi4_second_output_cpu.path}",
],
)


llama2_7b_f16qi4_first_input_cuda = fetch_source_fixture(
"https://storage.googleapis.com/shark_tank/llama_regression/llama2-7b-i4-golden-outputs/cuda/first_vicuna_forward_input.npy",
group="llama2_7b_f16qi4_first_input_cuda",
Expand Down

0 comments on commit dd26475

Please sign in to comment.