Skip to content

Commit

Permalink
Add reporting for llama testing and improve error handling (#322)
Browse files Browse the repository at this point in the history
This commit generates reports for the llama testing. Every time it runs
it in CI, it will deploy to github pages
(https://nod-ai.github.io/SHARK-Platform). @aviator19941 is working on
getting the rest of the tests properly implemented. Only the llama8b
fp16 tests are working as intended at the moment.
  • Loading branch information
saienduri authored Oct 30, 2024
1 parent 66a4043 commit d3b0681
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 37 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/ci-llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,13 @@ jobs:
"numpy<2.0"
- name: Run llama test
run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --longrun --iree-hip-target=gfx942
run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --longrun --iree-hip-target=gfx942 --html=out/index.html

- name: Deploy to GitHub Pages
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }}
publish_dir: ./out

- name: Upload llama executable files
uses: actions/upload-artifact@v4
Expand Down
21 changes: 21 additions & 0 deletions sharktank/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,3 +256,24 @@ def get_iree_flags(request: FixtureRequest):
model_path["iree_hal_target_backends"] = set_fixture_from_cli_option(
request, "--iree-hal-target-backends", "iree_hal_target_backends"
)


# The following three functions allow us to add a "XFail Reason" column to the html reports for each test
def pytest_html_results_table_header(cells):
cells.insert(2, "<th>XFail Reason</th>")


def pytest_html_results_table_row(report, cells):
if hasattr(report, "wasxfail"):
cells.insert(2, f"<td>{report.wasxfail}</td>")
else:
cells.insert(2, f"<td></td>")


@pytest.hookimpl(hookwrapper=True)
def pytest_runtest_makereport(item, call):
outcome = yield
report = outcome.get_result()

if report.when == "call" and hasattr(item, "wasxfail"):
report.wasxfail = item.wasxfail
1 change: 1 addition & 0 deletions sharktank/requirements-tests.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
datasets==3.0.0
parameterized
pytest==8.0.0
pytest-html
104 changes: 80 additions & 24 deletions sharktank/sharktank/utils/export_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,63 @@
)


class ExportMlirException(Exception):
"""SHARK-Platform export MLIR exception that preserves the command line and error output."""

def __init__(self, process: subprocess.CompletedProcess, cwd: str):
try:
errs = process.stderr.decode("utf-8")
except:
errs = str(process.stderr)
super().__init__(
f"Error invoking export_paged_llama_v1.py\n"
f"Error code: {process.returncode}\n"
f"Stderr diagnostics:\n{errs}\n\n"
f"Invoked with:\n"
f" cd {cwd} && {process.args}\n\n"
)


class IreeCompileException(Exception):
"""Compiler exception that preserves the command line and error output."""

def __init__(self, process: subprocess.CompletedProcess, cwd: str):
try:
errs = process.stderr.decode("utf-8")
except:
errs = str(process.stderr)
super().__init__(
f"Error invoking iree-compile\n"
f"Error code: {process.returncode}\n"
f"Stderr diagnostics:\n{errs}\n\n"
f"Invoked with:\n"
f" cd {cwd} && {process.args}\n\n"
)


class IreeBenchmarkException(Exception):
"""Runtime exception that preserves the command line and error output."""

def __init__(self, process: subprocess.CompletedProcess, cwd: str):
# iree-run-module sends output to both stdout and stderr
try:
errs = process.stderr.decode("utf-8")
except:
errs = str(process.stderr)
try:
outs = process.stdout.decode("utf-8")
except:
outs = str(process.stdout)
super().__init__(
f"Error invoking iree-benchmark-module\n"
f"Error code: {process.returncode}\n"
f"Stderr diagnostics:\n{errs}\n"
f"Stdout diagnostics:\n{outs}\n"
f"Run with:\n"
f" cd {cwd} && {process.args}\n\n"
)


class ExportArtifacts:
def __init__(
self,
Expand Down Expand Up @@ -127,37 +184,27 @@ def export_to_mlir(

proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd)
if proc.returncode != 0:
logger.error(
f"Error exporting mlir with export_paged_llm_v1.py\n"
f"{proc.stdout+proc.stderr}"
)
raise ExportMlirException(proc, cwd)
else:
logger.info(f"Exported to mlir successfully:\n" f"{proc.stdout}")

return proc.returncode

@timeit
def compile_to_vmfb(
self,
*,
mlir_path,
vmfb_path,
hal_dump_path,
):
# TODO: Control flag to enable multiple backends
def compile_to_vmfb(self, *, mlir_path, vmfb_path, hal_dump_path, cwd):
compile_flags = ["--iree-hip-target=" + self.iree_hip_target]
compile_flags += ["--iree-hal-target-backends=rocm"]
compile_flags += [f"--iree-hal-dump-executable-files-to={hal_dump_path}/files"]
try:
ireec.compile_file(
input_file=mlir_path,
target_backends=[self.iree_hal_target_backends],
extra_args=compile_flags,
output_file=vmfb_path,
)
except Exception as error:
logger.error(f"Error running iree-compile:\n" f"{error}")
else:
logger.info(f"Compiled to vmfb successfully:\n" f"{vmfb_path}")
cmd = self.get_compile_cmd(
output_mlir_path=mlir_path,
output_vmfb_path=vmfb_path,
args=compile_flags,
)
logging.getLogger().info(f"Launching compile command:\n" f"cd {cwd} && {cmd}")
proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd)
return_code = proc.returncode
if return_code != 0:
raise IreeCompileException(proc, cwd)

def iree_benchmark_vmfb(
self,
Expand Down Expand Up @@ -193,13 +240,22 @@ def iree_benchmark_vmfb(
proc = subprocess.run(cmd, shell=True, stdout=sys.stdout, cwd=cwd)
return_code = proc.returncode
if return_code != 0:
raise RuntimeError(f"Error running benchmark {cmd} in cwd {cwd}")
raise IreeBenchmarkException(proc, cwd)

def create_file(self, *, suffix, prefix):
file_path = Path(prefix).with_suffix(suffix)
f = open(file_path, "w")
return file_path

def get_compile_cmd(
self, *, output_mlir_path: str, output_vmfb_path: str, args: [str]
):
compile_args = ["iree-compile", output_mlir_path]
compile_args += args
compile_args += ["-o", output_vmfb_path]
cmd = subprocess.list2cmdline(compile_args)
return cmd

def get_artifacts(self):

self.dir_path = self.sharktank_dir + "/" + "tmp_perplexity_ci_artifacts/"
Expand Down
51 changes: 39 additions & 12 deletions sharktank/tests/models/llama/benchmark_amdgpu_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@
import subprocess
from pathlib import Path
from typing import List
from sharktank.utils.export_artifacts import ExportArtifacts
from sharktank.utils.export_artifacts import (
ExportArtifacts,
ExportMlirException,
IreeBenchmarkException,
IreeCompileException,
)

longrun = pytest.mark.skipif("not config.getoption('longrun')")
is_mi300x = pytest.mark.skipif("config.getoption('iree_hip_target') != 'gfx942'")
Expand Down Expand Up @@ -136,6 +141,7 @@ def testBenchmark8B_f16_Decomposed(self):
mlir_path=str(output_mlir),
vmfb_path=output_vmfb,
hal_dump_path=output_file_name,
cwd=self.repo_root,
)
# benchmark prefill
self.llama8b_f16_artifacts.iree_benchmark_vmfb(
Expand All @@ -156,7 +162,7 @@ def testBenchmark8B_f16_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
def testBenchmark8B_f16_Non_Decomposed(self):
output_file_name = self.dir_path_8b / "f16_torch"
output_mlir = self.llama8b_f16_artifacts.create_file(
Expand Down Expand Up @@ -187,6 +193,7 @@ def testBenchmark8B_f16_Non_Decomposed(self):
mlir_path=str(output_mlir),
vmfb_path=output_vmfb,
hal_dump_path=output_file_name,
cwd=self.repo_root,
)
# benchmark prefill
self.llama8b_f16_artifacts.iree_benchmark_vmfb(
Expand All @@ -207,7 +214,9 @@ def testBenchmark8B_f16_Non_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="8B fp8 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark8B_fp8_Decomposed(self):
output_file_name = self.dir_path_8b / "fp8_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -249,7 +258,9 @@ def testBenchmark8B_fp8_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark8B_fp8_Non_Decomposed(self):
output_file_name = self.dir_path_8b / "fp8_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -346,7 +357,9 @@ def setUp(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="70b f16 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark70B_f16_Decomposed(self):
output_file_name = self.dir_path_70b / "f16_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -388,7 +401,9 @@ def testBenchmark70B_f16_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark70B_f16_Non_Decomposed(self):
output_file_name = self.dir_path_70b / "f16_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -430,7 +445,9 @@ def testBenchmark70B_f16_Non_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="70B fp8 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark70B_fp8_Decomposed(self):
output_file_name = self.dir_path_70b / "fp8_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -472,7 +489,9 @@ def testBenchmark70B_fp8_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark70B_fp8_Non_Decomposed(self):
output_file_name = self.dir_path_70b / "fp8_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -569,7 +588,9 @@ def setUp(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="405B f16 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark405B_f16_Decomposed(self):
output_file_name = self.dir_path_405b / "f16_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -611,7 +632,9 @@ def testBenchmark405B_f16_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark405B_f16_Non_Decomposed(self):
output_file_name = self.dir_path_405b / "f16_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -653,7 +676,9 @@ def testBenchmark405B_f16_Non_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="405B fp8 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark405B_fp8_Decomposed(self):
output_file_name = self.dir_path_405b / "fp8_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -695,7 +720,9 @@ def testBenchmark405B_fp8_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark405B_fp8_Non_Decomposed(self):
output_file_name = self.dir_path_405b / "fp8_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down

0 comments on commit d3b0681

Please sign in to comment.