Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add reporting for llama testing and improve error handling #322

Merged
merged 25 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .github/workflows/ci-llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,13 @@ jobs:
"numpy<2.0"

- name: Run llama test
run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --longrun --iree-hip-target=gfx942
run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --longrun --iree-hip-target=gfx942 --html=out/index.html

- name: Deploy to GitHub Pages
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This broke the CI for PRs from forks, see #395. Sending a PR to deactivate this until fixed.

publish_dir: ./out

- name: Upload llama executable files
uses: actions/upload-artifact@v4
Expand Down
21 changes: 21 additions & 0 deletions sharktank/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,3 +256,24 @@ def get_iree_flags(request: FixtureRequest):
model_path["iree_hal_target_backends"] = set_fixture_from_cli_option(
request, "--iree-hal-target-backends", "iree_hal_target_backends"
)


# The following three functions allow us to add a "XFail Reason" column to the html reports for each test
def pytest_html_results_table_header(cells):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you use these functions anywhere?
Nit: Maybe a small comment to what the functions are doing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a comment. Yeah, these functions allow us to customize the html reports and add the XFail Reason column in the https://nod-ai.github.io/SHARK-Platform.

cells.insert(2, "<th>XFail Reason</th>")


def pytest_html_results_table_row(report, cells):
if hasattr(report, "wasxfail"):
cells.insert(2, f"<td>{report.wasxfail}</td>")
else:
cells.insert(2, f"<td></td>")


@pytest.hookimpl(hookwrapper=True)
def pytest_runtest_makereport(item, call):
outcome = yield
report = outcome.get_result()

if report.when == "call" and hasattr(item, "wasxfail"):
report.wasxfail = item.wasxfail
1 change: 1 addition & 0 deletions sharktank/requirements-tests.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
datasets==3.0.0
parameterized
pytest==8.0.0
pytest-html
104 changes: 80 additions & 24 deletions sharktank/sharktank/utils/export_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,63 @@
)


class ExportMlirException(Exception):
"""SHARK-Platform export MLIR exception that preserves the command line and error output."""

def __init__(self, process: subprocess.CompletedProcess, cwd: str):
try:
errs = process.stderr.decode("utf-8")
except:
errs = str(process.stderr)
super().__init__(
f"Error invoking export_paged_llama_v1.py\n"
f"Error code: {process.returncode}\n"
f"Stderr diagnostics:\n{errs}\n\n"
f"Invoked with:\n"
f" cd {cwd} && {process.args}\n\n"
)


class IreeCompileException(Exception):
"""Compiler exception that preserves the command line and error output."""

def __init__(self, process: subprocess.CompletedProcess, cwd: str):
try:
errs = process.stderr.decode("utf-8")
except:
errs = str(process.stderr)
super().__init__(
f"Error invoking iree-compile\n"
f"Error code: {process.returncode}\n"
f"Stderr diagnostics:\n{errs}\n\n"
f"Invoked with:\n"
f" cd {cwd} && {process.args}\n\n"
)


class IreeBenchmarkException(Exception):
"""Runtime exception that preserves the command line and error output."""

def __init__(self, process: subprocess.CompletedProcess, cwd: str):
# iree-run-module sends output to both stdout and stderr
try:
errs = process.stderr.decode("utf-8")
except:
errs = str(process.stderr)
try:
outs = process.stdout.decode("utf-8")
except:
outs = str(process.stdout)
super().__init__(
f"Error invoking iree-benchmark-module\n"
f"Error code: {process.returncode}\n"
f"Stderr diagnostics:\n{errs}\n"
f"Stdout diagnostics:\n{outs}\n"
f"Run with:\n"
f" cd {cwd} && {process.args}\n\n"
)


class ExportArtifacts:
def __init__(
self,
Expand Down Expand Up @@ -127,37 +184,27 @@ def export_to_mlir(

proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd)
if proc.returncode != 0:
logger.error(
f"Error exporting mlir with export_paged_llm_v1.py\n"
f"{proc.stdout+proc.stderr}"
)
raise ExportMlirException(proc, cwd)
else:
logger.info(f"Exported to mlir successfully:\n" f"{proc.stdout}")

return proc.returncode

@timeit
def compile_to_vmfb(
self,
*,
mlir_path,
vmfb_path,
hal_dump_path,
):
# TODO: Control flag to enable multiple backends
def compile_to_vmfb(self, *, mlir_path, vmfb_path, hal_dump_path, cwd):
compile_flags = ["--iree-hip-target=" + self.iree_hip_target]
compile_flags += ["--iree-hal-target-backends=rocm"]
compile_flags += [f"--iree-hal-dump-executable-files-to={hal_dump_path}/files"]
try:
ireec.compile_file(
input_file=mlir_path,
target_backends=[self.iree_hal_target_backends],
extra_args=compile_flags,
output_file=vmfb_path,
)
except Exception as error:
logger.error(f"Error running iree-compile:\n" f"{error}")
else:
logger.info(f"Compiled to vmfb successfully:\n" f"{vmfb_path}")
cmd = self.get_compile_cmd(
output_mlir_path=mlir_path,
output_vmfb_path=vmfb_path,
args=compile_flags,
)
logging.getLogger().info(f"Launching compile command:\n" f"cd {cwd} && {cmd}")
proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd)
return_code = proc.returncode
if return_code != 0:
raise IreeCompileException(proc, cwd)

def iree_benchmark_vmfb(
self,
Expand Down Expand Up @@ -193,13 +240,22 @@ def iree_benchmark_vmfb(
proc = subprocess.run(cmd, shell=True, stdout=sys.stdout, cwd=cwd)
return_code = proc.returncode
if return_code != 0:
raise RuntimeError(f"Error running benchmark {cmd} in cwd {cwd}")
raise IreeBenchmarkException(proc, cwd)

def create_file(self, *, suffix, prefix):
file_path = Path(prefix).with_suffix(suffix)
f = open(file_path, "w")
return file_path

def get_compile_cmd(
self, *, output_mlir_path: str, output_vmfb_path: str, args: [str]
):
compile_args = ["iree-compile", output_mlir_path]
compile_args += args
compile_args += ["-o", output_vmfb_path]
cmd = subprocess.list2cmdline(compile_args)
return cmd

def get_artifacts(self):

self.dir_path = self.sharktank_dir + "/" + "tmp_perplexity_ci_artifacts/"
Expand Down
51 changes: 39 additions & 12 deletions sharktank/tests/models/llama/benchmark_amdgpu_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@
import subprocess
from pathlib import Path
from typing import List
from sharktank.utils.export_artifacts import ExportArtifacts
from sharktank.utils.export_artifacts import (
ExportArtifacts,
ExportMlirException,
IreeBenchmarkException,
IreeCompileException,
)

longrun = pytest.mark.skipif("not config.getoption('longrun')")
is_mi300x = pytest.mark.skipif("config.getoption('iree_hip_target') != 'gfx942'")
Expand Down Expand Up @@ -136,6 +141,7 @@ def testBenchmark8B_f16_Decomposed(self):
mlir_path=str(output_mlir),
vmfb_path=output_vmfb,
hal_dump_path=output_file_name,
cwd=self.repo_root,
)
# benchmark prefill
self.llama8b_f16_artifacts.iree_benchmark_vmfb(
Expand All @@ -156,7 +162,7 @@ def testBenchmark8B_f16_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
def testBenchmark8B_f16_Non_Decomposed(self):
output_file_name = self.dir_path_8b / "f16_torch"
output_mlir = self.llama8b_f16_artifacts.create_file(
Expand Down Expand Up @@ -187,6 +193,7 @@ def testBenchmark8B_f16_Non_Decomposed(self):
mlir_path=str(output_mlir),
vmfb_path=output_vmfb,
hal_dump_path=output_file_name,
cwd=self.repo_root,
)
# benchmark prefill
self.llama8b_f16_artifacts.iree_benchmark_vmfb(
Expand All @@ -207,7 +214,9 @@ def testBenchmark8B_f16_Non_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="8B fp8 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark8B_fp8_Decomposed(self):
output_file_name = self.dir_path_8b / "fp8_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -249,7 +258,9 @@ def testBenchmark8B_fp8_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark8B_fp8_Non_Decomposed(self):
output_file_name = self.dir_path_8b / "fp8_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -346,7 +357,9 @@ def setUp(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="70b f16 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark70B_f16_Decomposed(self):
output_file_name = self.dir_path_70b / "f16_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -388,7 +401,9 @@ def testBenchmark70B_f16_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark70B_f16_Non_Decomposed(self):
output_file_name = self.dir_path_70b / "f16_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -430,7 +445,9 @@ def testBenchmark70B_f16_Non_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="70B fp8 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark70B_fp8_Decomposed(self):
output_file_name = self.dir_path_70b / "fp8_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -472,7 +489,9 @@ def testBenchmark70B_fp8_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark70B_fp8_Non_Decomposed(self):
output_file_name = self.dir_path_70b / "fp8_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -569,7 +588,9 @@ def setUp(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="405B f16 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark405B_f16_Decomposed(self):
output_file_name = self.dir_path_405b / "f16_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -611,7 +632,9 @@ def testBenchmark405B_f16_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark405B_f16_Non_Decomposed(self):
output_file_name = self.dir_path_405b / "f16_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -653,7 +676,9 @@ def testBenchmark405B_f16_Non_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="405B fp8 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark405B_fp8_Decomposed(self):
output_file_name = self.dir_path_405b / "fp8_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -695,7 +720,9 @@ def testBenchmark405B_fp8_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark405B_fp8_Non_Decomposed(self):
output_file_name = self.dir_path_405b / "fp8_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down
Loading