Skip to content

Commit

Permalink
Merge branch 'main' into llm-server-int-test
Browse files Browse the repository at this point in the history
  • Loading branch information
stbaione authored Oct 30, 2024
2 parents 436ab39 + d3b0681 commit 2965cc3
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 37 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/ci-llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,13 @@ jobs:
"numpy<2.0"
- name: Run llama test
run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --longrun --iree-hip-target=gfx942
run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --longrun --iree-hip-target=gfx942 --html=out/index.html

- name: Deploy to GitHub Pages
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }}
publish_dir: ./out

- name: Upload llama executable files
uses: actions/upload-artifact@v4
Expand Down
21 changes: 21 additions & 0 deletions sharktank/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,3 +256,24 @@ def get_iree_flags(request: FixtureRequest):
model_path["iree_hal_target_backends"] = set_fixture_from_cli_option(
request, "--iree-hal-target-backends", "iree_hal_target_backends"
)


# The following three functions allow us to add a "XFail Reason" column to the html reports for each test
def pytest_html_results_table_header(cells):
cells.insert(2, "<th>XFail Reason</th>")


def pytest_html_results_table_row(report, cells):
if hasattr(report, "wasxfail"):
cells.insert(2, f"<td>{report.wasxfail}</td>")
else:
cells.insert(2, f"<td></td>")


@pytest.hookimpl(hookwrapper=True)
def pytest_runtest_makereport(item, call):
outcome = yield
report = outcome.get_result()

if report.when == "call" and hasattr(item, "wasxfail"):
report.wasxfail = item.wasxfail
1 change: 1 addition & 0 deletions sharktank/requirements-tests.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
datasets==3.0.0
parameterized
pytest==8.0.0
pytest-html
104 changes: 80 additions & 24 deletions sharktank/sharktank/utils/export_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,63 @@
)


class ExportMlirException(Exception):
"""SHARK-Platform export MLIR exception that preserves the command line and error output."""

def __init__(self, process: subprocess.CompletedProcess, cwd: str):
try:
errs = process.stderr.decode("utf-8")
except:
errs = str(process.stderr)
super().__init__(
f"Error invoking export_paged_llama_v1.py\n"
f"Error code: {process.returncode}\n"
f"Stderr diagnostics:\n{errs}\n\n"
f"Invoked with:\n"
f" cd {cwd} && {process.args}\n\n"
)


class IreeCompileException(Exception):
"""Compiler exception that preserves the command line and error output."""

def __init__(self, process: subprocess.CompletedProcess, cwd: str):
try:
errs = process.stderr.decode("utf-8")
except:
errs = str(process.stderr)
super().__init__(
f"Error invoking iree-compile\n"
f"Error code: {process.returncode}\n"
f"Stderr diagnostics:\n{errs}\n\n"
f"Invoked with:\n"
f" cd {cwd} && {process.args}\n\n"
)


class IreeBenchmarkException(Exception):
"""Runtime exception that preserves the command line and error output."""

def __init__(self, process: subprocess.CompletedProcess, cwd: str):
# iree-run-module sends output to both stdout and stderr
try:
errs = process.stderr.decode("utf-8")
except:
errs = str(process.stderr)
try:
outs = process.stdout.decode("utf-8")
except:
outs = str(process.stdout)
super().__init__(
f"Error invoking iree-benchmark-module\n"
f"Error code: {process.returncode}\n"
f"Stderr diagnostics:\n{errs}\n"
f"Stdout diagnostics:\n{outs}\n"
f"Run with:\n"
f" cd {cwd} && {process.args}\n\n"
)


class ExportArtifacts:
def __init__(
self,
Expand Down Expand Up @@ -127,37 +184,27 @@ def export_to_mlir(

proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd)
if proc.returncode != 0:
logger.error(
f"Error exporting mlir with export_paged_llm_v1.py\n"
f"{proc.stdout+proc.stderr}"
)
raise ExportMlirException(proc, cwd)
else:
logger.info(f"Exported to mlir successfully:\n" f"{proc.stdout}")

return proc.returncode

@timeit
def compile_to_vmfb(
self,
*,
mlir_path,
vmfb_path,
hal_dump_path,
):
# TODO: Control flag to enable multiple backends
def compile_to_vmfb(self, *, mlir_path, vmfb_path, hal_dump_path, cwd):
compile_flags = ["--iree-hip-target=" + self.iree_hip_target]
compile_flags += ["--iree-hal-target-backends=rocm"]
compile_flags += [f"--iree-hal-dump-executable-files-to={hal_dump_path}/files"]
try:
ireec.compile_file(
input_file=mlir_path,
target_backends=[self.iree_hal_target_backends],
extra_args=compile_flags,
output_file=vmfb_path,
)
except Exception as error:
logger.error(f"Error running iree-compile:\n" f"{error}")
else:
logger.info(f"Compiled to vmfb successfully:\n" f"{vmfb_path}")
cmd = self.get_compile_cmd(
output_mlir_path=mlir_path,
output_vmfb_path=vmfb_path,
args=compile_flags,
)
logging.getLogger().info(f"Launching compile command:\n" f"cd {cwd} && {cmd}")
proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd)
return_code = proc.returncode
if return_code != 0:
raise IreeCompileException(proc, cwd)

def iree_benchmark_vmfb(
self,
Expand Down Expand Up @@ -193,13 +240,22 @@ def iree_benchmark_vmfb(
proc = subprocess.run(cmd, shell=True, stdout=sys.stdout, cwd=cwd)
return_code = proc.returncode
if return_code != 0:
raise RuntimeError(f"Error running benchmark {cmd} in cwd {cwd}")
raise IreeBenchmarkException(proc, cwd)

def create_file(self, *, suffix, prefix):
file_path = Path(prefix).with_suffix(suffix)
f = open(file_path, "w")
return file_path

def get_compile_cmd(
self, *, output_mlir_path: str, output_vmfb_path: str, args: [str]
):
compile_args = ["iree-compile", output_mlir_path]
compile_args += args
compile_args += ["-o", output_vmfb_path]
cmd = subprocess.list2cmdline(compile_args)
return cmd

def get_artifacts(self):

self.dir_path = self.sharktank_dir + "/" + "tmp_perplexity_ci_artifacts/"
Expand Down
51 changes: 39 additions & 12 deletions sharktank/tests/models/llama/benchmark_amdgpu_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@
import subprocess
from pathlib import Path
from typing import List
from sharktank.utils.export_artifacts import ExportArtifacts
from sharktank.utils.export_artifacts import (
ExportArtifacts,
ExportMlirException,
IreeBenchmarkException,
IreeCompileException,
)

longrun = pytest.mark.skipif("not config.getoption('longrun')")
is_mi300x = pytest.mark.skipif("config.getoption('iree_hip_target') != 'gfx942'")
Expand Down Expand Up @@ -136,6 +141,7 @@ def testBenchmark8B_f16_Decomposed(self):
mlir_path=str(output_mlir),
vmfb_path=output_vmfb,
hal_dump_path=output_file_name,
cwd=self.repo_root,
)
# benchmark prefill
self.llama8b_f16_artifacts.iree_benchmark_vmfb(
Expand All @@ -156,7 +162,7 @@ def testBenchmark8B_f16_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
def testBenchmark8B_f16_Non_Decomposed(self):
output_file_name = self.dir_path_8b / "f16_torch"
output_mlir = self.llama8b_f16_artifacts.create_file(
Expand Down Expand Up @@ -187,6 +193,7 @@ def testBenchmark8B_f16_Non_Decomposed(self):
mlir_path=str(output_mlir),
vmfb_path=output_vmfb,
hal_dump_path=output_file_name,
cwd=self.repo_root,
)
# benchmark prefill
self.llama8b_f16_artifacts.iree_benchmark_vmfb(
Expand All @@ -207,7 +214,9 @@ def testBenchmark8B_f16_Non_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="8B fp8 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark8B_fp8_Decomposed(self):
output_file_name = self.dir_path_8b / "fp8_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -249,7 +258,9 @@ def testBenchmark8B_fp8_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark8B_fp8_Non_Decomposed(self):
output_file_name = self.dir_path_8b / "fp8_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -346,7 +357,9 @@ def setUp(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="70b f16 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark70B_f16_Decomposed(self):
output_file_name = self.dir_path_70b / "f16_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -388,7 +401,9 @@ def testBenchmark70B_f16_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark70B_f16_Non_Decomposed(self):
output_file_name = self.dir_path_70b / "f16_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -430,7 +445,9 @@ def testBenchmark70B_f16_Non_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="70B fp8 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark70B_fp8_Decomposed(self):
output_file_name = self.dir_path_70b / "fp8_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -472,7 +489,9 @@ def testBenchmark70B_fp8_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark70B_fp8_Non_Decomposed(self):
output_file_name = self.dir_path_70b / "fp8_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -569,7 +588,9 @@ def setUp(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="405B f16 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark405B_f16_Decomposed(self):
output_file_name = self.dir_path_405b / "f16_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -611,7 +632,9 @@ def testBenchmark405B_f16_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark405B_f16_Non_Decomposed(self):
output_file_name = self.dir_path_405b / "f16_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -653,7 +676,9 @@ def testBenchmark405B_f16_Non_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="405B fp8 irpa path not stored yet", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark405B_fp8_Decomposed(self):
output_file_name = self.dir_path_405b / "fp8_decomposed"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down Expand Up @@ -695,7 +720,9 @@ def testBenchmark405B_fp8_Decomposed(self):

@longrun
@is_mi300x
@pytest.mark.xfail(reason="torch_sdpa not yet plumbed through", strict=True)
@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=AttributeError
)
def testBenchmark405B_fp8_Non_Decomposed(self):
output_file_name = self.dir_path_405b / "fp8_torch_sdpa"
output_mlir = self.create_file(suffix=".mlir", prefix=output_file_name)
Expand Down

0 comments on commit 2965cc3

Please sign in to comment.