Skip to content

Commit

Permalink
Remove device "gpu number" specifications from model benchmarks. (ire…
Browse files Browse the repository at this point in the history
…e-org#18315)

We'll be configuring which GPU is visible at the runner level, not the
test source code level. This way, we'll be able to have multiple runners
on the same physical machine (with multiple GPUs).

(As discussed previously here:
iree-org#17842 (comment))

ci-exactly: build_packages, regression_test
  • Loading branch information
ScottTodd authored Aug 21, 2024
1 parent 8dd1db3 commit 4c8913b
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 27 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/pkgci_regression_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,6 @@ jobs:
--goldensize-rocm-unet-bytes 2280000 \
--goldensize-rocm-clip-bytes 860000 \
--goldensize-rocm-vae-bytes 840000 \
--gpu-number 6 \
--rocm-chip gfx90a \
--log-cli-level=info \
--retries 7
Expand All @@ -364,7 +363,6 @@ jobs:
--goldensize-rocm-unet-bytes 2270000 \
--goldensize-rocm-clip-bytes 860000 \
--goldensize-rocm-vae-bytes 840000 \
--gpu-number 0 \
--rocm-chip gfx942 \
--log-cli-level=info \
--retries 7
Expand Down
25 changes: 12 additions & 13 deletions experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def run_iree_command(args: Sequence[str] = ()):
return 1, proc.stdout


def run_sdxl_rocm_benchmark(rocm_chip, gpu_number):
def run_sdxl_rocm_benchmark(rocm_chip):
exec_args = [
"iree-compile",
f"{benchmark_dir}/sdxl_pipeline_bench_f16.mlir",
Expand All @@ -67,7 +67,7 @@ def run_sdxl_rocm_benchmark(rocm_chip, gpu_number):
return 1, stdout
exec_args = [
"iree-benchmark-module",
f"--device=hip://{gpu_number}",
f"--device=hip",
"--device_allocator=caching",
f"--module={prompt_encoder_dir}/model.rocm_{rocm_chip}.vmfb",
f"--parameters=model={prompt_encoder_dir}/real_weights.irpa",
Expand All @@ -90,10 +90,10 @@ def run_sdxl_rocm_benchmark(rocm_chip, gpu_number):
return run_iree_command(exec_args)


def run_sdxl_unet_rocm_benchmark(gpu_number, rocm_chip):
def run_sdxl_unet_rocm_benchmark(rocm_chip):
exec_args = [
"iree-benchmark-module",
f"--device=hip://{gpu_number}",
f"--device=hip",
"--device_allocator=caching",
f"--module={scheduled_unet_dir}/model.rocm_{rocm_chip}.vmfb",
f"--parameters=model={scheduled_unet_dir}/real_weights.irpa",
Expand All @@ -111,10 +111,10 @@ def run_sdxl_unet_rocm_benchmark(gpu_number, rocm_chip):
return run_iree_command(exec_args)


def run_sdxl_prompt_encoder_rocm_benchmark(gpu_number, rocm_chip):
def run_sdxl_prompt_encoder_rocm_benchmark(rocm_chip):
exec_args = [
"iree-benchmark-module",
f"--device=hip://{gpu_number}",
f"--device=hip",
"--device_allocator=caching",
f"--module={prompt_encoder_dir}/model.rocm_{rocm_chip}.vmfb",
f"--parameters=model={prompt_encoder_dir}/real_weights.irpa",
Expand All @@ -130,10 +130,10 @@ def run_sdxl_prompt_encoder_rocm_benchmark(gpu_number, rocm_chip):
return run_iree_command(exec_args)


def run_sdxl_vae_decode_rocm_benchmark(gpu_number, rocm_chip):
def run_sdxl_vae_decode_rocm_benchmark(rocm_chip):
exec_args = [
"iree-benchmark-module",
f"--device=hip://{gpu_number}",
f"--device=hip",
"--device_allocator=caching",
f"--module={vae_decode_dir}/model.rocm_{rocm_chip}.vmfb",
f"--parameters=model={vae_decode_dir}/real_weights.irpa",
Expand Down Expand Up @@ -194,7 +194,6 @@ def test_sdxl_rocm_benchmark(
goldentime_rocm_unet,
goldentime_rocm_clip,
goldentime_rocm_vae,
gpu_number,
rocm_chip,
goldendispatch_rocm_unet,
goldendispatch_rocm_clip,
Expand All @@ -204,7 +203,7 @@ def test_sdxl_rocm_benchmark(
goldensize_rocm_vae,
):
# e2e benchmark
ret_value, output = run_sdxl_rocm_benchmark(rocm_chip, gpu_number)
ret_value, output = run_sdxl_rocm_benchmark(rocm_chip)
benchmark_e2e_mean_time = job_summary_process(ret_value, output)
mean_line = (
f"E2E Benchmark Time: {str(benchmark_e2e_mean_time)} ms"
Expand All @@ -213,7 +212,7 @@ def test_sdxl_rocm_benchmark(
logging.getLogger().info(mean_line)

# unet benchmark
ret_value, output = run_sdxl_unet_rocm_benchmark(gpu_number, rocm_chip)
ret_value, output = run_sdxl_unet_rocm_benchmark(rocm_chip)
benchmark_unet_mean_time = job_summary_process(ret_value, output)
mean_line = (
f"Scheduled Unet Benchmark Time: {str(benchmark_unet_mean_time)} ms"
Expand Down Expand Up @@ -242,7 +241,7 @@ def test_sdxl_rocm_benchmark(
logging.getLogger().info(compilation_line)

# prompt encoder benchmark
ret_value, output = run_sdxl_prompt_encoder_rocm_benchmark(gpu_number, rocm_chip)
ret_value, output = run_sdxl_prompt_encoder_rocm_benchmark(rocm_chip)
benchmark_clip_mean_time = job_summary_process(ret_value, output)
mean_line = (
f"Prompt Encoder Benchmark Time: {str(benchmark_clip_mean_time)} ms"
Expand Down Expand Up @@ -271,7 +270,7 @@ def test_sdxl_rocm_benchmark(
logging.getLogger().info(compilation_line)

# vae decode benchmark
ret_value, output = run_sdxl_vae_decode_rocm_benchmark(gpu_number, rocm_chip)
ret_value, output = run_sdxl_vae_decode_rocm_benchmark(rocm_chip)
benchmark_vae_mean_time = job_summary_process(ret_value, output)
mean_line = (
f"VAE Decode Benchmark Time: {str(benchmark_vae_mean_time)} ms"
Expand Down
12 changes: 0 additions & 12 deletions experimental/benchmarks/sdxl/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,6 @@ def pytest_addoption(parser):
type=int,
help="Golden vmfb size to test benchmark",
)
parser.addoption(
"--gpu-number",
action="store",
default=0,
type=int,
help="IREE GPU device number to test on",
)
parser.addoption(
"--rocm-chip",
action="store",
Expand Down Expand Up @@ -137,8 +130,3 @@ def goldensize_rocm_vae(request):
@pytest.fixture
def rocm_chip(request):
return request.config.getoption("--rocm-chip")


@pytest.fixture
def gpu_number(request):
return request.config.getoption("--gpu-number")

0 comments on commit 4c8913b

Please sign in to comment.