Remove device "gpu number" specifications from model benchmarks. (ire…

…e-org#18315) We'll be configuring which GPU is visible at the runner level, not the test source code level. This way, we'll be able to have multiple runners on the same physical machine (with multiple GPUs). (As discussed previously here: iree-org#17842 (comment)) ci-exactly: build_packages, regression_test
nod-ai · Aug 21, 2024 · 4c8913b · 4c8913b
1 parent 8dd1db3
commit 4c8913b
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 27 deletions.
diff --git a/.github/workflows/pkgci_regression_test.yml b/.github/workflows/pkgci_regression_test.yml
@@ -342,7 +342,6 @@ jobs:
             --goldensize-rocm-unet-bytes 2280000  \
             --goldensize-rocm-clip-bytes 860000 \
             --goldensize-rocm-vae-bytes 840000 \
-            --gpu-number 6 \
             --rocm-chip gfx90a \
             --log-cli-level=info \
             --retries 7
@@ -364,7 +363,6 @@ jobs:
             --goldensize-rocm-unet-bytes 2270000 \
             --goldensize-rocm-clip-bytes 860000  \
             --goldensize-rocm-vae-bytes 840000 \
-            --gpu-number 0 \
             --rocm-chip gfx942 \
             --log-cli-level=info \
             --retries 7

diff --git a/experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py b/experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py
@@ -46,7 +46,7 @@ def run_iree_command(args: Sequence[str] = ()):
     return 1, proc.stdout
 
 
-def run_sdxl_rocm_benchmark(rocm_chip, gpu_number):
+def run_sdxl_rocm_benchmark(rocm_chip):
     exec_args = [
         "iree-compile",
         f"{benchmark_dir}/sdxl_pipeline_bench_f16.mlir",
@@ -67,7 +67,7 @@ def run_sdxl_rocm_benchmark(rocm_chip, gpu_number):
         return 1, stdout
     exec_args = [
         "iree-benchmark-module",
-        f"--device=hip://{gpu_number}",
+        f"--device=hip",
         "--device_allocator=caching",
         f"--module={prompt_encoder_dir}/model.rocm_{rocm_chip}.vmfb",
         f"--parameters=model={prompt_encoder_dir}/real_weights.irpa",
@@ -90,10 +90,10 @@ def run_sdxl_rocm_benchmark(rocm_chip, gpu_number):
     return run_iree_command(exec_args)
 
 
-def run_sdxl_unet_rocm_benchmark(gpu_number, rocm_chip):
+def run_sdxl_unet_rocm_benchmark(rocm_chip):
     exec_args = [
         "iree-benchmark-module",
-        f"--device=hip://{gpu_number}",
+        f"--device=hip",
         "--device_allocator=caching",
         f"--module={scheduled_unet_dir}/model.rocm_{rocm_chip}.vmfb",
         f"--parameters=model={scheduled_unet_dir}/real_weights.irpa",
@@ -111,10 +111,10 @@ def run_sdxl_unet_rocm_benchmark(gpu_number, rocm_chip):
     return run_iree_command(exec_args)
 
 
-def run_sdxl_prompt_encoder_rocm_benchmark(gpu_number, rocm_chip):
+def run_sdxl_prompt_encoder_rocm_benchmark(rocm_chip):
     exec_args = [
         "iree-benchmark-module",
-        f"--device=hip://{gpu_number}",
+        f"--device=hip",
         "--device_allocator=caching",
         f"--module={prompt_encoder_dir}/model.rocm_{rocm_chip}.vmfb",
         f"--parameters=model={prompt_encoder_dir}/real_weights.irpa",
@@ -130,10 +130,10 @@ def run_sdxl_prompt_encoder_rocm_benchmark(gpu_number, rocm_chip):
     return run_iree_command(exec_args)
 
 
-def run_sdxl_vae_decode_rocm_benchmark(gpu_number, rocm_chip):
+def run_sdxl_vae_decode_rocm_benchmark(rocm_chip):
     exec_args = [
         "iree-benchmark-module",
-        f"--device=hip://{gpu_number}",
+        f"--device=hip",
         "--device_allocator=caching",
         f"--module={vae_decode_dir}/model.rocm_{rocm_chip}.vmfb",
         f"--parameters=model={vae_decode_dir}/real_weights.irpa",
@@ -194,7 +194,6 @@ def test_sdxl_rocm_benchmark(
     goldentime_rocm_unet,
     goldentime_rocm_clip,
     goldentime_rocm_vae,
-    gpu_number,
     rocm_chip,
     goldendispatch_rocm_unet,
     goldendispatch_rocm_clip,
@@ -204,7 +203,7 @@ def test_sdxl_rocm_benchmark(
     goldensize_rocm_vae,
 ):
     # e2e benchmark
-    ret_value, output = run_sdxl_rocm_benchmark(rocm_chip, gpu_number)
+    ret_value, output = run_sdxl_rocm_benchmark(rocm_chip)
     benchmark_e2e_mean_time = job_summary_process(ret_value, output)
     mean_line = (
         f"E2E Benchmark Time: {str(benchmark_e2e_mean_time)} ms"
@@ -213,7 +212,7 @@ def test_sdxl_rocm_benchmark(
     logging.getLogger().info(mean_line)
 
     # unet benchmark
-    ret_value, output = run_sdxl_unet_rocm_benchmark(gpu_number, rocm_chip)
+    ret_value, output = run_sdxl_unet_rocm_benchmark(rocm_chip)
     benchmark_unet_mean_time = job_summary_process(ret_value, output)
     mean_line = (
         f"Scheduled Unet Benchmark Time: {str(benchmark_unet_mean_time)} ms"
@@ -242,7 +241,7 @@ def test_sdxl_rocm_benchmark(
     logging.getLogger().info(compilation_line)
 
     # prompt encoder benchmark
-    ret_value, output = run_sdxl_prompt_encoder_rocm_benchmark(gpu_number, rocm_chip)
+    ret_value, output = run_sdxl_prompt_encoder_rocm_benchmark(rocm_chip)
     benchmark_clip_mean_time = job_summary_process(ret_value, output)
     mean_line = (
         f"Prompt Encoder Benchmark Time: {str(benchmark_clip_mean_time)} ms"
@@ -271,7 +270,7 @@ def test_sdxl_rocm_benchmark(
     logging.getLogger().info(compilation_line)
 
     # vae decode benchmark
-    ret_value, output = run_sdxl_vae_decode_rocm_benchmark(gpu_number, rocm_chip)
+    ret_value, output = run_sdxl_vae_decode_rocm_benchmark(rocm_chip)
     benchmark_vae_mean_time = job_summary_process(ret_value, output)
     mean_line = (
         f"VAE Decode Benchmark Time: {str(benchmark_vae_mean_time)} ms"

diff --git a/experimental/benchmarks/sdxl/conftest.py b/experimental/benchmarks/sdxl/conftest.py
@@ -68,13 +68,6 @@ def pytest_addoption(parser):
         type=int,
         help="Golden vmfb size to test benchmark",
     )
-    parser.addoption(
-        "--gpu-number",
-        action="store",
-        default=0,
-        type=int,
-        help="IREE GPU device number to test on",
-    )
     parser.addoption(
         "--rocm-chip",
         action="store",
@@ -137,8 +130,3 @@ def goldensize_rocm_vae(request):
 @pytest.fixture
 def rocm_chip(request):
     return request.config.getoption("--rocm-chip")
-
-
-@pytest.fixture
-def gpu_number(request):
-    return request.config.getoption("--gpu-number")