From ce6827354f03d8f5254300705464011832ea5272 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Wed, 30 Oct 2024 03:44:08 +0000 Subject: [PATCH 01/10] make hal_dump_path optional --- sharktank/sharktank/utils/export_artifacts.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sharktank/sharktank/utils/export_artifacts.py b/sharktank/sharktank/utils/export_artifacts.py index 84c206d7a..365709713 100644 --- a/sharktank/sharktank/utils/export_artifacts.py +++ b/sharktank/sharktank/utils/export_artifacts.py @@ -11,7 +11,7 @@ import time from pathlib import Path from datetime import timedelta -from typing import List +from typing import List, Optional import iree.compiler as ireec @@ -142,11 +142,14 @@ def compile_to_vmfb( *, mlir_path, vmfb_path, - hal_dump_path, + hal_dump_path: Optional[Path] = None, ): # TODO: Control flag to enable multiple backends compile_flags = ["--iree-hip-target=" + self.iree_hip_target] - compile_flags += [f"--iree-hal-dump-executable-files-to={hal_dump_path}/files"] + if hal_dump_path: + compile_flags += [ + f"--iree-hal-dump-executable-files-to={hal_dump_path}/files" + ] try: ireec.compile_file( input_file=mlir_path, From 56c8d3c99d6105951f4d22deca7dcaad57169dd6 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Wed, 30 Oct 2024 03:45:58 +0000 Subject: [PATCH 02/10] make the logging comprehensible --- sharktank/sharktank/utils/export_artifacts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sharktank/sharktank/utils/export_artifacts.py b/sharktank/sharktank/utils/export_artifacts.py index 365709713..870df457a 100644 --- a/sharktank/sharktank/utils/export_artifacts.py +++ b/sharktank/sharktank/utils/export_artifacts.py @@ -85,7 +85,7 @@ def shard_irpa_file( logger.info(f"Sharding irpa file:\n" f"cd {cwd} && {cmd}") - proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd) + proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd, text=True) if proc.returncode != 0: logger.error( f"Error sharding irpa file with shard_llama.py\n" @@ -125,7 +125,7 @@ def export_to_mlir( logger.info(f"Exporting mlir:\n" f"cd {cwd} && {cmd}") - proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd) + proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd, text=True) if proc.returncode != 0: logger.error( f"Error exporting mlir with export_paged_llm_v1.py\n" From 4bb540e0f403cc0b4d8d9efc89bbcdd4e9947f7d Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Wed, 30 Oct 2024 08:09:00 +0000 Subject: [PATCH 03/10] Test on PR --- .github/workflows/ci_eval.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 78aaa84e3..748946946 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -7,6 +7,7 @@ name: Evaluation Tests on: + pull_request: workflow_dispatch: schedule: # Weekdays nightly at 07:00 UTC = 23:00 PST / 00:00 PDT. @@ -72,7 +73,7 @@ jobs: iree-runtime \ "numpy<2.0" - name: Run perplexity test with vmfb - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_vmfb_test.py --longrun --iree-device='hip://7' --iree-hip-target='gfx942' --llama3-8b-f16-model-path=/data/llama-3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama-3.1/8b/tokenizer_config.json + run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_vmfb_test.py --longrun --iree-device='hip://5' --iree-hip-target='gfx942' --llama3-8b-f16-model-path=/data/llama-3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama-3.1/8b/tokenizer_config.json test_perplexity_torch: timeout-minutes: 1000 From c3a5c7f6a179cc3d77c3097b58c23a0852ea2f61 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Wed, 30 Oct 2024 17:34:11 +0000 Subject: [PATCH 04/10] Update baseline numbers for vmfb --- .../evaluate/baseline_perplexity_scores.json | 202 +++++++++--------- 1 file changed, 101 insertions(+), 101 deletions(-) diff --git a/sharktank/tests/evaluate/baseline_perplexity_scores.json b/sharktank/tests/evaluate/baseline_perplexity_scores.json index d9d0d454b..469b758ae 100644 --- a/sharktank/tests/evaluate/baseline_perplexity_scores.json +++ b/sharktank/tests/evaluate/baseline_perplexity_scores.json @@ -212,107 +212,107 @@ }, "llama3_8B_f16_decomposed_vmfb": { "perplexities": [ - 21419.466797, - 21546.818359, - 14827.014648, - 16375.65918, - 8945.300781, - 9944.508789, - 16438.810547, - 10728.957031, - 9669.796875, - 14450.475586, - 27094.927734, - 8578.132812, - 22942.267578, - 8198.905273, - 4902.405762, - 14073.242188, - 11952.408203, - 9045.265625, - 7347.615234, - 14579.709961, - 20511.626953, - 15005.15332, - 15205.226562, - 22462.205078, - 17937.900391, - 11057.017578, - 11663.111328, - 11390.241211, - 7898.138672, - 7637.557129, - 10265.848633, - 16729.228516, - 5744.851074, - 7046.032227, - 7316.122559, - 7153.626953, - 8192.285156, - 5918.197266, - 12119.681641, - 13367.679688, - 6873.890137, - 7742.501953, - 13619.378906, - 7469.197754, - 8517.003906, - 5852.495605, - 21839.90625, - 13266.838867, - 45137.652344, - 13815.619141, - 14725.118164, - 14006.322266, - 27869.220703, - 8008.710449, - 6843.859863, - 10156.393555, - 7417.569824, - 17133.203125, - 4873.34668, - 8810.631836, - 13012.022461, - 10515.050781, - 6490.756348, - 6884.498535, - 13199.611328, - 9676.604492, - 2992.313965, - 12557.617188, - 13808.018555, - 12141.337891, - 10426.229492, - 16427.511719, - 13736.017578, - 9114.052734, - 14844.96875, - 11502.46875, - 6369.100098, - 10188.533203, - 5520.150391, - 10693.388672, - 4136.566895, - 12878.518555, - 6268.281738, - 17126.113281, - 10425.692383, - 42463.15625, - 21795.568359, - 6170.659668, - 17573.275391, - 6537.691406, - 8774.048828, - 14328.767578, - 35863.398438, - 10549.089844, - 5560.846191, - 8987.045898, - 6189.242188, - 13732.914062, - 10735.333984, - 12495.99707 + 21193.072266, + 19056.046875, + 14219.483398, + 15756.895508, + 8944.106445, + 9869.661133, + 16658.287109, + 10607.500977, + 9713.913086, + 14292.532227, + 25132.544922, + 8547.485352, + 22008.113281, + 8151.666016, + 4657.027344, + 13439.427734, + 11977.96875, + 9102.040039, + 7168.54248, + 14284.506836, + 19425.982422, + 13816.765625, + 14938.758789, + 20920.292969, + 17318.623047, + 10631.939453, + 10967.728516, + 11320.954102, + 7898.131348, + 7533.309082, + 10347.459961, + 16628.794922, + 5660.436523, + 6997.796875, + 7166.778809, + 7254.343262, + 7830.847656, + 5824.183105, + 12025.296875, + 13098.652344, + 6687.068848, + 7917.422363, + 13454.124023, + 7467.844238, + 8359.083984, + 5764.806152, + 21530.652344, + 13371.147461, + 41864.191406, + 13620.183594, + 13884.408203, + 13103.100586, + 27156.755859, + 8063.845215, + 6860.425293, + 9858.18457, + 7352.942871, + 15842.359375, + 4743.538086, + 8537.008789, + 12972.78125, + 10095.286133, + 6439.164062, + 6490.558105, + 12648.167969, + 9572.857422, + 2898.407471, + 12640.499023, + 14136.019531, + 12054.679688, + 10645.260742, + 15704.34375, + 13092.246094, + 9125.333008, + 14404.946289, + 10729.243164, + 6442.880371, + 10171.029297, + 5473.422363, + 10730.542969, + 4240.854004, + 11855.84375, + 6185.365234, + 16672.496094, + 9839.399414, + 39691.976562, + 21539.197266, + 6073.532715, + 18334.935547, + 6634.76416, + 8460.183594, + 14246.141602, + 34158.425781, + 9613.376953, + 5572.355469, + 9140.828125, + 6082.545898, + 13940.730469, + 10588.328125, + 12113.68457 ], - "mean_perplexity": 12543.547432 + "mean_perplexity": 12192.796248 } } From 78656a78d6bdf7feff27fd8ae33065574cb5ea4c Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Wed, 30 Oct 2024 17:37:35 +0000 Subject: [PATCH 05/10] Remove debugging --- .github/workflows/ci_eval.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 748946946..78aaa84e3 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -7,7 +7,6 @@ name: Evaluation Tests on: - pull_request: workflow_dispatch: schedule: # Weekdays nightly at 07:00 UTC = 23:00 PST / 00:00 PDT. @@ -73,7 +72,7 @@ jobs: iree-runtime \ "numpy<2.0" - name: Run perplexity test with vmfb - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_vmfb_test.py --longrun --iree-device='hip://5' --iree-hip-target='gfx942' --llama3-8b-f16-model-path=/data/llama-3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama-3.1/8b/tokenizer_config.json + run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_vmfb_test.py --longrun --iree-device='hip://7' --iree-hip-target='gfx942' --llama3-8b-f16-model-path=/data/llama-3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama-3.1/8b/tokenizer_config.json test_perplexity_torch: timeout-minutes: 1000 From 5d9ae3313daafd38b4d03b649e8b3def975376ec Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Wed, 30 Oct 2024 19:31:02 +0000 Subject: [PATCH 06/10] Fix recent patch changes --- sharktank/conftest.py | 1 - sharktank/sharktank/utils/export_artifacts.py | 30 +++++++------------ 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/sharktank/conftest.py b/sharktank/conftest.py index e46466959..ce1e131a4 100644 --- a/sharktank/conftest.py +++ b/sharktank/conftest.py @@ -141,7 +141,6 @@ def pytest_addoption(parser): parser.addoption( "--iree-hal-target-backends", action="store", - default="rocm", help="Specify the iree-hal target backend (e.g., rocm)", ) diff --git a/sharktank/sharktank/utils/export_artifacts.py b/sharktank/sharktank/utils/export_artifacts.py index 89bbdf8a1..e36b1fc8b 100644 --- a/sharktank/sharktank/utils/export_artifacts.py +++ b/sharktank/sharktank/utils/export_artifacts.py @@ -200,27 +200,25 @@ def compile_to_vmfb( hal_dump_path: Optional[Path] = None, ): # TODO: Control flag to enable multiple backends - compile_flags = [ - "--iree-hip-target=" + self.iree_hip_target, - "--iree-hal-target-backends=" + self.iree_hal_target_backends, + compile_args = [ + f"iree-compile={mlir_path}", + f"--iree-hip-target={self.iree_hip_target}", + f"--iree-hal-target-backends={self.iree_hal_target_backends}", + f"-o={vmfb_path}", ] if hal_dump_path: - compile_flags += [ + compile_args += [ f"--iree-hal-dump-executable-files-to={hal_dump_path}/files" ] - - cmd = self.get_compile_cmd( - output_mlir_path=mlir_path, - output_vmfb_path=vmfb_path, - args=compile_flags, - ) + + cmd = subprocess.list2cmdline(compile_args) + logging.getLogger().info(f"Launching compile command:\n" f"cd {cwd} && {cmd}") proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd) return_code = proc.returncode if return_code != 0: raise IreeCompileException(proc, cwd) - def iree_benchmark_vmfb( self, *, @@ -262,15 +260,6 @@ def create_file(self, *, suffix, prefix): f = open(file_path, "w") return file_path - def get_compile_cmd( - self, *, output_mlir_path: str, output_vmfb_path: str, args: [str] - ): - compile_args = ["iree-compile", output_mlir_path] - compile_args += args - compile_args += ["-o", output_vmfb_path] - cmd = subprocess.list2cmdline(compile_args) - return cmd - def get_artifacts(self): self.dir_path = self.sharktank_dir + "/" + "tmp_perplexity_ci_artifacts/" @@ -302,6 +291,7 @@ def get_artifacts(self): self.compile_to_vmfb( mlir_path=mlir_path, vmfb_path=vmfb_path, + cwd=self.sharktank_dir, ) return vmfb_path From 494ebb1915272f85a7e73ea114c2cf1a2794f2e6 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Wed, 30 Oct 2024 19:55:57 +0000 Subject: [PATCH 07/10] Fix iree-compile and edit mlir args --- sharktank/sharktank/utils/export_artifacts.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/sharktank/sharktank/utils/export_artifacts.py b/sharktank/sharktank/utils/export_artifacts.py index e36b1fc8b..658a14439 100644 --- a/sharktank/sharktank/utils/export_artifacts.py +++ b/sharktank/sharktank/utils/export_artifacts.py @@ -164,14 +164,10 @@ def export_to_mlir( "python3", "-m", "sharktank.examples.export_paged_llm_v1", - "--irpa-file", - self.irpa_path, - "--output-mlir", - mlir_path, - "--output-config", - json_path, - "--bs", - str(self.batch_size), + f"--irpa-file={self.irpa_path}", + f"--output-mlir={mlir_path}", + f"--output-config={json_path}", + f"--bs={str(self.batch_size)}", ] if self.attention_kernel in ["decomposed", "torch"]: export_args.append("--attention-kernel") @@ -201,7 +197,8 @@ def compile_to_vmfb( ): # TODO: Control flag to enable multiple backends compile_args = [ - f"iree-compile={mlir_path}", + f"iree-compile", + f"{mlir_path}", f"--iree-hip-target={self.iree_hip_target}", f"--iree-hal-target-backends={self.iree_hal_target_backends}", f"-o={vmfb_path}", From d2c19be65b8755e79c6b56c5746a06362bf6dd9b Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Wed, 30 Oct 2024 23:03:28 +0000 Subject: [PATCH 08/10] Update mi300x-3 artifact folder and add args to pytest cmd --- .github/workflows/ci_eval.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 78aaa84e3..94e4ad538 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -72,7 +72,7 @@ jobs: iree-runtime \ "numpy<2.0" - name: Run perplexity test with vmfb - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_vmfb_test.py --longrun --iree-device='hip://7' --iree-hip-target='gfx942' --llama3-8b-f16-model-path=/data/llama-3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama-3.1/8b/tokenizer_config.json + run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_vmfb_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json test_perplexity_torch: timeout-minutes: 1000 @@ -118,4 +118,4 @@ jobs: pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/ - name: Run perplexity test in eager mode - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama-3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama-3.1/8b/tokenizer_config.json + run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json From d98617428a3379f63f215e1bf2fc14936cf857ce Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Wed, 30 Oct 2024 23:04:38 +0000 Subject: [PATCH 09/10] Test on PR --- .github/workflows/ci_eval.yaml | 1 + sharktank/sharktank/evaluate/perplexity_torch.py | 2 +- sharktank/sharktank/evaluate/perplexity_vmfb.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 94e4ad538..c1c2b051b 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -7,6 +7,7 @@ name: Evaluation Tests on: + pull_request: workflow_dispatch: schedule: # Weekdays nightly at 07:00 UTC = 23:00 PST / 00:00 PDT. diff --git a/sharktank/sharktank/evaluate/perplexity_torch.py b/sharktank/sharktank/evaluate/perplexity_torch.py index fc3aa5fca..5a00aa41e 100644 --- a/sharktank/sharktank/evaluate/perplexity_torch.py +++ b/sharktank/sharktank/evaluate/perplexity_torch.py @@ -292,7 +292,7 @@ def run_perplexity_torch( perplexity.load_model(dataset, tokenizer, tensor_parallelism_size, attention_kernel) test_prompts = perplexity.get_prompts() - ppl = perplexity.get_perplexity(test_prompts=test_prompts) + ppl = perplexity.get_perplexity(test_prompts=test_prompts[0:4]) return ppl diff --git a/sharktank/sharktank/evaluate/perplexity_vmfb.py b/sharktank/sharktank/evaluate/perplexity_vmfb.py index fedf7c1c9..a052a0ada 100644 --- a/sharktank/sharktank/evaluate/perplexity_vmfb.py +++ b/sharktank/sharktank/evaluate/perplexity_vmfb.py @@ -181,7 +181,7 @@ def get_prompts(self): s.replace("\n", "").rstrip() for s in test_prompts if s != "" and len(s.split()) >= 20 and s.count("=") < 2 - ] + ][0:4] self.bs = len(test_prompts) From 708c8559fbe6adf1efa3b33cb935af1484519c39 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Thu, 31 Oct 2024 00:43:59 +0000 Subject: [PATCH 10/10] Revert "Test on PR" This reverts commit d98617428a3379f63f215e1bf2fc14936cf857ce. --- .github/workflows/ci_eval.yaml | 1 - sharktank/sharktank/evaluate/perplexity_torch.py | 2 +- sharktank/sharktank/evaluate/perplexity_vmfb.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index c1c2b051b..94e4ad538 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -7,7 +7,6 @@ name: Evaluation Tests on: - pull_request: workflow_dispatch: schedule: # Weekdays nightly at 07:00 UTC = 23:00 PST / 00:00 PDT. diff --git a/sharktank/sharktank/evaluate/perplexity_torch.py b/sharktank/sharktank/evaluate/perplexity_torch.py index 5a00aa41e..fc3aa5fca 100644 --- a/sharktank/sharktank/evaluate/perplexity_torch.py +++ b/sharktank/sharktank/evaluate/perplexity_torch.py @@ -292,7 +292,7 @@ def run_perplexity_torch( perplexity.load_model(dataset, tokenizer, tensor_parallelism_size, attention_kernel) test_prompts = perplexity.get_prompts() - ppl = perplexity.get_perplexity(test_prompts=test_prompts[0:4]) + ppl = perplexity.get_perplexity(test_prompts=test_prompts) return ppl diff --git a/sharktank/sharktank/evaluate/perplexity_vmfb.py b/sharktank/sharktank/evaluate/perplexity_vmfb.py index a052a0ada..fedf7c1c9 100644 --- a/sharktank/sharktank/evaluate/perplexity_vmfb.py +++ b/sharktank/sharktank/evaluate/perplexity_vmfb.py @@ -181,7 +181,7 @@ def get_prompts(self): s.replace("\n", "").rstrip() for s in test_prompts if s != "" and len(s.split()) >= 20 and s.count("=") < 2 - ][0:4] + ] self.bs = len(test_prompts)