nod-ai · archana-ramalingam · Oct 31, 2024 · Oct 30, 2024 · Oct 30, 2024 · Oct 30, 2024
diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml
@@ -72,7 +72,7 @@ jobs:
             iree-runtime \
             "numpy<2.0"
       - name: Run perplexity test with vmfb
-        run:  pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_vmfb_test.py --longrun --iree-device='hip://7' --iree-hip-target='gfx942' --llama3-8b-f16-model-path=/data/llama-3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama-3.1/8b/tokenizer_config.json
+        run:  pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_vmfb_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json
 
   test_perplexity_torch:
     timeout-minutes: 1000
@@ -118,4 +118,4 @@ jobs:
           pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/
 
       - name: Run perplexity test in eager mode
-        run:  pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama-3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama-3.1/8b/tokenizer_config.json
+        run:  pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json
diff --git a/sharktank/conftest.py b/sharktank/conftest.py
@@ -141,7 +141,6 @@ def pytest_addoption(parser):
     parser.addoption(
         "--iree-hal-target-backends",
         action="store",
-        default="rocm",
         help="Specify the iree-hal target backend (e.g., rocm)",
     )
 

diff --git a/sharktank/sharktank/utils/export_artifacts.py b/sharktank/sharktank/utils/export_artifacts.py
@@ -11,7 +11,7 @@
 import time
 from pathlib import Path
 from datetime import timedelta
-from typing import List
+from typing import List, Optional
 
 import iree.compiler as ireec
 
@@ -142,7 +142,7 @@ def shard_irpa_file(
 
         logger.info(f"Sharding irpa file:\n" f"cd {cwd} && {cmd}")
 
-        proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd)
+        proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd, text=True)
         if proc.returncode != 0:
             logger.error(
                 f"Error sharding irpa file with shard_llama.py\n"
@@ -164,14 +164,10 @@ def export_to_mlir(
             "python3",
             "-m",
             "sharktank.examples.export_paged_llm_v1",
-            "--irpa-file",
-            self.irpa_path,
-            "--output-mlir",
-            mlir_path,
-            "--output-config",
-            json_path,
-            "--bs",
-            str(self.batch_size),
+            f"--irpa-file={self.irpa_path}",
+            f"--output-mlir={mlir_path}",
+            f"--output-config={json_path}",
+            f"--bs={str(self.batch_size)}",
         ]
         if self.attention_kernel in ["decomposed", "torch"]:
             export_args.append("--attention-kernel")
@@ -182,7 +178,7 @@ def export_to_mlir(
 
         logger.info(f"Exporting mlir:\n" f"cd {cwd} && {cmd}")
 
-        proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd)
+        proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd, text=True)
         if proc.returncode != 0:
             raise ExportMlirException(proc, cwd)
         else:
@@ -191,15 +187,29 @@ def export_to_mlir(
         return proc.returncode
 
     @timeit
-    def compile_to_vmfb(self, *, mlir_path, vmfb_path, hal_dump_path, cwd):
-        compile_flags = ["--iree-hip-target=" + self.iree_hip_target]
-        compile_flags += ["--iree-hal-target-backends=rocm"]
-        compile_flags += [f"--iree-hal-dump-executable-files-to={hal_dump_path}/files"]
-        cmd = self.get_compile_cmd(
-            output_mlir_path=mlir_path,
-            output_vmfb_path=vmfb_path,
-            args=compile_flags,
-        )
+    def compile_to_vmfb(
+        self,
+        *,
+        mlir_path,
+        vmfb_path,
+        cwd,
+        hal_dump_path: Optional[Path] = None,
+    ):
+        # TODO: Control flag to enable multiple backends
+        compile_args = [
+            f"iree-compile",
+            f"{mlir_path}",
+            f"--iree-hip-target={self.iree_hip_target}",
+            f"--iree-hal-target-backends={self.iree_hal_target_backends}",
+            f"-o={vmfb_path}",
+        ]
+        if hal_dump_path:
+            compile_args += [
+                f"--iree-hal-dump-executable-files-to={hal_dump_path}/files"
+            ]
+
+        cmd = subprocess.list2cmdline(compile_args)
+
         logging.getLogger().info(f"Launching compile command:\n" f"cd {cwd} && {cmd}")
         proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd)
         return_code = proc.returncode
@@ -247,15 +257,6 @@ def create_file(self, *, suffix, prefix):
         f = open(file_path, "w")
         return file_path
 
-    def get_compile_cmd(
-        self, *, output_mlir_path: str, output_vmfb_path: str, args: [str]
-    ):
-        compile_args = ["iree-compile", output_mlir_path]
-        compile_args += args
-        compile_args += ["-o", output_vmfb_path]
-        cmd = subprocess.list2cmdline(compile_args)
-        return cmd
-
     def get_artifacts(self):
 
         self.dir_path = self.sharktank_dir + "/" + "tmp_perplexity_ci_artifacts/"
@@ -287,6 +288,7 @@ def get_artifacts(self):
                 self.compile_to_vmfb(
                     mlir_path=mlir_path,
                     vmfb_path=vmfb_path,
+                    cwd=self.sharktank_dir,
                 )
 
         return vmfb_path
diff --git a/sharktank/tests/evaluate/baseline_perplexity_scores.json b/sharktank/tests/evaluate/baseline_perplexity_scores.json
@@ -212,107 +212,107 @@
   },
   "llama3_8B_f16_decomposed_vmfb": {
     "perplexities": [
-      21419.466797,
-      21546.818359,
-      14827.014648,
-      16375.65918,
-      8945.300781,
-      9944.508789,
-      16438.810547,
-      10728.957031,
-      9669.796875,
-      14450.475586,
-      27094.927734,
-      8578.132812,
-      22942.267578,
-      8198.905273,
-      4902.405762,
-      14073.242188,
-      11952.408203,
-      9045.265625,
-      7347.615234,
-      14579.709961,
-      20511.626953,
-      15005.15332,
-      15205.226562,
-      22462.205078,
-      17937.900391,
-      11057.017578,
-      11663.111328,
-      11390.241211,
-      7898.138672,
-      7637.557129,
-      10265.848633,
-      16729.228516,
-      5744.851074,
-      7046.032227,
-      7316.122559,
-      7153.626953,
-      8192.285156,
-      5918.197266,
-      12119.681641,
-      13367.679688,
-      6873.890137,
-      7742.501953,
-      13619.378906,
-      7469.197754,
-      8517.003906,
-      5852.495605,
-      21839.90625,
-      13266.838867,
-      45137.652344,
-      13815.619141,
-      14725.118164,
-      14006.322266,
-      27869.220703,
-      8008.710449,
-      6843.859863,
-      10156.393555,
-      7417.569824,
-      17133.203125,
-      4873.34668,
-      8810.631836,
-      13012.022461,
-      10515.050781,
-      6490.756348,
-      6884.498535,
-      13199.611328,
-      9676.604492,
-      2992.313965,
-      12557.617188,
-      13808.018555,
-      12141.337891,
-      10426.229492,
-      16427.511719,
-      13736.017578,
-      9114.052734,
-      14844.96875,
-      11502.46875,
-      6369.100098,
-      10188.533203,
-      5520.150391,
-      10693.388672,
-      4136.566895,
-      12878.518555,
-      6268.281738,
-      17126.113281,
-      10425.692383,
-      42463.15625,
-      21795.568359,
-      6170.659668,
-      17573.275391,
-      6537.691406,
-      8774.048828,
-      14328.767578,
-      35863.398438,
-      10549.089844,
-      5560.846191,
-      8987.045898,
-      6189.242188,
-      13732.914062,
-      10735.333984,
-      12495.99707
+      21193.072266,
+      19056.046875,
+      14219.483398,
+      15756.895508,
+      8944.106445,
+      9869.661133,
+      16658.287109,
+      10607.500977,
+      9713.913086,
+      14292.532227,
+      25132.544922,
+      8547.485352,
+      22008.113281,
+      8151.666016,
+      4657.027344,
+      13439.427734,
+      11977.96875,
+      9102.040039,
+      7168.54248,
+      14284.506836,
+      19425.982422,
+      13816.765625,
+      14938.758789,
+      20920.292969,
+      17318.623047,
+      10631.939453,
+      10967.728516,
+      11320.954102,
+      7898.131348,
+      7533.309082,
+      10347.459961,
+      16628.794922,
+      5660.436523,
+      6997.796875,
+      7166.778809,
+      7254.343262,
+      7830.847656,
+      5824.183105,
+      12025.296875,
+      13098.652344,
+      6687.068848,
+      7917.422363,
+      13454.124023,
+      7467.844238,
+      8359.083984,
+      5764.806152,
+      21530.652344,
+      13371.147461,
+      41864.191406,
+      13620.183594,
+      13884.408203,
+      13103.100586,
+      27156.755859,
+      8063.845215,
+      6860.425293,
+      9858.18457,
+      7352.942871,
+      15842.359375,
+      4743.538086,
+      8537.008789,
+      12972.78125,
+      10095.286133,
+      6439.164062,
+      6490.558105,
+      12648.167969,
+      9572.857422,
+      2898.407471,
+      12640.499023,
+      14136.019531,
+      12054.679688,
+      10645.260742,
+      15704.34375,
+      13092.246094,
+      9125.333008,
+      14404.946289,
+      10729.243164,
+      6442.880371,
+      10171.029297,
+      5473.422363,
+      10730.542969,
+      4240.854004,
+      11855.84375,
+      6185.365234,
+      16672.496094,
+      9839.399414,
+      39691.976562,
+      21539.197266,
+      6073.532715,
+      18334.935547,
+      6634.76416,
+      8460.183594,
+      14246.141602,
+      34158.425781,
+      9613.376953,
+      5572.355469,
+      9140.828125,
+      6082.545898,
+      13940.730469,
+      10588.328125,
+      12113.68457
     ],
-    "mean_perplexity": 12543.547432
+    "mean_perplexity": 12192.796248
   }
 }