FlagOpen · tianxiao-baai · Oct 5, 2024 · Sep 20, 2024 · Sep 28, 2024 · Oct 4, 2024
diff --git a/training/benchmarks/llama3_70B_continuetrain/flagscale/run_pretraining.py b/training/benchmarks/llama3_70B_continuetrain/flagscale/run_pretraining.py
@@ -176,7 +176,7 @@ def replace_yamls(scale_home, config_module, args):
             print("Maybe some errors")
         if len(info_line) == getattr(module, "steps"):
             break
-        time.sleep(300)
+        time.sleep(300) if args.vendor != 'metax' else time.sleep(10)
 
     infos = []
     for line in info_line:

diff --git a/training/metax/llama3_70B_continuetrain-flagscale/README.md b/training/metax/llama3_70B_continuetrain-flagscale/README.md
@@ -0,0 +1 @@
+此测例为FlagScale相关项目测例
diff --git a/training/metax/llama3_70B_continuetrain-flagscale/config/config_C500x4x8.py b/training/metax/llama3_70B_continuetrain-flagscale/config/config_C500x4x8.py
@@ -0,0 +1,25 @@
+# scale_parent must under FlagPerf/ or data_dir/, otherwise you cannot mount it into baremetal, therefore cannot use shared storage
+scale_parent = "/share/project/FlagPerf/FlagScale/build/metax_C500"
+scale_home = f"{scale_parent}/FlagScale"
+
+# this cmd should install scale at <scale_home>. <scale_home> is set by flagperf.training.benchmarks.llava1.5_7b.flagscale.run_pretraining.py
+scale_download_cmd = f"cd {scale_parent}"
+
+# NV need nothing because all requirements have been established in base docker image. vendor can do anything related here
+scale_install_cmd = ""
+
+scale_conf_dir = f"{scale_home}/examples/llama/conf"
+configyaml = f"{scale_conf_dir}/config.yaml"
+trainyaml = f"{scale_conf_dir}/train/train_llama3_70b_finetune.yaml"
+dataset = f"SAMPLE50B/llama3/llama3_dataset"
+tokenizer = f"SAMPLE50B/llama3/llama3_tokenizer"
+ckpt = f"llama3_ckpt"
+
+cmds = {"before_start": ""}
+# flagscale's requirements
+flagscale_chip_type = "C500"
+flagscale_ssh_port = 1234
+flops = -1
+
+# for llava's algorithm
+steps = 500
diff --git a/training/metax/llama3_70B_continuetrain-flagscale/config/requirements.txt b/training/metax/llama3_70B_continuetrain-flagscale/config/requirements.txt
diff --git a/training/run_benchmarks/config/test_conf.py b/training/run_benchmarks/config/test_conf.py
@@ -174,6 +174,7 @@
     # "llama2_7b:deepspeed:S4000:1:8:1": "/data/flagperf/llama/openwebtext",
 
     # metax cases
+    #"llama3_70B_continuetrain:flagscale:C500:4:8:1": "/metax/dataset"
     #"llava1.5_7b_continuetrain:flagscale:C500:4:8:1": "/data/dataset/llava"
     #"llama3_8B:megatron_core060:C500:1:8:1": "/data/llama3_8b"
     # "llama2_70B:megatron:C500:4:8:1": "/data/llama2-70B"