-
Notifications
You must be signed in to change notification settings - Fork 104
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[KUNLUNXIN] case config: llava1.5_7b.
- Loading branch information
1 parent
ea906ae
commit 70e8988
Showing
9 changed files
with
81 additions
and
0 deletions.
There are no files selected for viewing
12 changes: 12 additions & 0 deletions
12
training/kunlunxin/docker_image/flagscale_llava/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
FROM zhiyuan_flagscale_llava:newest | ||
RUN /bin/bash -c "pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple" | ||
RUN /bin/bash -c "uname -a" | ||
RUN /bin/bash -c alias python3=python | ||
ENV PATH /root/miniconda/envs/python39_torch201_cuda/bin:$PATH | ||
|
||
echo '#!/bin/bash\nservice ssh restart\nexec "$@"' > /docker-start.sh | ||
|
||
RUN chmod +x /docker-start.sh | ||
RUN apt-get install -y openssh-server | ||
ENTRYPOINT ["sh", "/docker-start.sh"] | ||
|
9 changes: 9 additions & 0 deletions
9
training/kunlunxin/docker_image/flagscale_llava/flagscale_llava_install.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#!/bin/bash | ||
|
||
set -x | ||
|
||
# cd /opt/xpytorch && bash xpytorch-cp39-torch201-ubuntu2004-x64.run | ||
|
||
CUDART_DUMMY_REGISTER=1 python -m torch_xmlir --doctor &> /tmp/xpytorch.version.out | ||
CUDART_DUMMY_REGISTER=1 python -c "import torch; print(torch.rand(512, 128).cuda())" &> /tmp/xpytorch.test.out | ||
/etc/init.d/ssh restart |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
此测例为FlagScale相关项目测例 |
27 changes: 27 additions & 0 deletions
27
training/kunlunxin/llava1.5_7b-flagscale/config/config_R300px4x8.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# scale_parent must under FlagPerf/ or data_dir/, otherwise you cannot mount it into baremetal, therefore cannot use shared storage | ||
scale_parent = "/share/project/PUBLIC" | ||
scale_home = f"{scale_parent}/FlagScale" | ||
|
||
# this cmd should install scale at <scale_home>. <scale_home> is set by flagperf.training.benchmarks.llava1.5_7b.flagscale.run_pretraining.py | ||
scale_download_cmd = f"cd {scale_parent}" | ||
|
||
# NV need nothing because all requirements have been established in base docker image. vendor can do anything related here | ||
scale_install_cmd = "" | ||
|
||
# locate energon. the copy from energon_install_path to flagscale/megatron/ is done by flagperf...run_pretraining.py | ||
energon_locate_cmd = r"pip show megatron-energon | grep Location | awk -F: '{print $2}' | xargs" | ||
|
||
scale_conf_dir = f"{scale_home}/examples/llava/conf" | ||
configyaml = f"{scale_conf_dir}/config.yaml" | ||
trainyaml = f"{scale_conf_dir}/train/train_llava1.5_7b.yaml" | ||
datasetyaml = f"{scale_home}/megatron/examples/multimodal/pretrain_dataset.yaml" | ||
prompt = f"{scale_home}/megatron/examples/multimodal/manual_prompts.json" | ||
|
||
cmds = {"before_start": "source ~/.bashrc"} | ||
# flagscale's requirements | ||
flagscale_chip_type = "R300p" | ||
flagscale_ssh_port = 4323 | ||
flops = 999 | ||
|
||
# for llava's algorithm | ||
steps = 30 |
1 change: 1 addition & 0 deletions
1
training/kunlunxin/llava1.5_7b-flagscale/config/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
megatron-energon==2.2.0 |
1 change: 1 addition & 0 deletions
1
training/kunlunxin/llava1.5_7b_continuetrain-flagscale/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
此测例为FlagScale相关项目测例 |
27 changes: 27 additions & 0 deletions
27
training/kunlunxin/llava1.5_7b_continuetrain-flagscale/config/config_R300px4x8.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# scale_parent must under FlagPerf/ or data_dir/, otherwise you cannot mount it into baremetal, therefore cannot use shared storage | ||
scale_parent = "/share/project/PUBLIC" | ||
scale_home = f"{scale_parent}/FlagScale" | ||
|
||
# this cmd should install scale at <scale_home>. <scale_home> is set by flagperf.training.benchmarks.llava1.5_7b.flagscale.run_pretraining.py | ||
scale_download_cmd = f"cd {scale_parent}" | ||
|
||
# NV need nothing because all requirements have been established in base docker image. vendor can do anything related here | ||
scale_install_cmd = "" | ||
|
||
# locate energon. the copy from energon_install_path to flagscale/megatron/ is done by flagperf...run_pretraining.py | ||
energon_locate_cmd = r"pip show megatron-energon | grep Location | awk -F: '{print $2}' | xargs" | ||
|
||
scale_conf_dir = f"{scale_home}/examples/llava/conf" | ||
configyaml = f"{scale_conf_dir}/config.yaml" | ||
trainyaml = f"{scale_conf_dir}/train/train_llava1.5_7b.yaml" | ||
datasetyaml = f"{scale_home}/megatron/examples/multimodal/pretrain_dataset.yaml" | ||
prompt = f"{scale_home}/megatron/examples/multimodal/manual_prompts.json" | ||
|
||
cmds = {"before_start": "source ~/.bashrc"} | ||
# flagscale's requirements | ||
flagscale_chip_type = "R300p" | ||
flagscale_ssh_port = 4323 | ||
flops = 999 | ||
|
||
# for llava's algorithm | ||
steps = 5000 |
1 change: 1 addition & 0 deletions
1
training/kunlunxin/llava1.5_7b_continuetrain-flagscale/config/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
megatron-energon==2.2.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters