-
Notifications
You must be signed in to change notification settings - Fork 56
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into base_autotuner
- Loading branch information
Showing
21 changed files
with
193 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
FROM nvcr.io/nvidia/pytorch:24.05-py3 | ||
|
||
ENV DEBIAN_FRONTEND noninteractive | ||
ENV TZ=Asia/Shanghai | ||
|
||
############################################################################## | ||
# Change apt source to Ksyun | ||
############################################################################## | ||
RUN sed -i "s#\S\+#http://apt.ksyun.cn/ubuntu/#2" /etc/apt/sources.list && \ | ||
> /etc/apt/apt.conf.d/docker-clean && \ | ||
> /etc/dpkg/dpkg.cfg.d/pkg-config-hook-config | ||
|
||
############################################################################## | ||
# Install basic utilities | ||
############################################################################## | ||
RUN apt-get update && \ | ||
apt-get install -y --no-install-recommends \ | ||
curl wget vim tmux less unzip \ | ||
htop iftop iotop ca-certificates openssh-client openssh-server \ | ||
rsync iputils-ping net-tools \ | ||
tzdata psmisc screen && \ | ||
apt-get clean | ||
|
||
############################################################################## | ||
# SSH configuration (not secure, only for development purpose) | ||
############################################################################## | ||
RUN mkdir -p /run/sshd && \ | ||
ssh-keygen -t rsa -N "" -f ~/.ssh/id_rsa && \ | ||
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys && \ | ||
echo "StrictHostKeyChecking no\n" >> /etc/ssh/ssh_config | ||
|
||
############################################################################## | ||
# Install Miniconda | ||
############################################################################## | ||
RUN mkdir -p ~/miniconda3 && \ | ||
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh && \ | ||
bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 || { echo 'Miniconda installation failed' ; exit 1; } && \ | ||
rm -rf ~/miniconda3/miniconda.sh && \ | ||
~/miniconda3/bin/conda init bash || { echo 'conda init failed' ; exit 1; } && \ | ||
~/miniconda3/bin/conda config --set auto_activate_base false || { echo 'conda config failed' ; exit 1; } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -304,4 +304,3 @@ def set_device_type(args): | |
# Apply the following patch during the import time | ||
import patches | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
def set_mock_args(): | ||
from unittest import mock | ||
def init_mock_args(args): | ||
args.data_parallel_random_init = False | ||
args.virtual_pipeline_model_parallel_size = None | ||
args.bf16 = True | ||
args.accumulate_allreduce_grads_in_fp32 = False | ||
args.overlap_grad_reduce = False | ||
args.use_distributed_optimizer = True | ||
args.load = None | ||
args.save_param_index_maps_only = False | ||
args.rampup_batch_size = None | ||
args.global_batch_size = 8 | ||
args.micro_batch_size = 1 | ||
args.data_parallel_size = 8 | ||
args.adlr_autoresume = False | ||
args.timing_log_option = 'minmax' | ||
args.timing_log_level = 0 | ||
args.pretrained_checkpoint = None | ||
return args | ||
|
||
with mock.patch('megatron.training.training.get_args', data_parallel_random_init=False) as mock_args: | ||
init_mock_args(mock_args.return_value) | ||
from megatron.training.global_vars import set_args | ||
set_args(mock_args.return_value) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# build tools | ||
packaging | ||
setuptools>=49.4.0 | ||
wheel | ||
|
||
# testing | ||
pytest | ||
pytest_mock | ||
pytest-cov | ||
pytest-forked | ||
pytest-asyncio | ||
pytest-rerunfailures | ||
pytest-shard | ||
|
||
# formatting | ||
black==24.4.2 | ||
codespell==2.2.6 | ||
isort==5.13.2 | ||
clang-format==18.1.5 | ||
|
||
# type checking | ||
mypy==1.10.0 | ||
|
||
# pre-commit | ||
pre-commit>=2.20.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# mirror from vllm repo with some modifications | ||
|
||
# testing utils | ||
awscli | ||
einops # required for MPT | ||
httpx | ||
peft | ||
requests | ||
ray | ||
sentence-transformers # required for embedding | ||
|
||
# Benchmarking | ||
aiohttp | ||
|
||
# Multimodal | ||
pillow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# mirror from vllm repo with some modifications | ||
torch | ||
fastapi | ||
aiohttp | ||
openai | ||
uvicorn[standard] | ||
pydantic >= 2.0 # Required for OpenAI server. | ||
prometheus_client >= 0.18.0 | ||
prometheus-fastapi-instrumentator >= 7.0.0 | ||
lm-format-enforcer == 0.10.1 | ||
outlines == 0.0.34 # Requires torch >= 2.1.0 | ||
typing_extensions | ||
filelock >= 3.10.4 # filelock starts to support `mode` argument from 3.10.4 | ||
|
||
# Uncomment the following lines if you don't compile them from source | ||
# xformers | ||
# vllm-flash-attn | ||
# vllm-nccl-cu12 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# pip install git+https://github.com/fanshiqing/[email protected] | ||
nltk | ||
wrapt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
torch | ||
wandb | ||
deepspeed | ||
dlrover[torch] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
cmake >= 3.21 | ||
hydra-core | ||
ninja | ||
psutil | ||
numpy | ||
requests | ||
py-cpuinfo | ||
sentencepiece | ||
transformers >= 4.40.0 | ||
tokenizers >= 0.19.1 | ||
tiktoken >= 0.6.0 | ||
ray >= 2.9 | ||
matplotlib | ||
py3nvml | ||
graphviz | ||
tqdm | ||
mpi4py | ||
nvidia-ml-py |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
{"lm loss:": {"values": [11.61173, 11.61412, 11.47692, 11.37522, 11.28096, 11.21686, 11.16553, 11.13099, 11.10483, 11.08886]}} | ||
{"lm loss:": {"values": [11.61173, 11.61412, 11.47692, 11.37523, 11.28096, 11.21686, 11.16553, 11.13099, 11.10483, 11.08886]}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1 @@ | ||
export PYTHONPATH=./flagscale:$PYTHONPATH | ||
torchrun --nproc_per_node=8 -m pytest -q -x tests/unit_tests/launcher | ||
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib -q -x tests/unit_tests/launcher |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,12 @@ | ||
export PYTHONPATH=./megatron:$PYTHONPATH | ||
export PYTHONPATH=./../../FlagScale/:$PYTHONPATH | ||
cd megatron | ||
# torchrun --nproc_per_node=8 -m pytest -q -x tests/unit_tests/data | ||
torchrun --nproc_per_node=8 -m pytest -q -x tests/unit_tests/dist_checkpointing | ||
torchrun --nproc_per_node=8 -m pytest -q -x tests/unit_tests/fusions | ||
torchrun --nproc_per_node=8 -m pytest -q -x tests/unit_tests/models | ||
torchrun --nproc_per_node=8 -m pytest -q -x tests/unit_tests/pipeline_parallel | ||
torchrun --nproc_per_node=8 -m pytest -q -x tests/unit_tests/tensor_parallel | ||
torchrun --nproc_per_node=8 -m pytest -q -x tests/unit_tests/transformer | ||
torchrun --nproc_per_node=8 -m pytest -q -x tests/unit_tests/*.py | ||
|
||
export PYTHONPATH=..:$PYTHONPATH | ||
|
||
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib -q -x tests/unit_tests/data | ||
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib -q -x tests/unit_tests/dist_checkpointing | ||
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib -q -x tests/unit_tests/fusions | ||
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib -q -x tests/unit_tests/models | ||
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib -q -x tests/unit_tests/pipeline_parallel | ||
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib -q -x tests/unit_tests/tensor_parallel | ||
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib -q -x tests/unit_tests/transformer | ||
torchrun --nproc_per_node=8 -m pytest --import-mode=importlib -q -x tests/unit_tests/*.py |