Skip to content

Commit

Permalink
add mixtral test
Browse files Browse the repository at this point in the history
  • Loading branch information
phoenixdong committed Jun 7, 2024
1 parent ac373cb commit f6540ad
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 1 deletion.
7 changes: 6 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ jobs:
- 80
volumes:
- /home/flagscale_cicd/flask/static:/workspace/report
- /home/flagscale_cicd/docker/docker_build/docker_data:/home/gitlab-runner/data
- /home/flagscale_cicd/docker/docker_build/docker_tokenizers:/home/gitlab-runner/tokenizers
options: --gpus all --hostname flagscale_cicd
steps:
- name: Checkout Code
Expand Down Expand Up @@ -51,4 +53,7 @@ jobs:
- name: Flagscale Functional Test
run: |
python run.py --config-path tests/functional_tests/aquila/conf --config-name config action=test
pytest -s tests/functional_tests/test_result.py --test_reaults_path=./tests/functional_tests/aquila/test_result
pytest -s tests/functional_tests/test_result.py --test_reaults_path=./tests/functional_tests/aquila/test_result
python run.py --config-path tests/functional_tests/mixtral/conf --config-name config action=test
pytest -s tests/functional_tests/test_result.py --test_reaults_path=./tests/functional_tests/mixtral/test_result
24 changes: 24 additions & 0 deletions tests/functional_tests/mixtral/conf/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
defaults:
- _self_
- train: test_train_mixtral

experiment:
exp_name: test_train_mixtral
exp_dir: ./tests/functional_tests/mixtral/test_result
task:
type: train
backend: megatron
entrypoint: flagscale/train/train_mixtral.py
runner:
backend: torchrun
shell_cmds: null
ssh_port: null
envs:
CUDA_VISIBLE_DEVICES: "0,1,2,3,4,5,6,7"
CUDA_DEVICE_MAX_CONNECTIONS: 1

action: run

hydra:
run:
dir: ${experiment.exp_dir}/hydra
75 changes: 75 additions & 0 deletions tests/functional_tests/mixtral/conf/train/test_train_mixtral.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
system:
tensor_model_parallel_size: 4
pipeline_model_parallel_size: 1
expert_model_parallel_size: 2
sequence_parallel: true
use_distributed_optimizer: true
precision:
bf16: true
logging:
log_interval: 1
tensorboard_log_interval: 1
wandb_project: train-mixtral
wandb_exp_name: train-mixtral-test
checkpoint:
save_interval: 2000
no_save_optim: false
no_save_rng: false
no_load_optim: false
no_load_rng: false

model:
use_mcore_models: true
transformer_impl: transformer_engine
num_layers: 2
hidden_size: 4096
ffn_hidden_size: 14336
num_attention_heads: 32
seq_length: 2048
max_position_embeddings: 32768
swiglu: true
normalization: RMSNorm
norm_epsilon: 1e-05
group_query_attention: true
num_query_groups: 8
init_method_std: 0.02
attention_dropout: 0.0
hidden_dropout: 0.0
disable_bias_linear: true
position_embedding_type: rope
rotary_base: 1000000.0
no_position_embedding: true
no_masked_softmax_fusion: true
untie_embeddings_and_output_weights: true
# moe args
num_experts: 8
moe_router_load_balancing_type: aux_loss
moe_router_topk: 2
moe_aux_loss_coeff: 0.02
# moe_grouped_gemm: true

# seed: 42
train_iters: 10
micro_batch_size: 1
global_batch_size: 128
clip_grad: 1.0
eval_interval: 1000
eval_iters: 10

optimizer:
lr: 1e-4
weight_decay: 0.1
lr_scheduler:
min_lr: 1.0e-5
lr_warmup_iters: 2
lr_decay_iters: 4
lr_decay_style: cosine


data:
data_path: /home/gitlab-runner/data/llama_00_text_document/llama_00_text_document
split: 1
tokenizer:
tokenizer_type: Llama2Tokenizer
tokenizer_model: /home/gitlab-runner/tokenizers/llama2/tokenizer.model
vocab_size: 32000
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"lm loss:": {"values": [11.19837, 11.19397, 10.28315, 13.51352, 13.2433, 12.05933, 11.05991, 9.919383, 9.475966, 9.289787]}}

0 comments on commit f6540ad

Please sign in to comment.