diff --git a/hardwares/iluvatar/README.md b/hardwares/iluvatar/README.md new file mode 100644 index 000000000..62c75c001 --- /dev/null +++ b/hardwares/iluvatar/README.md @@ -0,0 +1 @@ +commit-id d7dc60ec3ef6341526fd187281dc289418c17899 diff --git a/hardwares/iluvatar/patches/__init__.py b/hardwares/iluvatar/patches/__init__.py new file mode 100644 index 000000000..87dad1a2f --- /dev/null +++ b/hardwares/iluvatar/patches/__init__.py @@ -0,0 +1,5 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- + + +rom .core_models_gpt_gpt_model import print_device_type diff --git a/hardwares/iluvatar/patches/core_models_gpt_gpt_model.py b/hardwares/iluvatar/patches/core_models_gpt_gpt_model.py new file mode 100644 index 000000000..533475aca --- /dev/null +++ b/hardwares/iluvatar/patches/core_models_gpt_gpt_model.py @@ -0,0 +1,18 @@ +import megatron +from megatron import print_rank_0 +from flagscale.patches_utils import add_patches_module + +#[iluvatar] start of changes +def print_device_type(): + device_type = "iluvatar" + if device_type: + print_rank_0("=== Monkey-patching Device Type: {} ===".format(device_type)) + else: + print_rank_0("=== Monkey-patching Device Type: None ===") + +#[iluvatar] end of changes + +# This is used for monkey-patching demonstration. +module_path = "megatron.core.models.gpt.gpt_model" +module_dict = {"print_device_type",print_device_type} +add_patches_module(module_path,module_dict) diff --git a/hardwares/iluvatar/scripts/config.yaml b/hardwares/iluvatar/scripts/config.yaml new file mode 100644 index 000000000..efb6e3174 --- /dev/null +++ b/hardwares/iluvatar/scripts/config.yaml @@ -0,0 +1,33 @@ +defaults: + - train: train_aquila_7b + - _self_ + +experiment: + exp_name: aquila2 + exp_dir: ./outputs + task: + type: train + backend: megatron + entrypoint: /home/FlagScale/flagscale/train/train_aquila.py + runner: + backend: torchrun + nnodes: 1 + nproc_per_node: 8 + envs: + PYTORCH_SKIP_COMPILE_CHECK: 1 + MACA_PATH: /opt/maca + MACA_SMALL_PAGESIZE_ENABLE: 1 + PYTORCH_ENABLE_SAME_RANK_A100: 1 + CUCC_PATH: /opt/maca/tools/cu-bridge + CUDA_PATH: /opt/maca/tools/cu-bridge + SET_DEVICE_NUMA_PREFERRED: 1 + MHA_USE_BLAS: ON + MHA_BWD_NO_ATOMIC_F64: 1 + CUDA_DEVICE_MAX_CONNECTIONS: 1 + CUDA_VISIBLE_DEVICES: 0,1,2,3,4,5,6,7 + +action: run + +hydra: + run: + dir: ${experiment.exp_dir}/hydra diff --git a/hardwares/iluvatar/scripts/train/train_aquila_7b.yaml b/hardwares/iluvatar/scripts/train/train_aquila_7b.yaml new file mode 100644 index 000000000..643632c0b --- /dev/null +++ b/hardwares/iluvatar/scripts/train/train_aquila_7b.yaml @@ -0,0 +1,66 @@ +system: + tensor_model_parallel_size: 4 + pipeline_model_parallel_size: 1 + disable_bias_linear: True + use_flash_attn: True + use_distributed_optimizer: True + device_type: iluvatar + precision: + fp16: True + initial_loss_scale: 522893 + min_loss_scale: 1.0 + attention_softmax_in_fp32: True + accumulate_allreduce_grads_in_fp32: True + logging: + log_interval: 1 + tensorboard_log_interval: 1 + wandb_project: "train-aquila-7B" + wandb_exp_name: "train-test-7B" + checkpoint: + save_interval: 2000 + +model: + use_mcore_models: true + num_layers: 32 + hidden_size: 4096 + num_attention_heads: 32 + seq_length: 2048 + max_position_embeddings: 2048 + norm_epsilon: 1e-5 + use_rotary_position_embeddings: true + no_position_embedding: true + swiglu: true + multiple_of: 256 + normalization: RMSNorm + rotary_interleaved_patch: true + untie_embeddings_and_output_weights: true + init_method_std: 0.02 + attention_dropout: 0.0 + hidden_dropout: 0.0 + weight_decay: 0.1 + clip_grad: 1.0 + train_samples: 100000 + eval_iters: 0 + micro_batch_size: 1 + global_batch_size: 128 + seed: 1234 + + optimizer: + weight_decay: 0.1 + adam_beta1: 0.9 + adam_beta2: 0.95 + lr_scheduler: + lr: 2.0e-5 + min_lr: 2.0e-6 + lr_warmup_samples: 500 + lr_decay_style: cosine + +data: + data_path: ${data_path:??} + split: 1 + tokenizer: + tokenizer_type: AquilaTokenizer + vocab_file: ./examples/aquila/tokenizer/vocab.json + merge_file: ./examples/aquila/tokenizer/merges.txt + special_tokens_file: ./examples/aquila/tokenizer/special_tokens.txt + vocab_size: 100008