Skip to content

Commit

Permalink
added llama models
Browse files Browse the repository at this point in the history
  • Loading branch information
zhenghh04 committed Feb 6, 2025
1 parent d25d730 commit 58c220e
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 4 deletions.
46 changes: 46 additions & 0 deletions dlio_benchmark/configs/workload/llama_1t.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# we mimic the checkpoint data for megatron-deepspeed
model:
name: llama_405b
type: transformer
model_size: 30102
num_layers: 128
parallelism:
tensor: 8
pipeline: 64
zero_stage: 1
transformer:
vocab_size: 128000
hidden_size: 25872
ffn_hidden_size: 98304

framework: pytorch

workflow:
generate_data: True
train: True
checkpoint: True

dataset:
data_folder: data/llama_405b/
format: mmap_indexed_binary
num_files_train: 1
num_samples_per_file: 1048576
record_length: 2048

reader:
data_loader: pytorch
batch_size: 16
read_threads: 1
file_shuffle: seed
sample_shuffle: seed

train:
epochs: 3
computation_time: 5 # 2.44 sec per step
total_training_steps: 5


checkpoint:
checkpoint_folder: checkpoints/llama_405b
steps_between_checkpoints: 1
type: all_ranks
3 changes: 1 addition & 2 deletions dlio_benchmark/configs/workload/llama_405b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@ model:
name: llama_405b
type: transformer
model_size: 30102
num_layers: 2
num_layers: 126
parallelism:
tensor: 8
pipeline: 16
zero_stage: 1
transformer:
vocab_size: 128000
hidden_size: 16384
num_layers: 126
ffn_hidden_size: 53248

framework: pytorch
Expand Down
1 change: 1 addition & 0 deletions dlio_benchmark/configs/workload/llama_70b.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# we mimic the checkpoint data for megatron-deepspeed
model:
name: llama_70b
type: transformer
Expand Down
6 changes: 4 additions & 2 deletions dlio_benchmark/configs/workload/megatron_deepspeed.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ model:
optimization_groups: [1009254400, 865075200, 793600]
model_size: 30102
num_layers: 40
pipeline_parallelism: 8
tensor_parallelism: 4
parallelism:
pipeline: 8
tensor: 4
zero_stage: -1
layer_parameters: [52583936, 209715200]

framework: pytorch
Expand Down

0 comments on commit 58c220e

Please sign in to comment.