-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
30 lines (28 loc) · 1.17 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
env:
rand_seed : 20231002
work_dir : '/home/shius/projects/plantbert/1_vanilla_bert'
model_dir_name : 'models'
data_dir_name : 'data'
data_file : 'corpus_with_topics.tsv.gz'
txt_column: 'Corpus'
test_size : 0.1
bert_config:
vocab_size : 30_522
max_length : 512
training_arguments:
resume_from_ckpt : True # resume training from checkpoint
mlm : True # train with masked language modeling
mlm_prob : 0.2 # probability of masking each token
eval_strategy : 'steps' # evaluate each `logging_steps` steps
overwrite_out : True # continue to train if output_dir is a ckpt dir
num_epochs : 40 # num of training epochs
train_batch_size : 20 # per device training batch size
grad_acc_steps : 8 # accumulate gradients before weights update
eval_batch_size : 64 # per device evaluation batch size
logging_strategy : 'steps' # evaluate, log every X steps
log_steps : 500
save_strategy : 'steps' # save checkpoint every X steps
save_steps : 500
safetensors : True # save SafeTensors instead of Tensors
load_best_at_end : True
save_total_limit : 5