train.sh

# Train BART
python run_summarization.py \
    --model_name_or_path "facebook/bart-base" \
    --config_name "facebook/bart-base" \
    --tokenizer_name ./tokenizer \
    --do_train \
    --do_eval \
    --evaluation_strategy="epoch" \
    --group_by_length \
    --num_train_epochs=10 \
    --train_file train.csv \
    --validation_file test.csv \
    --preprocessing_num_workers="20" \
    --output_dir ./bart-kurd-spell-base/ \
    --overwrite_output_dir \
    --per_device_train_batch_size=320 \
    --per_device_eval_batch_size=256 \
    --gradient_accumulation_steps=1 \
    --predict_with_generate \
    --logging_steps="100" \
    --save_total_limit="1" \
    --save_strategy="epoch" \
    --report_to="wandb" \
    --run_name="Bart Spell" \
    --max_target_length=1024 \
    --max_source_length=1024 \
    --fp16 \
    --save_safetensors \
    --push_to_hub 

# Train T5
# python3 run_summarization.py \
#     --source_prefix "correct: " \
#     --model_name_or_path "google/flan-t5-small" \
#     --config_name "google/flan-t5-small" \
#     --tokenizer_name ./tokenizer \
#     --do_train \
#     --do_eval \
#     --evaluation_strategy="epoch" \
#     --group_by_length \
#     --num_train_epochs=5 \
#     --train_file train.csv \
#     --validation_file test.csv \
#     --preprocessing_num_workers="12" \
#     --output_dir ./t5-kurd-spell-base/ \
#     --overwrite_output_dir \
#     --per_device_train_batch_size=64 \
#     --per_device_eval_batch_size=64 \
#     --gradient_accumulation_steps=1 \
#     --predict_with_generate \
#     --logging_steps="100" \
#     --save_total_limit="1" \
#     --save_strategy="epoch" \
#     --report_to="none" \
#     --run_name="T5 Spell" \
#     --max_target_length=1024 \
#     --max_source_length=1024 \
#     --push_to_hub 
#     # --fp16 \