diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index 85058e098..b3e28f686 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -290,8 +290,10 @@ def train( ) if tokenizer.eos_token != configs.DEFAULT_PAD_TOKEN: tokenizer.pad_token = configs.DEFAULT_PAD_TOKEN + special_tokens_dict["pad_token"] = configs.DEFAULT_PAD_TOKEN else: tokenizer.eos_token = configs.DEFAULT_EOS_TOKEN + special_tokens_dict["eos_token"] = configs.DEFAULT_EOS_TOKEN # TODO: lower priority but understand if resizing impacts inference quality and why its needed. # It makes sense if we manipulate tokenizer that we also save it and provide it to inference.