From 5b33ad300d48cf77e3f743541448842fd6d33f9a Mon Sep 17 00:00:00 2001 From: Tingkai Liu Date: Sat, 25 Jan 2025 10:01:46 -0500 Subject: [PATCH] use tokenizer.chat_template by default for instruction type tasks --- bigcode_eval/utils.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/bigcode_eval/utils.py b/bigcode_eval/utils.py index b3de558a4..b35c23725 100644 --- a/bigcode_eval/utils.py +++ b/bigcode_eval/utils.py @@ -71,9 +71,21 @@ def __iter__(self): # Instruction-tuning mode instruction.append(True) infill.append(False) - prompt = self._make_instruction_prompt( - **prompt_contents, prefix=self.prefix - ) + + if self.tokenizer.chat_template is not None: + prompt = ( + self.prefix + + self.tokenizer.apply_chat_template( + [{"role": "user", "content": prompt_contents['instruction']}], + add_generation_prompt=True, + tokenize=False + ) + + prompt_contents['context'] + ) + else: + prompt = self._make_instruction_prompt( + **prompt_contents, prefix=self.prefix + ) else: raise ValueError(f"Unsupported prompt format: {type(prompt_contents)}") prompts.append(prompt)