From 5b33ad300d48cf77e3f743541448842fd6d33f9a Mon Sep 17 00:00:00 2001
From: Tingkai Liu <tl2747@columbia.edu>
Date: Sat, 25 Jan 2025 10:01:46 -0500
Subject: [PATCH] use tokenizer.chat_template by default for instruction type
 tasks

---
 bigcode_eval/utils.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/bigcode_eval/utils.py b/bigcode_eval/utils.py
index b3de558a4..b35c23725 100644
--- a/bigcode_eval/utils.py
+++ b/bigcode_eval/utils.py
@@ -71,9 +71,21 @@ def __iter__(self):
                     # Instruction-tuning mode
                     instruction.append(True)
                     infill.append(False)
-                    prompt = self._make_instruction_prompt(
-                        **prompt_contents, prefix=self.prefix
-                    )
+
+                    if self.tokenizer.chat_template is not None:
+                        prompt = (
+                            self.prefix
+                            + self.tokenizer.apply_chat_template(
+                                [{"role": "user", "content": prompt_contents['instruction']}],
+                                add_generation_prompt=True,
+                                tokenize=False
+                            )
+                            + prompt_contents['context']
+                        )
+                    else:
+                        prompt = self._make_instruction_prompt(
+                            **prompt_contents, prefix=self.prefix
+                        )
             else:
                 raise ValueError(f"Unsupported prompt format: {type(prompt_contents)}")
             prompts.append(prompt)