From 39a68da85f55de999d7d868da0cc79b6381a24b7 Mon Sep 17 00:00:00 2001
From: Egor Lebedev <egor.lebe@inbox.ru>
Date: Fri, 8 Nov 2024 13:39:18 +0300
Subject: [PATCH 01/12] Add Udmurt (udm) translation literals (#381)

---
 .../templates/utils/translation_literals.py   | 23 +++++++++++++++++++
 src/lighteval/utils/language.py               |  2 ++
 2 files changed, 25 insertions(+)

diff --git a/src/lighteval/tasks/templates/utils/translation_literals.py b/src/lighteval/tasks/templates/utils/translation_literals.py
index 7537c38f..b00957ac 100644
--- a/src/lighteval/tasks/templates/utils/translation_literals.py
+++ b/src/lighteval/tasks/templates/utils/translation_literals.py
@@ -865,6 +865,29 @@ def __getattribute__(self, name: str) -> str:
         colon=":",
     ),
     Language.TURKMEN: TranslationLiterals(language=Language.TURKMEN),
+    Language.UDMURT: TranslationLiterals(
+        language=Language.UDMURT,
+        question_word="юан",
+        answer="валэктон",
+        confirmation_word="озьы-а",
+        yes="бен",
+        no="ӧвӧл",
+        also="озьы ик",
+        cause_word="малы ке шуоно",
+        effect_word="соин ик",
+        true="шонерлык",
+        false="пӧяськон",
+        neither="мукет",
+        or_word="яке",
+        full_stop=".",
+        comma=",",
+        question_mark="?",
+        exclamation_mark="!",
+        word_space=" ",
+        sentence_space=" ",
+        colon=":",
+        indices=["А", "Б", "В", "Г", "Д", "Е"],
+    ),
     Language.UKRAINIAN: TranslationLiterals(
         language=Language.UKRAINIAN,
         question_word="питання",
diff --git a/src/lighteval/utils/language.py b/src/lighteval/utils/language.py
index 1e9707a3..e6e53984 100644
--- a/src/lighteval/utils/language.py
+++ b/src/lighteval/utils/language.py
@@ -121,6 +121,7 @@ class Language(Enum):
     CEBUANO = "ceb"
     WAR = "war"
     SHAN = "shn"
+    UDMURT = "udm"
 
 
 # This mapping was created for beleble, it converts iso_639_3 individual codes to iso_639_3 macro codes
@@ -232,6 +233,7 @@ class Language(Enum):
     "ars": Language.ARABIC,
     "bul": Language.BULGARIAN,
     "est": Language.ESTONIAN,
+    "udm": Language.UDMURT,
     #  'hau': Language.HAUSA,
     "ind": Language.INDONESIAN,
     #  'kea': Language.KABUVERDIANU,

From 78d978547b98275348675661c7a59e46bb6f67c3 Mon Sep 17 00:00:00 2001
From: Kryvich <44714498+Kryuski@users.noreply.github.com>
Date: Fri, 8 Nov 2024 15:09:39 +0300
Subject: [PATCH 02/12] This PR adds translation literals for Belarusian
 language. (#382)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>
---
 .../templates/utils/translation_literals.py   | 24 ++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/lighteval/tasks/templates/utils/translation_literals.py b/src/lighteval/tasks/templates/utils/translation_literals.py
index b00957ac..cb77dc16 100644
--- a/src/lighteval/tasks/templates/utils/translation_literals.py
+++ b/src/lighteval/tasks/templates/utils/translation_literals.py
@@ -149,7 +149,29 @@ def __getattribute__(self, name: str) -> str:
         colon=":",
         semicolon=";",
     ),
-    Language.BELARUSIAN: TranslationLiterals(language=Language.BELARUSIAN),
+    Language.BELARUSIAN: TranslationLiterals(
+        language=Language.BELARUSIAN,
+        question_word="пытанне",
+        answer="адказ",
+        confirmation_word="ці не так",
+        yes="так",
+        no="не",
+        also="апроч таго",
+        cause_word="бо",
+        effect_word="таму",
+        true="праўда",
+        false="няпраўда",
+        neither="ні тое, ні тое",
+        or_word="ці",
+        full_stop=".",
+        comma=",",
+        question_mark="?",
+        exclamation_mark="!",
+        word_space=" ",
+        sentence_space=" ",
+        colon=":",
+        indices=["А", "Б", "В", "Г", "Д", "Е"],
+    ),
     Language.BENGALI: TranslationLiterals(language=Language.BENGALI, question_word="প্রশ্ন"),
     Language.BIHARI: TranslationLiterals(language=Language.BIHARI),  # Deprecated
     Language.BOSNIAN: TranslationLiterals(language=Language.BOSNIAN),

From e51be17ec29080c914c1f260e14694086720c923 Mon Sep 17 00:00:00 2001
From: Nazim Ali <nazimali@gmail.com>
Date: Fri, 8 Nov 2024 09:12:25 -0500
Subject: [PATCH 03/12] fix: cache directory variable (#378)

Fixes cache directory bug by using HF_HUB_CACHE instead of HF_HOME

See documentation https://huggingface.co/docs/huggingface_hub/main/en/package_reference/environment_variables#hfhubcache
---
 src/lighteval/utils/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lighteval/utils/utils.py b/src/lighteval/utils/utils.py
index 1cc51955..2c0ea487 100644
--- a/src/lighteval/utils/utils.py
+++ b/src/lighteval/utils/utils.py
@@ -192,7 +192,7 @@ class EnvConfig:
         token (str): authentication token used for accessing the HuggingFace Hub.
     """
 
-    cache_dir: str = os.getenv("HF_HOME", "/scratch")
+    cache_dir: str = os.getenv("HF_HUB_CACHE", "/scratch")
     token: str = os.getenv("HF_TOKEN")
 
 

From 2f03df010b95725867eb9cba25731e8f0e53dc07 Mon Sep 17 00:00:00 2001
From: vsabolcec <60775189+vsabolcec@users.noreply.github.com>
Date: Fri, 8 Nov 2024 16:24:49 +0100
Subject: [PATCH 04/12] greedy_until() fix (#344)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com>
Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>
---
 src/lighteval/models/nanotron_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lighteval/models/nanotron_model.py b/src/lighteval/models/nanotron_model.py
index 40c8f237..0814acac 100644
--- a/src/lighteval/models/nanotron_model.py
+++ b/src/lighteval/models/nanotron_model.py
@@ -1133,7 +1133,7 @@ def greedy_until(
             else:
                 # Longest context in the current split is the first item (since we sort reversed)
                 context_enc = dataset[0][1].tokenized_context
-                max_gen = max(item[1].generation_size for item in dataset)
+                max_gen = max(item.generation_size for item in dataset)
                 max_input_length = min(len(context_enc) + max_gen, self.max_length)
 
             batch_size = self._get_batch_size(

From 853e10be40e93976362ef41e320665c0e20facf5 Mon Sep 17 00:00:00 2001
From: Dmitry Gaynullin <117692111+gaydmi@users.noreply.github.com>
Date: Wed, 13 Nov 2024 09:07:50 +0100
Subject: [PATCH 05/12] added tatar literals (#383)

---------

Co-authored-by: Dmitry Gaynullin <gaydmi@Dmitrys-MacBook-Air.local>
---
 .../templates/utils/translation_literals.py   | 24 ++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/lighteval/tasks/templates/utils/translation_literals.py b/src/lighteval/tasks/templates/utils/translation_literals.py
index cb77dc16..ef5194f3 100644
--- a/src/lighteval/tasks/templates/utils/translation_literals.py
+++ b/src/lighteval/tasks/templates/utils/translation_literals.py
@@ -817,7 +817,29 @@ def __getattribute__(self, name: str) -> str:
         cause_word="காரணமாக",
         effect_word="எனவே",
     ),
-    Language.TATAR: TranslationLiterals(language=Language.TATAR),
+    Language.TATAR: TranslationLiterals(
+        language=Language.TATAR,
+        question_word="сорау",
+        answer="җавап",
+        confirmation_word="шулай түгелме",
+        yes="әйе",
+        no="юк",
+        also="шулай ук",
+        cause_word="чөнки",
+        effect_word="шуңа күрә",
+        or_word="яки",
+        true="дөрес",
+        false="ялган",
+        neither="бер генә дә",
+        full_stop=".",
+        comma=",",
+        question_mark="?",
+        exclamation_mark="!",
+        word_space=" ",
+        sentence_space=" ",
+        colon=":",
+        indices=["А", "Б", "В", "Г", "Д", "Е"],
+    ),
     Language.TELUGU: TranslationLiterals(
         language=Language.TELUGU,
         question_word="ప్రశ్న",

From 9be2a38797b0aef46f265e477b2aa966166f4b87 Mon Sep 17 00:00:00 2001
From: Joel Niklaus <j.niklaus.95@gmail.com>
Date: Wed, 13 Nov 2024 09:49:44 -0800
Subject: [PATCH 06/12] Fixes a TypeError for generative metrics. (#386)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>
---
 src/lighteval/metrics/sample_preparator.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/lighteval/metrics/sample_preparator.py b/src/lighteval/metrics/sample_preparator.py
index ad709033..dc32d95c 100644
--- a/src/lighteval/metrics/sample_preparator.py
+++ b/src/lighteval/metrics/sample_preparator.py
@@ -55,6 +55,7 @@ class PerplexityCorpusMetricInput(CorpusMetricInput):
 
 
 class GenerativePreparator:
+    @staticmethod
     def prepare(golds: list[str], predictions: list[str], **kwargs):
         """Prepares an individual generative example to the format expected by metrics computed at the corpus level (aggregated).
 

From 1faa3b2c63f0d2885f86479b644f2aa1e62ae282 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mentine=20Fourrier?=
 <22726840+clefourrier@users.noreply.github.com>
Date: Fri, 15 Nov 2024 14:27:28 +0100
Subject: [PATCH 07/12] Update README.md

---
 README.md | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 22afd60d..4e03b14c 100644
--- a/README.md
+++ b/README.md
@@ -104,9 +104,17 @@ Harness and HELM teams for their pioneering work on LLM evaluations.
 Got ideas? Found a bug? Want to add a
 [task](https://github.com/huggingface/lighteval/wiki/Adding-a-Custom-Task) or
 [metric](https://github.com/huggingface/lighteval/wiki/Adding-a-New-Metric)?
-Contributions are warmly
-welcomed!
+Contributions are warmly welcomed! 
 
+If you're adding a new feature, please open an issue first.
+
+If you open a PR, don't forget to run the styling!
+
+```bash
+pip install -e .[dev]
+pre-commit install
+pre-commit run --all-files
+```
 ## 📜 Citation
 
 ```bibtex

From 9b43560faa72ee90cf7ac150e1faaf129cdda677 Mon Sep 17 00:00:00 2001
From: Joel Niklaus <joel@harvey.ai>
Date: Mon, 18 Nov 2024 01:13:08 -0800
Subject: [PATCH 08/12] Fixes a TypeError in Sacrebleu. (#387)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---------

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>
---
 .github/ISSUE_TEMPLATE/evaluation-task-request.md |  4 ++--
 .github/ISSUE_TEMPLATE/feature-request.md         |  1 -
 README.md                                         |  2 +-
 examples/model_configs/peft_model.yaml            |  4 ++--
 examples/model_configs/quantized_model.yaml       |  4 ++--
 src/lighteval/metrics/metrics_corpus.py           | 10 +++++++++-
 6 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/evaluation-task-request.md b/.github/ISSUE_TEMPLATE/evaluation-task-request.md
index 38310bc1..4b890858 100644
--- a/.github/ISSUE_TEMPLATE/evaluation-task-request.md
+++ b/.github/ISSUE_TEMPLATE/evaluation-task-request.md
@@ -13,6 +13,6 @@ assignees: ''
 
 ## Evaluation metadata
 Provide all available
-- Paper url: 
-- Github url: 
+- Paper url:
+- Github url:
 - Dataset url:
diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md
index f8a597fe..801b1047 100644
--- a/.github/ISSUE_TEMPLATE/feature-request.md
+++ b/.github/ISSUE_TEMPLATE/feature-request.md
@@ -15,4 +15,3 @@ A clear and concise description of what you want to happen.
 
 ## Posssible alternatives
 A clear and concise description of any alternative solutions or features you've considered.
-
diff --git a/README.md b/README.md
index 4e03b14c..f554ed17 100644
--- a/README.md
+++ b/README.md
@@ -104,7 +104,7 @@ Harness and HELM teams for their pioneering work on LLM evaluations.
 Got ideas? Found a bug? Want to add a
 [task](https://github.com/huggingface/lighteval/wiki/Adding-a-Custom-Task) or
 [metric](https://github.com/huggingface/lighteval/wiki/Adding-a-New-Metric)?
-Contributions are warmly welcomed! 
+Contributions are warmly welcomed!
 
 If you're adding a new feature, please open an issue first.
 
diff --git a/examples/model_configs/peft_model.yaml b/examples/model_configs/peft_model.yaml
index 200542ae..d94ff610 100644
--- a/examples/model_configs/peft_model.yaml
+++ b/examples/model_configs/peft_model.yaml
@@ -1,8 +1,8 @@
 model:
-  type: "base" 
+  type: "base"
   base_params:
     model_args: "pretrained=predibase/customer_support,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... For a PEFT model, the pretrained model should be the one trained with PEFT and the base model below will contain the original model on which the adapters will be applied.
-    dtype: "4bit"  # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization. 
+    dtype: "4bit"  # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization.
     compile: true
   merged_weights: # Ignore this section if you are not using PEFT models
     delta_weights: false # set to True of your model should be merged with a base model, also need to provide the base model name
diff --git a/examples/model_configs/quantized_model.yaml b/examples/model_configs/quantized_model.yaml
index 5b69de95..dfac1c95 100644
--- a/examples/model_configs/quantized_model.yaml
+++ b/examples/model_configs/quantized_model.yaml
@@ -1,8 +1,8 @@
 model:
-  type: "base" 
+  type: "base"
   base_params:
     model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ...
-    dtype: "4bit"  # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization. 
+    dtype: "4bit"  # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization.
     compile: true
   merged_weights: # Ignore this section if you are not using PEFT models
     delta_weights: false # set to True of your model should be merged with a base model, also need to provide the base model name
diff --git a/src/lighteval/metrics/metrics_corpus.py b/src/lighteval/metrics/metrics_corpus.py
index 59b9ecea..1286ab08 100644
--- a/src/lighteval/metrics/metrics_corpus.py
+++ b/src/lighteval/metrics/metrics_corpus.py
@@ -30,6 +30,7 @@
 import sacrebleu
 import sklearn.metrics
 
+from lighteval.logging.hierarchical_logger import hlog_warn
 from lighteval.metrics.sample_preparator import (
     GenerativeCorpusMetricInput,
     LogprobCorpusMetricInput,
@@ -103,7 +104,14 @@ def __init__(self, metric_type: str):
     def compute(self, items: list[GenerativeCorpusMetricInput]) -> float:
         """Computes the metric score over all the corpus generated items, by using the sacrebleu implementation."""
         golds = [i.golds for i in items]
-        preds = [as_list(i.preds) for i in items]
+        preds = []
+        for i in items:
+            pred = as_list(i.preds)
+            if len(pred) > 1:
+                hlog_warn(
+                    f"Multiple predictions present, keeping only the first prediction (when computing sacrebleu.{self.metric.__name__})."
+                )
+            preds.append(pred[0])
         return float(self.metric(hypotheses=preds, references=golds).score)
 
 

From fd8a68201b41070e78439eeee91ae2de74a4142c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hynek=20Kydl=C3=AD=C4=8Dek?= <kydlicek.hynek@gmail.com>
Date: Mon, 18 Nov 2024 09:42:30 -0400
Subject: [PATCH 09/12] Adds template for translation tasks (#391)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* implement tranlsation prompt

* add small coment about tranlsation prompt

* change formatting to reformat language dependant  parts

---------

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>
---
 src/lighteval/tasks/templates/continuation.py |  10 +-
 src/lighteval/tasks/templates/translation.py  | 156 ++++++++++++++++++
 tests/tasks/templates/test_translation.py     | 120 ++++++++++++++
 3 files changed, 285 insertions(+), 1 deletion(-)
 create mode 100644 src/lighteval/tasks/templates/translation.py
 create mode 100644 tests/tasks/templates/test_translation.py

diff --git a/src/lighteval/tasks/templates/continuation.py b/src/lighteval/tasks/templates/continuation.py
index 84c11230..6435fc8f 100644
--- a/src/lighteval/tasks/templates/continuation.py
+++ b/src/lighteval/tasks/templates/continuation.py
@@ -86,6 +86,7 @@ def get_continuation_prompt_function(
     language: Language,
     adapter: Callable[[dict], ContinuationInput | None] | ContinuationDictAdapter,
     formulation: Formulation = MCFFormulation(),
+    fix_formatting: bool = True,
 ):
     """
     Create a templated prompt function for a Continuation task.
@@ -118,6 +119,7 @@ def get_continuation_prompt_function(
         adapter (Callable[[dict], ContinuationInput] | ContinuationDictAdapter): Either a function that takes a dataset row and returns a ContinuationInput, or a dictionary with keys corresponding to the field names in the dataset row.
             Note: Both ContinuationDictAdapter and ContinuationInput are TypeDicts, this means that the caller provides dictionary and doesn't initialize any class!
         formulation (Formulation, optional): The formulation (MCF/Hybrid/CF) to use for the task. Defaults to MCFFormulation().
+        fix_formatting (bool, optional): Whether to fix the formatting of the text by capitalizing and fixing punctuation based on language. If False, the text will be used as-is. Defaults to True.
     Returns:
         Callable: A function that generates Continuation prompt based on the given parameters.
     """
@@ -132,10 +134,16 @@ def prepare_prompt(line: dict):
         instruction_val = cont_input.get("instruction")
         instruction = f"{instruction_val}\n" if instruction_val else ""
 
-        context = f"{capitalize(fix_ending_punct(cont_input['context'], translation_literals))}"
+        context = (
+            f"{capitalize(fix_ending_punct(cont_input['context'], translation_literals))}"
+            if fix_formatting
+            else cont_input["context"]
+        )
 
         continuations = [
             fix_capitalization(context, fix_ending_punct(continuation, translation_literals), translation_literals)
+            if fix_formatting
+            else continuation
             for continuation in cont_input["continuations"]
         ]
 
diff --git a/src/lighteval/tasks/templates/translation.py b/src/lighteval/tasks/templates/translation.py
new file mode 100644
index 00000000..c90b99e0
--- /dev/null
+++ b/src/lighteval/tasks/templates/translation.py
@@ -0,0 +1,156 @@
+# MIT License
+
+# Copyright (c) 2024 The HuggingFace Team
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from typing import Callable
+
+from langcodes import standardize_tag
+from typing_extensions import NotRequired, TypedDict
+
+from lighteval.tasks.templates.continuation import get_continuation_prompt_function
+from lighteval.tasks.templates.multichoice import create_adapter_from_dict
+from lighteval.tasks.templates.utils.formatting_utils import capitalize, fix_ending_punct
+from lighteval.tasks.templates.utils.formulation import Formulation, MCFFormulation
+from lighteval.tasks.templates.utils.translation_literals import TRANSLATION_LITERALS
+from lighteval.utils.language import Language
+from lighteval.utils.utils import as_list
+
+
+# Template chosen so that it's not very language-dependent, as it's not clear whether one should use the target or source language.
+# It's also the best template based on https://arxiv.org/pdf/2301.07069.
+
+
+TRANSLATION_CONTEXT = "{source_label}{colon}{sentence_space}{source_text}{sentence_space}{target_label}{colon}"
+
+
+# Defined for type hinting only
+class TranslationInput(TypedDict):
+    """
+    Input for the Translation task.
+    Args:
+        source_text: The source text to be translated
+        target_text: The target text to be translated
+        instruction (optional): The instruction of the Translation task (e.g. Translate the following text to Turkish)
+    """
+
+    source_text: str
+    target_text: str | list[str]
+    gold_idx: NotRequired[int | list[int]]
+    instruction: NotRequired[str]
+
+
+class TranslationAdapter(TypedDict):
+    """
+    Adapter for mapping from the dataset row into the TranslationInput format.
+    Args:
+        source_text: Column name in the row that contains the source text to be translated
+        target_text: Column name in the row that contains the target text to be translated
+        instruction (optional): Column name in the row that contains the instruction of the task (e.g. Translate the following text to Turkish)
+    """
+
+    source_text: str
+    target_text: str
+    gold_idx: NotRequired[int | list[int]]
+    instruction: NotRequired[str]
+
+
+def get_translation_prompt_function(
+    source_language: Language,
+    target_language: Language,
+    adapter: Callable[[dict], TranslationInput | None] | TranslationAdapter,
+    formulation: Formulation = MCFFormulation(),
+):
+    """
+    Create a templated prompt function for a Translation task.
+    Example tasks:
+    - WMT2016
+    - WMT2017
+
+    Format:
+    *CF*
+    EN: How are you? TR: | Nasılsın?
+
+    *Hybrid*
+    EN: How are you? TR:
+    A. Nasılsın?
+    B. Jak se máš?
+    Answer: | Nasılsın?/Jak se máš?
+
+    *MCF*
+    EN: How are you? TR:
+    A. Nasılsın?
+    B. Jak se máš?
+    Answer: | A/B
+
+    Args:
+        adapter (Callable[[dict], TranslationInput] | TranslationAdapter): Either a function that takes a dataset row and returns a TranslationInput, or a dictionary with keys corresponding to the field names in the dataset row.
+            Note: Both TranslationAdapter and TranslationInput are TypeDicts, this means that the caller provides dictionary and doesn't initialize any class!
+        formulation (Formulation, optional): The formulation to use for the task. Defaults to MCFFormulation().
+    Returns:
+        Callable: A function that generates Translation prompts based on the given parameters.
+    """
+    adapter_fn = create_adapter_from_dict(adapter)
+    continuation_prompt_fn = get_continuation_prompt_function(
+        Language.ENGLISH,
+        {"context": "context", "continuations": "continuations", "gold_idx": "gold_idx"},
+        formulation,
+        fix_formatting=False,
+    )
+    source_translation_literals = TRANSLATION_LITERALS[source_language]
+    target_translation_literals = TRANSLATION_LITERALS[target_language]
+
+    source_label_string = standardize_tag(source_language.value).upper()
+    target_label_string = standardize_tag(target_language.value).upper()
+
+    def translation_prompt(
+        line: dict,
+        task_name: str,
+    ):
+        input_data = adapter_fn(line)
+        if input_data is None:
+            return None
+
+        source_text = capitalize(fix_ending_punct(input_data["source_text"], source_translation_literals))
+
+        context = TRANSLATION_CONTEXT.format(
+            source_label=source_label_string,
+            source_text=source_text,
+            target_label=target_label_string,
+            colon=":",
+            sentence_space=" ",
+        )
+
+        continuations = [
+            capitalize(fix_ending_punct(text, target_translation_literals))
+            for text in as_list(input_data["target_text"])
+        ]
+
+        return continuation_prompt_fn(
+            {
+                "instruction": input_data.get("instruction", ""),
+                "context": context,
+                "continuations": continuations,
+                "gold_idx": input_data.get("gold_idx", list(range(len(continuations)))),
+            },
+            task_name,
+        )
+
+    return translation_prompt
diff --git a/tests/tasks/templates/test_translation.py b/tests/tasks/templates/test_translation.py
new file mode 100644
index 00000000..eab59cf1
--- /dev/null
+++ b/tests/tasks/templates/test_translation.py
@@ -0,0 +1,120 @@
+# MIT License
+
+# Copyright (c) 2024 The HuggingFace Team
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
+from lighteval.tasks.templates.translation import get_translation_prompt_function
+from lighteval.tasks.templates.utils.formulation import CFFormulation, MCFFormulation
+from lighteval.utils.language import Language
+
+
+def test_translation_prompt_cf():
+    """
+    Tests that translation prompt function works correctly for CF formulation.
+    """
+    test_input = {
+        "source_text": "Ahoj, jak se máš?",
+        "target_text": "Bonjour, comment allez-vous?",
+    }
+
+    prompt_fn = get_translation_prompt_function(
+        source_language=Language.CZECH,
+        target_language=Language.FRENCH,
+        adapter=lambda x: {
+            "source_text": x["source_text"],
+            "target_text": x["target_text"],
+        },
+        formulation=CFFormulation(),
+    )
+
+    doc = prompt_fn(test_input, "test_task")
+    assert doc is not None
+
+    assert doc.query == "CS: Ahoj, jak se máš? FR:"
+    assert doc.unconditioned_query == ""
+    assert doc.choices == [" Bonjour, comment allez-vous?"]
+    assert doc.gold_index == [0]
+
+
+def test_translation_prompt_mcf():
+    """
+    Tests that translation prompt function works correctly for MCF formulation.
+    """
+    test_input = {
+        "source_text": "Ahoj, jak se máš?",
+        "target_text": ["Bonjour, comment allez-vous?", "Ciao, come stai?"],
+    }
+
+    prompt_fn = get_translation_prompt_function(
+        source_language=Language.CZECH,
+        target_language=Language.FRENCH,
+        adapter=lambda x: {
+            "source_text": x["source_text"],
+            "target_text": x["target_text"],
+            "gold_idx": 0,
+        },
+        formulation=MCFFormulation(),
+    )
+
+    doc = prompt_fn(test_input, "test_task")
+    assert doc is not None
+
+    assert (
+        doc.query
+        == """\
+CS: Ahoj, jak se máš? FR:
+ A. Bonjour, comment allez-vous?
+ B. Ciao, come stai?
+Answer:\
+"""
+    )
+    assert doc.unconditioned_query == "Answer:"
+    assert doc.choices == [" A", " B"]
+    assert doc.gold_index == [0]
+
+
+def test_translation_prompt_cf_formatting():
+    """
+    Tests that translation prompt function works correctly for CF formulation with formatting.
+    """
+    test_input = {
+        "source_text": "How are you?",
+        "target_text": ["你好吗?"],
+    }
+
+    prompt_fn = get_translation_prompt_function(
+        source_language=Language.ENGLISH,
+        target_language=Language.CHINESE,
+        adapter=lambda x: {
+            "source_text": x["source_text"],
+            "target_text": x["target_text"],
+            "gold_idx": 0,
+        },
+        formulation=CFFormulation(),
+    )
+
+    doc = prompt_fn(test_input, "test_task")
+    assert doc is not None
+
+    assert doc.query == "EN: How are you? ZH:"
+    assert doc.unconditioned_query == ""
+    assert doc.choices == [" 你好吗？"]
+    assert doc.gold_index == [0]

From db609f791fc790e940d247534c7632bcc58771f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mentine=20Fourrier?=
 <22726840+clefourrier@users.noreply.github.com>
Date: Tue, 19 Nov 2024 08:59:11 +0100
Subject: [PATCH 10/12] Use the programmatic interface using an already in
 memory loaded model (#390)

---------

Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com>
---
 src/lighteval/logging/hierarchical_logger.py |   3 +
 src/lighteval/models/adapter_model.py        |   9 +-
 src/lighteval/models/base_model.py           | 109 +++++++++++++++++--
 src/lighteval/pipeline.py                    |  12 +-
 4 files changed, 121 insertions(+), 12 deletions(-)

diff --git a/src/lighteval/logging/hierarchical_logger.py b/src/lighteval/logging/hierarchical_logger.py
index 99287f75..1c4c3a11 100644
--- a/src/lighteval/logging/hierarchical_logger.py
+++ b/src/lighteval/logging/hierarchical_logger.py
@@ -34,8 +34,11 @@
 
     logger = get_logger(__name__, log_level="INFO")
 elif is_accelerate_available():
+    from accelerate import Accelerator, InitProcessGroupKwargs
     from accelerate.logging import get_logger
 
+    # We must init the accelerator before using the logger
+    accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])
     logger = get_logger(__name__, log_level="INFO")
 else:
     logger = Logger(__name__, level="INFO")
diff --git a/src/lighteval/models/adapter_model.py b/src/lighteval/models/adapter_model.py
index 18fd6890..dbf762d7 100644
--- a/src/lighteval/models/adapter_model.py
+++ b/src/lighteval/models/adapter_model.py
@@ -41,7 +41,14 @@ class AdapterModel(BaseModel):
     def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConfig) -> PreTrainedTokenizer:
         # By default, we look at the model config for the model stored in `base_model`
         # (= the parent model, not the model of interest)
-        return self._create_auto_tokenizer_with_name(config.base_model, config=config, env_config=env_config)
+        return self._create_auto_tokenizer_with_name(
+            model_name=config.base_model,
+            revision=config.revision,
+            env_config=env_config,
+            tokenizer_name=config.tokenizer,
+            subfolder=config.subfolder,
+            trust_remote_code=config.trust_remote_code,
+        )
 
     def _create_auto_model(self, config: AdapterModelConfig, env_config: EnvConfig) -> AutoModelForCausalLM:
         """Returns a PeftModel from a base model and a version fined tuned using PEFT."""
diff --git a/src/lighteval/models/base_model.py b/src/lighteval/models/base_model.py
index debec448..993978d5 100644
--- a/src/lighteval/models/base_model.py
+++ b/src/lighteval/models/base_model.py
@@ -30,6 +30,7 @@
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
 
 from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset, LoglikelihoodSingleTokenDataset
 from lighteval.logging.hierarchical_logger import hlog, hlog_err, hlog_warn
@@ -57,6 +58,7 @@
 
 
 if is_accelerate_available():
+    from accelerate import Accelerator
     from accelerate.utils import calculate_maximum_sizes, convert_bytes, get_max_memory
 
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
@@ -67,8 +69,8 @@
 class BaseModel(LightevalModel):
     def __init__(
         self,
-        config: BaseModelConfig,
         env_config: EnvConfig,
+        config: BaseModelConfig,
     ):
         """Initializes a HuggingFace `AutoModel` and `AutoTokenizer` for evaluation."""
         self._config = config.init_configs(env_config)
@@ -114,6 +116,72 @@ def __init__(
 
         self.pairwise_tokenization = config.pairwise_tokenization
 
+    @classmethod
+    def from_model(
+        cls,
+        model: Union[AutoModelForCausalLM, LightevalModel],
+        env_config: EnvConfig,
+        accelerator: "Accelerator" = None,
+        tokenizer_name: str = None,  # custom tokenizer
+        trust_remote_code: bool = False,
+        use_chat_template: bool = False,
+        add_special_tokens: bool = True,
+        pairwise_tokenization: bool = False,
+        multichoice_continuations_start_space: bool = None,
+    ):
+        # Slightly hackish way to test if the model is a AutoModelForCausalLM, since the instances don't
+        # derive from this class explicitely
+        assert isinstance(model, LightevalModel) or type(model).__name__ in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES.values()
+
+        if isinstance(model, LightevalModel):
+            return model
+
+        # Instanciate the object without using __init__
+        self = cls.__new__(cls)
+        self._config = model.config
+        self._max_length = self._init_max_length(max_length=model.config.max_length)
+        self._tokenizer = self._create_auto_tokenizer_with_name(
+            model_name=model.name_or_path,
+            revision=model.config._commit_hash,
+            env_config=env_config,
+            trust_remote_code=trust_remote_code,
+            tokenizer_name=tokenizer_name,
+        )
+        self.model_name = _simplify_name(model.name_or_path)
+        self.model_sha = model.config._commit_hash
+
+        # If model_parallel is not set we compare the number of processes with the number of GPUs
+        self.model = model
+        self.model.eval()
+        torch.set_grad_enabled(False)
+
+        self.accelerator = accelerator
+        if accelerator is not None:
+            self._device = accelerator.device
+            self.model = self.accelerator.prepare(self.model.to(accelerator.device))
+        else:
+            self._device = "cpu"
+
+        self.use_chat_template = use_chat_template
+        self._add_special_tokens = add_special_tokens if add_special_tokens is not None else False
+        self.pairwise_tokenization = pairwise_tokenization
+        self.multichoice_continuations_start_space = multichoice_continuations_start_space
+
+        self.precision = _get_dtype(model.dtype, config=self._config)
+
+        if is_accelerate_available():
+            model_size, _ = calculate_maximum_sizes(self.model)
+            model_size = convert_bytes(model_size)
+        else:
+            model_size = -1
+        self.model_info = ModelInfo(
+            model_name=self.model_name,
+            model_sha=self.model_sha,
+            model_dtype=self.precision,
+            model_size=model_size,
+        )
+        return self
+
     @property
     def tokenizer(self):
         return self._tokenizer
@@ -207,10 +275,23 @@ def _create_auto_model(self, config: BaseModelConfig, env_config: EnvConfig) ->
     def _create_auto_tokenizer(
         self, config: BaseModelConfig, env_config: EnvConfig
     ) -> transformers.PreTrainedTokenizer:
-        return self._create_auto_tokenizer_with_name(config.pretrained, config=config, env_config=env_config)
+        return self._create_auto_tokenizer_with_name(
+            model_name=config.pretrained,
+            revision=config.revision,
+            env_config=env_config,
+            tokenizer_name=config.tokenizer,
+            subfolder=config.subfolder,
+            trust_remote_code=config.trust_remote_code,
+        )
 
     def _create_auto_tokenizer_with_name(
-        self, model_name: str, config: BaseModelConfig, env_config: EnvConfig
+        self,
+        model_name: str,
+        revision: str,
+        env_config: EnvConfig,
+        tokenizer_name: str = None,
+        subfolder: str = None,
+        trust_remote_code: bool = False,
     ) -> transformers.PreTrainedTokenizer:
         """
         Create a Hugging Face AutoTokenizer for language model.
@@ -231,25 +312,35 @@ def _create_auto_tokenizer_with_name(
         """
         try:
             tokenizer = AutoTokenizer.from_pretrained(
-                model_name if config.tokenizer is None else config.tokenizer,
-                revision=config.revision + (f"/{config.subfolder}" if config.subfolder is not None else ""),
+                model_name if tokenizer_name is None else tokenizer_name,
+                revision=revision + (f"/{subfolder}" if subfolder is not None else ""),
                 cache_dir=env_config.cache_dir,
                 token=env_config.token,
-                trust_remote_code=config.trust_remote_code,
+                trust_remote_code=trust_remote_code,
                 padding_side="left",
                 truncation_side="left",
             )
         except RecursionError:
             tokenizer = AutoTokenizer.from_pretrained(
-                model_name if config.tokenizer is None else config.tokenizer,
-                revision=config.revision + (f"/{config.subfolder}" if config.subfolder is not None else ""),
+                model_name if tokenizer_name is None else tokenizer_name,
+                revision=revision + (f"/{subfolder}" if subfolder is not None else ""),
                 cache_dir=env_config.cache_dir,
                 token=env_config.token,
-                trust_remote_code=config.trust_remote_code,
+                trust_remote_code=trust_remote_code,
                 unk_token="<unk>",
                 padding_side="left",
                 truncation_side="left",
             )
+        except FileNotFoundError:
+            hlog_warn("Problem when loading the tokenizer in the cache - discarding the provided cache path value.")
+            tokenizer = AutoTokenizer.from_pretrained(
+                model_name if tokenizer_name is None else tokenizer_name,
+                revision=revision + (f"/{subfolder}" if subfolder is not None else ""),
+                token=env_config.token,
+                trust_remote_code=trust_remote_code,
+                padding_side="left",
+                truncation_side="left",
+            )
         tokenizer.pad_token = tokenizer.eos_token
         tokenizer.model_max_length = self.max_length
         hlog("Tokenizer truncation and padding size set to the left side.")
diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py
index db0ede47..da4fb045 100644
--- a/src/lighteval/pipeline.py
+++ b/src/lighteval/pipeline.py
@@ -34,7 +34,7 @@
 from lighteval.logging.evaluation_tracker import EvaluationTracker
 from lighteval.logging.hierarchical_logger import hlog, htrack_block
 from lighteval.metrics.utils.metric_utils import MetricCategory
-from lighteval.models.model_loader import load_model
+from lighteval.models.model_loader import BaseModel, load_model
 from lighteval.models.model_output import ModelResponse
 from lighteval.tasks.lighteval_task import LightevalTask, create_requests_from_tasks
 from lighteval.tasks.registry import Registry, taskinfo_selector
@@ -164,7 +164,15 @@ def _init_model(self, model_config, model):
                     )
                 else:
                     return load_model(config=model_config, env_config=self.pipeline_parameters.env_config)
-            return model
+            if isinstance(model, BaseModel):
+                return model
+            else:
+                return BaseModel.from_model(
+                    model=model,
+                    use_chat_template=self.pipeline_parameters.use_chat_template,
+                    env_config=self.pipeline_parameters.env_config,
+                    accelerator=self.accelerator,
+                )
 
     def _init_tasks_and_requests(self, tasks: str):
         with htrack_block("Tasks loading"):

From c173871b4eefddc4bb02574305db1759ffaaeb55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hynek=20Kydl=C3=AD=C4=8Dek?= <kydlicek.hynek@gmail.com>
Date: Tue, 19 Nov 2024 08:30:03 -0400
Subject: [PATCH 11/12] fix ukr/rus (#394)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>
---
 src/lighteval/tasks/templates/utils/translation_literals.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lighteval/tasks/templates/utils/translation_literals.py b/src/lighteval/tasks/templates/utils/translation_literals.py
index ef5194f3..186d6448 100644
--- a/src/lighteval/tasks/templates/utils/translation_literals.py
+++ b/src/lighteval/tasks/templates/utils/translation_literals.py
@@ -631,7 +631,7 @@ def __getattribute__(self, name: str) -> str:
         language=Language.RUSSIAN,
         question_word="вопрос",
         answer="ответ",
-        confirmation_word="не так ли",
+        confirmation_word="верно",
         yes="да",
         no="нет",
         also="к тому же",
@@ -936,7 +936,7 @@ def __getattribute__(self, name: str) -> str:
         language=Language.UKRAINIAN,
         question_word="питання",
         answer="відповідь",
-        confirmation_word="правда",
+        confirmation_word="вірно",
         yes="так",
         no="ні",
         also="також",

From 85c0d9f0fb5106d834666fce6867e28d02a9b4e6 Mon Sep 17 00:00:00 2001
From: Anton Lozhkov <anton@huggingface.co>
Date: Wed, 20 Nov 2024 11:08:18 +0100
Subject: [PATCH 12/12] fix repeated cleanup (#399)

Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com>
---
 src/lighteval/models/vllm_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/lighteval/models/vllm_model.py b/src/lighteval/models/vllm_model.py
index 22051e8f..dc242c60 100644
--- a/src/lighteval/models/vllm_model.py
+++ b/src/lighteval/models/vllm_model.py
@@ -98,7 +98,8 @@ def tokenizer(self):
 
     def cleanup(self):
         destroy_model_parallel()
-        del self.model.llm_engine.model_executor.driver_worker
+        if self.model is not None:
+            del self.model.llm_engine.model_executor.driver_worker
         self.model = None
         gc.collect()
         ray.shutdown()