Fixes a TypeError in Sacrebleu. (#387)

--------- Co-authored-by: Clémentine Fourrier <[email protected]>
huggingface · Nov 18, 2024 · 9b43560 · 9b43560
1 parent 1faa3b2
commit 9b43560
Show file tree

Hide file tree

Showing 6 changed files with 16 additions and 9 deletions.
diff --git a/.github/ISSUE_TEMPLATE/evaluation-task-request.md b/.github/ISSUE_TEMPLATE/evaluation-task-request.md
@@ -13,6 +13,6 @@ assignees: ''
 
 ## Evaluation metadata
 Provide all available
-- Paper url: 
-- Github url: 
+- Paper url:
+- Github url:
 - Dataset url:
diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md
@@ -15,4 +15,3 @@ A clear and concise description of what you want to happen.
 
 ## Posssible alternatives
 A clear and concise description of any alternative solutions or features you've considered.
-
diff --git a/README.md b/README.md
@@ -104,7 +104,7 @@ Harness and HELM teams for their pioneering work on LLM evaluations.
 Got ideas? Found a bug? Want to add a
 [task](https://github.com/huggingface/lighteval/wiki/Adding-a-Custom-Task) or
 [metric](https://github.com/huggingface/lighteval/wiki/Adding-a-New-Metric)?
-Contributions are warmly welcomed! 
+Contributions are warmly welcomed!
 
 If you're adding a new feature, please open an issue first.
 

diff --git a/examples/model_configs/peft_model.yaml b/examples/model_configs/peft_model.yaml
@@ -1,8 +1,8 @@
 model:
-  type: "base" 
+  type: "base"
   base_params:
     model_args: "pretrained=predibase/customer_support,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... For a PEFT model, the pretrained model should be the one trained with PEFT and the base model below will contain the original model on which the adapters will be applied.
-    dtype: "4bit"  # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization. 
+    dtype: "4bit"  # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization.
     compile: true
   merged_weights: # Ignore this section if you are not using PEFT models
     delta_weights: false # set to True of your model should be merged with a base model, also need to provide the base model name

diff --git a/examples/model_configs/quantized_model.yaml b/examples/model_configs/quantized_model.yaml
@@ -1,8 +1,8 @@
 model:
-  type: "base" 
+  type: "base"
   base_params:
     model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ...
-    dtype: "4bit"  # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization. 
+    dtype: "4bit"  # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization.
     compile: true
   merged_weights: # Ignore this section if you are not using PEFT models
     delta_weights: false # set to True of your model should be merged with a base model, also need to provide the base model name

diff --git a/src/lighteval/metrics/metrics_corpus.py b/src/lighteval/metrics/metrics_corpus.py
@@ -30,6 +30,7 @@
 import sacrebleu
 import sklearn.metrics
 
+from lighteval.logging.hierarchical_logger import hlog_warn
 from lighteval.metrics.sample_preparator import (
     GenerativeCorpusMetricInput,
     LogprobCorpusMetricInput,
@@ -103,7 +104,14 @@ def __init__(self, metric_type: str):
     def compute(self, items: list[GenerativeCorpusMetricInput]) -> float:
         """Computes the metric score over all the corpus generated items, by using the sacrebleu implementation."""
         golds = [i.golds for i in items]
-        preds = [as_list(i.preds) for i in items]
+        preds = []
+        for i in items:
+            pred = as_list(i.preds)
+            if len(pred) > 1:
+                hlog_warn(
+                    f"Multiple predictions present, keeping only the first prediction (when computing sacrebleu.{self.metric.__name__})."
+                )
+            preds.append(pred[0])
         return float(self.metric(hypotheses=preds, references=golds).score)