From 3fe122ff54a6c6bb1ba6ba65c12c291fe0432b3a Mon Sep 17 00:00:00 2001
From: hellen <144710795+hellen9527@users.noreply.github.com>
Date: Sat, 15 Feb 2025 23:09:40 +0800
Subject: [PATCH] fix bug, solutions not found

---
 src/open_r1/rewards.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/open_r1/rewards.py b/src/open_r1/rewards.py
index 27962784..5411caac 100644
--- a/src/open_r1/rewards.py
+++ b/src/open_r1/rewards.py
@@ -75,14 +75,14 @@ def reasoning_steps_reward(completions, **kwargs):
     return [min(1.0, count / 3) for count in matches]
 
 
-def len_reward(completions: list[Dict[str, str]], solutions: list[str], **kwargs) -> float:
+def len_reward(completions: list[Dict[str, str]], solution: list[str], **kwargs) -> float:
     """Compute length-based rewards to discourage overthinking and promote token efficiency.
 
     Taken from from the Kimi 1.5 tech report: https://arxiv.org/abs/2501.12599
 
     Args:
         completions: List of model completions
-        solutions: List of ground truth solutions
+        solution: List of ground truth solution
 
     Returns:
         List of rewards where:
@@ -93,7 +93,7 @@ def len_reward(completions: list[Dict[str, str]], solutions: list[str], **kwargs
 
     # First check correctness of answers
     correctness = []
-    for content, sol in zip(contents, solutions):
+    for content, sol in zip(contents, solution):
         gold_parsed = parse(
             sol,
             extraction_mode="first_match",