From 3fe122ff54a6c6bb1ba6ba65c12c291fe0432b3a Mon Sep 17 00:00:00 2001 From: hellen <144710795+hellen9527@users.noreply.github.com> Date: Sat, 15 Feb 2025 23:09:40 +0800 Subject: [PATCH] fix bug, solutions not found --- src/open_r1/rewards.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/open_r1/rewards.py b/src/open_r1/rewards.py index 27962784..5411caac 100644 --- a/src/open_r1/rewards.py +++ b/src/open_r1/rewards.py @@ -75,14 +75,14 @@ def reasoning_steps_reward(completions, **kwargs): return [min(1.0, count / 3) for count in matches] -def len_reward(completions: list[Dict[str, str]], solutions: list[str], **kwargs) -> float: +def len_reward(completions: list[Dict[str, str]], solution: list[str], **kwargs) -> float: """Compute length-based rewards to discourage overthinking and promote token efficiency. Taken from from the Kimi 1.5 tech report: https://arxiv.org/abs/2501.12599 Args: completions: List of model completions - solutions: List of ground truth solutions + solution: List of ground truth solution Returns: List of rewards where: @@ -93,7 +93,7 @@ def len_reward(completions: list[Dict[str, str]], solutions: list[str], **kwargs # First check correctness of answers correctness = [] - for content, sol in zip(contents, solutions): + for content, sol in zip(contents, solution): gold_parsed = parse( sol, extraction_mode="first_match",