From 551875a34900fa3b60f9b2e7bf934ab279f851c3 Mon Sep 17 00:00:00 2001 From: zeenolife Date: Fri, 7 Feb 2025 00:27:27 +0000 Subject: [PATCH 1/5] [Testing Github workflow] Updating workflows and makefile --- .github/workflows/quality.yml | 5 ++++- Makefile | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml index 90837727..13dd6960 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/quality.yml @@ -24,8 +24,11 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install ".[quality]" + python -m pip install ".[quality,tests]" - name: Code quality run: | make quality + - name: Run tests + run: | + make test diff --git a/Makefile b/Makefile index 17e2ddc1..f57328ec 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,8 @@ quality: isort --check-only $(check_dirs) setup.py flake8 --max-line-length 119 $(check_dirs) setup.py +test: + pytest tests/ # Evaluation From e8cac09703ac6798458d35a0180747eaf119cddd Mon Sep 17 00:00:00 2001 From: zeenolife Date: Fri, 7 Feb 2025 20:19:04 +0000 Subject: [PATCH 2/5] [Testing Github workflow] - Refactoring workflow, fixing tests erorr, easier debugging --- .github/workflows/{quality.yml => tests.yml} | 6 +++--- Makefile | 2 +- setup.py | 2 +- src/open_r1/rewards.py | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) rename .github/workflows/{quality.yml => tests.yml} (90%) diff --git a/.github/workflows/quality.yml b/.github/workflows/tests.yml similarity index 90% rename from .github/workflows/quality.yml rename to .github/workflows/tests.yml index 13dd6960..1d2885ab 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/tests.yml @@ -1,4 +1,4 @@ -name: Quality +name: Tests on: push: @@ -11,8 +11,8 @@ on: jobs: - check_code_quality: - name: Check code quality + tests: + name: Run tests and quality checks runs-on: ubuntu-latest steps: - name: Checkout code diff --git a/Makefile b/Makefile index f57328ec..74b2cd8c 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ quality: flake8 --max-line-length 119 $(check_dirs) setup.py test: - pytest tests/ + pytest -sv tests/ # Evaluation diff --git a/setup.py b/setup.py index a2dd93ad..969871a6 100644 --- a/setup.py +++ b/setup.py @@ -81,7 +81,7 @@ def deps_list(*pkgs): extras = {} -extras["tests"] = deps_list("pytest", "parameterized") +extras["tests"] = deps_list("pytest", "parameterized", "math-verify") extras["torch"] = deps_list("torch") extras["quality"] = deps_list("ruff", "isort", "flake8") extras["eval"] = deps_list("lighteval", "math-verify") diff --git a/src/open_r1/rewards.py b/src/open_r1/rewards.py index 9362d6eb..4af55f0e 100644 --- a/src/open_r1/rewards.py +++ b/src/open_r1/rewards.py @@ -59,10 +59,10 @@ def format_reward(completions, **kwargs): def reasoning_steps_reward(completions, **kwargs): """Reward function that checks for clear step-by-step reasoning. Regex pattern: - Step \d+: - matches "Step 1:", "Step 2:", etc. - ^\d+\. - matches numbered lists like "1.", "2.", etc. at start of line - \n- - matches bullet points with hyphens - \n\* - matches bullet points with asterisks + Step \\d+: - matches "Step 1:", "Step 2:", etc. + ^\\d+\\. - matches numbered lists like "1.", "2.", etc. at start of line + \\n- - matches bullet points with hyphens + \\n\\* - matches bullet points with asterisks First,|Second,|Next,|Finally, - matches transition words """ pattern = r"(Step \d+:|^\d+\.|\n-|\n\*|First,|Second,|Next,|Finally,)" From 2515a0a3f875b274a03722f4e9a7322c27e7f8c1 Mon Sep 17 00:00:00 2001 From: zeenolife Date: Mon, 10 Feb 2025 16:11:24 +0000 Subject: [PATCH 3/5] [Testing Github workflow] Converting docstring into raw string --- src/open_r1/rewards.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/open_r1/rewards.py b/src/open_r1/rewards.py index d8e60918..bec3d11c 100644 --- a/src/open_r1/rewards.py +++ b/src/open_r1/rewards.py @@ -58,12 +58,12 @@ def format_reward(completions, **kwargs): def reasoning_steps_reward(completions, **kwargs): - """Reward function that checks for clear step-by-step reasoning. + r"""Reward function that checks for clear step-by-step reasoning. Regex pattern: - Step \\d+: - matches "Step 1:", "Step 2:", etc. - ^\\d+\\. - matches numbered lists like "1.", "2.", etc. at start of line - \\n- - matches bullet points with hyphens - \\n\\* - matches bullet points with asterisks + Step \d+: - matches "Step 1:", "Step 2:", etc. + ^\d+\. - matches numbered lists like "1.", "2.", etc. at start of line + \n- - matches bullet points with hyphens + \n\* - matches bullet points with asterisks First,|Second,|Next,|Finally, - matches transition words """ pattern = r"(Step \d+:|^\d+\.|\n-|\n\*|First,|Second,|Next,|Finally,)" From d1a4c652c025aeae55fe76bea75012f09dd0ef6e Mon Sep 17 00:00:00 2001 From: zeenolife Date: Mon, 10 Feb 2025 16:41:31 +0000 Subject: [PATCH 4/5] [Testing Github workflow] - Fixing test_zero_max_penalty_returns_zero() test --- tests/test_rewards.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_rewards.py b/tests/test_rewards.py index 0ae015d1..2b85678d 100644 --- a/tests/test_rewards.py +++ b/tests/test_rewards.py @@ -120,7 +120,9 @@ def test_positive_max_penalty_raises_value_error(self): def test_zero_max_penalty_returns_zero(self): reward_fn = get_repetition_penalty_reward(ngram_size=2, max_penalty=0.0) - self.assertEqual(reward_fn, 0) + completions = [[{"content": "test test test"}]] + rewards = reward_fn(completions) + self.assertEqual(rewards, [0.0]) def test_no_repetition(self): reward_fn = get_repetition_penalty_reward(ngram_size=2, max_penalty=-1.0) From 0632d3b559e5d6f4b0cf40b7024545c4665937d8 Mon Sep 17 00:00:00 2001 From: zeenolife Date: Mon, 10 Feb 2025 17:25:20 +0000 Subject: [PATCH 5/5] [Testing Github workflow] Removing redundant test --- tests/test_rewards.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/test_rewards.py b/tests/test_rewards.py index 2b85678d..7f0cbfa9 100644 --- a/tests/test_rewards.py +++ b/tests/test_rewards.py @@ -118,12 +118,6 @@ def test_positive_max_penalty_raises_value_error(self): with self.assertRaisesRegex(ValueError, "max_penalty 1.5 should not be positive"): get_repetition_penalty_reward(ngram_size=2, max_penalty=1.5) - def test_zero_max_penalty_returns_zero(self): - reward_fn = get_repetition_penalty_reward(ngram_size=2, max_penalty=0.0) - completions = [[{"content": "test test test"}]] - rewards = reward_fn(completions) - self.assertEqual(rewards, [0.0]) - def test_no_repetition(self): reward_fn = get_repetition_penalty_reward(ngram_size=2, max_penalty=-1.0) completions = [[{"content": "this is a test sentence"}]]