From 551875a34900fa3b60f9b2e7bf934ab279f851c3 Mon Sep 17 00:00:00 2001
From: zeenolife <almaz.zinollayev@gmail.com>
Date: Fri, 7 Feb 2025 00:27:27 +0000
Subject: [PATCH 1/5] [Testing Github workflow] Updating workflows and makefile

---
 .github/workflows/quality.yml | 5 ++++-
 Makefile                      | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml
index 90837727..13dd6960 100644
--- a/.github/workflows/quality.yml
+++ b/.github/workflows/quality.yml
@@ -24,8 +24,11 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          python -m pip install ".[quality]"
+          python -m pip install ".[quality,tests]"
       - name: Code quality
         run: |
           make quality
+      - name: Run tests
+        run: |
+          make test
 
diff --git a/Makefile b/Makefile
index 17e2ddc1..f57328ec 100644
--- a/Makefile
+++ b/Makefile
@@ -14,6 +14,8 @@ quality:
 	isort --check-only $(check_dirs) setup.py
 	flake8 --max-line-length 119 $(check_dirs) setup.py
 
+test:
+	pytest tests/
 
 # Evaluation
 

From e8cac09703ac6798458d35a0180747eaf119cddd Mon Sep 17 00:00:00 2001
From: zeenolife <almaz.zinollayev@gmail.com>
Date: Fri, 7 Feb 2025 20:19:04 +0000
Subject: [PATCH 2/5] [Testing Github workflow] - Refactoring workflow, fixing
 tests erorr, easier debugging

---
 .github/workflows/{quality.yml => tests.yml} | 6 +++---
 Makefile                                     | 2 +-
 setup.py                                     | 2 +-
 src/open_r1/rewards.py                       | 8 ++++----
 4 files changed, 9 insertions(+), 9 deletions(-)
 rename .github/workflows/{quality.yml => tests.yml} (90%)

diff --git a/.github/workflows/quality.yml b/.github/workflows/tests.yml
similarity index 90%
rename from .github/workflows/quality.yml
rename to .github/workflows/tests.yml
index 13dd6960..1d2885ab 100644
--- a/.github/workflows/quality.yml
+++ b/.github/workflows/tests.yml
@@ -1,4 +1,4 @@
-name: Quality
+name: Tests
 
 on:
   push:
@@ -11,8 +11,8 @@ on:
 
 jobs:
 
-  check_code_quality:
-    name: Check code quality
+  tests:
+    name: Run tests and quality checks
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
diff --git a/Makefile b/Makefile
index f57328ec..74b2cd8c 100644
--- a/Makefile
+++ b/Makefile
@@ -15,7 +15,7 @@ quality:
 	flake8 --max-line-length 119 $(check_dirs) setup.py
 
 test:
-	pytest tests/
+	pytest -sv tests/
 
 # Evaluation
 
diff --git a/setup.py b/setup.py
index a2dd93ad..969871a6 100644
--- a/setup.py
+++ b/setup.py
@@ -81,7 +81,7 @@ def deps_list(*pkgs):
 
 
 extras = {}
-extras["tests"] = deps_list("pytest", "parameterized")
+extras["tests"] = deps_list("pytest", "parameterized", "math-verify")
 extras["torch"] = deps_list("torch")
 extras["quality"] = deps_list("ruff", "isort", "flake8")
 extras["eval"] = deps_list("lighteval", "math-verify")
diff --git a/src/open_r1/rewards.py b/src/open_r1/rewards.py
index 9362d6eb..4af55f0e 100644
--- a/src/open_r1/rewards.py
+++ b/src/open_r1/rewards.py
@@ -59,10 +59,10 @@ def format_reward(completions, **kwargs):
 def reasoning_steps_reward(completions, **kwargs):
     """Reward function that checks for clear step-by-step reasoning.
     Regex pattern:
-        Step \d+: - matches "Step 1:", "Step 2:", etc.
-        ^\d+\. - matches numbered lists like "1.", "2.", etc. at start of line
-        \n- - matches bullet points with hyphens
-        \n\* - matches bullet points with asterisks
+        Step \\d+: - matches "Step 1:", "Step 2:", etc.
+        ^\\d+\\. - matches numbered lists like "1.", "2.", etc. at start of line
+        \\n- - matches bullet points with hyphens
+        \\n\\* - matches bullet points with asterisks
         First,|Second,|Next,|Finally, - matches transition words
     """
     pattern = r"(Step \d+:|^\d+\.|\n-|\n\*|First,|Second,|Next,|Finally,)"

From 2515a0a3f875b274a03722f4e9a7322c27e7f8c1 Mon Sep 17 00:00:00 2001
From: zeenolife <almaz.zinollayev@gmail.com>
Date: Mon, 10 Feb 2025 16:11:24 +0000
Subject: [PATCH 3/5] [Testing Github workflow] Converting docstring into raw
 string

---
 src/open_r1/rewards.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/open_r1/rewards.py b/src/open_r1/rewards.py
index d8e60918..bec3d11c 100644
--- a/src/open_r1/rewards.py
+++ b/src/open_r1/rewards.py
@@ -58,12 +58,12 @@ def format_reward(completions, **kwargs):
 
 
 def reasoning_steps_reward(completions, **kwargs):
-    """Reward function that checks for clear step-by-step reasoning.
+    r"""Reward function that checks for clear step-by-step reasoning.
     Regex pattern:
-        Step \\d+: - matches "Step 1:", "Step 2:", etc.
-        ^\\d+\\. - matches numbered lists like "1.", "2.", etc. at start of line
-        \\n- - matches bullet points with hyphens
-        \\n\\* - matches bullet points with asterisks
+        Step \d+: - matches "Step 1:", "Step 2:", etc.
+        ^\d+\. - matches numbered lists like "1.", "2.", etc. at start of line
+        \n- - matches bullet points with hyphens
+        \n\* - matches bullet points with asterisks
         First,|Second,|Next,|Finally, - matches transition words
     """
     pattern = r"(Step \d+:|^\d+\.|\n-|\n\*|First,|Second,|Next,|Finally,)"

From d1a4c652c025aeae55fe76bea75012f09dd0ef6e Mon Sep 17 00:00:00 2001
From: zeenolife <almaz.zinollayev@gmail.com>
Date: Mon, 10 Feb 2025 16:41:31 +0000
Subject: [PATCH 4/5] [Testing Github workflow] - Fixing
 test_zero_max_penalty_returns_zero() test

---
 tests/test_rewards.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_rewards.py b/tests/test_rewards.py
index 0ae015d1..2b85678d 100644
--- a/tests/test_rewards.py
+++ b/tests/test_rewards.py
@@ -120,7 +120,9 @@ def test_positive_max_penalty_raises_value_error(self):
 
     def test_zero_max_penalty_returns_zero(self):
         reward_fn = get_repetition_penalty_reward(ngram_size=2, max_penalty=0.0)
-        self.assertEqual(reward_fn, 0)
+        completions = [[{"content": "test test test"}]]
+        rewards = reward_fn(completions)
+        self.assertEqual(rewards, [0.0])
 
     def test_no_repetition(self):
         reward_fn = get_repetition_penalty_reward(ngram_size=2, max_penalty=-1.0)

From 0632d3b559e5d6f4b0cf40b7024545c4665937d8 Mon Sep 17 00:00:00 2001
From: zeenolife <almaz.zinollayev@gmail.com>
Date: Mon, 10 Feb 2025 17:25:20 +0000
Subject: [PATCH 5/5] [Testing Github workflow] Removing redundant test

---
 tests/test_rewards.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/test_rewards.py b/tests/test_rewards.py
index 2b85678d..7f0cbfa9 100644
--- a/tests/test_rewards.py
+++ b/tests/test_rewards.py
@@ -118,12 +118,6 @@ def test_positive_max_penalty_raises_value_error(self):
         with self.assertRaisesRegex(ValueError, "max_penalty 1.5 should not be positive"):
             get_repetition_penalty_reward(ngram_size=2, max_penalty=1.5)
 
-    def test_zero_max_penalty_returns_zero(self):
-        reward_fn = get_repetition_penalty_reward(ngram_size=2, max_penalty=0.0)
-        completions = [[{"content": "test test test"}]]
-        rewards = reward_fn(completions)
-        self.assertEqual(rewards, [0.0])
-
     def test_no_repetition(self):
         reward_fn = get_repetition_penalty_reward(ngram_size=2, max_penalty=-1.0)
         completions = [[{"content": "this is a test sentence"}]]