Minor question template & score_answer improvements (#261)

* math prompt improvements * ignore brackets in complex_arithmetic results * improve additional instruction in prompt of polynomial_equations * more strict tests for score_answer in polynomial_equations * simplify special reward handling * fix test_intermediate_integration * fix sokoban dataset * add common dataset score_answer consistency test
2026-04-22 16:49:06 +00:00 · 2025-03-04 21:55:09 +01:00 · 2025-03-04 21:55:09 +01:00 · 5d7fbac0ad
commit 5d7fbac0ad
parent 061282e373
106 changed files with 403 additions and 507 deletions
--- a/reasoning_gym/algorithmic/spell_backward.py
+++ b/reasoning_gym/algorithmic/spell_backward.py
@ -52,14 +52,14 @@ class SpellBackwardDataset(ProceduralDataset):
    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
        reward = 0.0
        expected_answer = entry["answer"]
-        if answer is not None:
+        if isinstance(answer, str):
            try:
                if expected_answer.lower() == answer.lower():
                    reward = 1.0
                else:
                    reward = 0.05
            except:
-                reward = 0.01
+                reward = 0.0
        return reward