Minor question template & score_answer improvements (#261)

* math prompt improvements * ignore brackets in complex_arithmetic results * improve additional instruction in prompt of polynomial_equations * more strict tests for score_answer in polynomial_equations * simplify special reward handling * fix test_intermediate_integration * fix sokoban dataset * add common dataset score_answer consistency test
2026-04-22 16:49:06 +00:00 · 2025-03-04 21:55:09 +01:00 · 2025-03-04 21:55:09 +01:00 · 5d7fbac0ad
commit 5d7fbac0ad
parent 061282e373
106 changed files with 403 additions and 507 deletions
--- a/reasoning_gym/logic/self_reference.py
+++ b/reasoning_gym/logic/self_reference.py
@ -339,9 +339,7 @@ class SelfReferenceDataset(ProceduralDataset):

        # Solve puzzle
        solutions = solve_puzzle_dynamic(puzzle)
-        for idx, sol in enumerate(solutions, start=1):
-            sol_str = ["True" if s else "False" for s in sol]
-        answer = len(solutions)
+        answer = str(len(solutions))

        return {
            "question": puzz_s,
@ -362,12 +360,10 @@ class SelfReferenceDataset(ProceduralDataset):
            float: The computed score between 0.0 and 1.0.
        """

-        if answer == None:
-            return 0.0
-        if str(answer) != str(entry["answer"]):
-            return 0.1
-        else:
-            return 1.0  # Yay
+        if isinstance(answer, str):
+            if answer == str(entry["answer"]):
+                return 1.0  # Yay
+        return 0.0


 register_dataset("self_reference", SelfReferenceDataset, SelfReferenceConfig)