Minor question template & score_answer improvements (#261)

* math prompt improvements * ignore brackets in complex_arithmetic results * improve additional instruction in prompt of polynomial_equations * more strict tests for score_answer in polynomial_equations * simplify special reward handling * fix test_intermediate_integration * fix sokoban dataset * add common dataset score_answer consistency test
2026-04-27 17:23:19 +00:00 · 2025-03-04 21:55:09 +01:00 · 2025-03-04 21:55:09 +01:00 · b2904ccab9
commit b2904ccab9
parent bf24999bb0
106 changed files with 403 additions and 507 deletions
--- a/reasoning_gym/algorithmic/letter_jumble.py
+++ b/reasoning_gym/algorithmic/letter_jumble.py
@ -116,7 +116,7 @@ class LetterJumbleDataset(ProceduralDataset):

        # Each word in the expected answer is worth an equal fraction of 1.0
        total_words = len(expected_words)
-        score_per_word = 1.0 / total_words if total_words else 0
+        score_per_word = 1.0 / total_words if total_words > 0 else 0

        # Calculate scores word by word
        scores = []
@ -142,18 +142,16 @@ class LetterJumbleDataset(ProceduralDataset):
            float: The computed score between 0.0 and 1.0.
        """

-        if not answer:
+        if not isinstance(answer, str):
            return 0.0

        oracle_answer = entry["answer"].strip().lower()
-        if answer:
-            answer = answer.strip().lower()
-            if answer == oracle_answer:
-                return 1.0  # Perfect score!
-            else:
-                partial_score = self.partial(oracle_answer, answer)
-                return partial_score
-        return 0.01
+        answer = answer.strip().lower()
+        if answer == oracle_answer:
+            return 1.0  # Perfect score!
+        else:
+            partial_score = self.partial(oracle_answer, answer)
+            return partial_score


 register_dataset("letter_jumble", LetterJumbleDataset, LetterJumbleConfig)