Minor question template & score_answer improvements (#261)

* math prompt improvements * ignore brackets in complex_arithmetic results * improve additional instruction in prompt of polynomial_equations * more strict tests for score_answer in polynomial_equations * simplify special reward handling * fix test_intermediate_integration * fix sokoban dataset * add common dataset score_answer consistency test
2026-04-24 17:05:03 +00:00 · 2025-03-04 21:55:09 +01:00 · 2025-03-04 21:55:09 +01:00 · b2904ccab9
commit b2904ccab9
parent bf24999bb0
106 changed files with 403 additions and 507 deletions
--- a/reasoning_gym/cognition/needle_haystack.py
+++ b/reasoning_gym/cognition/needle_haystack.py
@ -110,19 +110,17 @@ class NeedleHaystackDataset(ProceduralDataset):
        Returns:
            float: The computed score between 0.0 and 1.0.
        """
+        if isinstance(answer, str):
+            correct_word = entry["answer"]

-        correct_word = entry["answer"]
-        if not answer:
-            return 0.0  # No answer given
+            # Normalize case
+            answer = answer.replace(" ", "").strip().lower()
+            correct_word = correct_word.strip().lower()

-        # Normalize case
-        answer = answer.replace(" ", "").strip().lower()
-        correct_word = correct_word.strip().lower()
+            if answer == correct_word:
+                return 1.0  # Correct!

-        if answer == correct_word:
-            return 1.0  # Correct!
-
-        return 0.01
+        return 0.0


 # Register the dataset