Minor question template & score_answer improvements (#261)

* math prompt improvements * ignore brackets in complex_arithmetic results * improve additional instruction in prompt of polynomial_equations * more strict tests for score_answer in polynomial_equations * simplify special reward handling * fix test_intermediate_integration * fix sokoban dataset * add common dataset score_answer consistency test
2026-04-19 12:58:07 +00:00 · 2025-03-04 21:55:09 +01:00 · 2025-03-04 21:55:09 +01:00 · 5d7fbac0ad
commit 5d7fbac0ad
parent 061282e373
106 changed files with 403 additions and 507 deletions
--- a/reasoning_gym/arithmetic/decimal_arithmetic.py
+++ b/reasoning_gym/arithmetic/decimal_arithmetic.py
@ -178,7 +178,7 @@ class DecimalArithmeticDataset(ProceduralDataset):
            + problem_str
        )

-        return {"question": problem_str, "answer": answer, "metadata": {}}
+        return {"question": problem_str, "answer": str(answer), "metadata": {}}

    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
        """
@ -189,12 +189,12 @@ class DecimalArithmeticDataset(ProceduralDataset):
        Returns:
            float: 1.0 if the user's answer is within tolerance; otherwise, 0.01.
        """
-        if answer is None:
+        if not isinstance(answer, str):
            return 0.0

        try:
            user_ans: Decimal = Decimal(answer)
-            correct_ans: Decimal = entry["answer"]
+            correct_ans: Decimal = Decimal(entry["answer"])

            # Determine tolerance based on the desired precision.
            precision: int = self.config.max_num_decimal_places
@ -202,9 +202,9 @@ class DecimalArithmeticDataset(ProceduralDataset):
            if abs(user_ans - correct_ans) <= tol:
                return 1.0
        except Exception:
-            return 0.01
+            pass

-        return 0.01
+        return 0.0


 # Register the dataset with the factory.