Remove strip from ProceduralDataset::core score_answer() (#250)

* remove strip from ProceduralDataset::core score_answer(), strip in extract answer (optional, default=True) * test: Move test_extract_answer() from test_dataset.py to test_utils.py * refactor: Improve decimal reward computation with more flexible comparison * fix: Implement rounding for format_number when round_if_needed is True * test: Add test case for compute_decimal_reward with sign and zeros
2026-04-26 17:13:17 +00:00 · 2025-03-02 08:46:36 +01:00 · 2025-03-02 08:46:36 +01:00 · ece6990709
commit ece6990709
parent 16a4ea1193
6 changed files with 80 additions and 26 deletions
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@ -2,7 +2,6 @@ import pytest

 from reasoning_gym.arithmetic.basic_arithmetic import BasicArithmeticDataset, BasicArithmeticDatasetConfig
 from reasoning_gym.dataset import ReseedingDataset
-from reasoning_gym.utils import extract_answer


 def test_reseeding_dataset_iteration():
@ -41,12 +40,7 @@ def test_reseeding_dataset_iteration():
    assert infinite_dataset.score_answer(test_item["answer"], test_item) == 1.0


-def test_extract_answer():
-    assert extract_answer("This is a text. <final_answer>1234</final_answer>", tag_name="final_answer") == "1234"
-
-    # ignore single whitespae
-    assert extract_answer("This is a text. <answer>\n1234 </answer>", tag_name="answer") == "1234"
-
+def test_basic_arithmetic_score_answer():
    config = BasicArithmeticDatasetConfig(
        min_terms=2, max_terms=3, min_digits=1, max_digits=2, operators=["+"], allow_parentheses=False, seed=42, size=10
    )