feat: add scoring cascade for reducing false negatives (#526)

* feat: add scoring cascade for reducing false negatives in answer verification * style: fix black and isort formatting Run black and isort to satisfy pre-commit checks. Made-with: Cursor * docs: add scoring cascade example to Quickstart section Mention the experimental scoring cascade feature at the end of the Quickstart section with a disclaimer and complete usage examples showing both the dataset method and standalone function. Made-with: Cursor * docs: shorten scoring cascade section in README Trim to a concise standalone example per review feedback. Made-with: Cursor * docs: simplify scoring cascade description in README Made-with: Cursor * update readme --------- Co-authored-by: Zafir Stojanovski <zaf.stojano@gmail.com>
2026-04-30 17:40:45 +00:00 · 2026-04-18 01:09:15 +05:30 · 2026-04-18 01:09:15 +05:30 · 49b07130b3
commit 49b07130b3
parent 437e0b49c4
6 changed files with 477 additions and 0 deletions
--- a/reasoning_gym/dataset.py
+++ b/reasoning_gym/dataset.py
@ -71,6 +71,21 @@ class ProceduralDataset(ABC, Sized, Iterable[dict[str, Any]]):
                reward = len(oracle_answer) / len(answer)
        return reward

+    def score_answer_cascade(self, answer: Optional[str], entry: dict[str, Any]) -> float:
+        """Score with fallback cascade (LaTeX stripping, string, float, math matching).
+
+        Runs this dataset's ``score_answer`` first, then progressively more
+        lenient matchers.  The cascade can only upgrade, never downgrade.
+
+        Requires ``pip install reasoning-gym[scoring]`` for the ``math_match``
+        step (other steps work without extra dependencies).
+        """
+        from .scoring import cascade_score
+
+        if answer is None:
+            return 0.0
+        return cascade_score(answer, entry.get("answer", ""), dataset=self, entry=entry)
+

 T = TypeVar("T", bound="ProceduralDataset")

@ -127,3 +142,7 @@ class ReseedingDataset(Iterable[dict[str, Any]]):
    def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
        """Forward scoring to the wrapped dataset's implementation"""
        return self.dataset.score_answer(answer, entry)
+
+    def score_answer_cascade(self, answer: Optional[str], entry: dict[str, Any]) -> float:
+        """Forward cascade scoring to the wrapped dataset's implementation"""
+        return self.dataset.score_answer_cascade(answer, entry)