mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-30 17:40:45 +00:00
feat: add scoring cascade for reducing false negatives (#526)
* feat: add scoring cascade for reducing false negatives in answer verification * style: fix black and isort formatting Run black and isort to satisfy pre-commit checks. Made-with: Cursor * docs: add scoring cascade example to Quickstart section Mention the experimental scoring cascade feature at the end of the Quickstart section with a disclaimer and complete usage examples showing both the dataset method and standalone function. Made-with: Cursor * docs: shorten scoring cascade section in README Trim to a concise standalone example per review feedback. Made-with: Cursor * docs: simplify scoring cascade description in README Made-with: Cursor * update readme --------- Co-authored-by: Zafir Stojanovski <zaf.stojano@gmail.com>
This commit is contained in:
parent
437e0b49c4
commit
49b07130b3
6 changed files with 477 additions and 0 deletions
|
|
@ -71,6 +71,21 @@ class ProceduralDataset(ABC, Sized, Iterable[dict[str, Any]]):
|
|||
reward = len(oracle_answer) / len(answer)
|
||||
return reward
|
||||
|
||||
def score_answer_cascade(self, answer: Optional[str], entry: dict[str, Any]) -> float:
|
||||
"""Score with fallback cascade (LaTeX stripping, string, float, math matching).
|
||||
|
||||
Runs this dataset's ``score_answer`` first, then progressively more
|
||||
lenient matchers. The cascade can only upgrade, never downgrade.
|
||||
|
||||
Requires ``pip install reasoning-gym[scoring]`` for the ``math_match``
|
||||
step (other steps work without extra dependencies).
|
||||
"""
|
||||
from .scoring import cascade_score
|
||||
|
||||
if answer is None:
|
||||
return 0.0
|
||||
return cascade_score(answer, entry.get("answer", ""), dataset=self, entry=entry)
|
||||
|
||||
|
||||
T = TypeVar("T", bound="ProceduralDataset")
|
||||
|
||||
|
|
@ -127,3 +142,7 @@ class ReseedingDataset(Iterable[dict[str, Any]]):
|
|||
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
|
||||
"""Forward scoring to the wrapped dataset's implementation"""
|
||||
return self.dataset.score_answer(answer, entry)
|
||||
|
||||
def score_answer_cascade(self, answer: Optional[str], entry: dict[str, Any]) -> float:
|
||||
"""Forward cascade scoring to the wrapped dataset's implementation"""
|
||||
return self.dataset.score_answer_cascade(answer, entry)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue