reasoning-gym/tests/test_get_score_answer_fn.py
rasdani 72e45e9401
Feat: expose score_answer function without needing to instantiate a dataset (#422)
* feat: get `score_answer` for a given dataset

* fix: `self` error

* add test
2025-04-18 10:36:44 +02:00

71 lines
2.1 KiB
Python

"""
Tests for the get_score_answer_fn helper with hard-coded sample cases.
"""
import pytest
from reasoning_gym import get_score_answer_fn
TEST_CASES = [
{
"dataset": "letter_jumble",
"entry": {"answer": "second opportunity to receive"},
"model_answer": "second opportunity to receive",
"expected": 1.0,
"id": "rg_4806-correct",
},
{
"dataset": "word_sorting",
"entry": {
"answer": "arrive, burdens, computers, federal, louder, paragraphs, side, specified, virus",
"metadata": {
"sorted_words": [
"arrive",
"burdens",
"computers",
"federal",
"louder",
"paragraphs",
"side",
"specified",
"virus",
]
},
},
"model_answer": "arrive, burdens, computers, federal, louder, paragraphs, side, specified, virus",
"expected": 1.0,
"id": "rg_16004-word_sorting-correct",
},
{
"dataset": "spell_backward",
"entry": {"answer": "ssiknu"},
"model_answer": "ssiknu",
"expected": 1.0,
"id": "rg_14211-correct",
},
{
"dataset": "letter_jumble",
"entry": {"answer": "second opportunity to receive"},
"model_answer": "completely wrong answer here",
"expected": 0.0,
"id": "rg_4806-incorrect",
},
{
"dataset": "spell_backward",
"entry": {"answer": "ssiknu"},
"model_answer": "unkiss",
"expected": 0.0,
"id": "rg_14211-incorrect",
},
]
@pytest.mark.parametrize("case", TEST_CASES, ids=lambda c: c["id"])
def test_get_score_answer_fn_hardcoded(case):
"""
Ensure the dataset-specific scorer returns the expected value
for the given model answer and entry.
"""
scorer = get_score_answer_fn(case["dataset"])
returned = scorer(case["model_answer"], case["entry"])
assert returned == pytest.approx(case["expected"], abs=1e-8)