mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-26 17:13:17 +00:00
Fixed countdown score_answer (#265)
* fixed countdown score ans * checked solution uses all numbers
This commit is contained in:
parent
3c544aba20
commit
7458dbc95d
2 changed files with 41 additions and 19 deletions
|
|
@ -72,7 +72,7 @@ def test_countdown_game_items():
|
|||
dataset.score_answer(answer="a wrong solution", entry=item) == 0.01
|
||||
) # wrong answer but incorrectly formatted
|
||||
assert dataset.score_answer(answer="", entry=item) == 0.01 # wrong answer but empty string
|
||||
assert dataset.score_answer(answer=None, entry=item) == 0.0 # no answer
|
||||
assert dataset.score_answer(answer=None, entry=item) == 0.01 # no answer
|
||||
|
||||
try:
|
||||
result = eval(expr) # Safe here since we control expression generation
|
||||
|
|
@ -81,6 +81,25 @@ def test_countdown_game_items():
|
|||
pytest.fail(f"Invalid expression generated: {expr}")
|
||||
|
||||
|
||||
def test_answer_with_incorrect_numbers():
|
||||
dataset = CountdownDataset(CountdownConfig(size=10, seed=42))
|
||||
answer = "45+2"
|
||||
item = {
|
||||
"metadata": {
|
||||
"numbers": [44, 3],
|
||||
"target": 47,
|
||||
}
|
||||
}
|
||||
assert dataset.score_answer(answer=answer, entry=item) == 0.05
|
||||
|
||||
|
||||
def test_answer_without_all_numbers():
|
||||
dataset = CountdownDataset(CountdownConfig(size=10, seed=42))
|
||||
answer = "45+2+3"
|
||||
item = {"metadata": {"numbers": [1, 45, 2, 3], "target": 50}}
|
||||
assert dataset.score_answer(answer=answer, entry=item) == 0.05
|
||||
|
||||
|
||||
def test_countdown_game_randomization():
|
||||
"""Test number randomization configuration"""
|
||||
config = CountdownConfig(min_numbers=4, max_numbers=4, shuffle=False, size=10, seed=42) # Fixed size for testing
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue