mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-27 17:23:19 +00:00
added linting checks
This commit is contained in:
parent
d7ebe409d8
commit
ca3a841bf6
2 changed files with 13 additions and 11 deletions
|
|
@ -64,14 +64,16 @@ def test_countdown_game_items():
|
|||
|
||||
# Verify expression evaluates correctly
|
||||
expr = item["metadata"]["expression"]
|
||||
|
||||
#check score
|
||||
assert dataset.score_answer(answer=expr, metadata=item["metadata"]) == 1.0 #correct answer
|
||||
assert dataset.score_answer(answer="45+2", metadata=item["metadata"]) == 0.05 #wrong answer but an attempt
|
||||
assert dataset.score_answer(answer="a wrong solution", metadata=item["metadata"]) == 0.01 #wrong answer but incorrectly formatted
|
||||
assert dataset.score_answer(answer="", metadata=item["metadata"]) == 0.01 #wrong answer but empty string
|
||||
assert dataset.score_answer(answer=None, metadata=item["metadata"]) == 0.0 #no answer
|
||||
|
||||
|
||||
# check score
|
||||
assert dataset.score_answer(answer=expr, metadata=item["metadata"]) == 1.0 # correct answer
|
||||
assert dataset.score_answer(answer="45+2", metadata=item["metadata"]) == 0.05 # wrong answer but an attempt
|
||||
assert (
|
||||
dataset.score_answer(answer="a wrong solution", metadata=item["metadata"]) == 0.01
|
||||
) # wrong answer but incorrectly formatted
|
||||
assert dataset.score_answer(answer="", metadata=item["metadata"]) == 0.01 # wrong answer but empty string
|
||||
assert dataset.score_answer(answer=None, metadata=item["metadata"]) == 0.0 # no answer
|
||||
|
||||
try:
|
||||
result = eval(expr) # Safe here since we control expression generation
|
||||
assert result == item["metadata"]["target"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue