mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-23 16:55:05 +00:00
add ArcAgiDataset class, fix score_entry() metadata params
This commit is contained in:
parent
2ad0965fdc
commit
4e49806d22
20 changed files with 194 additions and 93 deletions
|
|
@ -66,13 +66,13 @@ def test_countdown_game_items():
|
|||
expr = item["metadata"]["expression"]
|
||||
|
||||
# check score
|
||||
assert dataset.score_answer(answer=expr, metadata=item["metadata"]) == 1.0 # correct answer
|
||||
assert dataset.score_answer(answer="45+2", metadata=item["metadata"]) == 0.05 # wrong answer but an attempt
|
||||
assert dataset.score_answer(answer=expr, entry=item) == 1.0 # correct answer
|
||||
assert dataset.score_answer(answer="45+2", entry=item) == 0.05 # wrong answer but an attempt
|
||||
assert (
|
||||
dataset.score_answer(answer="a wrong solution", metadata=item["metadata"]) == 0.01
|
||||
dataset.score_answer(answer="a wrong solution", entry=item) == 0.01
|
||||
) # wrong answer but incorrectly formatted
|
||||
assert dataset.score_answer(answer="", metadata=item["metadata"]) == 0.01 # wrong answer but empty string
|
||||
assert dataset.score_answer(answer=None, metadata=item["metadata"]) == 0.0 # no answer
|
||||
assert dataset.score_answer(answer="", entry=item) == 0.01 # wrong answer but empty string
|
||||
assert dataset.score_answer(answer=None, entry=item) == 0.0 # no answer
|
||||
|
||||
try:
|
||||
result = eval(expr) # Safe here since we control expression generation
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue