mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-23 16:55:05 +00:00
add ArcAgiDataset class, fix score_entry() metadata params
This commit is contained in:
parent
2ad0965fdc
commit
4e49806d22
20 changed files with 194 additions and 93 deletions
|
|
@ -72,21 +72,20 @@ def test_score_answer():
|
|||
|
||||
for item in dataset:
|
||||
correct_answer = item["answer"]
|
||||
metadata = item["metadata"]
|
||||
|
||||
# Correct answer should score 1.0
|
||||
assert dataset.score_answer(correct_answer, metadata) == 1.0
|
||||
assert dataset.score_answer(correct_answer, entry=item) == 1.0
|
||||
|
||||
# Incorrect answer (palindrome, but not correct one) should score 0.05
|
||||
pal_letters = "racecar" if "racecar" != correct_answer else "aba"
|
||||
assert dataset.score_answer(pal_letters, metadata) == 0.05
|
||||
assert dataset.score_answer(pal_letters, entry=item) == 0.05
|
||||
|
||||
# Incorrect answer (not palindrome) should score 0.02
|
||||
wrong_letters = "abcd" if "abcd" != correct_answer else "efgh"
|
||||
assert dataset.score_answer(wrong_letters, metadata) == 0.02
|
||||
assert dataset.score_answer(wrong_letters, entry=item) == 0.02
|
||||
|
||||
# Empty String input should score 0.01
|
||||
assert dataset.score_answer("", metadata) == 0.01
|
||||
assert dataset.score_answer("", entry=item) == 0.01
|
||||
|
||||
# Empty input should score 0.0
|
||||
assert dataset.score_answer(None, metadata) == 0.0
|
||||
assert dataset.score_answer(None, entry=item) == 0.0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue