mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
Merge branch 'main' of https://github.com/open-thought/reasoning-gym into env/string-splitting
This commit is contained in:
commit
3275538fa1
2 changed files with 4 additions and 2 deletions
|
|
@ -98,7 +98,7 @@ def test_arc_1d_scoring():
|
|||
assert dataset.score_answer(entry["answer"], entry) == 1.0
|
||||
|
||||
# Test partial match (answer contained within response)
|
||||
assert dataset.score_answer(f"The answer is: {entry['answer']}", entry) == 0.5
|
||||
assert dataset.score_answer(f"The answer is: {entry['answer']}", entry) > 0.5
|
||||
|
||||
# Test incorrect answer
|
||||
assert dataset.score_answer("wrong answer", entry) == 0.01
|
||||
|
|
|
|||
|
|
@ -111,7 +111,9 @@ def test_products_scoring():
|
|||
assert dataset.score_answer("wrong", item) == 0.01, "Wrong answer should score 0.01"
|
||||
|
||||
# Test scoring with partial match (answer contained in response)
|
||||
assert dataset.score_answer(f"The answer is {item['answer']}", item) == 0.5, "Partial match should score 0.5"
|
||||
assert (
|
||||
dataset.score_answer(f"The answer is {item['answer']}", item) > 0.1
|
||||
), "Partial match should scored len(oracle_answer)/len(answer)"
|
||||
|
||||
# Test scoring with None
|
||||
assert dataset.score_answer(None, item) == 0.0, "None should score 0.0"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue