mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
fix template
This commit is contained in:
parent
95f179f34e
commit
b47b6f94c9
2 changed files with 60 additions and 20 deletions
|
|
@ -116,3 +116,35 @@ def test_word_sorting_dataset_iteration():
|
|||
|
||||
# Test multiple iterations yield same items
|
||||
assert items == list(dataset)
|
||||
|
||||
|
||||
def test_word_sorting_scoring():
|
||||
"""Test scoring function"""
|
||||
config = WordSortingConfig(size=1, seed=42)
|
||||
dataset = WordSortingDataset(config)
|
||||
|
||||
item = {
|
||||
"metadata": {
|
||||
"sorted_words": ["apple", "banana", "cherry"],
|
||||
}
|
||||
}
|
||||
|
||||
# Correct answer
|
||||
answer = "apple, banana, cherry"
|
||||
assert dataset.score_answer(answer, item) == 1.0
|
||||
|
||||
# Correct answer, with incorrect spaces
|
||||
answer = "apple,banana, cherry"
|
||||
assert dataset.score_answer(answer, item) == 1.0
|
||||
|
||||
# All words present, but not sorted
|
||||
answer = "banana, cherry, apple"
|
||||
assert dataset.score_answer(answer, item) == 0.2
|
||||
|
||||
# Garbage
|
||||
answer = "gibberish"
|
||||
assert dataset.score_answer(answer, item) == 0.01
|
||||
|
||||
# Empty answer
|
||||
answer = None
|
||||
assert dataset.score_answer(answer, item) == 0.0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue