mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-28 17:29:39 +00:00
Review: Add unit tests, clean-ups, fixes, and add datasets dep in a new requirements-optional.txt
This commit is contained in:
parent
09e30a2308
commit
fdb93a3d7d
3 changed files with 147 additions and 31 deletions
100
tests/test_wikirace.py
Normal file
100
tests/test_wikirace.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
import pytest
|
||||
|
||||
from reasoning_gym.games.wikirace import WikiraceConfig, WikiraceCurriculum, WikiraceDataset
|
||||
|
||||
|
||||
def test_wikirace_game_config_validation():
|
||||
"""Test that invalid configs raise appropriate errors"""
|
||||
with pytest.raises(AssertionError):
|
||||
config = WikiraceConfig(min_distance=0)
|
||||
config.validate()
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
config = WikiraceConfig(min_distance=3, max_distance=2)
|
||||
config.validate()
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
config = WikiraceConfig(max_tries=-2)
|
||||
config.validate()
|
||||
|
||||
|
||||
def test_wikirace_game_deterministic():
|
||||
"""Test that dataset generates same items with same seed"""
|
||||
config1 = WikiraceConfig(seed=42, size=2)
|
||||
dataset1 = WikiraceDataset(config1)
|
||||
config2 = WikiraceConfig(seed=42, size=2)
|
||||
dataset2 = WikiraceDataset(config2)
|
||||
|
||||
for i in range(len(dataset1)):
|
||||
assert dataset1[i] == dataset2[i]
|
||||
|
||||
|
||||
def test_wikirace_game_items():
|
||||
"""Test basic properties of generated items"""
|
||||
config = WikiraceConfig(
|
||||
seed=42,
|
||||
size=2,
|
||||
)
|
||||
dataset = WikiraceDataset(config)
|
||||
|
||||
for item in dataset:
|
||||
assert isinstance(item, dict)
|
||||
assert "question" in item
|
||||
assert "answer" in item
|
||||
assert "metadata" in item
|
||||
|
||||
# Check metadata contains required fields
|
||||
assert "source" in item["metadata"]
|
||||
assert "links" in item["metadata"]
|
||||
assert "target" in item["metadata"]
|
||||
assert "current" in item["metadata"]
|
||||
assert "distance" in item["metadata"]
|
||||
|
||||
# Verify number of source numbers is within config range
|
||||
assert config.min_distance <= item["metadata"]["distance"] <= config.max_distance
|
||||
|
||||
# A non-int answer fails
|
||||
assert dataset.score_answer(answer="nope", entry=item) == 0.01
|
||||
|
||||
# A negative answer fails
|
||||
assert dataset.score_answer(answer="-1", entry=item) == 0.01
|
||||
|
||||
# An out of bond answer fails
|
||||
assert dataset.score_answer(answer=str(len(item["metadata"]["links"])), entry=item) == 0.01
|
||||
|
||||
# A parsable answer gives at least 0.1
|
||||
assert dataset.score_answer(answer="0", entry=item) >= 0.1
|
||||
|
||||
# The expected answer gives 1.0
|
||||
assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
|
||||
|
||||
|
||||
def test_wikirace_game_single():
|
||||
"""Test a known item"""
|
||||
config = WikiraceConfig(
|
||||
seed=42,
|
||||
size=1,
|
||||
)
|
||||
dataset = WikiraceDataset(config)
|
||||
item = dataset[0]
|
||||
|
||||
# If those asserts fails, it probably just means you changed the generation algorithm, which is fine
|
||||
# you'll have have to update this test
|
||||
assert item["metadata"]["source"] == "Vadim Bakatin"
|
||||
assert item["metadata"]["target"] == "Azerbaijan Technological University"
|
||||
assert item["metadata"]["distance"] == 3
|
||||
assert len(item["metadata"]["path"]) == 0
|
||||
|
||||
# If those asserts fails, it is most likely an actual error
|
||||
|
||||
# Only valid answer is 4 - Moscow
|
||||
assert dataset.score_answer(answer="4", entry=item) == 1.0
|
||||
# Selecting 8 - Russians makes you go further away from the target
|
||||
assert dataset.score_answer(answer="2", entry=item) == 0.1
|
||||
# Selecting 0 - Commmunist Party of the Soviet Union doesn't get you further away, but it doesn't get you closer either
|
||||
assert dataset.score_answer(answer="2", entry=item) == 0.1
|
||||
|
||||
# Use this to check the results if you need to update this test
|
||||
# (with pytest -s)
|
||||
# for (i,_) in item['metadata']['links']:
|
||||
# print(i, dataset.score_answer(answer=str(i), entry=item))
|
||||
Loading…
Add table
Add a link
Reference in a new issue