diff --git a/reasoning_gym/algorithmic/jugs.py b/reasoning_gym/algorithmic/jugs.py index 43e97620..b1458279 100644 --- a/reasoning_gym/algorithmic/jugs.py +++ b/reasoning_gym/algorithmic/jugs.py @@ -232,9 +232,9 @@ class JugsConfig: def validate(self): """Validate configuration parameters""" - assert self.num_jugs > 2, "edge_probability must be gt 2" - assert self.difficulty > 0, "edge_probability must be gt 0" - assert self.difficulty < 200, "edge_probability must be gt 200" + assert self.num_jugs > 2, "num_jugs must be gt 2" + assert self.difficulty > 0, "difficulty must be gt 0" + assert self.difficulty < 200, "difficulty must be lt 200" class JugsDataset(ProceduralDataset): diff --git a/tests/test_jugs.py b/tests/test_jugs.py index dc5f0476..a8bb4615 100644 --- a/tests/test_jugs.py +++ b/tests/test_jugs.py @@ -7,7 +7,7 @@ from reasoning_gym.algorithmic.jugs import JugsConfig, JugsDataset def test_jugs(): """Test basic properties and solution of generated items""" - config = JugsConfig(seed=42, size=10, num_jugs=3, difficulty=20) + config = JugsConfig(seed=42, size=1000, num_jugs=3, difficulty=5) dataset = JugsDataset(config) # easy @@ -20,3 +20,31 @@ def test_jugs(): # Test the scoring assert dataset.score_answer(answer=json.dumps(item["metadata"]["possible_answer"]), entry=item) == 1.0 assert dataset.score_answer(answer=None, entry=item) == 0.0 + + config = JugsConfig(seed=42, size=1, num_jugs=3, difficulty=50) + dataset = JugsDataset(config) + + # med + for item in dataset: + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + + # Test the scoring + assert dataset.score_answer(answer=json.dumps(item["metadata"]["possible_answer"]), entry=item) == 1.0 + assert dataset.score_answer(answer=None, entry=item) == 0.0 + + config = JugsConfig(seed=42, size=1, num_jugs=3, difficulty=99) + dataset = JugsDataset(config) + + # hard + for item in dataset: + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + + # Test the scoring + assert dataset.score_answer(answer=json.dumps(item["metadata"]["possible_answer"]), entry=item) == 1.0 + assert dataset.score_answer(answer=None, entry=item) == 0.0