diff --git a/reasoning_gym/games/__init__.py b/reasoning_gym/games/__init__.py index 9752e9a1..c2d39fef 100644 --- a/reasoning_gym/games/__init__.py +++ b/reasoning_gym/games/__init__.py @@ -16,7 +16,7 @@ from .mini_sudoku import MiniSudokuConfig, MiniSudokuCurriculum, MiniSudokuDatas from .n_queens import NQueensConfig, NQueensCurriculum, NQueensDataset from .puzzle24 import Puzzle24Config, Puzzle24Dataset from .rush_hour import RushHourConfig, RushHourDataset -from .sokoban import SokobanConfig, SokobanDataset +from .sokoban import SokobanConfig, SokobanCurriculum, SokobanDataset from .sudoku import SudokuConfig, SudokuDataset from .tower_of_hanoi import HanoiConfig, HanoiDataset from .tsumego import TsumegoConfig, TsumegoCurriculum, TsumegoDataset @@ -38,6 +38,7 @@ __all__ = [ "SudokuConfig", "SudokuDataset", "SokobanConfig", + "SokobanCurriculum", "SokobanDataset", "RushHourConfig", "RushHourDataset", diff --git a/reasoning_gym/games/contrib/sokoban/src/generator.py b/reasoning_gym/games/contrib/sokoban/src/generator.py index 372a4168..3fdfd2b0 100644 --- a/reasoning_gym/games/contrib/sokoban/src/generator.py +++ b/reasoning_gym/games/contrib/sokoban/src/generator.py @@ -104,7 +104,7 @@ def generate( game.player.update(key=move) game.print_puzzle() - difficulty = {"size": puzzle_size, "num_steps": len(solution)} + difficulty = {"width": width, "height": height} return puzzle_str, solution, difficulty else: if debug: diff --git a/reasoning_gym/games/sokoban.py b/reasoning_gym/games/sokoban.py index 0f81930d..1eff7154 100644 --- a/reasoning_gym/games/sokoban.py +++ b/reasoning_gym/games/sokoban.py @@ -4,6 +4,7 @@ from typing import Any, Optional import numpy as np +from ..coaching import AttributeType, BaseCurriculum, RangeAttributeDefinition from ..factory import ProceduralDataset, register_dataset @@ -130,4 +131,31 @@ Here is your puzzle: return 0.0 -register_dataset("sokoban", SokobanDataset, SokobanConfig) +class SokobanCurriculum(BaseCurriculum): + def __init__(self): + super().__init__(SokobanCurriculum.__name__, SokobanConfig) + self._define_attributes( + RangeAttributeDefinition( + name="width", + levels=list(range(6, 11)), + default_level=0, + description="The width of the Sokoban board", + attr_type=AttributeType.APPEND, + lower_field_name="min_w", + upper_field_name="max_w", + min_value=6, + ), + RangeAttributeDefinition( + name="height", + levels=list(range(6, 11)), + default_level=0, + description="The height of the Sokoban board", + attr_type=AttributeType.APPEND, + lower_field_name="min_h", + upper_field_name="max_h", + min_value=6, + ), + ) + + +register_dataset("sokoban", SokobanDataset, SokobanConfig, SokobanCurriculum) diff --git a/tests/test_sokoban.py b/tests/test_sokoban.py index 1e0e6502..07b406b3 100644 --- a/tests/test_sokoban.py +++ b/tests/test_sokoban.py @@ -1,6 +1,6 @@ import pytest -from reasoning_gym.games.sokoban import SokobanConfig, SokobanDataset +from reasoning_gym.games.sokoban import SokobanConfig, SokobanCurriculum, SokobanDataset def test_sokoban(): @@ -56,3 +56,88 @@ def test_sokoban(): # Test the scoring assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0 assert dataset.score_answer(answer=None, entry=item) == 0.0 + + +def test_sokoban_curriculum(): + """Test the SokobanCurriculum functionality""" + curriculum = SokobanCurriculum() + + base_value = {"size": 150, "seed": 1} + + # Test initial configuration + base_cfg = curriculum.generate_configuration(base_value) + assert base_cfg.seed == 1 + assert base_cfg.size == 150 + assert base_cfg.min_w == 6 and base_cfg.max_w == 6 + assert base_cfg.min_h == 6 and base_cfg.max_h == 6 + assert base_cfg.min_boxes == 4 # Default value from SokobanConfig + assert base_cfg.max_boxes == 10 # Default value from SokobanConfig + + # Test incrementing width attribute + curriculum.increment_attr_level("width") + width_cfg = curriculum.generate_configuration(base_value) + assert width_cfg.min_w == 6 and width_cfg.max_w == 7 + assert width_cfg.min_h == 6 and width_cfg.max_h == 6 # Height unchanged + + # Test incrementing height attribute + curriculum.increment_attr_level("height") + both_cfg = curriculum.generate_configuration(base_value) + assert both_cfg.min_w == 6 and both_cfg.max_w == 7 # Width preserved + assert both_cfg.min_h == 6 and both_cfg.max_h == 7 # Height increased + + # Test decrementing width attribute + curriculum.decrement_attr_level("width") + height_only_cfg = curriculum.generate_configuration(base_value) + assert height_only_cfg.min_w == 6 and height_only_cfg.max_w == 6 # Width reset + assert height_only_cfg.min_h == 6 and height_only_cfg.max_h == 7 # Height preserved + + # Test global level adjustments + curriculum = SokobanCurriculum() # Reset curriculum + assert curriculum.get_attr_level("width") == 0 + assert curriculum.get_attr_level("height") == 0 + + # Increase global level + curriculum.increment_global_level() + assert curriculum.get_attr_level("width") == 1 + assert curriculum.get_attr_level("height") == 1 + + global_level_cfg = curriculum.generate_configuration(base_value) + assert global_level_cfg.min_w == 6 and global_level_cfg.max_w == 7 + assert global_level_cfg.min_h == 6 and global_level_cfg.max_h == 7 + + # Increase global level again + curriculum.increment_global_level() + assert curriculum.get_attr_level("width") == 2 + assert curriculum.get_attr_level("height") == 2 + + global_level_cfg_2 = curriculum.generate_configuration(base_value) + assert global_level_cfg_2.min_w == 6 and global_level_cfg_2.max_w == 8 + assert global_level_cfg_2.min_h == 6 and global_level_cfg_2.max_h == 8 + + # Decrease global level + curriculum.decrement_global_level() + assert curriculum.get_attr_level("width") == 1 + assert curriculum.get_attr_level("height") == 1 + + global_level_cfg_3 = curriculum.generate_configuration(base_value) + assert global_level_cfg_3.min_w == 6 and global_level_cfg_3.max_w == 7 + assert global_level_cfg_3.min_h == 6 and global_level_cfg_3.max_h == 7 + + # Test upper bound + curriculum = SokobanCurriculum() # Reset curriculum + for _ in range(10): # Try going beyond max level + curriculum.increment_attr_level("width") + curriculum.increment_attr_level("height") + + max_cfg = curriculum.generate_configuration(base_value) + assert max_cfg.min_w == 6 and max_cfg.max_w == 10 # Width capped at 10 + assert max_cfg.min_h == 6 and max_cfg.max_h == 10 # Height capped at 10 + + # Test lower bound + curriculum = SokobanCurriculum() # Reset curriculum + curriculum.decrement_attr_level("width") # Try going below min level + curriculum.decrement_attr_level("height") # Try going below min level + + min_cfg = curriculum.generate_configuration(base_value) + assert min_cfg.min_w == 6 and min_cfg.max_w == 6 # Width stays at min + assert min_cfg.min_h == 6 and min_cfg.max_h == 6 # Height stays at min