diff --git a/reasoning_gym/games/__init__.py b/reasoning_gym/games/__init__.py index 2b01898e..9d15ec3c 100644 --- a/reasoning_gym/games/__init__.py +++ b/reasoning_gym/games/__init__.py @@ -28,6 +28,7 @@ __all__ = [ "EmojiMysteryCurriculum", "EmojiMysteryDataset", "FutoshikiConfig", + "FutoshikiCurriculum", "FutoshikiDataset", "MiniSudokuConfig", "MiniSudokuDataset", diff --git a/reasoning_gym/games/futoshiki.py b/reasoning_gym/games/futoshiki.py index 092087f1..b5eab54c 100644 --- a/reasoning_gym/games/futoshiki.py +++ b/reasoning_gym/games/futoshiki.py @@ -6,6 +6,7 @@ from dataclasses import dataclass from random import Random from typing import Any, Optional +from ..coaching import AttributeType, BaseCurriculum, RangeAttributeDefinition from ..factory import ProceduralDataset, register_dataset @@ -13,15 +14,17 @@ from ..factory import ProceduralDataset, register_dataset class FutoshikiConfig: """Configuration for Futoshiki puzzle generation""" - board_size: int = 4 # Board will be NxN where N is this value - difficulty: int = 1 # Possible values: 0, 1, 2, 3 + min_board_size: int = 4 # Board will be NxN where N is this value + max_board_size: int = 9 + min_difficulty: int = 0 + max_difficulty: int = 3 seed: Optional[int] = None size: int = 500 # Virtual dataset size def validate(self): """Validate configuration parameters""" - assert 4 <= self.board_size <= 9, "board_size must be between 4 and 9" - assert 0 <= self.difficulty <= 3, "difficulty must be between 0 and 3" + assert 4 <= self.min_board_size <= self.max_board_size, "board_size must be between 4 and 9" + assert 0 <= self.min_difficulty <= self.max_difficulty, "difficulty must be between 0 and 3" class FutoshikiDataset(ProceduralDataset): @@ -52,11 +55,13 @@ class FutoshikiDataset(ProceduralDataset): Difficulty in [0..3] affects number of clues and constraints. """ rng = Random(self.seed + idx) + board_size = rng.randint(self.config.min_board_size, self.config.max_board_size) + difficulty = rng.randint(self.config.min_difficulty, self.config.max_difficulty) # Generate random "solved" Futoshiki grid - solution = self._generate_random_solution(self.config.board_size, rng) + solution = self._generate_random_solution(board_size, rng) # Add random adjacency constraints consistent with generated solved grid - constraints = self._generate_random_constraints(solution, self.config.difficulty, rng) + constraints = self._generate_random_constraints(solution, difficulty, rng) # Starting with full solution, remove clues to desired difficulty puzzle = self._remove_clues(copy.deepcopy(solution), constraints, rng) @@ -65,11 +70,11 @@ class FutoshikiDataset(ProceduralDataset): solution_str = self._puzzle_to_string(solution, constraints) question = ( - f"Solve the following {self.config.board_size}x{self.config.board_size} Futoshiki puzzle:\n\n" + f"Solve the following {board_size}x{board_size} Futoshiki puzzle:\n\n" f"{puzzle_str}\n\n" "Ensure your answer follows the same format as the puzzle above, just replace blanks (_) with the correct value for the cell.\n" "Use < and > for horizontal constraints. Use \u2227 and \u2228 for vertical constraints.\n" - f"Remember, in Futoshiki each row and column must contain each number from 1 to {self.config.board_size} exactly once." + f"Remember, in Futoshiki each row and column must contain each number from 1 to {board_size} exactly once." ) return { @@ -79,8 +84,7 @@ class FutoshikiDataset(ProceduralDataset): "puzzle": puzzle, "constraints": constraints, "solution": solution, - "board_size": self.config.board_size, - "difficulty": self.config.difficulty, + "difficulty": {"board_size": board_size, "difficulty": difficulty}, }, } @@ -655,4 +659,32 @@ class FutoshikiDataset(ProceduralDataset): return reward +class FutoshikiCurriculum(BaseCurriculum): + def __init__(self): + super().__init__(FutoshikiCurriculum.__name__, FutoshikiConfig) + + self._define_attributes( + RangeAttributeDefinition( + name="board_size", + levels=[4, 6, 7, 9], + default_level=0, + description="Board size", + attr_type=AttributeType.STATIC, + min_value=4, + lower_field_name="min_board_size", + upper_field_name="max_board_size", + ), + RangeAttributeDefinition( + name="difficulty", + levels=[0, 1, 2, 3], + default_level=0, + description="Difficulty", + attr_type=AttributeType.STATIC, + min_value=0, + lower_field_name="min_difficulty", + upper_field_name="max_difficulty", + ), + ) + + register_dataset("futoshiki", FutoshikiDataset, FutoshikiConfig) diff --git a/reasoning_gym/games/tower_of_hanoi.py b/reasoning_gym/games/tower_of_hanoi.py index 9ab50cb8..ccd1e2fb 100644 --- a/reasoning_gym/games/tower_of_hanoi.py +++ b/reasoning_gym/games/tower_of_hanoi.py @@ -6,6 +6,7 @@ import re from dataclasses import dataclass from typing import Any, Optional +from ..coaching import AttributeType, BaseCurriculum, RangeAttributeDefinition from ..factory import ProceduralDataset, register_dataset QUESTION_TEMPLATE = """Solve the Tower of Hanoi problem with {num_disks} disks and {num_pegs} pegs. @@ -432,5 +433,22 @@ class HanoiDataset(ProceduralDataset): return optimal_moves / user_moves +class HanoiCurriculum(BaseCurriculum): + def __init__(self): + super().__init__(HanoiCurriculum.__name__, HanoiConfig) + self._define_attributes( + RangeAttributeDefinition( + name="num_disks", + levels=[3, 4, 5, 7], + default_level=0, + min_disks=3, + attr_type=AttributeType.APPEND, + lower_field_name="min_disks", + upper_field_name="max_disks", + description="Number of disks in the puzzle", + ), + ) + + # Register the dataset -register_dataset("tower_of_hanoi", HanoiDataset, HanoiConfig) +register_dataset("tower_of_hanoi", HanoiDataset, HanoiConfig, HanoiCurriculum) diff --git a/tests/test_futoshiki.py b/tests/test_futoshiki.py index 61f3276b..aebf9dc7 100644 --- a/tests/test_futoshiki.py +++ b/tests/test_futoshiki.py @@ -6,25 +6,17 @@ from reasoning_gym.games import FutoshikiConfig, FutoshikiDataset def test_futoshiki_config_validation(): """Test that invalid configs raise appropriate errors""" with pytest.raises(AssertionError): - config = FutoshikiConfig(board_size=3) # Too small + config = FutoshikiConfig(min_board_size=5, max_board_size=4) # Too small config.validate() with pytest.raises(AssertionError): - config = FutoshikiConfig(board_size=10) # Too large - config.validate() - - with pytest.raises(AssertionError): - config = FutoshikiConfig(difficulty=-1) # Invalid difficulty - config.validate() - - with pytest.raises(AssertionError): - config = FutoshikiConfig(difficulty=4) # Invalid difficulty + config = FutoshikiConfig(min_difficulty=2, max_difficulty=1) # Too large config.validate() def test_futoshiki_deterministic(): """Test that dataset generates same puzzles with same seed""" - config = FutoshikiConfig(seed=42, size=10) + config = FutoshikiConfig(seed=42, size=10, min_board_size=4, max_board_size=9, min_difficulty=0, max_difficulty=3) dataset1 = FutoshikiDataset(config) dataset2 = FutoshikiDataset(config) @@ -34,7 +26,7 @@ def test_futoshiki_deterministic(): def test_futoshiki_items(): """Test basic properties of generated items""" - config = FutoshikiConfig(board_size=4, difficulty=1, size=10, seed=42) + config = FutoshikiConfig(min_difficulty=1, max_difficulty=1, min_board_size=4, max_board_size=9, size=10, seed=42) dataset = FutoshikiDataset(config) for i in range(len(dataset)): @@ -49,32 +41,33 @@ def test_futoshiki_items(): assert "puzzle" in metadata assert "solution" in metadata assert "constraints" in metadata - assert "board_size" in metadata - assert "difficulty" in metadata # Verify board dimensions puzzle = metadata["puzzle"] solution = metadata["solution"] - assert len(puzzle) == config.board_size - assert len(solution) == config.board_size + assert len(puzzle) >= config.min_board_size + assert len(solution) >= config.min_board_size + assert len(puzzle) <= config.max_board_size + assert len(solution) <= config.max_board_size for row in puzzle: - assert len(row) == config.board_size + assert len(row) >= config.min_board_size + assert len(row) <= config.max_board_size for row in solution: - assert len(row) == config.board_size - + assert len(row) >= config.min_board_size + assert len(row) <= config.max_board_size # Verify constraints format constraints = metadata["constraints"] for ((r1, c1), (r2, c2)), rel in constraints.items(): - assert 0 <= r1 < config.board_size - assert 0 <= c1 < config.board_size - assert 0 <= r2 < config.board_size - assert 0 <= c2 < config.board_size + assert 0 <= r1 < config.max_board_size + assert 0 <= c1 < config.max_board_size + assert 0 <= r2 < config.max_board_size + assert 0 <= c2 < config.max_board_size assert rel in ("<", ">") def test_futoshiki_solution_validity(): """Test that solutions are valid according to Futoshiki rules""" - config = FutoshikiConfig(board_size=4, difficulty=1, size=10, seed=42) + config = FutoshikiConfig(min_board_size=4, max_board_size=4, min_difficulty=1, max_difficulty=1, size=10, seed=42) dataset = FutoshikiDataset(config) def is_valid_solution(solution, board_size, constraints): @@ -105,12 +98,12 @@ def test_futoshiki_solution_validity(): solution = metadata["solution"] constraints = metadata["constraints"] - assert is_valid_solution(solution, config.board_size, constraints) + assert is_valid_solution(solution, config.min_board_size, constraints) def test_futoshiki_puzzle_solvability(): """Test that generated puzzles are solvable and have unique solutions""" - config = FutoshikiConfig(board_size=4, difficulty=1, size=5, seed=42) + config = FutoshikiConfig(min_board_size=4, max_board_size=4, min_difficulty=1, max_difficulty=1, size=5, seed=42) dataset = FutoshikiDataset(config) for i in range(len(dataset)): @@ -140,7 +133,14 @@ def test_futoshiki_difficulty_levels(): constraints_by_difficulty = [] for difficulty in range(4): # 0 to 3 - config = FutoshikiConfig(board_size=board_size, difficulty=difficulty, size=size, seed=seed) + config = FutoshikiConfig( + min_board_size=board_size, + max_board_size=board_size, + min_difficulty=difficulty, + max_difficulty=difficulty, + size=size, + seed=seed, + ) dataset = FutoshikiDataset(config) avg_clues = sum(count_clues(item["metadata"]["puzzle"]) for item in dataset) / size @@ -159,7 +159,7 @@ def test_futoshiki_difficulty_levels(): def test_futoshiki_answer_scoring(): """Test the answer scoring mechanism""" - config = FutoshikiConfig(board_size=4, difficulty=0, size=5, seed=42) + config = FutoshikiConfig(min_board_size=4, max_board_size=4, min_difficulty=0, max_difficulty=0, size=5, seed=42) dataset = FutoshikiDataset(config) for item in dataset: @@ -186,3 +186,65 @@ def test_futoshiki_answer_scoring(): bad_answer = "\n".join(anwser_with_additional_text.split("\n")[::-1]) assert dataset.score_answer(bad_answer, item) < 0.1 + + +def test_futoshiki_curriculum(): + """Test the FutoshikiCurriculum works as expected""" + from reasoning_gym.games.futoshiki import FutoshikiCurriculum + + curriculum = FutoshikiCurriculum() + + base_value = {"size": 150, "seed": 1} + + base_cfg: FutoshikiConfig = curriculum.generate_configuration(base_value) + assert base_cfg.seed == 1 + assert base_cfg.size == 150 + assert base_cfg.min_board_size == 4 and base_cfg.max_board_size == 4 + assert base_cfg.min_difficulty == 0 and base_cfg.max_difficulty == 0 + + # Test incrementing attribute levels + curriculum.increment_attr_level("board_size") + curriculum.increment_attr_level("difficulty") + increased_cfg = curriculum.generate_configuration(base_value) + assert increased_cfg.min_board_size == 6 and increased_cfg.max_board_size == 6 + assert increased_cfg.min_difficulty == 1 and increased_cfg.max_difficulty == 1 + + # Test incrementing again + curriculum.increment_attr_level("board_size") + curriculum.increment_attr_level("difficulty") + increased_cfg2 = curriculum.generate_configuration(base_value) + assert increased_cfg2.min_board_size == 7 and increased_cfg2.max_board_size == 7 + assert increased_cfg2.min_difficulty == 2 and increased_cfg2.max_difficulty == 2 + + # Test incrementing to max levels + curriculum.increment_attr_level("board_size") + curriculum.increment_attr_level("difficulty") + max_cfg = curriculum.generate_configuration(base_value) + assert max_cfg.min_board_size == 9 and max_cfg.max_board_size == 9 + assert max_cfg.min_difficulty == 3 and max_cfg.max_difficulty == 3 + + # Test that we can't go beyond max levels + assert not curriculum.increment_attr_level("board_size") + assert not curriculum.increment_attr_level("difficulty") + still_max_cfg = curriculum.generate_configuration(base_value) + assert still_max_cfg.min_board_size == 9 and still_max_cfg.max_board_size == 9 + assert still_max_cfg.min_difficulty == 3 and still_max_cfg.max_difficulty == 3 + + # Test decrementing attribute levels + curriculum.decrement_attr_level("board_size") + curriculum.decrement_attr_level("difficulty") + decreased_cfg = curriculum.generate_configuration(base_value) + assert decreased_cfg.min_board_size == 7 and decreased_cfg.max_board_size == 7 + assert decreased_cfg.min_difficulty == 2 and decreased_cfg.max_difficulty == 2 + + # Test global level setting + curriculum.set_global_level(0) + global_lvl0_cfg = curriculum.generate_configuration(base_value) + assert global_lvl0_cfg.min_board_size == 4 and global_lvl0_cfg.max_board_size == 4 + assert global_lvl0_cfg.min_difficulty == 0 and global_lvl0_cfg.max_difficulty == 0 + + # Test global level increment + curriculum.increment_global_level() + global_lvl1_cfg = curriculum.generate_configuration(base_value) + assert global_lvl1_cfg.min_board_size == 6 and global_lvl1_cfg.max_board_size == 6 + assert global_lvl1_cfg.min_difficulty == 1 and global_lvl1_cfg.max_difficulty == 1