diff --git a/GALLERY.md b/GALLERY.md index 1d09a54f..ad0248ce 100644 --- a/GALLERY.md +++ b/GALLERY.md @@ -19,6 +19,7 @@ This gallery shows examples from all available datasets using their default conf - [fraction_simplification](#fraction_simplification) - [game_of_life](#game_of_life) - [gcd](#gcd) +- [group_anagrams](#group_anagrams) - [gsm_symbolic](#gsm_symbolic) - [intermediate_integration](#intermediate_integration) - [largest_island](#largest_island) @@ -42,13 +43,13 @@ This gallery shows examples from all available datasets using their default conf - [simple_equations](#simple_equations) - [simple_geometry](#simple_geometry) - [simple_integration](#simple_integration) +- [sokoban](#sokoban) - [spell_backward](#spell_backward) - [sudoku](#sudoku) - [syllogism](#syllogism) - [time_intervals](#time_intervals) - [tower_of_hanoi](#tower_of_hanoi) - [word_ladder](#word_ladder) -- [group_anagrams](#group_anagrams) - [word_sequence_reversal](#word_sequence_reversal) - [word_sorting](#word_sorting) - [zebra_puzzles](#zebra_puzzles) @@ -406,17 +407,17 @@ Example tasks: Example 1: Question: 4 + 3 = Answer: 7 -Metadata: {'num_terms': 2, 'num_digits': 1, 'expression': '4 + 3'} +Metadata: {'difficulty': {'num_terms': 2, 'num_digits': 1}, 'expression': '4 + 3'} Example 2: Question: 812 + 880 = Answer: 1692 -Metadata: {'num_terms': 2, 'num_digits': 3, 'expression': '812 + 880'} +Metadata: {'difficulty': {'num_terms': 2, 'num_digits': 3}, 'expression': '812 + 880'} Example 3: Question: 2 + 6 + 3 + 4 + 0 = Answer: 15 -Metadata: {'num_terms': 5, 'num_digits': 1, 'expression': '2 + 6 + 3 + 4 + 0'} +Metadata: {'difficulty': {'num_terms': 5, 'num_digits': 1}, 'expression': '2 + 6 + 3 + 4 + 0'} ```` @@ -897,6 +898,75 @@ Metadata: {'numbers': [297, 30], 'result': 3} ```` +### group_anagrams +Generates Group Anagrams exercises with configurable difficulty + +Default configuration: +```python +anagram_groups = 10 +max_words_per_group = 5 +size = 500 +seed = 42 +``` + +Example tasks: +```` +Example 1: +Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once. + +Your job is to group the anagrams together. You can return the answer in any order. + +Example: +Input: ["eat", "tea", "tan", "ate", "nat", "bat"] +Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]] +Explanation: + - There is no string in the input that can be rearranged to form "bat". + - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other. + +Group the following list of words into anagrams: +["tinglers", "argonon", "ditas", "palinodist", "merocyte", "conterminal", "canny", "nancy", "outasight", "autosight", "oversauciness", "applauders", "suprapedal"] + +Answer: [["applauders", "suprapedal"], ["argonon"], ["autosight", "outasight"], ["canny", "nancy"], ["conterminal"], ["ditas"], ["merocyte"], ["oversauciness"], ["palinodist"], ["tinglers"]] +Metadata: {'words': ['tinglers', 'argonon', 'ditas', 'palinodist', 'merocyte', 'conterminal', 'canny', 'nancy', 'outasight', 'autosight', 'oversauciness', 'applauders', 'suprapedal'], 'solution': [['applauders', 'suprapedal'], ['argonon'], ['autosight', 'outasight'], ['canny', 'nancy'], ['conterminal'], ['ditas'], ['merocyte'], ['oversauciness'], ['palinodist'], ['tinglers']]} + +Example 2: +Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once. + +Your job is to group the anagrams together. You can return the answer in any order. + +Example: +Input: ["eat", "tea", "tan", "ate", "nat", "bat"] +Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]] +Explanation: + - There is no string in the input that can be rearranged to form "bat". + - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other. + +Group the following list of words into anagrams: +["regear", "escrod", "coders", "decors", "credos", "scored", "semitaur", "muriates", "peripterous", "zanies", "expatiater", "wooled", "meningomyelocele", "myelomeningocele", "vainest", "natives", "naivest", "preludes", "repulsed"] + +Answer: [["coders", "credos", "decors", "escrod", "scored"], ["expatiater"], ["meningomyelocele", "myelomeningocele"], ["muriates", "semitaur"], ["naivest", "natives", "vainest"], ["peripterous"], ["preludes", "repulsed"], ["regear"], ["wooled"], ["zanies"]] +Metadata: {'words': ['regear', 'escrod', 'coders', 'decors', 'credos', 'scored', 'semitaur', 'muriates', 'peripterous', 'zanies', 'expatiater', 'wooled', 'meningomyelocele', 'myelomeningocele', 'vainest', 'natives', 'naivest', 'preludes', 'repulsed'], 'solution': [['coders', 'credos', 'decors', 'escrod', 'scored'], ['expatiater'], ['meningomyelocele', 'myelomeningocele'], ['muriates', 'semitaur'], ['naivest', 'natives', 'vainest'], ['peripterous'], ['preludes', 'repulsed'], ['regear'], ['wooled'], ['zanies']]} + +Example 3: +Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once. + +Your job is to group the anagrams together. You can return the answer in any order. + +Example: +Input: ["eat", "tea", "tan", "ate", "nat", "bat"] +Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]] +Explanation: + - There is no string in the input that can be rearranged to form "bat". + - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other. + +Group the following list of words into anagrams: +["eagerest", "granitite", "helium", "nizam", "nazim", "striplings", "slipstring", "rearrest", "arrester", "bf", "tadpolism", "canun", "cunan", "isotonic"] + +Answer: [["arrester", "rearrest"], ["bf"], ["canun", "cunan"], ["eagerest"], ["granitite"], ["helium"], ["isotonic"], ["nazim", "nizam"], ["slipstring", "striplings"], ["tadpolism"]] +Metadata: {'words': ['eagerest', 'granitite', 'helium', 'nizam', 'nazim', 'striplings', 'slipstring', 'rearrest', 'arrester', 'bf', 'tadpolism', 'canun', 'cunan', 'isotonic'], 'solution': [['arrester', 'rearrest'], ['bf'], ['canun', 'cunan'], ['eagerest'], ['granitite'], ['helium'], ['isotonic'], ['nazim', 'nizam'], ['slipstring', 'striplings'], ['tadpolism']]} + +```` + ### gsm_symbolic Default configuration: ```python @@ -1101,17 +1171,17 @@ Example tasks: Example 1: Question: How many legs are there in total if you have 1 sea slug, 1 deer? Answer: 4 -Metadata: {'animals': {'sea slug': 1, 'deer': 1}, 'total_legs': 4} +Metadata: {'difficulty': {'num_animals': 2}, 'animals': {'sea slug': 1, 'deer': 1}, 'total_legs': 4} Example 2: Question: How many legs are there in total if you have 2 sheeps, 2 dogs? Answer: 16 -Metadata: {'animals': {'sheep': 2, 'dog': 2}, 'total_legs': 16} +Metadata: {'difficulty': {'num_animals': 2}, 'animals': {'sheep': 2, 'dog': 2}, 'total_legs': 16} Example 3: Question: How many legs are there in total if you have 1 crab, 2 lobsters, 1 human, 1 cow, 1 bee? Answer: 42 -Metadata: {'animals': {'crab': 1, 'lobster': 2, 'human': 1, 'cow': 1, 'bee': 1}, 'total_legs': 42} +Metadata: {'difficulty': {'num_animals': 5}, 'animals': {'crab': 1, 'lobster': 2, 'human': 1, 'cow': 1, 'bee': 1}, 'total_legs': 42} ```` @@ -1923,6 +1993,107 @@ Metadata: {'integrand': '-28*X**3 + 8*X', 'variable': 'X', 'expected_answer_expr ```` +### sokoban +Generates Sokoban games with configurable parameters + +Default configuration: +```python +seed = 42 +size = 500 +min_w = 6 +min_h = 6 +max_w = 10 +max_h = 10 +min_boxes = 6 +max_boxes = 10 +``` + +Example tasks: +```` +Example 1: +Question: You are going to solve a 'sokoban' puzzle. + +* - The player +% - The player on a goal +@ - A box +X - A goal +$ - A box on a goal ++ - A wall +- - An empty position + +Your solution must be a string of characters, ex: LDURRUDL. + +Here is your puzzle: ++ + + + + + + + + ++ + X - @ * @ X + ++ + + - - @ - + + ++ + + - - - X $ + ++ + + + - + + + + ++ + $ + + + + + + ++ + + + + + + + + + + +Answer: RLDULLRRDLDR +Metadata: {'gamestr': '+ + + + + + + + + \n+ + X - @ * @ X + \n+ + + - - @ - + + \n+ + + - - - X $ + \n+ + + + - + + + + \n+ + $ + + + + + + \n+ + + + + + + + + \n\n', 'difficulty': {'size': (7, 9), 'num_steps': 12}} + +Example 2: +Question: You are going to solve a 'sokoban' puzzle. + +* - The player +% - The player on a goal +@ - A box +X - A goal +$ - A box on a goal ++ - A wall +- - An empty position + +Your solution must be a string of characters, ex: LDURRUDL. + +Here is your puzzle: ++ + + + + + ++ - * - - + ++ @ - - @ + ++ X - @ - + ++ - - - X + ++ X - @ X + ++ - - - - + ++ + + + + + + + +Answer: LDRRDRDDLLURURDULUURDD +Metadata: {'gamestr': '+ + + + + + \n+ - * - - + \n+ @ - - @ + \n+ X - @ - + \n+ - - - X + \n+ X - @ X + \n+ - - - - + \n+ + + + + + \n\n', 'difficulty': {'size': (8, 6), 'num_steps': 22}} + +Example 3: +Question: You are going to solve a 'sokoban' puzzle. + +* - The player +% - The player on a goal +@ - A box +X - A goal +$ - A box on a goal ++ - A wall +- - An empty position + +Your solution must be a string of characters, ex: LDURRUDL. + +Here is your puzzle: ++ + + + + + + + + + + + ++ - $ - X + - - - - - + ++ - @ - - - - - @ - X + ++ - * - @ - - X - $ - + ++ - - - - X + - - - - + ++ + - - - - + $ - @ - + ++ + + - - - - - - - - + ++ + + - - - $ - - - - + ++ + + + - - - - - - - + ++ + + + + + + + + + + + + + +Answer: RRRRURRRLDDRRDLULDRDLLLLULLDRDRUULUUULDLLURRDRU +Metadata: {'gamestr': '+ + + + + + + + + + + + \n+ - $ - X + - - - - - + \n+ - @ - - - - - @ - X + \n+ - * - @ - - X - $ - + \n+ - - - - X + - - - - + \n+ + - - - - + $ - @ - + \n+ + + - - - - - - - - + \n+ + + - - - $ - - - - + \n+ + + + - - - - - - - + \n+ + + + + + + + + + + + \n\n', 'difficulty': {'size': (10, 12), 'num_steps': 47}} + +```` + ### spell_backward Generates tasks to spell words backward @@ -2112,7 +2283,7 @@ Metadata: {'task_type': 'datetime_tz', 'start_time': datetime.datetime(2964, 6, Example 2: Question: A video call started at 09:44 and ended at 12:22. How long was the call? Answer in HH:MM. Answer: 02:38 -Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 5, 9, 44), 'end_time': datetime.datetime(2025, 2, 5, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'} +Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 7, 9, 44), 'end_time': datetime.datetime(2025, 2, 7, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'} Example 3: Question: Calculate the time difference between Sat Dec 22 2677 and Thu Mar 21 2678. Express the result in D days. @@ -2217,85 +2388,6 @@ Metadata: {'start_word': 'SNOG', 'end_word': 'SUQS', 'word_length': 4, 'chain_le ```` - -### group_anagrams - -Group anagrams together in a list of words. - -Default configuration -```python -anagram_groups: int = 10 # Groups of anagrams present in the input -max_words_per_group: int = 5 # Maximum number of words in a single anagram group -``` - -Example tasks: -``` -Example 1: -Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once. - -Your job is to group the anagrams together. You can return the answer in any order. - -Example: -Input: ["eat", "tea", "tan", "ate", "nat", "bat"] -Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]] -Explanation: - - There is no string in the input that can be rearranged to form "bat". - - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other. - -Group the following list of words into anagrams: -["tinglers", "argonon", "ditas", "palinodist", "merocyte", "conterminal", "canny", "nancy", "outasight", "autosight", "oversauciness", "applauders", "suprapedal"] - -Answer: [["applauders", "suprapedal"], ["argonon"], ["autosight", "outasight"], ["canny", "nancy"], ["conterminal"], ["ditas"], ["merocyte"], ["oversauciness"], ["palinodist"], ["tinglers"]] - -Metadata: {'words': ['tinglers', 'argonon', 'ditas', 'palinodist', 'merocyte', 'conterminal', 'canny', 'nancy', 'outasight', 'autosight', 'oversauciness', 'applauders', 'suprapedal'], 'solution': [['applauders', 'suprapedal'], ['argonon'], ['autosight', 'outasight'], ['canny', 'nancy'], ['conterminal'], ['ditas'], ['merocyte'], ['oversauciness'], ['palinodist'], ['tinglers']]} - --------------------------------------------------- - -Example 2: -Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once. - -Your job is to group the anagrams together. You can return the answer in any order. - -Example: -Input: ["eat", "tea", "tan", "ate", "nat", "bat"] -Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]] -Explanation: - - There is no string in the input that can be rearranged to form "bat". - - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other. - -Group the following list of words into anagrams: -["regear", "escrod", "coders", "decors", "credos", "scored", "semitaur", "muriates", "peripterous", "zanies", "expatiater", "wooled", "meningomyelocele", "myelomeningocele", "vainest", "natives", "naivest", "preludes", "repulsed"] - -Answer: [["coders", "credos", "decors", "escrod", "scored"], ["expatiater"], ["meningomyelocele", "myelomeningocele"], ["muriates", "semitaur"], ["naivest", "natives", "vainest"], ["peripterous"], ["preludes", "repulsed"], ["regear"], ["wooled"], ["zanies"]] - -Metadata: {'words': ['regear', 'escrod', 'coders', 'decors', 'credos', 'scored', 'semitaur', 'muriates', 'peripterous', 'zanies', 'expatiater', 'wooled', 'meningomyelocele', 'myelomeningocele', 'vainest', 'natives', 'naivest', 'preludes', 'repulsed'], 'solution': [['coders', 'credos', 'decors', 'escrod', 'scored'], ['expatiater'], ['meningomyelocele', 'myelomeningocele'], ['muriates', 'semitaur'], ['naivest', 'natives', 'vainest'], ['peripterous'], ['preludes', 'repulsed'], ['regear'], ['wooled'], ['zanies']]} - --------------------------------------------------- - -Example 3: -Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once. - -Your job is to group the anagrams together. You can return the answer in any order. - -Example: -Input: ["eat", "tea", "tan", "ate", "nat", "bat"] -Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]] -Explanation: - - There is no string in the input that can be rearranged to form "bat". - - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other. - -Group the following list of words into anagrams: -["eagerest", "granitite", "helium", "nizam", "nazim", "striplings", "slipstring", "rearrest", "arrester", "bf", "tadpolism", "canun", "cunan", "isotonic"] - -Answer: [["arrester", "rearrest"], ["bf"], ["canun", "cunan"], ["eagerest"], ["granitite"], ["helium"], ["isotonic"], ["nazim", "nizam"], ["slipstring", "striplings"], ["tadpolism"]] - -Metadata: {'words': ['eagerest', 'granitite', 'helium', 'nizam', 'nazim', 'striplings', 'slipstring', 'rearrest', 'arrester', 'bf', 'tadpolism', 'canun', 'cunan', 'isotonic'], 'solution': [['arrester', 'rearrest'], ['bf'], ['canun', 'cunan'], ['eagerest'], ['granitite'], ['helium'], ['isotonic'], ['nazim', 'nizam'], ['slipstring', 'striplings'], ['tadpolism']]} - --------------------------------------------------- -``` - - - ### word_sequence_reversal Generates word sequence reversal tasks from text spans diff --git a/README.md b/README.md index 9335a1d2..b623eebc 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,7 @@ See the [Dataset Gallery](GALLERY.md) for a complete list of available datasets ### Game Tasks - `SudokuDataset`: Generate 9x9 Sudoku puzzles with configurable number of empty cells +- `SokobanDataset`: Generate [Sokoban](https://en.wikipedia.org/wiki/Sokoban) puzzles with configurable size and detail. - `MiniSudokuDataset`: Generate 4x4 Mini Sudoku puzzles with configurable difficulty - `MazeDataset`: Generate a maze with a start and a goal - `CountdownDataset`: Generate number game tasks where numbers and operators must be combined to reach a target value diff --git a/reasoning_gym/games/__init__.py b/reasoning_gym/games/__init__.py index 8e4e32d6..958dcd01 100644 --- a/reasoning_gym/games/__init__.py +++ b/reasoning_gym/games/__init__.py @@ -11,6 +11,7 @@ from .game_of_life import GameOfLifeConfig, GameOfLifeDataset from .maze import MazeConfig, MazeDataset from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset from .n_queens import NQueensDataset +from .sokoban import SokobanConfig, SokobanDataset from .sudoku import SudokuConfig, SudokuDataset from .tower_of_hanoi import HanoiConfig, HanoiDataset @@ -21,6 +22,8 @@ __all__ = [ "MiniSudokuDataset", "SudokuConfig", "SudokuDataset", + "SokobanConfig", + "SokobanDataset", "MazeConfig", "MazeDataset", "GameOfLifeConfig", diff --git a/reasoning_gym/games/contrib/__init__.py b/reasoning_gym/games/contrib/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/reasoning_gym/games/contrib/sokoban/LICENSE b/reasoning_gym/games/contrib/sokoban/LICENSE new file mode 100644 index 00000000..84d0d484 --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Bruno Andrade + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/reasoning_gym/games/contrib/sokoban/README.md b/reasoning_gym/games/contrib/sokoban/README.md new file mode 100644 index 00000000..44d565ea --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/README.md @@ -0,0 +1,52 @@ +# 📦 Sokoban Solver and Generator + +This folder contains a minified version of Bruno Andrade's Sokoban game, all pygame dependencies were stripped. + +The original version can be found here: [xbandrade/sokoban-solver-generator](https://github.com/xbandrade/sokoban-solver-generator) + + +This is a Sokoban puzzle generator and solver that uses BFS, A* and Dijkstra search algorithms. + +`Sokoban` is a puzzle game in which the player pushes boxes around in a warehouse, trying to get every box to a goal. + + +### ❕Sokoban Puzzle +The puzzle states are stored in a matrix, and each element of the puzzle is represented by a single character in the matrix. +``` ++ + + + + + + ++ * - @ - X + ++ + - @ - + + ++ X - - - $ + ++ + + + + + + +``` +`*` - The player
+`%` - The player on a goal
+`@` - A box
+`X` - A goal
+`$` - A box on a goal
+`+` - A wall
+`-` - An empty position
+ +A box on a goal will have its color changed to green on the game window. + + +### ❕Sokoban Generator + +The generator will initially create a puzzle with a random board size, then the player and the boxes on goals will be randomly placed on the board. +The player will only be able to pull boxes from their positions during the generation of a puzzle, breaking every wall on his way, so it is guaranteed that the puzzle will have a valid solution. + + +### ❕ Sokoban Solver + +The algorithms used to implement the Sokoban puzzle solvers were `Breadth-First Search(BFS)` and `A*`. + +The `BFS` solver uses a queue to store the next states of the puzzle it needs to visit. A visited state is stored in a hashset, and BFS won't try to visit the same state twice. + +The `A*` algorithm is similar to the BFS algorithm, but it uses a priority queue instead of a queue, and it prioritizes moves that are more likely to solve the problem. +It does so by setting costs to the puzzle state and the player's movements, punishing the player with high costs for a bad move and rewarding the player with lower costs for a good move. +The state costs are defined by heuristic functions, and this solver was implemented with two different heuristics: the `Manhattan Distance` function and `Dijkstra` distance function. + +All three implementations check for possible deadlocks (states that are impossible to solve) before adding the new state to the queue. + + +More about Sokoban: [Wikipedia Article](https://en.wikipedia.org/wiki/Sokoban) diff --git a/reasoning_gym/games/contrib/sokoban/__init__.py b/reasoning_gym/games/contrib/sokoban/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat new file mode 100644 index 00000000..867d112a --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat @@ -0,0 +1,10 @@ ++ + + + + + + ++ - * - - - + ++ - - - $ - + ++ X - - @ - + ++ - - - - - + ++ $ - + - - + ++ + - - - - + ++ X @ - $ - + ++ + - - - - + ++ + + + + + + diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat new file mode 100644 index 00000000..9ba48c31 --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat @@ -0,0 +1,5 @@ ++ + + + + + + ++ * - @ - X + ++ + - @ - + + ++ X - - - - + ++ + + + + + + diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat new file mode 100644 index 00000000..46755810 --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat @@ -0,0 +1,6 @@ +- - + + + + + + +- + + - - - * + ++ + - - - + X + ++ X - @ - @ @ + ++ X X @ - - - + ++ + + + + + + + diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat new file mode 100644 index 00000000..9d0bc599 --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat @@ -0,0 +1,7 @@ +- + + + + + + - - - +- + X - - X + - - - ++ + - @ @ + + - - - ++ - - - - + + - - - ++ - @ - - * + + + + ++ + - - - - - - X + +- + + + + + + + + + diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat new file mode 100644 index 00000000..42fbc6eb --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat @@ -0,0 +1,7 @@ +- + + + + + + - - ++ + X - @ - + + + ++ - - - - - - - + ++ - @ + + X - @ + ++ - - - @ - + - + ++ + + * - X - X + +- - + + + + + + + diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat new file mode 100644 index 00000000..3a096d58 --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat @@ -0,0 +1,7 @@ +- + + + + + + + - ++ + - - + - - + + ++ - @ - - - @ - + ++ - - X * X - - + ++ + @ + + - - + + ++ - - X - - - + - ++ + + + + + + + - diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat new file mode 100644 index 00000000..32ee5bbc --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat @@ -0,0 +1,9 @@ +- - - + + + + + + + + +- - - + - - - - - - + +- - + + - - - - @ - + +- + + - - + + - + + + ++ + - - + - - X - - + ++ - - + X @ @ - - + + ++ * + X - - - - + + - ++ + - - - - - + + - - ++ + + + + + + + - - - diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat new file mode 100644 index 00000000..9c2fe302 --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat @@ -0,0 +1,6 @@ ++ + + + + + + + ++ - - @ - X * + ++ - @ - - + X + ++ X X @ - @ @ + ++ X X @ - - - + ++ + + + + + + + diff --git a/reasoning_gym/games/contrib/sokoban/src/__init__.py b/reasoning_gym/games/contrib/sokoban/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/reasoning_gym/games/contrib/sokoban/src/astar.py b/reasoning_gym/games/contrib/sokoban/src/astar.py new file mode 100644 index 00000000..25d1e63d --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/src/astar.py @@ -0,0 +1,81 @@ +from collections import defaultdict +from heapq import heappop, heappush + +import numpy as np + +from reasoning_gym.games.contrib.sokoban.src.utils import ( + can_move, + dijkstra_sum, + get_state, + is_deadlock, + is_solved, + manhattan_sum, +) + + +def astar(matrix, player_pos, debug=False, heuristic="manhattan"): + # print(f'A* - {heuristic.title()} Heuristic') + heur = "[A*]" if heuristic == "manhattan" else "[Dijkstra]" + shape = matrix.shape + initial_state = get_state(matrix) + initial_cost = curr_depth = 0 + if heuristic == "manhattan": + curr_cost = manhattan_sum(initial_state, player_pos, shape) + else: + distances = defaultdict(lambda: []) + curr_cost = dijkstra_sum(initial_state, player_pos, shape, distances) + seen = {None} + heap = [] + heappush(heap, (initial_cost, curr_cost, initial_state, player_pos, curr_depth, "")) + moves = [(1, 0), (-1, 0), (0, -1), (0, 1)] + direction = { + (1, 0): "D", + (-1, 0): "U", + (0, -1): "L", + (0, 1): "R", + } + while heap: + _, curr_cost, state, pos, depth, path = heappop(heap) + seen.add(state) + for move in moves: + new_state, move_cost = can_move(state, shape, pos, move) + deadlock = is_deadlock(new_state, shape) + if new_state in seen or deadlock: + continue + new_pos = pos[0] + move[0], pos[1] + move[1] + if heuristic == "manhattan": + new_cost = manhattan_sum(new_state, new_pos, shape) + else: + new_cost = dijkstra_sum(new_state, new_pos, shape, distances) + if new_cost == float("inf"): + continue + heappush( + heap, + ( + move_cost + curr_cost, + new_cost, + new_state, + new_pos, + depth + 1, + path + direction[move], + ), + ) + if is_solved(new_state): + # print(f'{heur} Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n') + if debug: + print(f"{heur} Solution Found!\n{path + direction[move]}", 20) + return (path + direction[move], depth + 1) + if debug: + print(f"{heur} Solution Depth: {depth + 1}\n{path + direction[move]}", 20) + print(f"{heur} Solution not found!\n") + if debug: + print(f"{heur} Solution Not Found!\nDepth {depth + 1}", 20) + + return (None, -1 if not heap else depth + 1) + + +def solve_astar(puzzle, visualizer=False, heuristic="manhattan"): + matrix = puzzle + where = np.where((matrix == "*") | (matrix == "%")) + player_pos = where[0][0], where[1][0] + return astar(matrix, player_pos, debug=visualizer, heuristic=heuristic) diff --git a/reasoning_gym/games/contrib/sokoban/src/bfs.py b/reasoning_gym/games/contrib/sokoban/src/bfs.py new file mode 100644 index 00000000..d6a376c9 --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/src/bfs.py @@ -0,0 +1,66 @@ +import time +from collections import deque + +import numpy as np + +from reasoning_gym.games.contrib.sokoban.src.utils import can_move, get_state, is_deadlock, is_solved, print_state + + +def bfs(matrix, player_pos, debug=False): + print("Breadth-First Search") + initial_state = get_state(matrix) + shape = matrix.shape + print_state(initial_state, shape) + seen = {None} + q = deque([(initial_state, player_pos, 0, "")]) + moves = [(1, 0), (-1, 0), (0, -1), (0, 1)] + curr_depth = 0 + direction = { + (1, 0): "D", + (-1, 0): "U", + (0, -1): "L", + (0, 1): "R", + } + while q: + state, pos, depth, path = q.popleft() + # if depth != curr_depth: + # print(f'Depth: {depth}') + # curr_depth = depth + seen.add(state) + for move in moves: + new_state, _ = can_move(state, shape, pos, move) + deadlock = is_deadlock(new_state, shape) + if new_state in seen or deadlock: + continue + q.append( + ( + new_state, + (pos[0] + move[0], pos[1] + move[1]), + depth + 1, + path + direction[move], + ) + ) + if is_solved(new_state): + print(f"[BFS] Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n") + if debug: + print(f"[BFS] Solution Found!\n{path + direction[move]}", 20) + return (path + direction[move], depth + 1) + if debug: + print(f"[BFS] Solution Depth: {depth + 1}\n{path + direction[move]}", 20) + print(f"[BFS] Solution not found!\n") + if debug: + print(f"[BFS] Solution Not Found!\nDepth {depth + 1}", 20) + return (None, -1 if not q else depth + 1) + + +def solve_bfs(puzzle, visualizer=False): + matrix = puzzle + where = np.where((matrix == "*") | (matrix == "%")) + player_pos = where[0][0], where[1][0] + return bfs(matrix, player_pos, debug=visualizer) + + +if __name__ == "__main__": + start = time.time() + root = solve_bfs(np.loadtxt("levels/lvl7.dat", dtype=" str: + return self.char + + +class Game: + def __init__(self, width=19, height=10, level=None, path=None): + self.level = level + self.width = width + self.height = height + self.puzzle = np.empty((height, width), dtype=PuzzleElement) + + self.player = None + self.puzzle_size = None + self.pad_x = 0 + self.pad_y = 0 + self.path = path or f"levels/lvl{level}.dat" + + if path: + if type(self) == Game: + self.load_puzzle() + + def get_matrix(self): + slice_x = slice(self.pad_x, self.pad_x + self.puzzle_size[1]) + slice_y = slice(self.pad_y, self.pad_y + self.puzzle_size[0]) + sliced = self.puzzle[slice_y, slice_x] + matrix = np.empty((self.puzzle_size), dtype=" 0 else 0) + pad_x = (self.width - self.puzzle_size[1] - 2) // 2 # -2 matches original file-based logic + pad_y = (self.height - self.puzzle_size[0]) // 2 + self.pad_x, self.pad_y = pad_x, pad_y + + # Populate puzzle elements + for i, row in enumerate(data): + for j, c in enumerate(row): + new_elem = PuzzleElement(c) + self.puzzle[i + pad_y, j + pad_x] = new_elem + + # Create game objects based on characters + if c == "+": # Wall + new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y) + elif c == "@": # Box + new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self) + elif c == "*": # Player + new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self) + self.player = new_elem.obj + elif c == "X": # Goal + new_elem.ground = Goal(x=j + pad_x, y=i + pad_y) + elif c == "$": # Box on goal + new_elem.ground = Goal(x=j + pad_x, y=i + pad_y) + new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self) + elif c == "%": # Player on goal + new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self) + new_elem.ground = Goal(x=j + pad_x, y=i + pad_y) + self.player = new_elem.obj + elif c not in " -": # Validation + raise ValueError(f"Invalid character in puzzle: {c}") + + +class ReverseGame(Game): + def __init__(self, rng: Random, width=19, height=10, level=None): + super().__init__(width, height, level) + self.rng = rng + self.pad_x = 0 + self.pad_y = 0 + + def load_puzzle(self, puzzle): + self.puzzle_size = (len(puzzle), len(puzzle[0]) if len(puzzle) > 0 else 0) + pad_x = (self.width - len(puzzle[0]) - 2) // 2 + pad_y = (self.height - len(puzzle)) // 2 + self.pad_x, self.pad_y = pad_x, pad_y + for i, row in enumerate(puzzle): + for j, c in enumerate(row): + new_elem = PuzzleElement(c) + self.puzzle[i + pad_y, j + pad_x] = new_elem + if c == "+": # wall + new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y) + elif c == "@": # box + new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self) + elif c == "*": # player + new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self) + self.player = new_elem.obj + elif c == "X": # goal + new_elem.ground = Goal(x=j + pad_x, y=i + pad_y) + elif c == "$": # box on goal + new_elem.ground = Goal(x=j + pad_x, y=i + pad_y) + new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self) + elif c == "%": # player on goal + new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self) + new_elem.ground = Goal(x=j + pad_x, y=i + pad_y) + self.player = new_elem.obj diff --git a/reasoning_gym/games/contrib/sokoban/src/generator.py b/reasoning_gym/games/contrib/sokoban/src/generator.py new file mode 100644 index 00000000..da4c954f --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/src/generator.py @@ -0,0 +1,107 @@ +from random import Random + +import numpy as np + +from reasoning_gym.games.contrib.sokoban.src.astar import solve_astar +from reasoning_gym.games.contrib.sokoban.src.game import Game, ReverseGame + + +def num_boxes(puzzle_area, min_boxes, max_boxes, min_w, min_h, max_w, max_h): + m = (max_boxes - min_boxes) / (max_w * max_h - min_w * min_h) + b = min_boxes - m * min_w * min_h + return int(m * puzzle_area + b) + + +def random_valid(rng: Random, width: int = 10, height: int = 10): + return rng.randrange(1, width - 1), rng.randrange(1, height - 1) + + +def generate( + rng: Random, + debug: bool = False, + path: str = None, + min_w: int = 6, + min_h: int = 6, + max_w: int = 15, + max_h: int = 10, + min_boxes: int = 4, + max_boxes: int = 10, +) -> tuple[str, str, dict]: + """ + Generates a level with the given configuration parameters. + + Parameters: + rng: Random number generator for reproducibility. + visualizer: Whether to visualize the generation process. + path: Path to save the level file (default 'levels/lvl0.dat'). + min_w: Minimum width of the puzzle. + min_h: Minimum height of the puzzle. + max_w: Maximum width of the puzzle. + max_h: Maximum height of the puzzle. + min_boxes: Minimum number of boxes. + max_boxes: Maximum number of boxes. + Returns: + puzzle_string, solution + """ + path = path or "levels/lvl0.dat" + while True: + width = rng.randint(min_w, max_w) + height = rng.randint(min_h, max_h) + puzzle = np.full((height, width), "+", dtype=" 0: + reverse_game.player.update(puzzle_size) + if player.states[player.curr_state] >= 20: + break + counter -= 1 + slice_x = slice(reverse_game.pad_x, reverse_game.pad_x + width) + slice_y = slice(reverse_game.pad_y, reverse_game.pad_y + height) + matrix = reverse_game.puzzle[slice_y, slice_x] + # Optionally print the puzzle: + if debug: + player.print_puzzle(matrix) + + out_of_place_boxes = np.sum([str(x) == "@" for x in matrix.flatten()]) + if out_of_place_boxes >= boxes // 2: + # Optionally save the puzzle to a file: + # np.savetxt(path, matrix, fmt='%s') + puzzle_str = player.puzzle_to_string(matrix) + + grid_list = [list(line) for line in puzzle_str.replace(" ", "").strip().split("\n")] + grid_array = np.array(grid_list) + solution, _ = solve_astar(grid_array) + + if debug: + print(f"solution={solution}") + game = Game() + game.load_puzzle_matrix(grid_array) + + for step, move in enumerate(solution): + print(f"move #{step}: {move}") + game.player.update(key=move) + game.print_puzzle() + + difficulty = {"size": puzzle_size, "num_steps": len(solution)} + return puzzle_str, solution, difficulty + else: + if debug: + print(f"Not enough boxes out of place, retrying generation... [{out_of_place_boxes}/{boxes}]") + + +if __name__ == "__main__": + generate(rng=Random(), debug=True) diff --git a/reasoning_gym/games/contrib/sokoban/src/player.py b/reasoning_gym/games/contrib/sokoban/src/player.py new file mode 100644 index 00000000..1299ea7c --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/src/player.py @@ -0,0 +1,118 @@ +from collections import defaultdict +from random import Random + +from reasoning_gym.games.contrib.sokoban.src.box import Box, Obstacle + + +class Player: + """A player that can only push boxes""" + + def __init__(self, x, y, game): + self.game = game + self.x = x + self.y = y + + def update(self, key: str = None) -> int: + move = None + if key: + if key == "R": + move = (1, 0) + elif key == "L": + move = (-1, 0) + elif key == "U": + move = (0, -1) + elif key == "D": + move = (0, 1) + if move: + curr = self.y, self.x + target = self.y + move[1], self.x + move[0] + target_elem = self.game.puzzle[target] + if not (target_elem and target_elem.obj and isinstance(target_elem.obj, Obstacle)): + is_box = isinstance(target_elem.obj, Box) + if not is_box or (is_box and target_elem.obj.can_move(move)): + curr_elem = self.game.puzzle[curr] + self.y, self.x = target + curr_elem.char = "-" if not curr_elem.ground else "X" + curr_elem.obj = None + target_elem.char = "*" if not target_elem.ground else "%" + target_elem.obj = self + return 1 + return 0 + + +class ReversePlayer(Player): + """A player that can only pull boxes""" + + def __init__(self, rng: Random, x, y, game=None, puzzle=None): + super().__init__(x=x, y=y, game=game) + self.rng = rng + self.game = game + self.puzzle = puzzle + self.curr_state = "" + self.states = defaultdict(int) + self.prev_move = (0, 0) + + def print_puzzle(self, matrix=None): + print(self.puzzle_to_string(matrix=matrix)) + + def puzzle_to_string(self, matrix=None): + matrix = matrix if matrix is not None else self.game.puzzle + height, width = len(matrix), len(matrix[0]) + ss = "" + for h in range(height): + for w in range(width): + if matrix[h, w]: + ss = ss + str(matrix[h, w]) + " " + else: + ss = ss + "F" + " " + ss = ss + " " + "\n" + ss = ss + "\n" + return ss + + def get_state(self): + state = "" + height, width = len(self.game.puzzle), len(self.game.puzzle[0]) + for row in range(height): + for col in range(width): + if self.game.puzzle[row, col]: + state += str(self.game.puzzle[row, col]) + return state + + def update(self, puzzle_size): + height, width = puzzle_size + quick_chars = { + "*": "-", + "%": "X", + "+": "*", + "-": "*", + "X": "%", + "@": "-", + "$": "X", + } + moves_tuples = [(1, 0), (-1, 0), (0, -1), (0, 1)] + moves = self.rng.choices(moves_tuples, weights=[0.1 if m == self.prev_move else 1 for m in moves_tuples], k=1) + self.curr_state = self.get_state() + for move in moves: + self.states[self.curr_state] += 1 + curr_pos = self.y, self.x + target = self.y + move[0], self.x + move[1] + reverse_target = self.y - move[0], self.x - move[1] + if ( + target[1] == self.game.pad_x + or target[0] == self.game.pad_y + or target[1] >= self.game.pad_x + width - 1 + or target[0] >= self.game.pad_y + height - 1 + or (self.game.puzzle[target] and self.game.puzzle[target].char in "@$") + ): + self.prev_move = move + return + self.prev_move = -move[0], -move[1] + self.game.puzzle[curr_pos].char = quick_chars[self.game.puzzle[curr_pos].char] + self.game.puzzle[curr_pos].obj = None + self.game.puzzle[target].char = quick_chars[self.game.puzzle[target].char] + self.game.puzzle[target].obj = self + if (c := self.game.puzzle[reverse_target].char) in "@$": + self.game.puzzle[reverse_target].char = quick_chars[c] + self.game.puzzle[reverse_target].obj.reverse_move(move) + + self.y, self.x = target diff --git a/reasoning_gym/games/contrib/sokoban/src/utils.py b/reasoning_gym/games/contrib/sokoban/src/utils.py new file mode 100644 index 00000000..106fb8d1 --- /dev/null +++ b/reasoning_gym/games/contrib/sokoban/src/utils.py @@ -0,0 +1,170 @@ +from heapq import heappop, heappush + +import numpy as np + + +def print_state(state, shape): + if not state: + return + m, n = shape + matrix = np.array(list(state)).reshape(m, n) + print(matrix) + + +def find_boxes_and_goals(state, shape): + _, width = shape + boxes, goals, boxes_on_goal = [], [], [] + for pos, char in enumerate(state): + if char == "@": + boxes.append((pos // width, pos % width)) + elif char in "X%": + goals.append((pos // width, pos % width)) + elif char == "$": + boxes_on_goal.append((pos // width, pos % width)) + return boxes, goals, boxes_on_goal + + +def get_state(matrix): + return matrix.tobytes().decode("utf-8").replace("\x00", "") + + +def is_solved(state): + return "@" not in state + + +def manhattan_sum(state, player_pos, shape): + height, width = shape + player_x, player_y = player_pos + boxes, goals, _ = find_boxes_and_goals(state, shape) + boxes_cost = len(boxes) * height * width + player_cost = 0 + for box_x, box_y in boxes: + boxes_cost += min(abs(box_x - goal_x) + abs(box_y - goal_y) for goal_x, goal_y in goals) + player_cost = min(abs(box_x - player_x) + abs(box_y - player_y) for box_x, box_y in boxes) if boxes else 0 + return boxes_cost + player_cost + + +def dijkstra(state, shape, box_pos=None, player_pos=None): + height, width = shape + dijk = np.array([[float("inf") for _ in range(width)] for _ in range(height)]) + dijk[box_pos or player_pos] = 0 + moves = [(1, 0), (-1, 0), (0, 1), (0, -1)] + heap = [(0, box_pos or player_pos)] + obstacles = "+" if player_pos else "+@$" + while heap: + distance, curr_pos = heappop(heap) + if distance > dijk[curr_pos]: + continue + for move in moves: + new_x, new_y = curr_pos[0] + move[0], curr_pos[1] + move[1] + new_pos = new_x, new_y + if 1 <= new_x < height - 1 and 1 <= new_y < width - 1 and state[new_x * width + new_y] not in obstacles: + new_distance = distance + 1 + if new_distance < dijk[new_pos]: + dijk[new_pos] = new_distance + heappush(heap, (new_distance, new_pos)) + return dijk + + +def dijkstra_sum(state, player_pos, shape, distances): + height, width = shape + boxes, goals, boxes_on_goal = find_boxes_and_goals(state, shape) + boxes_cost = len(boxes) * height * width + player_cost = 0 + for box in boxes + boxes_on_goal: + distances[box] = dijkstra(state, shape, box) + distances[player_pos] = dijkstra(state, shape, player_pos=player_pos) + for box in boxes: + boxes_cost += min(distances[box][goal] for goal in goals) + player_cost = min(distances[player_pos][box] for box in boxes) if boxes else 0 + return boxes_cost + player_cost + + +def is_deadlock(state, shape): + height, width = shape + if not state or len(state) != height * width: + return False + boxes, _, _ = find_boxes_and_goals(state, shape) + for bx, by in boxes: # corner deadlock + box = bx * width + by + if ( + (state[box - 1] == "+" and state[box - width] == "+") + or (state[box + 1] == "+" and state[box + width] == "+") + or (state[box + 1] == "+" and state[box - width] == "+") + or (state[box - 1] == "+" and state[box + width] == "+") + ): + return True + double_box_positions = [ + (0, -1, -width, -width - 1), + (0, 1, -width, -width + 1), + (0, -1, width - 1, width), + (0, 1, width + 1, width), + ] + for bx, by in boxes: # double box deadlock + box = bx * width + by + for pos in double_box_positions: + pos_set = set() + for dir in pos: + pos_set.add(state[box + dir]) + if pos_set in ({"@", "+"}, {"@"}, {"@", "$"}, {"@", "$", "+"}): + return True + box = goal = 0 + for i in range(width + 1, 2 * width - 1): # too many boxes deadlock + if state[i] == "@": + box += 1 + elif state[i] in "X%": + goal += 1 + if box > goal: + return True + box = goal = 0 + for i in range(width * (height - 2) + 1, width * (height - 2) + width - 1): + if state[i] == "@": + box += 1 + elif state[i] in "X%": + goal += 1 + if box > goal: + return True + box = goal = 0 + for i in range(width + 1, width * (height - 1) + 1, width): + if state[i] == "@": + box += 1 + elif state[i] in "X%": + goal += 1 + if box > goal: + return True + box = goal = 0 + for i in range(2 * width - 2, width * height - 2, width): + if state[i] == "@": + box += 1 + elif state[i] in "X%": + goal += 1 + if box > goal: + return True + return False + + +def can_move(state, shape, player_pos, move): + new_state = list(state) + x, y = player_pos + _, width = shape + move_cost = 0 + target = x + move[0], y + move[1] + boxtarget = x + move[0] * 2, y + move[1] * 2 + curr1d = x * width + y + target1d = target[0] * width + target[1] + boxtarget1d = boxtarget[0] * width + boxtarget[1] + if state[target1d] == "+": + return None, move_cost + elif state[target1d] in "-X": + new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X" + new_state[target1d] = "*" if new_state[target1d] == "-" else "%" + move_cost = 3 + elif state[target1d] in "@$": + if state[boxtarget1d] in "+@$": + return None, move_cost + elif state[boxtarget1d] in "-X": + new_state[boxtarget1d] = "@" if new_state[boxtarget1d] == "-" else "$" + new_state[target1d] = "*" if new_state[target1d] == "@" else "%" + new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X" + move_cost = 0 if new_state[boxtarget1d] == "$" else 2 + return "".join(new_state), move_cost diff --git a/reasoning_gym/games/sokoban.py b/reasoning_gym/games/sokoban.py new file mode 100644 index 00000000..f96d87ea --- /dev/null +++ b/reasoning_gym/games/sokoban.py @@ -0,0 +1,117 @@ +from dataclasses import dataclass +from random import Random +from typing import Dict, Optional + +import numpy as np + +from ..factory import ProceduralDataset, register_dataset + + +@dataclass +class SokobanConfig: + """Configuration for sokoban puzzle generation""" + + seed: Optional[int] = None + size: int = 500 + min_w: int = 6 # Minimum width of the puzzle. + min_h: int = 6 # Minimum height of the puzzle. + max_w: int = 10 # Maximum width of the puzzle. + max_h: int = 10 # Maximum height of the puzzle. + min_boxes: int = 6 # Minimum number of boxes. + max_boxes: int = 10 # Maximum number of boxes. + + def validate(self): + """Validate configuration parameters""" + assert self.min_w <= self.max_w, "min_w must be lte max_w" + assert self.min_h <= self.max_h, "min_h must be lte max_h" + assert self.min_boxes <= self.max_boxes, "min_boxes must be lte max_boxes" + + +class SokobanDataset(ProceduralDataset): + """Generates Sokoban games with configurable parameters""" + + def __init__(self, config: SokobanConfig): + self._prompt_templates = [ + "What will this Sokoban board look like after {simulation_steps} steps of simulation?\n\n{board}" + ] + + super().__init__(config=config, seed=config.seed, size=config.size) + + # lazy loading of sokoban imports + from .contrib.sokoban.src.game import Game + from .contrib.sokoban.src.generator import generate + from .contrib.sokoban.src.utils import is_solved + + self._Game = Game + self._generate = generate + self._is_solved = is_solved + + def __getitem__(self, idx: int) -> dict: + """Generate a single Sokoban task + + Returns: + dict with keys: + - question: str, the task description + - answer: str, a solution string + - metadata: dict with generation parameters + """ + + # Make the Sokoban! + rng = Random(self.seed + idx) + gamestr, solution, difficulty = self._generate(rng=rng) + + return { + "question": """You are going to solve a 'sokoban' puzzle. + +* - The player +% - The player on a goal +@ - A box +X - A goal +$ - A box on a goal ++ - A wall +- - An empty position + +Your solution must be a string of characters, ex: LDURRUDL. + +Here is your puzzle: +""" + + gamestr, + "answer": solution, + "metadata": {"gamestr": gamestr, "difficulty": difficulty}, + } + + def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float: + """Determine if the solution provided solves the Sokoban task. + + The function awards 1.0 for a correct answer. + + Args: + answer (Optional[str]): The user's answer. + entry (Dict[str, any]): The original dataset entry containing the correct answer. + + Returns: + float: The computed score between 0.0 and 1.0. + """ + + if answer == None: + return 0.0 + + try: + grid_list = [list(line) for line in entry["metadata"]["gamestr"].replace(" ", "").strip().split("\n")] + matrix = np.array(grid_list) + + game = self._Game() + game.load_puzzle_matrix(matrix) + + for move in answer: + game.player.update(key=move) + + if self._is_solved(game.get_curr_state()): + return 1.0 + except Exception as e: + return 0.01 + + return 0.1 + + +register_dataset("sokoban", SokobanDataset, SokobanConfig) diff --git a/tests/test_sokoban.py b/tests/test_sokoban.py new file mode 100644 index 00000000..c4d1e2b8 --- /dev/null +++ b/tests/test_sokoban.py @@ -0,0 +1,50 @@ +import pytest + +from reasoning_gym.games.sokoban import SokobanConfig, SokobanDataset + + +def test_sokoban(): + """Test basic properties and solution of generated items""" + + # Easy + config = SokobanConfig(seed=42, size=20) + dataset = SokobanDataset(config) + + for item in dataset: + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + + # Test the scoring + assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0 + assert dataset.score_answer(answer="RU", entry=item) == 0.1 + assert dataset.score_answer(answer=None, entry=item) == 0.0 + + # Medium + config = SokobanConfig(seed=42, min_h=40, max_h=50, min_w=40, max_w=50, min_boxes=20, max_boxes=30, size=3) + dataset = SokobanDataset(config) + + for item in dataset: + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + + # Test the scoring + assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0 + assert dataset.score_answer(answer=None, entry=item) == 0.0 + + # Hard + config = SokobanConfig(seed=42, min_h=400, max_h=500, min_w=400, max_w=500, min_boxes=50, max_boxes=50, size=1) + dataset = SokobanDataset(config) + + for item in dataset: + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + + # Test the scoring + assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0 + assert dataset.score_answer(answer=None, entry=item) == 0.0