diff --git a/GALLERY.md b/GALLERY.md
index 1d09a54f..ad0248ce 100644
--- a/GALLERY.md
+++ b/GALLERY.md
@@ -19,6 +19,7 @@ This gallery shows examples from all available datasets using their default conf
- [fraction_simplification](#fraction_simplification)
- [game_of_life](#game_of_life)
- [gcd](#gcd)
+- [group_anagrams](#group_anagrams)
- [gsm_symbolic](#gsm_symbolic)
- [intermediate_integration](#intermediate_integration)
- [largest_island](#largest_island)
@@ -42,13 +43,13 @@ This gallery shows examples from all available datasets using their default conf
- [simple_equations](#simple_equations)
- [simple_geometry](#simple_geometry)
- [simple_integration](#simple_integration)
+- [sokoban](#sokoban)
- [spell_backward](#spell_backward)
- [sudoku](#sudoku)
- [syllogism](#syllogism)
- [time_intervals](#time_intervals)
- [tower_of_hanoi](#tower_of_hanoi)
- [word_ladder](#word_ladder)
-- [group_anagrams](#group_anagrams)
- [word_sequence_reversal](#word_sequence_reversal)
- [word_sorting](#word_sorting)
- [zebra_puzzles](#zebra_puzzles)
@@ -406,17 +407,17 @@ Example tasks:
Example 1:
Question: 4 + 3 =
Answer: 7
-Metadata: {'num_terms': 2, 'num_digits': 1, 'expression': '4 + 3'}
+Metadata: {'difficulty': {'num_terms': 2, 'num_digits': 1}, 'expression': '4 + 3'}
Example 2:
Question: 812 + 880 =
Answer: 1692
-Metadata: {'num_terms': 2, 'num_digits': 3, 'expression': '812 + 880'}
+Metadata: {'difficulty': {'num_terms': 2, 'num_digits': 3}, 'expression': '812 + 880'}
Example 3:
Question: 2 + 6 + 3 + 4 + 0 =
Answer: 15
-Metadata: {'num_terms': 5, 'num_digits': 1, 'expression': '2 + 6 + 3 + 4 + 0'}
+Metadata: {'difficulty': {'num_terms': 5, 'num_digits': 1}, 'expression': '2 + 6 + 3 + 4 + 0'}
````
@@ -897,6 +898,75 @@ Metadata: {'numbers': [297, 30], 'result': 3}
````
+### group_anagrams
+Generates Group Anagrams exercises with configurable difficulty
+
+Default configuration:
+```python
+anagram_groups = 10
+max_words_per_group = 5
+size = 500
+seed = 42
+```
+
+Example tasks:
+````
+Example 1:
+Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+Your job is to group the anagrams together. You can return the answer in any order.
+
+Example:
+Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
+Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
+Explanation:
+ - There is no string in the input that can be rearranged to form "bat".
+ - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+
+Group the following list of words into anagrams:
+["tinglers", "argonon", "ditas", "palinodist", "merocyte", "conterminal", "canny", "nancy", "outasight", "autosight", "oversauciness", "applauders", "suprapedal"]
+
+Answer: [["applauders", "suprapedal"], ["argonon"], ["autosight", "outasight"], ["canny", "nancy"], ["conterminal"], ["ditas"], ["merocyte"], ["oversauciness"], ["palinodist"], ["tinglers"]]
+Metadata: {'words': ['tinglers', 'argonon', 'ditas', 'palinodist', 'merocyte', 'conterminal', 'canny', 'nancy', 'outasight', 'autosight', 'oversauciness', 'applauders', 'suprapedal'], 'solution': [['applauders', 'suprapedal'], ['argonon'], ['autosight', 'outasight'], ['canny', 'nancy'], ['conterminal'], ['ditas'], ['merocyte'], ['oversauciness'], ['palinodist'], ['tinglers']]}
+
+Example 2:
+Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+Your job is to group the anagrams together. You can return the answer in any order.
+
+Example:
+Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
+Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
+Explanation:
+ - There is no string in the input that can be rearranged to form "bat".
+ - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+
+Group the following list of words into anagrams:
+["regear", "escrod", "coders", "decors", "credos", "scored", "semitaur", "muriates", "peripterous", "zanies", "expatiater", "wooled", "meningomyelocele", "myelomeningocele", "vainest", "natives", "naivest", "preludes", "repulsed"]
+
+Answer: [["coders", "credos", "decors", "escrod", "scored"], ["expatiater"], ["meningomyelocele", "myelomeningocele"], ["muriates", "semitaur"], ["naivest", "natives", "vainest"], ["peripterous"], ["preludes", "repulsed"], ["regear"], ["wooled"], ["zanies"]]
+Metadata: {'words': ['regear', 'escrod', 'coders', 'decors', 'credos', 'scored', 'semitaur', 'muriates', 'peripterous', 'zanies', 'expatiater', 'wooled', 'meningomyelocele', 'myelomeningocele', 'vainest', 'natives', 'naivest', 'preludes', 'repulsed'], 'solution': [['coders', 'credos', 'decors', 'escrod', 'scored'], ['expatiater'], ['meningomyelocele', 'myelomeningocele'], ['muriates', 'semitaur'], ['naivest', 'natives', 'vainest'], ['peripterous'], ['preludes', 'repulsed'], ['regear'], ['wooled'], ['zanies']]}
+
+Example 3:
+Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+Your job is to group the anagrams together. You can return the answer in any order.
+
+Example:
+Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
+Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
+Explanation:
+ - There is no string in the input that can be rearranged to form "bat".
+ - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+
+Group the following list of words into anagrams:
+["eagerest", "granitite", "helium", "nizam", "nazim", "striplings", "slipstring", "rearrest", "arrester", "bf", "tadpolism", "canun", "cunan", "isotonic"]
+
+Answer: [["arrester", "rearrest"], ["bf"], ["canun", "cunan"], ["eagerest"], ["granitite"], ["helium"], ["isotonic"], ["nazim", "nizam"], ["slipstring", "striplings"], ["tadpolism"]]
+Metadata: {'words': ['eagerest', 'granitite', 'helium', 'nizam', 'nazim', 'striplings', 'slipstring', 'rearrest', 'arrester', 'bf', 'tadpolism', 'canun', 'cunan', 'isotonic'], 'solution': [['arrester', 'rearrest'], ['bf'], ['canun', 'cunan'], ['eagerest'], ['granitite'], ['helium'], ['isotonic'], ['nazim', 'nizam'], ['slipstring', 'striplings'], ['tadpolism']]}
+
+````
+
### gsm_symbolic
Default configuration:
```python
@@ -1101,17 +1171,17 @@ Example tasks:
Example 1:
Question: How many legs are there in total if you have 1 sea slug, 1 deer?
Answer: 4
-Metadata: {'animals': {'sea slug': 1, 'deer': 1}, 'total_legs': 4}
+Metadata: {'difficulty': {'num_animals': 2}, 'animals': {'sea slug': 1, 'deer': 1}, 'total_legs': 4}
Example 2:
Question: How many legs are there in total if you have 2 sheeps, 2 dogs?
Answer: 16
-Metadata: {'animals': {'sheep': 2, 'dog': 2}, 'total_legs': 16}
+Metadata: {'difficulty': {'num_animals': 2}, 'animals': {'sheep': 2, 'dog': 2}, 'total_legs': 16}
Example 3:
Question: How many legs are there in total if you have 1 crab, 2 lobsters, 1 human, 1 cow, 1 bee?
Answer: 42
-Metadata: {'animals': {'crab': 1, 'lobster': 2, 'human': 1, 'cow': 1, 'bee': 1}, 'total_legs': 42}
+Metadata: {'difficulty': {'num_animals': 5}, 'animals': {'crab': 1, 'lobster': 2, 'human': 1, 'cow': 1, 'bee': 1}, 'total_legs': 42}
````
@@ -1923,6 +1993,107 @@ Metadata: {'integrand': '-28*X**3 + 8*X', 'variable': 'X', 'expected_answer_expr
````
+### sokoban
+Generates Sokoban games with configurable parameters
+
+Default configuration:
+```python
+seed = 42
+size = 500
+min_w = 6
+min_h = 6
+max_w = 10
+max_h = 10
+min_boxes = 6
+max_boxes = 10
+```
+
+Example tasks:
+````
+Example 1:
+Question: You are going to solve a 'sokoban' puzzle.
+
+* - The player
+% - The player on a goal
+@ - A box
+X - A goal
+$ - A box on a goal
++ - A wall
+- - An empty position
+
+Your solution must be a string of characters, ex: LDURRUDL.
+
+Here is your puzzle:
++ + + + + + + + +
++ + X - @ * @ X +
++ + + - - @ - + +
++ + + - - - X $ +
++ + + + - + + + +
++ + $ + + + + + +
++ + + + + + + + +
+
+
+Answer: RLDULLRRDLDR
+Metadata: {'gamestr': '+ + + + + + + + + \n+ + X - @ * @ X + \n+ + + - - @ - + + \n+ + + - - - X $ + \n+ + + + - + + + + \n+ + $ + + + + + + \n+ + + + + + + + + \n\n', 'difficulty': {'size': (7, 9), 'num_steps': 12}}
+
+Example 2:
+Question: You are going to solve a 'sokoban' puzzle.
+
+* - The player
+% - The player on a goal
+@ - A box
+X - A goal
+$ - A box on a goal
++ - A wall
+- - An empty position
+
+Your solution must be a string of characters, ex: LDURRUDL.
+
+Here is your puzzle:
++ + + + + +
++ - * - - +
++ @ - - @ +
++ X - @ - +
++ - - - X +
++ X - @ X +
++ - - - - +
++ + + + + +
+
+
+Answer: LDRRDRDDLLURURDULUURDD
+Metadata: {'gamestr': '+ + + + + + \n+ - * - - + \n+ @ - - @ + \n+ X - @ - + \n+ - - - X + \n+ X - @ X + \n+ - - - - + \n+ + + + + + \n\n', 'difficulty': {'size': (8, 6), 'num_steps': 22}}
+
+Example 3:
+Question: You are going to solve a 'sokoban' puzzle.
+
+* - The player
+% - The player on a goal
+@ - A box
+X - A goal
+$ - A box on a goal
++ - A wall
+- - An empty position
+
+Your solution must be a string of characters, ex: LDURRUDL.
+
+Here is your puzzle:
++ + + + + + + + + + + +
++ - $ - X + - - - - - +
++ - @ - - - - - @ - X +
++ - * - @ - - X - $ - +
++ - - - - X + - - - - +
++ + - - - - + $ - @ - +
++ + + - - - - - - - - +
++ + + - - - $ - - - - +
++ + + + - - - - - - - +
++ + + + + + + + + + + +
+
+
+Answer: RRRRURRRLDDRRDLULDRDLLLLULLDRDRUULUUULDLLURRDRU
+Metadata: {'gamestr': '+ + + + + + + + + + + + \n+ - $ - X + - - - - - + \n+ - @ - - - - - @ - X + \n+ - * - @ - - X - $ - + \n+ - - - - X + - - - - + \n+ + - - - - + $ - @ - + \n+ + + - - - - - - - - + \n+ + + - - - $ - - - - + \n+ + + + - - - - - - - + \n+ + + + + + + + + + + + \n\n', 'difficulty': {'size': (10, 12), 'num_steps': 47}}
+
+````
+
### spell_backward
Generates tasks to spell words backward
@@ -2112,7 +2283,7 @@ Metadata: {'task_type': 'datetime_tz', 'start_time': datetime.datetime(2964, 6,
Example 2:
Question: A video call started at 09:44 and ended at 12:22. How long was the call? Answer in HH:MM.
Answer: 02:38
-Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 5, 9, 44), 'end_time': datetime.datetime(2025, 2, 5, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'}
+Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 7, 9, 44), 'end_time': datetime.datetime(2025, 2, 7, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'}
Example 3:
Question: Calculate the time difference between Sat Dec 22 2677 and Thu Mar 21 2678. Express the result in D days.
@@ -2217,85 +2388,6 @@ Metadata: {'start_word': 'SNOG', 'end_word': 'SUQS', 'word_length': 4, 'chain_le
````
-
-### group_anagrams
-
-Group anagrams together in a list of words.
-
-Default configuration
-```python
-anagram_groups: int = 10 # Groups of anagrams present in the input
-max_words_per_group: int = 5 # Maximum number of words in a single anagram group
-```
-
-Example tasks:
-```
-Example 1:
-Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
-
-Your job is to group the anagrams together. You can return the answer in any order.
-
-Example:
-Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
-Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
-Explanation:
- - There is no string in the input that can be rearranged to form "bat".
- - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
-
-Group the following list of words into anagrams:
-["tinglers", "argonon", "ditas", "palinodist", "merocyte", "conterminal", "canny", "nancy", "outasight", "autosight", "oversauciness", "applauders", "suprapedal"]
-
-Answer: [["applauders", "suprapedal"], ["argonon"], ["autosight", "outasight"], ["canny", "nancy"], ["conterminal"], ["ditas"], ["merocyte"], ["oversauciness"], ["palinodist"], ["tinglers"]]
-
-Metadata: {'words': ['tinglers', 'argonon', 'ditas', 'palinodist', 'merocyte', 'conterminal', 'canny', 'nancy', 'outasight', 'autosight', 'oversauciness', 'applauders', 'suprapedal'], 'solution': [['applauders', 'suprapedal'], ['argonon'], ['autosight', 'outasight'], ['canny', 'nancy'], ['conterminal'], ['ditas'], ['merocyte'], ['oversauciness'], ['palinodist'], ['tinglers']]}
-
---------------------------------------------------
-
-Example 2:
-Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
-
-Your job is to group the anagrams together. You can return the answer in any order.
-
-Example:
-Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
-Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
-Explanation:
- - There is no string in the input that can be rearranged to form "bat".
- - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
-
-Group the following list of words into anagrams:
-["regear", "escrod", "coders", "decors", "credos", "scored", "semitaur", "muriates", "peripterous", "zanies", "expatiater", "wooled", "meningomyelocele", "myelomeningocele", "vainest", "natives", "naivest", "preludes", "repulsed"]
-
-Answer: [["coders", "credos", "decors", "escrod", "scored"], ["expatiater"], ["meningomyelocele", "myelomeningocele"], ["muriates", "semitaur"], ["naivest", "natives", "vainest"], ["peripterous"], ["preludes", "repulsed"], ["regear"], ["wooled"], ["zanies"]]
-
-Metadata: {'words': ['regear', 'escrod', 'coders', 'decors', 'credos', 'scored', 'semitaur', 'muriates', 'peripterous', 'zanies', 'expatiater', 'wooled', 'meningomyelocele', 'myelomeningocele', 'vainest', 'natives', 'naivest', 'preludes', 'repulsed'], 'solution': [['coders', 'credos', 'decors', 'escrod', 'scored'], ['expatiater'], ['meningomyelocele', 'myelomeningocele'], ['muriates', 'semitaur'], ['naivest', 'natives', 'vainest'], ['peripterous'], ['preludes', 'repulsed'], ['regear'], ['wooled'], ['zanies']]}
-
---------------------------------------------------
-
-Example 3:
-Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
-
-Your job is to group the anagrams together. You can return the answer in any order.
-
-Example:
-Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
-Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
-Explanation:
- - There is no string in the input that can be rearranged to form "bat".
- - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
-
-Group the following list of words into anagrams:
-["eagerest", "granitite", "helium", "nizam", "nazim", "striplings", "slipstring", "rearrest", "arrester", "bf", "tadpolism", "canun", "cunan", "isotonic"]
-
-Answer: [["arrester", "rearrest"], ["bf"], ["canun", "cunan"], ["eagerest"], ["granitite"], ["helium"], ["isotonic"], ["nazim", "nizam"], ["slipstring", "striplings"], ["tadpolism"]]
-
-Metadata: {'words': ['eagerest', 'granitite', 'helium', 'nizam', 'nazim', 'striplings', 'slipstring', 'rearrest', 'arrester', 'bf', 'tadpolism', 'canun', 'cunan', 'isotonic'], 'solution': [['arrester', 'rearrest'], ['bf'], ['canun', 'cunan'], ['eagerest'], ['granitite'], ['helium'], ['isotonic'], ['nazim', 'nizam'], ['slipstring', 'striplings'], ['tadpolism']]}
-
---------------------------------------------------
-```
-
-
-
### word_sequence_reversal
Generates word sequence reversal tasks from text spans
diff --git a/README.md b/README.md
index 9335a1d2..b623eebc 100644
--- a/README.md
+++ b/README.md
@@ -128,6 +128,7 @@ See the [Dataset Gallery](GALLERY.md) for a complete list of available datasets
### Game Tasks
- `SudokuDataset`: Generate 9x9 Sudoku puzzles with configurable number of empty cells
+- `SokobanDataset`: Generate [Sokoban](https://en.wikipedia.org/wiki/Sokoban) puzzles with configurable size and detail.
- `MiniSudokuDataset`: Generate 4x4 Mini Sudoku puzzles with configurable difficulty
- `MazeDataset`: Generate a maze with a start and a goal
- `CountdownDataset`: Generate number game tasks where numbers and operators must be combined to reach a target value
diff --git a/reasoning_gym/games/__init__.py b/reasoning_gym/games/__init__.py
index 8e4e32d6..958dcd01 100644
--- a/reasoning_gym/games/__init__.py
+++ b/reasoning_gym/games/__init__.py
@@ -11,6 +11,7 @@ from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
from .maze import MazeConfig, MazeDataset
from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset
from .n_queens import NQueensDataset
+from .sokoban import SokobanConfig, SokobanDataset
from .sudoku import SudokuConfig, SudokuDataset
from .tower_of_hanoi import HanoiConfig, HanoiDataset
@@ -21,6 +22,8 @@ __all__ = [
"MiniSudokuDataset",
"SudokuConfig",
"SudokuDataset",
+ "SokobanConfig",
+ "SokobanDataset",
"MazeConfig",
"MazeDataset",
"GameOfLifeConfig",
diff --git a/reasoning_gym/games/contrib/__init__.py b/reasoning_gym/games/contrib/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/reasoning_gym/games/contrib/sokoban/LICENSE b/reasoning_gym/games/contrib/sokoban/LICENSE
new file mode 100644
index 00000000..84d0d484
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Bruno Andrade
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/reasoning_gym/games/contrib/sokoban/README.md b/reasoning_gym/games/contrib/sokoban/README.md
new file mode 100644
index 00000000..44d565ea
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/README.md
@@ -0,0 +1,52 @@
+# 📦 Sokoban Solver and Generator
+
+This folder contains a minified version of Bruno Andrade's Sokoban game, all pygame dependencies were stripped.
+
+The original version can be found here: [xbandrade/sokoban-solver-generator](https://github.com/xbandrade/sokoban-solver-generator)
+
+
+This is a Sokoban puzzle generator and solver that uses BFS, A* and Dijkstra search algorithms.
+
+`Sokoban` is a puzzle game in which the player pushes boxes around in a warehouse, trying to get every box to a goal.
+
+
+### ❕Sokoban Puzzle
+The puzzle states are stored in a matrix, and each element of the puzzle is represented by a single character in the matrix.
+```
++ + + + + + +
++ * - @ - X +
++ + - @ - + +
++ X - - - $ +
++ + + + + + +
+```
+`*` - The player
+`%` - The player on a goal
+`@` - A box
+`X` - A goal
+`$` - A box on a goal
+`+` - A wall
+`-` - An empty position
+
+A box on a goal will have its color changed to green on the game window.
+
+
+### ❕Sokoban Generator
+
+The generator will initially create a puzzle with a random board size, then the player and the boxes on goals will be randomly placed on the board.
+The player will only be able to pull boxes from their positions during the generation of a puzzle, breaking every wall on his way, so it is guaranteed that the puzzle will have a valid solution.
+
+
+### âť• Sokoban Solver
+
+The algorithms used to implement the Sokoban puzzle solvers were `Breadth-First Search(BFS)` and `A*`.
+
+The `BFS` solver uses a queue to store the next states of the puzzle it needs to visit. A visited state is stored in a hashset, and BFS won't try to visit the same state twice.
+
+The `A*` algorithm is similar to the BFS algorithm, but it uses a priority queue instead of a queue, and it prioritizes moves that are more likely to solve the problem.
+It does so by setting costs to the puzzle state and the player's movements, punishing the player with high costs for a bad move and rewarding the player with lower costs for a good move.
+The state costs are defined by heuristic functions, and this solver was implemented with two different heuristics: the `Manhattan Distance` function and `Dijkstra` distance function.
+
+All three implementations check for possible deadlocks (states that are impossible to solve) before adding the new state to the queue.
+
+
+More about Sokoban: [Wikipedia Article](https://en.wikipedia.org/wiki/Sokoban)
diff --git a/reasoning_gym/games/contrib/sokoban/__init__.py b/reasoning_gym/games/contrib/sokoban/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat
new file mode 100644
index 00000000..867d112a
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat
@@ -0,0 +1,10 @@
++ + + + + + +
++ - * - - - +
++ - - - $ - +
++ X - - @ - +
++ - - - - - +
++ $ - + - - +
++ + - - - - +
++ X @ - $ - +
++ + - - - - +
++ + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat
new file mode 100644
index 00000000..9ba48c31
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat
@@ -0,0 +1,5 @@
++ + + + + + +
++ * - @ - X +
++ + - @ - + +
++ X - - - - +
++ + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat
new file mode 100644
index 00000000..46755810
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat
@@ -0,0 +1,6 @@
+- - + + + + + +
+- + + - - - * +
++ + - - - + X +
++ X - @ - @ @ +
++ X X @ - - - +
++ + + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat
new file mode 100644
index 00000000..9d0bc599
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat
@@ -0,0 +1,7 @@
+- + + + + + + - - -
+- + X - - X + - - -
++ + - @ @ + + - - -
++ - - - - + + - - -
++ - @ - - * + + + +
++ + - - - - - - X +
+- + + + + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat
new file mode 100644
index 00000000..42fbc6eb
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat
@@ -0,0 +1,7 @@
+- + + + + + + - -
++ + X - @ - + + +
++ - - - - - - - +
++ - @ + + X - @ +
++ - - - @ - + - +
++ + + * - X - X +
+- - + + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat
new file mode 100644
index 00000000..3a096d58
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat
@@ -0,0 +1,7 @@
+- + + + + + + + -
++ + - - + - - + +
++ - @ - - - @ - +
++ - - X * X - - +
++ + @ + + - - + +
++ - - X - - - + -
++ + + + + + + + -
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat
new file mode 100644
index 00000000..32ee5bbc
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat
@@ -0,0 +1,9 @@
+- - - + + + + + + + +
+- - - + - - - - - - +
+- - + + - - - - @ - +
+- + + - - + + - + + +
++ + - - + - - X - - +
++ - - + X @ @ - - + +
++ * + X - - - - + + -
++ + - - - - - + + - -
++ + + + + + + + - - -
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat
new file mode 100644
index 00000000..9c2fe302
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat
@@ -0,0 +1,6 @@
++ + + + + + + +
++ - - @ - X * +
++ - @ - - + X +
++ X X @ - @ @ +
++ X X @ - - - +
++ + + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/src/__init__.py b/reasoning_gym/games/contrib/sokoban/src/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/reasoning_gym/games/contrib/sokoban/src/astar.py b/reasoning_gym/games/contrib/sokoban/src/astar.py
new file mode 100644
index 00000000..25d1e63d
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/astar.py
@@ -0,0 +1,81 @@
+from collections import defaultdict
+from heapq import heappop, heappush
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.utils import (
+ can_move,
+ dijkstra_sum,
+ get_state,
+ is_deadlock,
+ is_solved,
+ manhattan_sum,
+)
+
+
+def astar(matrix, player_pos, debug=False, heuristic="manhattan"):
+ # print(f'A* - {heuristic.title()} Heuristic')
+ heur = "[A*]" if heuristic == "manhattan" else "[Dijkstra]"
+ shape = matrix.shape
+ initial_state = get_state(matrix)
+ initial_cost = curr_depth = 0
+ if heuristic == "manhattan":
+ curr_cost = manhattan_sum(initial_state, player_pos, shape)
+ else:
+ distances = defaultdict(lambda: [])
+ curr_cost = dijkstra_sum(initial_state, player_pos, shape, distances)
+ seen = {None}
+ heap = []
+ heappush(heap, (initial_cost, curr_cost, initial_state, player_pos, curr_depth, ""))
+ moves = [(1, 0), (-1, 0), (0, -1), (0, 1)]
+ direction = {
+ (1, 0): "D",
+ (-1, 0): "U",
+ (0, -1): "L",
+ (0, 1): "R",
+ }
+ while heap:
+ _, curr_cost, state, pos, depth, path = heappop(heap)
+ seen.add(state)
+ for move in moves:
+ new_state, move_cost = can_move(state, shape, pos, move)
+ deadlock = is_deadlock(new_state, shape)
+ if new_state in seen or deadlock:
+ continue
+ new_pos = pos[0] + move[0], pos[1] + move[1]
+ if heuristic == "manhattan":
+ new_cost = manhattan_sum(new_state, new_pos, shape)
+ else:
+ new_cost = dijkstra_sum(new_state, new_pos, shape, distances)
+ if new_cost == float("inf"):
+ continue
+ heappush(
+ heap,
+ (
+ move_cost + curr_cost,
+ new_cost,
+ new_state,
+ new_pos,
+ depth + 1,
+ path + direction[move],
+ ),
+ )
+ if is_solved(new_state):
+ # print(f'{heur} Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n')
+ if debug:
+ print(f"{heur} Solution Found!\n{path + direction[move]}", 20)
+ return (path + direction[move], depth + 1)
+ if debug:
+ print(f"{heur} Solution Depth: {depth + 1}\n{path + direction[move]}", 20)
+ print(f"{heur} Solution not found!\n")
+ if debug:
+ print(f"{heur} Solution Not Found!\nDepth {depth + 1}", 20)
+
+ return (None, -1 if not heap else depth + 1)
+
+
+def solve_astar(puzzle, visualizer=False, heuristic="manhattan"):
+ matrix = puzzle
+ where = np.where((matrix == "*") | (matrix == "%"))
+ player_pos = where[0][0], where[1][0]
+ return astar(matrix, player_pos, debug=visualizer, heuristic=heuristic)
diff --git a/reasoning_gym/games/contrib/sokoban/src/bfs.py b/reasoning_gym/games/contrib/sokoban/src/bfs.py
new file mode 100644
index 00000000..d6a376c9
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/bfs.py
@@ -0,0 +1,66 @@
+import time
+from collections import deque
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.utils import can_move, get_state, is_deadlock, is_solved, print_state
+
+
+def bfs(matrix, player_pos, debug=False):
+ print("Breadth-First Search")
+ initial_state = get_state(matrix)
+ shape = matrix.shape
+ print_state(initial_state, shape)
+ seen = {None}
+ q = deque([(initial_state, player_pos, 0, "")])
+ moves = [(1, 0), (-1, 0), (0, -1), (0, 1)]
+ curr_depth = 0
+ direction = {
+ (1, 0): "D",
+ (-1, 0): "U",
+ (0, -1): "L",
+ (0, 1): "R",
+ }
+ while q:
+ state, pos, depth, path = q.popleft()
+ # if depth != curr_depth:
+ # print(f'Depth: {depth}')
+ # curr_depth = depth
+ seen.add(state)
+ for move in moves:
+ new_state, _ = can_move(state, shape, pos, move)
+ deadlock = is_deadlock(new_state, shape)
+ if new_state in seen or deadlock:
+ continue
+ q.append(
+ (
+ new_state,
+ (pos[0] + move[0], pos[1] + move[1]),
+ depth + 1,
+ path + direction[move],
+ )
+ )
+ if is_solved(new_state):
+ print(f"[BFS] Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n")
+ if debug:
+ print(f"[BFS] Solution Found!\n{path + direction[move]}", 20)
+ return (path + direction[move], depth + 1)
+ if debug:
+ print(f"[BFS] Solution Depth: {depth + 1}\n{path + direction[move]}", 20)
+ print(f"[BFS] Solution not found!\n")
+ if debug:
+ print(f"[BFS] Solution Not Found!\nDepth {depth + 1}", 20)
+ return (None, -1 if not q else depth + 1)
+
+
+def solve_bfs(puzzle, visualizer=False):
+ matrix = puzzle
+ where = np.where((matrix == "*") | (matrix == "%"))
+ player_pos = where[0][0], where[1][0]
+ return bfs(matrix, player_pos, debug=visualizer)
+
+
+if __name__ == "__main__":
+ start = time.time()
+ root = solve_bfs(np.loadtxt("levels/lvl7.dat", dtype=" str:
+ return self.char
+
+
+class Game:
+ def __init__(self, width=19, height=10, level=None, path=None):
+ self.level = level
+ self.width = width
+ self.height = height
+ self.puzzle = np.empty((height, width), dtype=PuzzleElement)
+
+ self.player = None
+ self.puzzle_size = None
+ self.pad_x = 0
+ self.pad_y = 0
+ self.path = path or f"levels/lvl{level}.dat"
+
+ if path:
+ if type(self) == Game:
+ self.load_puzzle()
+
+ def get_matrix(self):
+ slice_x = slice(self.pad_x, self.pad_x + self.puzzle_size[1])
+ slice_y = slice(self.pad_y, self.pad_y + self.puzzle_size[0])
+ sliced = self.puzzle[slice_y, slice_x]
+ matrix = np.empty((self.puzzle_size), dtype=" 0 else 0)
+ pad_x = (self.width - self.puzzle_size[1] - 2) // 2 # -2 matches original file-based logic
+ pad_y = (self.height - self.puzzle_size[0]) // 2
+ self.pad_x, self.pad_y = pad_x, pad_y
+
+ # Populate puzzle elements
+ for i, row in enumerate(data):
+ for j, c in enumerate(row):
+ new_elem = PuzzleElement(c)
+ self.puzzle[i + pad_y, j + pad_x] = new_elem
+
+ # Create game objects based on characters
+ if c == "+": # Wall
+ new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y)
+ elif c == "@": # Box
+ new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+ elif c == "*": # Player
+ new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self)
+ self.player = new_elem.obj
+ elif c == "X": # Goal
+ new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+ elif c == "$": # Box on goal
+ new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+ new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+ elif c == "%": # Player on goal
+ new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self)
+ new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+ self.player = new_elem.obj
+ elif c not in " -": # Validation
+ raise ValueError(f"Invalid character in puzzle: {c}")
+
+
+class ReverseGame(Game):
+ def __init__(self, rng: Random, width=19, height=10, level=None):
+ super().__init__(width, height, level)
+ self.rng = rng
+ self.pad_x = 0
+ self.pad_y = 0
+
+ def load_puzzle(self, puzzle):
+ self.puzzle_size = (len(puzzle), len(puzzle[0]) if len(puzzle) > 0 else 0)
+ pad_x = (self.width - len(puzzle[0]) - 2) // 2
+ pad_y = (self.height - len(puzzle)) // 2
+ self.pad_x, self.pad_y = pad_x, pad_y
+ for i, row in enumerate(puzzle):
+ for j, c in enumerate(row):
+ new_elem = PuzzleElement(c)
+ self.puzzle[i + pad_y, j + pad_x] = new_elem
+ if c == "+": # wall
+ new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y)
+ elif c == "@": # box
+ new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+ elif c == "*": # player
+ new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self)
+ self.player = new_elem.obj
+ elif c == "X": # goal
+ new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+ elif c == "$": # box on goal
+ new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+ new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+ elif c == "%": # player on goal
+ new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self)
+ new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+ self.player = new_elem.obj
diff --git a/reasoning_gym/games/contrib/sokoban/src/generator.py b/reasoning_gym/games/contrib/sokoban/src/generator.py
new file mode 100644
index 00000000..da4c954f
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/generator.py
@@ -0,0 +1,107 @@
+from random import Random
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.astar import solve_astar
+from reasoning_gym.games.contrib.sokoban.src.game import Game, ReverseGame
+
+
+def num_boxes(puzzle_area, min_boxes, max_boxes, min_w, min_h, max_w, max_h):
+ m = (max_boxes - min_boxes) / (max_w * max_h - min_w * min_h)
+ b = min_boxes - m * min_w * min_h
+ return int(m * puzzle_area + b)
+
+
+def random_valid(rng: Random, width: int = 10, height: int = 10):
+ return rng.randrange(1, width - 1), rng.randrange(1, height - 1)
+
+
+def generate(
+ rng: Random,
+ debug: bool = False,
+ path: str = None,
+ min_w: int = 6,
+ min_h: int = 6,
+ max_w: int = 15,
+ max_h: int = 10,
+ min_boxes: int = 4,
+ max_boxes: int = 10,
+) -> tuple[str, str, dict]:
+ """
+ Generates a level with the given configuration parameters.
+
+ Parameters:
+ rng: Random number generator for reproducibility.
+ visualizer: Whether to visualize the generation process.
+ path: Path to save the level file (default 'levels/lvl0.dat').
+ min_w: Minimum width of the puzzle.
+ min_h: Minimum height of the puzzle.
+ max_w: Maximum width of the puzzle.
+ max_h: Maximum height of the puzzle.
+ min_boxes: Minimum number of boxes.
+ max_boxes: Maximum number of boxes.
+ Returns:
+ puzzle_string, solution
+ """
+ path = path or "levels/lvl0.dat"
+ while True:
+ width = rng.randint(min_w, max_w)
+ height = rng.randint(min_h, max_h)
+ puzzle = np.full((height, width), "+", dtype=" 0:
+ reverse_game.player.update(puzzle_size)
+ if player.states[player.curr_state] >= 20:
+ break
+ counter -= 1
+ slice_x = slice(reverse_game.pad_x, reverse_game.pad_x + width)
+ slice_y = slice(reverse_game.pad_y, reverse_game.pad_y + height)
+ matrix = reverse_game.puzzle[slice_y, slice_x]
+ # Optionally print the puzzle:
+ if debug:
+ player.print_puzzle(matrix)
+
+ out_of_place_boxes = np.sum([str(x) == "@" for x in matrix.flatten()])
+ if out_of_place_boxes >= boxes // 2:
+ # Optionally save the puzzle to a file:
+ # np.savetxt(path, matrix, fmt='%s')
+ puzzle_str = player.puzzle_to_string(matrix)
+
+ grid_list = [list(line) for line in puzzle_str.replace(" ", "").strip().split("\n")]
+ grid_array = np.array(grid_list)
+ solution, _ = solve_astar(grid_array)
+
+ if debug:
+ print(f"solution={solution}")
+ game = Game()
+ game.load_puzzle_matrix(grid_array)
+
+ for step, move in enumerate(solution):
+ print(f"move #{step}: {move}")
+ game.player.update(key=move)
+ game.print_puzzle()
+
+ difficulty = {"size": puzzle_size, "num_steps": len(solution)}
+ return puzzle_str, solution, difficulty
+ else:
+ if debug:
+ print(f"Not enough boxes out of place, retrying generation... [{out_of_place_boxes}/{boxes}]")
+
+
+if __name__ == "__main__":
+ generate(rng=Random(), debug=True)
diff --git a/reasoning_gym/games/contrib/sokoban/src/player.py b/reasoning_gym/games/contrib/sokoban/src/player.py
new file mode 100644
index 00000000..1299ea7c
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/player.py
@@ -0,0 +1,118 @@
+from collections import defaultdict
+from random import Random
+
+from reasoning_gym.games.contrib.sokoban.src.box import Box, Obstacle
+
+
+class Player:
+ """A player that can only push boxes"""
+
+ def __init__(self, x, y, game):
+ self.game = game
+ self.x = x
+ self.y = y
+
+ def update(self, key: str = None) -> int:
+ move = None
+ if key:
+ if key == "R":
+ move = (1, 0)
+ elif key == "L":
+ move = (-1, 0)
+ elif key == "U":
+ move = (0, -1)
+ elif key == "D":
+ move = (0, 1)
+ if move:
+ curr = self.y, self.x
+ target = self.y + move[1], self.x + move[0]
+ target_elem = self.game.puzzle[target]
+ if not (target_elem and target_elem.obj and isinstance(target_elem.obj, Obstacle)):
+ is_box = isinstance(target_elem.obj, Box)
+ if not is_box or (is_box and target_elem.obj.can_move(move)):
+ curr_elem = self.game.puzzle[curr]
+ self.y, self.x = target
+ curr_elem.char = "-" if not curr_elem.ground else "X"
+ curr_elem.obj = None
+ target_elem.char = "*" if not target_elem.ground else "%"
+ target_elem.obj = self
+ return 1
+ return 0
+
+
+class ReversePlayer(Player):
+ """A player that can only pull boxes"""
+
+ def __init__(self, rng: Random, x, y, game=None, puzzle=None):
+ super().__init__(x=x, y=y, game=game)
+ self.rng = rng
+ self.game = game
+ self.puzzle = puzzle
+ self.curr_state = ""
+ self.states = defaultdict(int)
+ self.prev_move = (0, 0)
+
+ def print_puzzle(self, matrix=None):
+ print(self.puzzle_to_string(matrix=matrix))
+
+ def puzzle_to_string(self, matrix=None):
+ matrix = matrix if matrix is not None else self.game.puzzle
+ height, width = len(matrix), len(matrix[0])
+ ss = ""
+ for h in range(height):
+ for w in range(width):
+ if matrix[h, w]:
+ ss = ss + str(matrix[h, w]) + " "
+ else:
+ ss = ss + "F" + " "
+ ss = ss + " " + "\n"
+ ss = ss + "\n"
+ return ss
+
+ def get_state(self):
+ state = ""
+ height, width = len(self.game.puzzle), len(self.game.puzzle[0])
+ for row in range(height):
+ for col in range(width):
+ if self.game.puzzle[row, col]:
+ state += str(self.game.puzzle[row, col])
+ return state
+
+ def update(self, puzzle_size):
+ height, width = puzzle_size
+ quick_chars = {
+ "*": "-",
+ "%": "X",
+ "+": "*",
+ "-": "*",
+ "X": "%",
+ "@": "-",
+ "$": "X",
+ }
+ moves_tuples = [(1, 0), (-1, 0), (0, -1), (0, 1)]
+ moves = self.rng.choices(moves_tuples, weights=[0.1 if m == self.prev_move else 1 for m in moves_tuples], k=1)
+ self.curr_state = self.get_state()
+ for move in moves:
+ self.states[self.curr_state] += 1
+ curr_pos = self.y, self.x
+ target = self.y + move[0], self.x + move[1]
+ reverse_target = self.y - move[0], self.x - move[1]
+ if (
+ target[1] == self.game.pad_x
+ or target[0] == self.game.pad_y
+ or target[1] >= self.game.pad_x + width - 1
+ or target[0] >= self.game.pad_y + height - 1
+ or (self.game.puzzle[target] and self.game.puzzle[target].char in "@$")
+ ):
+ self.prev_move = move
+ return
+ self.prev_move = -move[0], -move[1]
+ self.game.puzzle[curr_pos].char = quick_chars[self.game.puzzle[curr_pos].char]
+ self.game.puzzle[curr_pos].obj = None
+ self.game.puzzle[target].char = quick_chars[self.game.puzzle[target].char]
+ self.game.puzzle[target].obj = self
+ if (c := self.game.puzzle[reverse_target].char) in "@$":
+ self.game.puzzle[reverse_target].char = quick_chars[c]
+ self.game.puzzle[reverse_target].obj.reverse_move(move)
+
+ self.y, self.x = target
diff --git a/reasoning_gym/games/contrib/sokoban/src/utils.py b/reasoning_gym/games/contrib/sokoban/src/utils.py
new file mode 100644
index 00000000..106fb8d1
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/utils.py
@@ -0,0 +1,170 @@
+from heapq import heappop, heappush
+
+import numpy as np
+
+
+def print_state(state, shape):
+ if not state:
+ return
+ m, n = shape
+ matrix = np.array(list(state)).reshape(m, n)
+ print(matrix)
+
+
+def find_boxes_and_goals(state, shape):
+ _, width = shape
+ boxes, goals, boxes_on_goal = [], [], []
+ for pos, char in enumerate(state):
+ if char == "@":
+ boxes.append((pos // width, pos % width))
+ elif char in "X%":
+ goals.append((pos // width, pos % width))
+ elif char == "$":
+ boxes_on_goal.append((pos // width, pos % width))
+ return boxes, goals, boxes_on_goal
+
+
+def get_state(matrix):
+ return matrix.tobytes().decode("utf-8").replace("\x00", "")
+
+
+def is_solved(state):
+ return "@" not in state
+
+
+def manhattan_sum(state, player_pos, shape):
+ height, width = shape
+ player_x, player_y = player_pos
+ boxes, goals, _ = find_boxes_and_goals(state, shape)
+ boxes_cost = len(boxes) * height * width
+ player_cost = 0
+ for box_x, box_y in boxes:
+ boxes_cost += min(abs(box_x - goal_x) + abs(box_y - goal_y) for goal_x, goal_y in goals)
+ player_cost = min(abs(box_x - player_x) + abs(box_y - player_y) for box_x, box_y in boxes) if boxes else 0
+ return boxes_cost + player_cost
+
+
+def dijkstra(state, shape, box_pos=None, player_pos=None):
+ height, width = shape
+ dijk = np.array([[float("inf") for _ in range(width)] for _ in range(height)])
+ dijk[box_pos or player_pos] = 0
+ moves = [(1, 0), (-1, 0), (0, 1), (0, -1)]
+ heap = [(0, box_pos or player_pos)]
+ obstacles = "+" if player_pos else "+@$"
+ while heap:
+ distance, curr_pos = heappop(heap)
+ if distance > dijk[curr_pos]:
+ continue
+ for move in moves:
+ new_x, new_y = curr_pos[0] + move[0], curr_pos[1] + move[1]
+ new_pos = new_x, new_y
+ if 1 <= new_x < height - 1 and 1 <= new_y < width - 1 and state[new_x * width + new_y] not in obstacles:
+ new_distance = distance + 1
+ if new_distance < dijk[new_pos]:
+ dijk[new_pos] = new_distance
+ heappush(heap, (new_distance, new_pos))
+ return dijk
+
+
+def dijkstra_sum(state, player_pos, shape, distances):
+ height, width = shape
+ boxes, goals, boxes_on_goal = find_boxes_and_goals(state, shape)
+ boxes_cost = len(boxes) * height * width
+ player_cost = 0
+ for box in boxes + boxes_on_goal:
+ distances[box] = dijkstra(state, shape, box)
+ distances[player_pos] = dijkstra(state, shape, player_pos=player_pos)
+ for box in boxes:
+ boxes_cost += min(distances[box][goal] for goal in goals)
+ player_cost = min(distances[player_pos][box] for box in boxes) if boxes else 0
+ return boxes_cost + player_cost
+
+
+def is_deadlock(state, shape):
+ height, width = shape
+ if not state or len(state) != height * width:
+ return False
+ boxes, _, _ = find_boxes_and_goals(state, shape)
+ for bx, by in boxes: # corner deadlock
+ box = bx * width + by
+ if (
+ (state[box - 1] == "+" and state[box - width] == "+")
+ or (state[box + 1] == "+" and state[box + width] == "+")
+ or (state[box + 1] == "+" and state[box - width] == "+")
+ or (state[box - 1] == "+" and state[box + width] == "+")
+ ):
+ return True
+ double_box_positions = [
+ (0, -1, -width, -width - 1),
+ (0, 1, -width, -width + 1),
+ (0, -1, width - 1, width),
+ (0, 1, width + 1, width),
+ ]
+ for bx, by in boxes: # double box deadlock
+ box = bx * width + by
+ for pos in double_box_positions:
+ pos_set = set()
+ for dir in pos:
+ pos_set.add(state[box + dir])
+ if pos_set in ({"@", "+"}, {"@"}, {"@", "$"}, {"@", "$", "+"}):
+ return True
+ box = goal = 0
+ for i in range(width + 1, 2 * width - 1): # too many boxes deadlock
+ if state[i] == "@":
+ box += 1
+ elif state[i] in "X%":
+ goal += 1
+ if box > goal:
+ return True
+ box = goal = 0
+ for i in range(width * (height - 2) + 1, width * (height - 2) + width - 1):
+ if state[i] == "@":
+ box += 1
+ elif state[i] in "X%":
+ goal += 1
+ if box > goal:
+ return True
+ box = goal = 0
+ for i in range(width + 1, width * (height - 1) + 1, width):
+ if state[i] == "@":
+ box += 1
+ elif state[i] in "X%":
+ goal += 1
+ if box > goal:
+ return True
+ box = goal = 0
+ for i in range(2 * width - 2, width * height - 2, width):
+ if state[i] == "@":
+ box += 1
+ elif state[i] in "X%":
+ goal += 1
+ if box > goal:
+ return True
+ return False
+
+
+def can_move(state, shape, player_pos, move):
+ new_state = list(state)
+ x, y = player_pos
+ _, width = shape
+ move_cost = 0
+ target = x + move[0], y + move[1]
+ boxtarget = x + move[0] * 2, y + move[1] * 2
+ curr1d = x * width + y
+ target1d = target[0] * width + target[1]
+ boxtarget1d = boxtarget[0] * width + boxtarget[1]
+ if state[target1d] == "+":
+ return None, move_cost
+ elif state[target1d] in "-X":
+ new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X"
+ new_state[target1d] = "*" if new_state[target1d] == "-" else "%"
+ move_cost = 3
+ elif state[target1d] in "@$":
+ if state[boxtarget1d] in "+@$":
+ return None, move_cost
+ elif state[boxtarget1d] in "-X":
+ new_state[boxtarget1d] = "@" if new_state[boxtarget1d] == "-" else "$"
+ new_state[target1d] = "*" if new_state[target1d] == "@" else "%"
+ new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X"
+ move_cost = 0 if new_state[boxtarget1d] == "$" else 2
+ return "".join(new_state), move_cost
diff --git a/reasoning_gym/games/sokoban.py b/reasoning_gym/games/sokoban.py
new file mode 100644
index 00000000..f96d87ea
--- /dev/null
+++ b/reasoning_gym/games/sokoban.py
@@ -0,0 +1,117 @@
+from dataclasses import dataclass
+from random import Random
+from typing import Dict, Optional
+
+import numpy as np
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+@dataclass
+class SokobanConfig:
+ """Configuration for sokoban puzzle generation"""
+
+ seed: Optional[int] = None
+ size: int = 500
+ min_w: int = 6 # Minimum width of the puzzle.
+ min_h: int = 6 # Minimum height of the puzzle.
+ max_w: int = 10 # Maximum width of the puzzle.
+ max_h: int = 10 # Maximum height of the puzzle.
+ min_boxes: int = 6 # Minimum number of boxes.
+ max_boxes: int = 10 # Maximum number of boxes.
+
+ def validate(self):
+ """Validate configuration parameters"""
+ assert self.min_w <= self.max_w, "min_w must be lte max_w"
+ assert self.min_h <= self.max_h, "min_h must be lte max_h"
+ assert self.min_boxes <= self.max_boxes, "min_boxes must be lte max_boxes"
+
+
+class SokobanDataset(ProceduralDataset):
+ """Generates Sokoban games with configurable parameters"""
+
+ def __init__(self, config: SokobanConfig):
+ self._prompt_templates = [
+ "What will this Sokoban board look like after {simulation_steps} steps of simulation?\n\n{board}"
+ ]
+
+ super().__init__(config=config, seed=config.seed, size=config.size)
+
+ # lazy loading of sokoban imports
+ from .contrib.sokoban.src.game import Game
+ from .contrib.sokoban.src.generator import generate
+ from .contrib.sokoban.src.utils import is_solved
+
+ self._Game = Game
+ self._generate = generate
+ self._is_solved = is_solved
+
+ def __getitem__(self, idx: int) -> dict:
+ """Generate a single Sokoban task
+
+ Returns:
+ dict with keys:
+ - question: str, the task description
+ - answer: str, a solution string
+ - metadata: dict with generation parameters
+ """
+
+ # Make the Sokoban!
+ rng = Random(self.seed + idx)
+ gamestr, solution, difficulty = self._generate(rng=rng)
+
+ return {
+ "question": """You are going to solve a 'sokoban' puzzle.
+
+* - The player
+% - The player on a goal
+@ - A box
+X - A goal
+$ - A box on a goal
++ - A wall
+- - An empty position
+
+Your solution must be a string of characters, ex: LDURRUDL.
+
+Here is your puzzle:
+"""
+ + gamestr,
+ "answer": solution,
+ "metadata": {"gamestr": gamestr, "difficulty": difficulty},
+ }
+
+ def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+ """Determine if the solution provided solves the Sokoban task.
+
+ The function awards 1.0 for a correct answer.
+
+ Args:
+ answer (Optional[str]): The user's answer.
+ entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+ Returns:
+ float: The computed score between 0.0 and 1.0.
+ """
+
+ if answer == None:
+ return 0.0
+
+ try:
+ grid_list = [list(line) for line in entry["metadata"]["gamestr"].replace(" ", "").strip().split("\n")]
+ matrix = np.array(grid_list)
+
+ game = self._Game()
+ game.load_puzzle_matrix(matrix)
+
+ for move in answer:
+ game.player.update(key=move)
+
+ if self._is_solved(game.get_curr_state()):
+ return 1.0
+ except Exception as e:
+ return 0.01
+
+ return 0.1
+
+
+register_dataset("sokoban", SokobanDataset, SokobanConfig)
diff --git a/tests/test_sokoban.py b/tests/test_sokoban.py
new file mode 100644
index 00000000..c4d1e2b8
--- /dev/null
+++ b/tests/test_sokoban.py
@@ -0,0 +1,50 @@
+import pytest
+
+from reasoning_gym.games.sokoban import SokobanConfig, SokobanDataset
+
+
+def test_sokoban():
+ """Test basic properties and solution of generated items"""
+
+ # Easy
+ config = SokobanConfig(seed=42, size=20)
+ dataset = SokobanDataset(config)
+
+ for item in dataset:
+ assert isinstance(item, dict)
+ assert "question" in item
+ assert "answer" in item
+ assert "metadata" in item
+
+ # Test the scoring
+ assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+ assert dataset.score_answer(answer="RU", entry=item) == 0.1
+ assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+ # Medium
+ config = SokobanConfig(seed=42, min_h=40, max_h=50, min_w=40, max_w=50, min_boxes=20, max_boxes=30, size=3)
+ dataset = SokobanDataset(config)
+
+ for item in dataset:
+ assert isinstance(item, dict)
+ assert "question" in item
+ assert "answer" in item
+ assert "metadata" in item
+
+ # Test the scoring
+ assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+ assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+ # Hard
+ config = SokobanConfig(seed=42, min_h=400, max_h=500, min_w=400, max_w=500, min_boxes=50, max_boxes=50, size=1)
+ dataset = SokobanDataset(config)
+
+ for item in dataset:
+ assert isinstance(item, dict)
+ assert "question" in item
+ assert "answer" in item
+ assert "metadata" in item
+
+ # Test the scoring
+ assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+ assert dataset.score_answer(answer=None, entry=item) == 0.0