diff --git a/GALLERY.md b/GALLERY.md
index 1d09a54f..ad0248ce 100644
--- a/GALLERY.md
+++ b/GALLERY.md
@@ -19,6 +19,7 @@ This gallery shows examples from all available datasets using their default conf
 - [fraction_simplification](#fraction_simplification)
 - [game_of_life](#game_of_life)
 - [gcd](#gcd)
+- [group_anagrams](#group_anagrams)
 - [gsm_symbolic](#gsm_symbolic)
 - [intermediate_integration](#intermediate_integration)
 - [largest_island](#largest_island)
@@ -42,13 +43,13 @@ This gallery shows examples from all available datasets using their default conf
 - [simple_equations](#simple_equations)
 - [simple_geometry](#simple_geometry)
 - [simple_integration](#simple_integration)
+- [sokoban](#sokoban)
 - [spell_backward](#spell_backward)
 - [sudoku](#sudoku)
 - [syllogism](#syllogism)
 - [time_intervals](#time_intervals)
 - [tower_of_hanoi](#tower_of_hanoi)
 - [word_ladder](#word_ladder)
-- [group_anagrams](#group_anagrams)
 - [word_sequence_reversal](#word_sequence_reversal)
 - [word_sorting](#word_sorting)
 - [zebra_puzzles](#zebra_puzzles)
@@ -406,17 +407,17 @@ Example tasks:
 Example 1:
 Question: 4 + 3 =
 Answer: 7
-Metadata: {'num_terms': 2, 'num_digits': 1, 'expression': '4 + 3'}
+Metadata: {'difficulty': {'num_terms': 2, 'num_digits': 1}, 'expression': '4 + 3'}
 
 Example 2:
 Question: 812 + 880 =
 Answer: 1692
-Metadata: {'num_terms': 2, 'num_digits': 3, 'expression': '812 + 880'}
+Metadata: {'difficulty': {'num_terms': 2, 'num_digits': 3}, 'expression': '812 + 880'}
 
 Example 3:
 Question: 2 + 6 + 3 + 4 + 0 =
 Answer: 15
-Metadata: {'num_terms': 5, 'num_digits': 1, 'expression': '2 + 6 + 3 + 4 + 0'}
+Metadata: {'difficulty': {'num_terms': 5, 'num_digits': 1}, 'expression': '2 + 6 + 3 + 4 + 0'}
 
 ````
 
@@ -897,6 +898,75 @@ Metadata: {'numbers': [297, 30], 'result': 3}
 
 ````
 
+### group_anagrams
+Generates Group Anagrams exercises with configurable difficulty
+
+Default configuration:
+```python
+anagram_groups = 10
+max_words_per_group = 5
+size = 500
+seed = 42
+```
+
+Example tasks:
+````
+Example 1:
+Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+Your job is to group the anagrams together. You can return the answer in any order.
+
+Example:
+Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
+Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
+Explanation:
+    - There is no string in the input that can be rearranged to form "bat".
+    - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+
+Group the following list of words into anagrams:
+["tinglers", "argonon", "ditas", "palinodist", "merocyte", "conterminal", "canny", "nancy", "outasight", "autosight", "oversauciness", "applauders", "suprapedal"]
+
+Answer: [["applauders", "suprapedal"], ["argonon"], ["autosight", "outasight"], ["canny", "nancy"], ["conterminal"], ["ditas"], ["merocyte"], ["oversauciness"], ["palinodist"], ["tinglers"]]
+Metadata: {'words': ['tinglers', 'argonon', 'ditas', 'palinodist', 'merocyte', 'conterminal', 'canny', 'nancy', 'outasight', 'autosight', 'oversauciness', 'applauders', 'suprapedal'], 'solution': [['applauders', 'suprapedal'], ['argonon'], ['autosight', 'outasight'], ['canny', 'nancy'], ['conterminal'], ['ditas'], ['merocyte'], ['oversauciness'], ['palinodist'], ['tinglers']]}
+
+Example 2:
+Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+Your job is to group the anagrams together. You can return the answer in any order.
+
+Example:
+Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
+Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
+Explanation:
+    - There is no string in the input that can be rearranged to form "bat".
+    - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+
+Group the following list of words into anagrams:
+["regear", "escrod", "coders", "decors", "credos", "scored", "semitaur", "muriates", "peripterous", "zanies", "expatiater", "wooled", "meningomyelocele", "myelomeningocele", "vainest", "natives", "naivest", "preludes", "repulsed"]
+
+Answer: [["coders", "credos", "decors", "escrod", "scored"], ["expatiater"], ["meningomyelocele", "myelomeningocele"], ["muriates", "semitaur"], ["naivest", "natives", "vainest"], ["peripterous"], ["preludes", "repulsed"], ["regear"], ["wooled"], ["zanies"]]
+Metadata: {'words': ['regear', 'escrod', 'coders', 'decors', 'credos', 'scored', 'semitaur', 'muriates', 'peripterous', 'zanies', 'expatiater', 'wooled', 'meningomyelocele', 'myelomeningocele', 'vainest', 'natives', 'naivest', 'preludes', 'repulsed'], 'solution': [['coders', 'credos', 'decors', 'escrod', 'scored'], ['expatiater'], ['meningomyelocele', 'myelomeningocele'], ['muriates', 'semitaur'], ['naivest', 'natives', 'vainest'], ['peripterous'], ['preludes', 'repulsed'], ['regear'], ['wooled'], ['zanies']]}
+
+Example 3:
+Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+Your job is to group the anagrams together. You can return the answer in any order.
+
+Example:
+Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
+Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
+Explanation:
+    - There is no string in the input that can be rearranged to form "bat".
+    - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+
+Group the following list of words into anagrams:
+["eagerest", "granitite", "helium", "nizam", "nazim", "striplings", "slipstring", "rearrest", "arrester", "bf", "tadpolism", "canun", "cunan", "isotonic"]
+
+Answer: [["arrester", "rearrest"], ["bf"], ["canun", "cunan"], ["eagerest"], ["granitite"], ["helium"], ["isotonic"], ["nazim", "nizam"], ["slipstring", "striplings"], ["tadpolism"]]
+Metadata: {'words': ['eagerest', 'granitite', 'helium', 'nizam', 'nazim', 'striplings', 'slipstring', 'rearrest', 'arrester', 'bf', 'tadpolism', 'canun', 'cunan', 'isotonic'], 'solution': [['arrester', 'rearrest'], ['bf'], ['canun', 'cunan'], ['eagerest'], ['granitite'], ['helium'], ['isotonic'], ['nazim', 'nizam'], ['slipstring', 'striplings'], ['tadpolism']]}
+
+````
+
 ### gsm_symbolic
 Default configuration:
 ```python
@@ -1101,17 +1171,17 @@ Example tasks:
 Example 1:
 Question: How many legs are there in total if you have 1 sea slug, 1 deer?
 Answer: 4
-Metadata: {'animals': {'sea slug': 1, 'deer': 1}, 'total_legs': 4}
+Metadata: {'difficulty': {'num_animals': 2}, 'animals': {'sea slug': 1, 'deer': 1}, 'total_legs': 4}
 
 Example 2:
 Question: How many legs are there in total if you have 2 sheeps, 2 dogs?
 Answer: 16
-Metadata: {'animals': {'sheep': 2, 'dog': 2}, 'total_legs': 16}
+Metadata: {'difficulty': {'num_animals': 2}, 'animals': {'sheep': 2, 'dog': 2}, 'total_legs': 16}
 
 Example 3:
 Question: How many legs are there in total if you have 1 crab, 2 lobsters, 1 human, 1 cow, 1 bee?
 Answer: 42
-Metadata: {'animals': {'crab': 1, 'lobster': 2, 'human': 1, 'cow': 1, 'bee': 1}, 'total_legs': 42}
+Metadata: {'difficulty': {'num_animals': 5}, 'animals': {'crab': 1, 'lobster': 2, 'human': 1, 'cow': 1, 'bee': 1}, 'total_legs': 42}
 
 ````
 
@@ -1923,6 +1993,107 @@ Metadata: {'integrand': '-28*X**3 + 8*X', 'variable': 'X', 'expected_answer_expr
 
 ````
 
+### sokoban
+Generates Sokoban games with configurable parameters
+
+Default configuration:
+```python
+seed = 42
+size = 500
+min_w = 6
+min_h = 6
+max_w = 10
+max_h = 10
+min_boxes = 6
+max_boxes = 10
+```
+
+Example tasks:
+````
+Example 1:
+Question: You are going to solve a 'sokoban' puzzle.
+
+* - The player
+% - The player on a goal
+@ - A box
+X - A goal
+$ - A box on a goal
++ - A wall
+- - An empty position
+
+Your solution must be a string of characters, ex: LDURRUDL.
+
+Here is your puzzle:
++ + + + + + + + +  
++ + X - @ * @ X +  
++ + + - - @ - + +  
++ + + - - - X $ +  
++ + + + - + + + +  
++ + $ + + + + + +  
++ + + + + + + + +  
+
+
+Answer: RLDULLRRDLDR
+Metadata: {'gamestr': '+ + + + + + + + +  \n+ + X - @ * @ X +  \n+ + + - - @ - + +  \n+ + + - - - X $ +  \n+ + + + - + + + +  \n+ + $ + + + + + +  \n+ + + + + + + + +  \n\n', 'difficulty': {'size': (7, 9), 'num_steps': 12}}
+
+Example 2:
+Question: You are going to solve a 'sokoban' puzzle.
+
+* - The player
+% - The player on a goal
+@ - A box
+X - A goal
+$ - A box on a goal
++ - A wall
+- - An empty position
+
+Your solution must be a string of characters, ex: LDURRUDL.
+
+Here is your puzzle:
++ + + + + +  
++ - * - - +  
++ @ - - @ +  
++ X - @ - +  
++ - - - X +  
++ X - @ X +  
++ - - - - +  
++ + + + + +  
+
+
+Answer: LDRRDRDDLLURURDULUURDD
+Metadata: {'gamestr': '+ + + + + +  \n+ - * - - +  \n+ @ - - @ +  \n+ X - @ - +  \n+ - - - X +  \n+ X - @ X +  \n+ - - - - +  \n+ + + + + +  \n\n', 'difficulty': {'size': (8, 6), 'num_steps': 22}}
+
+Example 3:
+Question: You are going to solve a 'sokoban' puzzle.
+
+* - The player
+% - The player on a goal
+@ - A box
+X - A goal
+$ - A box on a goal
++ - A wall
+- - An empty position
+
+Your solution must be a string of characters, ex: LDURRUDL.
+
+Here is your puzzle:
++ + + + + + + + + + + +  
++ - $ - X + - - - - - +  
++ - @ - - - - - @ - X +  
++ - * - @ - - X - $ - +  
++ - - - - X + - - - - +  
++ + - - - - + $ - @ - +  
++ + + - - - - - - - - +  
++ + + - - - $ - - - - +  
++ + + + - - - - - - - +  
++ + + + + + + + + + + +  
+
+
+Answer: RRRRURRRLDDRRDLULDRDLLLLULLDRDRUULUUULDLLURRDRU
+Metadata: {'gamestr': '+ + + + + + + + + + + +  \n+ - $ - X + - - - - - +  \n+ - @ - - - - - @ - X +  \n+ - * - @ - - X - $ - +  \n+ - - - - X + - - - - +  \n+ + - - - - + $ - @ - +  \n+ + + - - - - - - - - +  \n+ + + - - - $ - - - - +  \n+ + + + - - - - - - - +  \n+ + + + + + + + + + + +  \n\n', 'difficulty': {'size': (10, 12), 'num_steps': 47}}
+
+````
+
 ### spell_backward
 Generates tasks to spell words backward
 
@@ -2112,7 +2283,7 @@ Metadata: {'task_type': 'datetime_tz', 'start_time': datetime.datetime(2964, 6,
 Example 2:
 Question: A video call started at 09:44 and ended at 12:22. How long was the call? Answer in HH:MM.
 Answer: 02:38
-Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 5, 9, 44), 'end_time': datetime.datetime(2025, 2, 5, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'}
+Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 7, 9, 44), 'end_time': datetime.datetime(2025, 2, 7, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'}
 
 Example 3:
 Question: Calculate the time difference between Sat Dec 22 2677 and Thu Mar 21 2678. Express the result in D days.
@@ -2217,85 +2388,6 @@ Metadata: {'start_word': 'SNOG', 'end_word': 'SUQS', 'word_length': 4, 'chain_le
 
 ````
 
-
-### group_anagrams
-
-Group anagrams together in a list of words. 
-
-Default configuration
-```python
-anagram_groups: int = 10  # Groups of anagrams present in the input
-max_words_per_group: int = 5  # Maximum number of words in a single anagram group
-```
-
-Example tasks:
-```
-Example 1:
-Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
-
-Your job is to group the anagrams together. You can return the answer in any order.
-
-Example:
-Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
-Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
-Explanation:
-    - There is no string in the input that can be rearranged to form "bat".
-    - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
-
-Group the following list of words into anagrams:
-["tinglers", "argonon", "ditas", "palinodist", "merocyte", "conterminal", "canny", "nancy", "outasight", "autosight", "oversauciness", "applauders", "suprapedal"]
-
-Answer: [["applauders", "suprapedal"], ["argonon"], ["autosight", "outasight"], ["canny", "nancy"], ["conterminal"], ["ditas"], ["merocyte"], ["oversauciness"], ["palinodist"], ["tinglers"]]
-
-Metadata: {'words': ['tinglers', 'argonon', 'ditas', 'palinodist', 'merocyte', 'conterminal', 'canny', 'nancy', 'outasight', 'autosight', 'oversauciness', 'applauders', 'suprapedal'], 'solution': [['applauders', 'suprapedal'], ['argonon'], ['autosight', 'outasight'], ['canny', 'nancy'], ['conterminal'], ['ditas'], ['merocyte'], ['oversauciness'], ['palinodist'], ['tinglers']]}
-
---------------------------------------------------
-
-Example 2:
-Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
-
-Your job is to group the anagrams together. You can return the answer in any order.
-
-Example:
-Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
-Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
-Explanation:
-    - There is no string in the input that can be rearranged to form "bat".
-    - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
-
-Group the following list of words into anagrams:
-["regear", "escrod", "coders", "decors", "credos", "scored", "semitaur", "muriates", "peripterous", "zanies", "expatiater", "wooled", "meningomyelocele", "myelomeningocele", "vainest", "natives", "naivest", "preludes", "repulsed"]
-
-Answer: [["coders", "credos", "decors", "escrod", "scored"], ["expatiater"], ["meningomyelocele", "myelomeningocele"], ["muriates", "semitaur"], ["naivest", "natives", "vainest"], ["peripterous"], ["preludes", "repulsed"], ["regear"], ["wooled"], ["zanies"]]
-
-Metadata: {'words': ['regear', 'escrod', 'coders', 'decors', 'credos', 'scored', 'semitaur', 'muriates', 'peripterous', 'zanies', 'expatiater', 'wooled', 'meningomyelocele', 'myelomeningocele', 'vainest', 'natives', 'naivest', 'preludes', 'repulsed'], 'solution': [['coders', 'credos', 'decors', 'escrod', 'scored'], ['expatiater'], ['meningomyelocele', 'myelomeningocele'], ['muriates', 'semitaur'], ['naivest', 'natives', 'vainest'], ['peripterous'], ['preludes', 'repulsed'], ['regear'], ['wooled'], ['zanies']]}
-
---------------------------------------------------
-
-Example 3:
-Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
-
-Your job is to group the anagrams together. You can return the answer in any order.
-
-Example:
-Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
-Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
-Explanation:
-    - There is no string in the input that can be rearranged to form "bat".
-    - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
-
-Group the following list of words into anagrams:
-["eagerest", "granitite", "helium", "nizam", "nazim", "striplings", "slipstring", "rearrest", "arrester", "bf", "tadpolism", "canun", "cunan", "isotonic"]
-
-Answer: [["arrester", "rearrest"], ["bf"], ["canun", "cunan"], ["eagerest"], ["granitite"], ["helium"], ["isotonic"], ["nazim", "nizam"], ["slipstring", "striplings"], ["tadpolism"]]
-
-Metadata: {'words': ['eagerest', 'granitite', 'helium', 'nizam', 'nazim', 'striplings', 'slipstring', 'rearrest', 'arrester', 'bf', 'tadpolism', 'canun', 'cunan', 'isotonic'], 'solution': [['arrester', 'rearrest'], ['bf'], ['canun', 'cunan'], ['eagerest'], ['granitite'], ['helium'], ['isotonic'], ['nazim', 'nizam'], ['slipstring', 'striplings'], ['tadpolism']]}
-
---------------------------------------------------
-```
-
-
-
 ### word_sequence_reversal
 Generates word sequence reversal tasks from text spans
 
diff --git a/README.md b/README.md
index 9335a1d2..b623eebc 100644
--- a/README.md
+++ b/README.md
@@ -128,6 +128,7 @@ See the [Dataset Gallery](GALLERY.md) for a complete list of available datasets
 ### <small>Game Tasks</small>
 
 - `SudokuDataset`: Generate 9x9 Sudoku puzzles with configurable number of empty cells
+- `SokobanDataset`: Generate [Sokoban](https://en.wikipedia.org/wiki/Sokoban) puzzles with configurable size and detail.
 - `MiniSudokuDataset`: Generate 4x4 Mini Sudoku puzzles with configurable difficulty
 - `MazeDataset`: Generate a maze with a start and a goal
 - `CountdownDataset`: Generate number game tasks where numbers and operators must be combined to reach a target value
diff --git a/reasoning_gym/games/__init__.py b/reasoning_gym/games/__init__.py
index 8e4e32d6..958dcd01 100644
--- a/reasoning_gym/games/__init__.py
+++ b/reasoning_gym/games/__init__.py
@@ -11,6 +11,7 @@ from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
 from .maze import MazeConfig, MazeDataset
 from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset
 from .n_queens import NQueensDataset
+from .sokoban import SokobanConfig, SokobanDataset
 from .sudoku import SudokuConfig, SudokuDataset
 from .tower_of_hanoi import HanoiConfig, HanoiDataset
 
@@ -21,6 +22,8 @@ __all__ = [
     "MiniSudokuDataset",
     "SudokuConfig",
     "SudokuDataset",
+    "SokobanConfig",
+    "SokobanDataset",
     "MazeConfig",
     "MazeDataset",
     "GameOfLifeConfig",
diff --git a/reasoning_gym/games/contrib/__init__.py b/reasoning_gym/games/contrib/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/reasoning_gym/games/contrib/sokoban/LICENSE b/reasoning_gym/games/contrib/sokoban/LICENSE
new file mode 100644
index 00000000..84d0d484
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Bruno Andrade
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/reasoning_gym/games/contrib/sokoban/README.md b/reasoning_gym/games/contrib/sokoban/README.md
new file mode 100644
index 00000000..44d565ea
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/README.md
@@ -0,0 +1,52 @@
+# 📦 Sokoban Solver and Generator
+
+This folder contains a minified version of Bruno Andrade's Sokoban game, all pygame dependencies were stripped.
+
+The original version can be found here: [xbandrade/sokoban-solver-generator](https://github.com/xbandrade/sokoban-solver-generator)
+
+
+This is a Sokoban puzzle generator and solver that uses BFS, A* and Dijkstra search algorithms.
+
+`Sokoban` is a puzzle game in which the player pushes boxes around in a warehouse, trying to get every box to a goal.
+
+
+### ❕Sokoban Puzzle
+The puzzle states are stored in a matrix, and each element of the puzzle is represented by a single character in the matrix.
+```
++ + + + + + +
++ * - @ - X +
++ + - @ - + +
++ X - - - $ +
++ + + + + + +
+```
+`*` - The player </br>
+`%` - The player on a goal </br>
+`@` - A box </br>
+`X` - A goal </br>
+`$` - A box on a goal </br>
+`+` - A wall </br>
+`-` - An empty position </br>
+
+A box on a goal will have its color changed to green on the game window.
+
+
+### ❕Sokoban Generator
+
+The generator will initially create a puzzle with a random board size, then the player and the boxes on goals will be randomly placed on the board.
+The player will only be able to pull boxes from their positions during the generation of a puzzle, breaking every wall on his way, so it is guaranteed that the puzzle will have a valid solution.
+
+
+### ❕ Sokoban Solver
+
+The algorithms used to implement the Sokoban puzzle solvers were `Breadth-First Search(BFS)` and `A*`.
+
+The `BFS` solver uses a queue to store the next states of the puzzle it needs to visit. A visited state is stored in a hashset, and BFS won't try to visit the same state twice.
+
+The `A*` algorithm is similar to the BFS algorithm, but it uses a priority queue instead of a queue, and it prioritizes moves that are more likely to solve the problem.
+It does so by setting costs to the puzzle state and the player's movements, punishing the player with high costs for a bad move and rewarding the player with lower costs for a good move.
+The state costs are defined by heuristic functions, and this solver was implemented with two different heuristics: the `Manhattan Distance` function and `Dijkstra` distance function.
+
+All three implementations check for possible deadlocks (states that are impossible to solve) before adding the new state to the queue.
+
+
+More about Sokoban: [Wikipedia Article](https://en.wikipedia.org/wiki/Sokoban)
diff --git a/reasoning_gym/games/contrib/sokoban/__init__.py b/reasoning_gym/games/contrib/sokoban/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat
new file mode 100644
index 00000000..867d112a
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat
@@ -0,0 +1,10 @@
++ + + + + + +
++ - * - - - +
++ - - - $ - +
++ X - - @ - +
++ - - - - - +
++ $ - + - - +
++ + - - - - +
++ X @ - $ - +
++ + - - - - +
++ + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat
new file mode 100644
index 00000000..9ba48c31
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat
@@ -0,0 +1,5 @@
++ + + + + + +
++ * - @ - X +
++ + - @ - + +
++ X - - - - +
++ + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat
new file mode 100644
index 00000000..46755810
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat
@@ -0,0 +1,6 @@
+- - + + + + + +
+- + + - - - * +
++ + - - - + X +
++ X - @ - @ @ +
++ X X @ - - - +
++ + + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat
new file mode 100644
index 00000000..9d0bc599
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat
@@ -0,0 +1,7 @@
+- + + + + + + - - -
+- + X - - X + - - -
++ + - @ @ + + - - -
++ - - - - + + - - -
++ - @ - - * + + + +
++ + - - - - - - X +
+- + + + + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat
new file mode 100644
index 00000000..42fbc6eb
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat
@@ -0,0 +1,7 @@
+- + + + + + + - -
++ + X - @ - + + +
++ - - - - - - - +
++ - @ + + X - @ +
++ - - - @ - + - +
++ + + * - X - X +
+- - + + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat
new file mode 100644
index 00000000..3a096d58
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat
@@ -0,0 +1,7 @@
+- + + + + + + + -
++ + - - + - - + +
++ - @ - - - @ - +
++ - - X * X - - +
++ + @ + + - - + +
++ - - X - - - + -
++ + + + + + + + -
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat
new file mode 100644
index 00000000..32ee5bbc
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat
@@ -0,0 +1,9 @@
+- - - + + + + + + + +
+- - - + - - - - - - +
+- - + + - - - - @ - +
+- + + - - + + - + + +
++ + - - + - - X - - +
++ - - + X @ @ - - + +
++ * + X - - - - + + -
++ + - - - - - + + - -
++ + + + + + + + - - -
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat
new file mode 100644
index 00000000..9c2fe302
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat
@@ -0,0 +1,6 @@
++ + + + + + + +
++ - - @ - X * +
++ - @ - - + X +
++ X X @ - @ @ +
++ X X @ - - - +
++ + + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/src/__init__.py b/reasoning_gym/games/contrib/sokoban/src/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/reasoning_gym/games/contrib/sokoban/src/astar.py b/reasoning_gym/games/contrib/sokoban/src/astar.py
new file mode 100644
index 00000000..25d1e63d
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/astar.py
@@ -0,0 +1,81 @@
+from collections import defaultdict
+from heapq import heappop, heappush
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.utils import (
+    can_move,
+    dijkstra_sum,
+    get_state,
+    is_deadlock,
+    is_solved,
+    manhattan_sum,
+)
+
+
+def astar(matrix, player_pos, debug=False, heuristic="manhattan"):
+    # print(f'A* - {heuristic.title()} Heuristic')
+    heur = "[A*]" if heuristic == "manhattan" else "[Dijkstra]"
+    shape = matrix.shape
+    initial_state = get_state(matrix)
+    initial_cost = curr_depth = 0
+    if heuristic == "manhattan":
+        curr_cost = manhattan_sum(initial_state, player_pos, shape)
+    else:
+        distances = defaultdict(lambda: [])
+        curr_cost = dijkstra_sum(initial_state, player_pos, shape, distances)
+    seen = {None}
+    heap = []
+    heappush(heap, (initial_cost, curr_cost, initial_state, player_pos, curr_depth, ""))
+    moves = [(1, 0), (-1, 0), (0, -1), (0, 1)]
+    direction = {
+        (1, 0): "D",
+        (-1, 0): "U",
+        (0, -1): "L",
+        (0, 1): "R",
+    }
+    while heap:
+        _, curr_cost, state, pos, depth, path = heappop(heap)
+        seen.add(state)
+        for move in moves:
+            new_state, move_cost = can_move(state, shape, pos, move)
+            deadlock = is_deadlock(new_state, shape)
+            if new_state in seen or deadlock:
+                continue
+            new_pos = pos[0] + move[0], pos[1] + move[1]
+            if heuristic == "manhattan":
+                new_cost = manhattan_sum(new_state, new_pos, shape)
+            else:
+                new_cost = dijkstra_sum(new_state, new_pos, shape, distances)
+            if new_cost == float("inf"):
+                continue
+            heappush(
+                heap,
+                (
+                    move_cost + curr_cost,
+                    new_cost,
+                    new_state,
+                    new_pos,
+                    depth + 1,
+                    path + direction[move],
+                ),
+            )
+            if is_solved(new_state):
+                # print(f'{heur} Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n')
+                if debug:
+                    print(f"{heur} Solution Found!\n{path + direction[move]}", 20)
+                return (path + direction[move], depth + 1)
+            if debug:
+                print(f"{heur} Solution Depth: {depth + 1}\n{path + direction[move]}", 20)
+    print(f"{heur} Solution not found!\n")
+    if debug:
+        print(f"{heur} Solution Not Found!\nDepth {depth + 1}", 20)
+
+    return (None, -1 if not heap else depth + 1)
+
+
+def solve_astar(puzzle, visualizer=False, heuristic="manhattan"):
+    matrix = puzzle
+    where = np.where((matrix == "*") | (matrix == "%"))
+    player_pos = where[0][0], where[1][0]
+    return astar(matrix, player_pos, debug=visualizer, heuristic=heuristic)
diff --git a/reasoning_gym/games/contrib/sokoban/src/bfs.py b/reasoning_gym/games/contrib/sokoban/src/bfs.py
new file mode 100644
index 00000000..d6a376c9
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/bfs.py
@@ -0,0 +1,66 @@
+import time
+from collections import deque
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.utils import can_move, get_state, is_deadlock, is_solved, print_state
+
+
+def bfs(matrix, player_pos, debug=False):
+    print("Breadth-First Search")
+    initial_state = get_state(matrix)
+    shape = matrix.shape
+    print_state(initial_state, shape)
+    seen = {None}
+    q = deque([(initial_state, player_pos, 0, "")])
+    moves = [(1, 0), (-1, 0), (0, -1), (0, 1)]
+    curr_depth = 0
+    direction = {
+        (1, 0): "D",
+        (-1, 0): "U",
+        (0, -1): "L",
+        (0, 1): "R",
+    }
+    while q:
+        state, pos, depth, path = q.popleft()
+        # if depth != curr_depth:
+        # 	print(f'Depth: {depth}')
+        # 	curr_depth = depth
+        seen.add(state)
+        for move in moves:
+            new_state, _ = can_move(state, shape, pos, move)
+            deadlock = is_deadlock(new_state, shape)
+            if new_state in seen or deadlock:
+                continue
+            q.append(
+                (
+                    new_state,
+                    (pos[0] + move[0], pos[1] + move[1]),
+                    depth + 1,
+                    path + direction[move],
+                )
+            )
+            if is_solved(new_state):
+                print(f"[BFS] Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n")
+                if debug:
+                    print(f"[BFS] Solution Found!\n{path + direction[move]}", 20)
+                return (path + direction[move], depth + 1)
+            if debug:
+                print(f"[BFS] Solution Depth: {depth + 1}\n{path + direction[move]}", 20)
+    print(f"[BFS] Solution not found!\n")
+    if debug:
+        print(f"[BFS] Solution Not Found!\nDepth {depth + 1}", 20)
+    return (None, -1 if not q else depth + 1)
+
+
+def solve_bfs(puzzle, visualizer=False):
+    matrix = puzzle
+    where = np.where((matrix == "*") | (matrix == "%"))
+    player_pos = where[0][0], where[1][0]
+    return bfs(matrix, player_pos, debug=visualizer)
+
+
+if __name__ == "__main__":
+    start = time.time()
+    root = solve_bfs(np.loadtxt("levels/lvl7.dat", dtype="<U1"))
+    print(f"Runtime: {time.time() - start} seconds")
diff --git a/reasoning_gym/games/contrib/sokoban/src/box.py b/reasoning_gym/games/contrib/sokoban/src/box.py
new file mode 100644
index 00000000..e816db9a
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/box.py
@@ -0,0 +1,34 @@
+class Box:
+    def __init__(self, x, y, game=None):
+        self.game = game
+        self.x = x
+        self.y = y
+
+    def can_move(self, move):
+        target_x, target_y = self.x + move[0], self.y + move[1]
+        target = target_y, target_x
+        curr = self.y, self.x
+        target_elem = self.game.puzzle[target]
+        if not isinstance(target_elem.obj, Box):
+            curr_elem = self.game.puzzle[curr]
+            self.y, self.x = target
+            curr_elem.char = "-" if not curr_elem.ground else "X"
+            curr_elem.obj = None
+            target_elem.char = "@" if not target_elem.ground else "$"
+            target_elem.obj = self
+            return True
+        return False
+
+    def reverse_move(self, move):
+        target = self.y + move[0], self.x + move[1]
+        curr_pos = self.y, self.x
+        self.game.puzzle[curr_pos].obj = None
+        self.game.puzzle[target].obj = self
+        self.y, self.x = target
+        self.game.puzzle[curr_pos].char = "X" if self.game.puzzle[curr_pos].ground else "-"
+        self.game.puzzle[target].char = "$" if self.game.puzzle[target].ground else "@"
+
+
+class Obstacle(Box):
+    def __init__(self, x, y):
+        super().__init__(x=x, y=y)
diff --git a/reasoning_gym/games/contrib/sokoban/src/game.py b/reasoning_gym/games/contrib/sokoban/src/game.py
new file mode 100644
index 00000000..f01f3720
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/game.py
@@ -0,0 +1,173 @@
+from random import Random
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.box import Box, Obstacle
+from reasoning_gym.games.contrib.sokoban.src.player import Player, ReversePlayer
+from reasoning_gym.games.contrib.sokoban.src.utils import get_state
+
+
+class Floor:
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+
+class Goal(Floor):
+    def __init__(self, x, y):
+        super().__init__(x=x, y=y)
+
+
+class PuzzleElement:
+    def __init__(self, char: str, obj=None, ground=None):
+        self.char = char
+        self.ground = ground
+        self.obj = obj
+
+    def __str__(self) -> str:
+        return self.char
+
+
+class Game:
+    def __init__(self, width=19, height=10, level=None, path=None):
+        self.level = level
+        self.width = width
+        self.height = height
+        self.puzzle = np.empty((height, width), dtype=PuzzleElement)
+
+        self.player = None
+        self.puzzle_size = None
+        self.pad_x = 0
+        self.pad_y = 0
+        self.path = path or f"levels/lvl{level}.dat"
+
+        if path:
+            if type(self) == Game:
+                self.load_puzzle()
+
+    def get_matrix(self):
+        slice_x = slice(self.pad_x, self.pad_x + self.puzzle_size[1])
+        slice_y = slice(self.pad_y, self.pad_y + self.puzzle_size[0])
+        sliced = self.puzzle[slice_y, slice_x]
+        matrix = np.empty((self.puzzle_size), dtype="<U1")
+        for h in range(len(sliced)):
+            for w in range(len(sliced[0])):
+                matrix[h, w] = sliced[h, w].char
+        return matrix
+
+    def get_curr_state(self):
+        return get_state(self.get_matrix())
+
+    def print_puzzle(self):
+        for h in range(self.height):
+            for w in range(self.width):
+                if self.puzzle[h, w]:
+                    print(self.puzzle[h, w].char, end=" ")
+                else:
+                    print(" ", end=" ")
+            print(" ")
+
+    def is_level_complete(self):
+        boxes_left = 0
+        for h in range(self.height):
+            for w in range(self.width):
+                if self.puzzle[h, w] and self.puzzle[h, w].char == "@":
+                    boxes_left += 1
+        return boxes_left == 0
+
+    def load_puzzle(self):
+        """Load puzzle from file"""
+        try:
+            with open(self.path) as f:
+                # Read and parse file data
+                data = []
+                for line in f:
+                    data.append(line.strip().split())
+                self._process_puzzle_data(data)
+        except (OSError, ValueError) as e:
+            print(f"{e}")
+            return
+
+    def load_puzzle_matrix(self, matrix):
+        """New method: Load puzzle directly from a matrix (list/numpy array)"""
+        try:
+            # Convert numpy arrays to list of lists
+            if isinstance(matrix, np.ndarray):
+                data = matrix.tolist()
+            else:
+                data = matrix
+
+            # Validate and process
+            self._process_puzzle_data(data)
+        except ValueError as e:
+            print(f"{e}")
+            return
+
+    def _process_puzzle_data(self, data):
+        """Shared core logic for processing puzzle data"""
+
+        # Calculate puzzle size and padding
+        self.puzzle_size = (len(data), len(data[0]) if len(data) > 0 else 0)
+        pad_x = (self.width - self.puzzle_size[1] - 2) // 2  # -2 matches original file-based logic
+        pad_y = (self.height - self.puzzle_size[0]) // 2
+        self.pad_x, self.pad_y = pad_x, pad_y
+
+        # Populate puzzle elements
+        for i, row in enumerate(data):
+            for j, c in enumerate(row):
+                new_elem = PuzzleElement(c)
+                self.puzzle[i + pad_y, j + pad_x] = new_elem
+
+                # Create game objects based on characters
+                if c == "+":  # Wall
+                    new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y)
+                elif c == "@":  # Box
+                    new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+                elif c == "*":  # Player
+                    new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self)
+                    self.player = new_elem.obj
+                elif c == "X":  # Goal
+                    new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+                elif c == "$":  # Box on goal
+                    new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+                    new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+                elif c == "%":  # Player on goal
+                    new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self)
+                    new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+                    self.player = new_elem.obj
+                elif c not in " -":  # Validation
+                    raise ValueError(f"Invalid character in puzzle: {c}")
+
+
+class ReverseGame(Game):
+    def __init__(self, rng: Random, width=19, height=10, level=None):
+        super().__init__(width, height, level)
+        self.rng = rng
+        self.pad_x = 0
+        self.pad_y = 0
+
+    def load_puzzle(self, puzzle):
+        self.puzzle_size = (len(puzzle), len(puzzle[0]) if len(puzzle) > 0 else 0)
+        pad_x = (self.width - len(puzzle[0]) - 2) // 2
+        pad_y = (self.height - len(puzzle)) // 2
+        self.pad_x, self.pad_y = pad_x, pad_y
+        for i, row in enumerate(puzzle):
+            for j, c in enumerate(row):
+                new_elem = PuzzleElement(c)
+                self.puzzle[i + pad_y, j + pad_x] = new_elem
+                if c == "+":  # wall
+                    new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y)
+                elif c == "@":  # box
+                    new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+                elif c == "*":  # player
+                    new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self)
+                    self.player = new_elem.obj
+                elif c == "X":  # goal
+                    new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+                elif c == "$":  # box on goal
+                    new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+                    new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+                elif c == "%":  # player on goal
+                    new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self)
+                    new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+                    self.player = new_elem.obj
diff --git a/reasoning_gym/games/contrib/sokoban/src/generator.py b/reasoning_gym/games/contrib/sokoban/src/generator.py
new file mode 100644
index 00000000..da4c954f
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/generator.py
@@ -0,0 +1,107 @@
+from random import Random
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.astar import solve_astar
+from reasoning_gym.games.contrib.sokoban.src.game import Game, ReverseGame
+
+
+def num_boxes(puzzle_area, min_boxes, max_boxes, min_w, min_h, max_w, max_h):
+    m = (max_boxes - min_boxes) / (max_w * max_h - min_w * min_h)
+    b = min_boxes - m * min_w * min_h
+    return int(m * puzzle_area + b)
+
+
+def random_valid(rng: Random, width: int = 10, height: int = 10):
+    return rng.randrange(1, width - 1), rng.randrange(1, height - 1)
+
+
+def generate(
+    rng: Random,
+    debug: bool = False,
+    path: str = None,
+    min_w: int = 6,
+    min_h: int = 6,
+    max_w: int = 15,
+    max_h: int = 10,
+    min_boxes: int = 4,
+    max_boxes: int = 10,
+) -> tuple[str, str, dict]:
+    """
+    Generates a level with the given configuration parameters.
+
+    Parameters:
+        rng: Random number generator for reproducibility.
+        visualizer: Whether to visualize the generation process.
+        path: Path to save the level file (default 'levels/lvl0.dat').
+        min_w: Minimum width of the puzzle.
+        min_h: Minimum height of the puzzle.
+        max_w: Maximum width of the puzzle.
+        max_h: Maximum height of the puzzle.
+        min_boxes: Minimum number of boxes.
+        max_boxes: Maximum number of boxes.
+    Returns:
+        puzzle_string, solution
+    """
+    path = path or "levels/lvl0.dat"
+    while True:
+        width = rng.randint(min_w, max_w)
+        height = rng.randint(min_h, max_h)
+        puzzle = np.full((height, width), "+", dtype="<U1")
+        boxes = num_boxes(width * height, min_boxes, max_boxes, min_w, min_h, max_w, max_h)
+        boxes_seen = set()
+        player_pos = random_valid(rng, width, height)
+        puzzle_size = (height, width)
+        puzzle[player_pos[1], player_pos[0]] = "*"
+        boxes_created = 0
+        while boxes_created < boxes:
+            box_pos = random_valid(rng, height, width)
+            if puzzle[box_pos] == "+":
+                puzzle[box_pos] = "$"
+                boxes_created += 1
+                boxes_seen.add(box_pos)
+        reverse_game = ReverseGame(rng=rng, level=0)
+        reverse_game.load_puzzle(puzzle)
+        player = reverse_game.player
+        counter = round(height * width * rng.uniform(1.8, 3.6))
+        while counter > 0:
+            reverse_game.player.update(puzzle_size)
+            if player.states[player.curr_state] >= 20:
+                break
+            counter -= 1
+        slice_x = slice(reverse_game.pad_x, reverse_game.pad_x + width)
+        slice_y = slice(reverse_game.pad_y, reverse_game.pad_y + height)
+        matrix = reverse_game.puzzle[slice_y, slice_x]
+        # Optionally print the puzzle:
+        if debug:
+            player.print_puzzle(matrix)
+
+        out_of_place_boxes = np.sum([str(x) == "@" for x in matrix.flatten()])
+        if out_of_place_boxes >= boxes // 2:
+            # Optionally save the puzzle to a file:
+            # np.savetxt(path, matrix, fmt='%s')
+            puzzle_str = player.puzzle_to_string(matrix)
+
+            grid_list = [list(line) for line in puzzle_str.replace(" ", "").strip().split("\n")]
+            grid_array = np.array(grid_list)
+            solution, _ = solve_astar(grid_array)
+
+            if debug:
+                print(f"solution={solution}")
+                game = Game()
+                game.load_puzzle_matrix(grid_array)
+
+                for step, move in enumerate(solution):
+                    print(f"move #{step}: {move}")
+                    game.player.update(key=move)
+                    game.print_puzzle()
+
+            difficulty = {"size": puzzle_size, "num_steps": len(solution)}
+            return puzzle_str, solution, difficulty
+        else:
+            if debug:
+                print(f"Not enough boxes out of place, retrying generation... [{out_of_place_boxes}/{boxes}]")
+
+
+if __name__ == "__main__":
+    generate(rng=Random(), debug=True)
diff --git a/reasoning_gym/games/contrib/sokoban/src/player.py b/reasoning_gym/games/contrib/sokoban/src/player.py
new file mode 100644
index 00000000..1299ea7c
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/player.py
@@ -0,0 +1,118 @@
+from collections import defaultdict
+from random import Random
+
+from reasoning_gym.games.contrib.sokoban.src.box import Box, Obstacle
+
+
+class Player:
+    """A player that can only push boxes"""
+
+    def __init__(self, x, y, game):
+        self.game = game
+        self.x = x
+        self.y = y
+
+    def update(self, key: str = None) -> int:
+        move = None
+        if key:
+            if key == "R":
+                move = (1, 0)
+            elif key == "L":
+                move = (-1, 0)
+            elif key == "U":
+                move = (0, -1)
+            elif key == "D":
+                move = (0, 1)
+        if move:
+            curr = self.y, self.x
+            target = self.y + move[1], self.x + move[0]
+            target_elem = self.game.puzzle[target]
+            if not (target_elem and target_elem.obj and isinstance(target_elem.obj, Obstacle)):
+                is_box = isinstance(target_elem.obj, Box)
+                if not is_box or (is_box and target_elem.obj.can_move(move)):
+                    curr_elem = self.game.puzzle[curr]
+                    self.y, self.x = target
+                    curr_elem.char = "-" if not curr_elem.ground else "X"
+                    curr_elem.obj = None
+                    target_elem.char = "*" if not target_elem.ground else "%"
+                    target_elem.obj = self
+                    return 1
+        return 0
+
+
+class ReversePlayer(Player):
+    """A player that can only pull boxes"""
+
+    def __init__(self, rng: Random, x, y, game=None, puzzle=None):
+        super().__init__(x=x, y=y, game=game)
+        self.rng = rng
+        self.game = game
+        self.puzzle = puzzle
+        self.curr_state = ""
+        self.states = defaultdict(int)
+        self.prev_move = (0, 0)
+
+    def print_puzzle(self, matrix=None):
+        print(self.puzzle_to_string(matrix=matrix))
+
+    def puzzle_to_string(self, matrix=None):
+        matrix = matrix if matrix is not None else self.game.puzzle
+        height, width = len(matrix), len(matrix[0])
+        ss = ""
+        for h in range(height):
+            for w in range(width):
+                if matrix[h, w]:
+                    ss = ss + str(matrix[h, w]) + " "
+                else:
+                    ss = ss + "F" + " "
+            ss = ss + " " + "\n"
+        ss = ss + "\n"
+        return ss
+
+    def get_state(self):
+        state = ""
+        height, width = len(self.game.puzzle), len(self.game.puzzle[0])
+        for row in range(height):
+            for col in range(width):
+                if self.game.puzzle[row, col]:
+                    state += str(self.game.puzzle[row, col])
+        return state
+
+    def update(self, puzzle_size):
+        height, width = puzzle_size
+        quick_chars = {
+            "*": "-",
+            "%": "X",
+            "+": "*",
+            "-": "*",
+            "X": "%",
+            "@": "-",
+            "$": "X",
+        }
+        moves_tuples = [(1, 0), (-1, 0), (0, -1), (0, 1)]
+        moves = self.rng.choices(moves_tuples, weights=[0.1 if m == self.prev_move else 1 for m in moves_tuples], k=1)
+        self.curr_state = self.get_state()
+        for move in moves:
+            self.states[self.curr_state] += 1
+            curr_pos = self.y, self.x
+            target = self.y + move[0], self.x + move[1]
+            reverse_target = self.y - move[0], self.x - move[1]
+            if (
+                target[1] == self.game.pad_x
+                or target[0] == self.game.pad_y
+                or target[1] >= self.game.pad_x + width - 1
+                or target[0] >= self.game.pad_y + height - 1
+                or (self.game.puzzle[target] and self.game.puzzle[target].char in "@$")
+            ):
+                self.prev_move = move
+                return
+            self.prev_move = -move[0], -move[1]
+            self.game.puzzle[curr_pos].char = quick_chars[self.game.puzzle[curr_pos].char]
+            self.game.puzzle[curr_pos].obj = None
+            self.game.puzzle[target].char = quick_chars[self.game.puzzle[target].char]
+            self.game.puzzle[target].obj = self
+            if (c := self.game.puzzle[reverse_target].char) in "@$":
+                self.game.puzzle[reverse_target].char = quick_chars[c]
+                self.game.puzzle[reverse_target].obj.reverse_move(move)
+
+            self.y, self.x = target
diff --git a/reasoning_gym/games/contrib/sokoban/src/utils.py b/reasoning_gym/games/contrib/sokoban/src/utils.py
new file mode 100644
index 00000000..106fb8d1
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/utils.py
@@ -0,0 +1,170 @@
+from heapq import heappop, heappush
+
+import numpy as np
+
+
+def print_state(state, shape):
+    if not state:
+        return
+    m, n = shape
+    matrix = np.array(list(state)).reshape(m, n)
+    print(matrix)
+
+
+def find_boxes_and_goals(state, shape):
+    _, width = shape
+    boxes, goals, boxes_on_goal = [], [], []
+    for pos, char in enumerate(state):
+        if char == "@":
+            boxes.append((pos // width, pos % width))
+        elif char in "X%":
+            goals.append((pos // width, pos % width))
+        elif char == "$":
+            boxes_on_goal.append((pos // width, pos % width))
+    return boxes, goals, boxes_on_goal
+
+
+def get_state(matrix):
+    return matrix.tobytes().decode("utf-8").replace("\x00", "")
+
+
+def is_solved(state):
+    return "@" not in state
+
+
+def manhattan_sum(state, player_pos, shape):
+    height, width = shape
+    player_x, player_y = player_pos
+    boxes, goals, _ = find_boxes_and_goals(state, shape)
+    boxes_cost = len(boxes) * height * width
+    player_cost = 0
+    for box_x, box_y in boxes:
+        boxes_cost += min(abs(box_x - goal_x) + abs(box_y - goal_y) for goal_x, goal_y in goals)
+    player_cost = min(abs(box_x - player_x) + abs(box_y - player_y) for box_x, box_y in boxes) if boxes else 0
+    return boxes_cost + player_cost
+
+
+def dijkstra(state, shape, box_pos=None, player_pos=None):
+    height, width = shape
+    dijk = np.array([[float("inf") for _ in range(width)] for _ in range(height)])
+    dijk[box_pos or player_pos] = 0
+    moves = [(1, 0), (-1, 0), (0, 1), (0, -1)]
+    heap = [(0, box_pos or player_pos)]
+    obstacles = "+" if player_pos else "+@$"
+    while heap:
+        distance, curr_pos = heappop(heap)
+        if distance > dijk[curr_pos]:
+            continue
+        for move in moves:
+            new_x, new_y = curr_pos[0] + move[0], curr_pos[1] + move[1]
+            new_pos = new_x, new_y
+            if 1 <= new_x < height - 1 and 1 <= new_y < width - 1 and state[new_x * width + new_y] not in obstacles:
+                new_distance = distance + 1
+                if new_distance < dijk[new_pos]:
+                    dijk[new_pos] = new_distance
+                    heappush(heap, (new_distance, new_pos))
+    return dijk
+
+
+def dijkstra_sum(state, player_pos, shape, distances):
+    height, width = shape
+    boxes, goals, boxes_on_goal = find_boxes_and_goals(state, shape)
+    boxes_cost = len(boxes) * height * width
+    player_cost = 0
+    for box in boxes + boxes_on_goal:
+        distances[box] = dijkstra(state, shape, box)
+    distances[player_pos] = dijkstra(state, shape, player_pos=player_pos)
+    for box in boxes:
+        boxes_cost += min(distances[box][goal] for goal in goals)
+    player_cost = min(distances[player_pos][box] for box in boxes) if boxes else 0
+    return boxes_cost + player_cost
+
+
+def is_deadlock(state, shape):
+    height, width = shape
+    if not state or len(state) != height * width:
+        return False
+    boxes, _, _ = find_boxes_and_goals(state, shape)
+    for bx, by in boxes:  # corner deadlock
+        box = bx * width + by
+        if (
+            (state[box - 1] == "+" and state[box - width] == "+")
+            or (state[box + 1] == "+" and state[box + width] == "+")
+            or (state[box + 1] == "+" and state[box - width] == "+")
+            or (state[box - 1] == "+" and state[box + width] == "+")
+        ):
+            return True
+    double_box_positions = [
+        (0, -1, -width, -width - 1),
+        (0, 1, -width, -width + 1),
+        (0, -1, width - 1, width),
+        (0, 1, width + 1, width),
+    ]
+    for bx, by in boxes:  # double box deadlock
+        box = bx * width + by
+        for pos in double_box_positions:
+            pos_set = set()
+            for dir in pos:
+                pos_set.add(state[box + dir])
+            if pos_set in ({"@", "+"}, {"@"}, {"@", "$"}, {"@", "$", "+"}):
+                return True
+    box = goal = 0
+    for i in range(width + 1, 2 * width - 1):  # too many boxes deadlock
+        if state[i] == "@":
+            box += 1
+        elif state[i] in "X%":
+            goal += 1
+    if box > goal:
+        return True
+    box = goal = 0
+    for i in range(width * (height - 2) + 1, width * (height - 2) + width - 1):
+        if state[i] == "@":
+            box += 1
+        elif state[i] in "X%":
+            goal += 1
+    if box > goal:
+        return True
+    box = goal = 0
+    for i in range(width + 1, width * (height - 1) + 1, width):
+        if state[i] == "@":
+            box += 1
+        elif state[i] in "X%":
+            goal += 1
+    if box > goal:
+        return True
+    box = goal = 0
+    for i in range(2 * width - 2, width * height - 2, width):
+        if state[i] == "@":
+            box += 1
+        elif state[i] in "X%":
+            goal += 1
+    if box > goal:
+        return True
+    return False
+
+
+def can_move(state, shape, player_pos, move):
+    new_state = list(state)
+    x, y = player_pos
+    _, width = shape
+    move_cost = 0
+    target = x + move[0], y + move[1]
+    boxtarget = x + move[0] * 2, y + move[1] * 2
+    curr1d = x * width + y
+    target1d = target[0] * width + target[1]
+    boxtarget1d = boxtarget[0] * width + boxtarget[1]
+    if state[target1d] == "+":
+        return None, move_cost
+    elif state[target1d] in "-X":
+        new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X"
+        new_state[target1d] = "*" if new_state[target1d] == "-" else "%"
+        move_cost = 3
+    elif state[target1d] in "@$":
+        if state[boxtarget1d] in "+@$":
+            return None, move_cost
+        elif state[boxtarget1d] in "-X":
+            new_state[boxtarget1d] = "@" if new_state[boxtarget1d] == "-" else "$"
+            new_state[target1d] = "*" if new_state[target1d] == "@" else "%"
+            new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X"
+            move_cost = 0 if new_state[boxtarget1d] == "$" else 2
+    return "".join(new_state), move_cost
diff --git a/reasoning_gym/games/sokoban.py b/reasoning_gym/games/sokoban.py
new file mode 100644
index 00000000..f96d87ea
--- /dev/null
+++ b/reasoning_gym/games/sokoban.py
@@ -0,0 +1,117 @@
+from dataclasses import dataclass
+from random import Random
+from typing import Dict, Optional
+
+import numpy as np
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+@dataclass
+class SokobanConfig:
+    """Configuration for sokoban puzzle generation"""
+
+    seed: Optional[int] = None
+    size: int = 500
+    min_w: int = 6  # Minimum width of the puzzle.
+    min_h: int = 6  # Minimum height of the puzzle.
+    max_w: int = 10  # Maximum width of the puzzle.
+    max_h: int = 10  # Maximum height of the puzzle.
+    min_boxes: int = 6  # Minimum number of boxes.
+    max_boxes: int = 10  # Maximum number of boxes.
+
+    def validate(self):
+        """Validate configuration parameters"""
+        assert self.min_w <= self.max_w, "min_w must be lte max_w"
+        assert self.min_h <= self.max_h, "min_h must be lte max_h"
+        assert self.min_boxes <= self.max_boxes, "min_boxes must be lte max_boxes"
+
+
+class SokobanDataset(ProceduralDataset):
+    """Generates Sokoban games with configurable parameters"""
+
+    def __init__(self, config: SokobanConfig):
+        self._prompt_templates = [
+            "What will this Sokoban board look like after {simulation_steps} steps of simulation?\n\n{board}"
+        ]
+
+        super().__init__(config=config, seed=config.seed, size=config.size)
+
+        # lazy loading of sokoban imports
+        from .contrib.sokoban.src.game import Game
+        from .contrib.sokoban.src.generator import generate
+        from .contrib.sokoban.src.utils import is_solved
+
+        self._Game = Game
+        self._generate = generate
+        self._is_solved = is_solved
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single Sokoban task
+
+        Returns:
+            dict with keys:
+                - question: str, the task description
+                - answer: str, a solution string
+                - metadata: dict with generation parameters
+        """
+
+        # Make the Sokoban!
+        rng = Random(self.seed + idx)
+        gamestr, solution, difficulty = self._generate(rng=rng)
+
+        return {
+            "question": """You are going to solve a 'sokoban' puzzle.
+
+* - The player
+% - The player on a goal
+@ - A box
+X - A goal
+$ - A box on a goal
++ - A wall
+- - An empty position
+
+Your solution must be a string of characters, ex: LDURRUDL.
+
+Here is your puzzle:
+"""
+            + gamestr,
+            "answer": solution,
+            "metadata": {"gamestr": gamestr, "difficulty": difficulty},
+        }
+
+    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+        """Determine if the solution provided solves the Sokoban task.
+
+        The function awards 1.0 for a correct answer.
+
+        Args:
+            answer (Optional[str]): The user's answer.
+            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+        Returns:
+            float: The computed score between 0.0 and 1.0.
+        """
+
+        if answer == None:
+            return 0.0
+
+        try:
+            grid_list = [list(line) for line in entry["metadata"]["gamestr"].replace(" ", "").strip().split("\n")]
+            matrix = np.array(grid_list)
+
+            game = self._Game()
+            game.load_puzzle_matrix(matrix)
+
+            for move in answer:
+                game.player.update(key=move)
+
+            if self._is_solved(game.get_curr_state()):
+                return 1.0
+        except Exception as e:
+            return 0.01
+
+        return 0.1
+
+
+register_dataset("sokoban", SokobanDataset, SokobanConfig)
diff --git a/tests/test_sokoban.py b/tests/test_sokoban.py
new file mode 100644
index 00000000..c4d1e2b8
--- /dev/null
+++ b/tests/test_sokoban.py
@@ -0,0 +1,50 @@
+import pytest
+
+from reasoning_gym.games.sokoban import SokobanConfig, SokobanDataset
+
+
+def test_sokoban():
+    """Test basic properties and solution of generated items"""
+
+    # Easy
+    config = SokobanConfig(seed=42, size=20)
+    dataset = SokobanDataset(config)
+
+    for item in dataset:
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Test the scoring
+        assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+        assert dataset.score_answer(answer="RU", entry=item) == 0.1
+        assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+    # Medium
+    config = SokobanConfig(seed=42, min_h=40, max_h=50, min_w=40, max_w=50, min_boxes=20, max_boxes=30, size=3)
+    dataset = SokobanDataset(config)
+
+    for item in dataset:
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Test the scoring
+        assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+        assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+    # Hard
+    config = SokobanConfig(seed=42, min_h=400, max_h=500, min_w=400, max_w=500, min_boxes=50, max_boxes=50, size=1)
+    dataset = SokobanDataset(config)
+
+    for item in dataset:
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Test the scoring
+        assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+        assert dataset.score_answer(answer=None, entry=item) == 0.0