Sokoban without pygame (#77)

* add minified version of https://github.com/xbandrade/sokoban-solver-generator --------- Co-authored-by: Rich Jones <miserlou@gmail.com>
2026-04-28 17:29:39 +00:00 · 2025-02-07 11:57:53 +01:00 · 2025-02-07 11:57:53 +01:00 · 1b49713116
commit 1b49713116
parent a8f9eafd43
25 changed files with 1229 additions and 87 deletions
--- a/reasoning_gym/games/init.py
+++ b/reasoning_gym/games/init.py
@ -11,6 +11,7 @@ from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
 from .maze import MazeConfig, MazeDataset
 from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset
 from .n_queens import NQueensDataset
+from .sokoban import SokobanConfig, SokobanDataset
 from .sudoku import SudokuConfig, SudokuDataset
 from .tower_of_hanoi import HanoiConfig, HanoiDataset

@ -21,6 +22,8 @@ __all__ = [
    "MiniSudokuDataset",
    "SudokuConfig",
    "SudokuDataset",
+    "SokobanConfig",
+    "SokobanDataset",
    "MazeConfig",
    "MazeDataset",
    "GameOfLifeConfig",
--- a/reasoning_gym/games/contrib/init.py
+++ b/reasoning_gym/games/contrib/init.py
--- a/reasoning_gym/games/contrib/sokoban/LICENSE
+++ b/reasoning_gym/games/contrib/sokoban/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Bruno Andrade
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/reasoning_gym/games/contrib/sokoban/README.md
+++ b/reasoning_gym/games/contrib/sokoban/README.md
@ -0,0 +1,52 @@
+# 📦 Sokoban Solver and Generator
+
+This folder contains a minified version of Bruno Andrade's Sokoban game, all pygame dependencies were stripped.
+
+The original version can be found here: [xbandrade/sokoban-solver-generator](https://github.com/xbandrade/sokoban-solver-generator)
+
+
+This is a Sokoban puzzle generator and solver that uses BFS, A* and Dijkstra search algorithms.
+
+`Sokoban` is a puzzle game in which the player pushes boxes around in a warehouse, trying to get every box to a goal.
+
+
+### ❕Sokoban Puzzle
+The puzzle states are stored in a matrix, and each element of the puzzle is represented by a single character in the matrix.
+```
+ + + + + + +
+ * - @ - X +
+ + - @ - + +
+ X - - - $ +
+ + + + + + +
+```
+`*` - The player </br>
+`%` - The player on a goal </br>
+`@` - A box </br>
+`X` - A goal </br>
+`$` - A box on a goal </br>
+`+` - A wall </br>
+`-` - An empty position </br>
+
+A box on a goal will have its color changed to green on the game window.
+
+
+### ❕Sokoban Generator
+
+The generator will initially create a puzzle with a random board size, then the player and the boxes on goals will be randomly placed on the board.
+The player will only be able to pull boxes from their positions during the generation of a puzzle, breaking every wall on his way, so it is guaranteed that the puzzle will have a valid solution.
+
+
+### ❕ Sokoban Solver
+
+The algorithms used to implement the Sokoban puzzle solvers were `Breadth-First Search(BFS)` and `A*`.
+
+The `BFS` solver uses a queue to store the next states of the puzzle it needs to visit. A visited state is stored in a hashset, and BFS won't try to visit the same state twice.
+
+The `A*` algorithm is similar to the BFS algorithm, but it uses a priority queue instead of a queue, and it prioritizes moves that are more likely to solve the problem.
+It does so by setting costs to the puzzle state and the player's movements, punishing the player with high costs for a bad move and rewarding the player with lower costs for a good move.
+The state costs are defined by heuristic functions, and this solver was implemented with two different heuristics: the `Manhattan Distance` function and `Dijkstra` distance function.
+
+All three implementations check for possible deadlocks (states that are impossible to solve) before adding the new state to the queue.
+
+
+More about Sokoban: [Wikipedia Article](https://en.wikipedia.org/wiki/Sokoban)
--- a/reasoning_gym/games/contrib/sokoban/init.py
+++ b/reasoning_gym/games/contrib/sokoban/init.py
--- a/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat
@ -0,0 +1,10 @@
+ + + + + + +
+ - * - - - +
+ - - - $ - +
+ X - - @ - +
+ - - - - - +
+ $ - + - - +
+ + - - - - +
+ X @ - $ - +
+ + - - - - +
+ + + + + + +
--- a/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat
@ -0,0 +1,5 @@
+ + + + + + +
+ * - @ - X +
+ + - @ - + +
+ X - - - - +
+ + + + + + +
--- a/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat
@ -0,0 +1,6 @@
+- - + + + + + +
+- + + - - - * +
+ + - - - + X +
+ X - @ - @ @ +
+ X X @ - - - +
+ + + + + + + +
--- a/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat
@ -0,0 +1,7 @@
+- + + + + + + - - -
+- + X - - X + - - -
+ + - @ @ + + - - -
+ - - - - + + - - -
+ - @ - - * + + + +
+ + - - - - - - X +
+- + + + + + + + + +
--- a/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat
@ -0,0 +1,7 @@
+- + + + + + + - -
+ + X - @ - + + +
+ - - - - - - - +
+ - @ + + X - @ +
+ - - - @ - + - +
+ + + * - X - X +
+- - + + + + + + +
--- a/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat
@ -0,0 +1,7 @@
+- + + + + + + + -
+ + - - + - - + +
+ - @ - - - @ - +
+ - - X * X - - +
+ + @ + + - - + +
+ - - X - - - + -
+ + + + + + + + -
--- a/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat
@ -0,0 +1,9 @@
+- - - + + + + + + + +
+- - - + - - - - - - +
+- - + + - - - - @ - +
+- + + - - + + - + + +
+ + - - + - - X - - +
+ - - + X @ @ - - + +
+ * + X - - - - + + -
+ + - - - - - + + - -
+ + + + + + + + - - -
--- a/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat
@ -0,0 +1,6 @@
+ + + + + + + +
+ - - @ - X * +
+ - @ - - + X +
+ X X @ - @ @ +
+ X X @ - - - +
+ + + + + + + +
--- a/reasoning_gym/games/contrib/sokoban/src/init.py
+++ b/reasoning_gym/games/contrib/sokoban/src/init.py
--- a/reasoning_gym/games/contrib/sokoban/src/astar.py
+++ b/reasoning_gym/games/contrib/sokoban/src/astar.py
@ -0,0 +1,81 @@
+from collections import defaultdict
+from heapq import heappop, heappush
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.utils import (
+    can_move,
+    dijkstra_sum,
+    get_state,
+    is_deadlock,
+    is_solved,
+    manhattan_sum,
+)
+
+
+def astar(matrix, player_pos, debug=False, heuristic="manhattan"):
+    # print(f'A* - {heuristic.title()} Heuristic')
+    heur = "[A*]" if heuristic == "manhattan" else "[Dijkstra]"
+    shape = matrix.shape
+    initial_state = get_state(matrix)
+    initial_cost = curr_depth = 0
+    if heuristic == "manhattan":
+        curr_cost = manhattan_sum(initial_state, player_pos, shape)
+    else:
+        distances = defaultdict(lambda: [])
+        curr_cost = dijkstra_sum(initial_state, player_pos, shape, distances)
+    seen = {None}
+    heap = []
+    heappush(heap, (initial_cost, curr_cost, initial_state, player_pos, curr_depth, ""))
+    moves = [(1, 0), (-1, 0), (0, -1), (0, 1)]
+    direction = {
+        (1, 0): "D",
+        (-1, 0): "U",
+        (0, -1): "L",
+        (0, 1): "R",
+    }
+    while heap:
+        _, curr_cost, state, pos, depth, path = heappop(heap)
+        seen.add(state)
+        for move in moves:
+            new_state, move_cost = can_move(state, shape, pos, move)
+            deadlock = is_deadlock(new_state, shape)
+            if new_state in seen or deadlock:
+                continue
+            new_pos = pos[0] + move[0], pos[1] + move[1]
+            if heuristic == "manhattan":
+                new_cost = manhattan_sum(new_state, new_pos, shape)
+            else:
+                new_cost = dijkstra_sum(new_state, new_pos, shape, distances)
+            if new_cost == float("inf"):
+                continue
+            heappush(
+                heap,
+                (
+                    move_cost + curr_cost,
+                    new_cost,
+                    new_state,
+                    new_pos,
+                    depth + 1,
+                    path + direction[move],
+                ),
+            )
+            if is_solved(new_state):
+                # print(f'{heur} Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n')
+                if debug:
+                    print(f"{heur} Solution Found!\n{path + direction[move]}", 20)
+                return (path + direction[move], depth + 1)
+            if debug:
+                print(f"{heur} Solution Depth: {depth + 1}\n{path + direction[move]}", 20)
+    print(f"{heur} Solution not found!\n")
+    if debug:
+        print(f"{heur} Solution Not Found!\nDepth {depth + 1}", 20)
+
+    return (None, -1 if not heap else depth + 1)
+
+
+def solve_astar(puzzle, visualizer=False, heuristic="manhattan"):
+    matrix = puzzle
+    where = np.where((matrix == "*") | (matrix == "%"))
+    player_pos = where[0][0], where[1][0]
+    return astar(matrix, player_pos, debug=visualizer, heuristic=heuristic)
--- a/reasoning_gym/games/contrib/sokoban/src/bfs.py
+++ b/reasoning_gym/games/contrib/sokoban/src/bfs.py
@ -0,0 +1,66 @@
+import time
+from collections import deque
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.utils import can_move, get_state, is_deadlock, is_solved, print_state
+
+
+def bfs(matrix, player_pos, debug=False):
+    print("Breadth-First Search")
+    initial_state = get_state(matrix)
+    shape = matrix.shape
+    print_state(initial_state, shape)
+    seen = {None}
+    q = deque([(initial_state, player_pos, 0, "")])
+    moves = [(1, 0), (-1, 0), (0, -1), (0, 1)]
+    curr_depth = 0
+    direction = {
+        (1, 0): "D",
+        (-1, 0): "U",
+        (0, -1): "L",
+        (0, 1): "R",
+    }
+    while q:
+        state, pos, depth, path = q.popleft()
+        # if depth != curr_depth:
+        # 	print(f'Depth: {depth}')
+        # 	curr_depth = depth
+        seen.add(state)
+        for move in moves:
+            new_state, _ = can_move(state, shape, pos, move)
+            deadlock = is_deadlock(new_state, shape)
+            if new_state in seen or deadlock:
+                continue
+            q.append(
+                (
+                    new_state,
+                    (pos[0] + move[0], pos[1] + move[1]),
+                    depth + 1,
+                    path + direction[move],
+                )
+            )
+            if is_solved(new_state):
+                print(f"[BFS] Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n")
+                if debug:
+                    print(f"[BFS] Solution Found!\n{path + direction[move]}", 20)
+                return (path + direction[move], depth + 1)
+            if debug:
+                print(f"[BFS] Solution Depth: {depth + 1}\n{path + direction[move]}", 20)
+    print(f"[BFS] Solution not found!\n")
+    if debug:
+        print(f"[BFS] Solution Not Found!\nDepth {depth + 1}", 20)
+    return (None, -1 if not q else depth + 1)
+
+
+def solve_bfs(puzzle, visualizer=False):
+    matrix = puzzle
+    where = np.where((matrix == "*") | (matrix == "%"))
+    player_pos = where[0][0], where[1][0]
+    return bfs(matrix, player_pos, debug=visualizer)
+
+
+if __name__ == "__main__":
+    start = time.time()
+    root = solve_bfs(np.loadtxt("levels/lvl7.dat", dtype="<U1"))
+    print(f"Runtime: {time.time() - start} seconds")
--- a/reasoning_gym/games/contrib/sokoban/src/box.py
+++ b/reasoning_gym/games/contrib/sokoban/src/box.py
@ -0,0 +1,34 @@
+class Box:
+    def __init__(self, x, y, game=None):
+        self.game = game
+        self.x = x
+        self.y = y
+
+    def can_move(self, move):
+        target_x, target_y = self.x + move[0], self.y + move[1]
+        target = target_y, target_x
+        curr = self.y, self.x
+        target_elem = self.game.puzzle[target]
+        if not isinstance(target_elem.obj, Box):
+            curr_elem = self.game.puzzle[curr]
+            self.y, self.x = target
+            curr_elem.char = "-" if not curr_elem.ground else "X"
+            curr_elem.obj = None
+            target_elem.char = "@" if not target_elem.ground else "$"
+            target_elem.obj = self
+            return True
+        return False
+
+    def reverse_move(self, move):
+        target = self.y + move[0], self.x + move[1]
+        curr_pos = self.y, self.x
+        self.game.puzzle[curr_pos].obj = None
+        self.game.puzzle[target].obj = self
+        self.y, self.x = target
+        self.game.puzzle[curr_pos].char = "X" if self.game.puzzle[curr_pos].ground else "-"
+        self.game.puzzle[target].char = "$" if self.game.puzzle[target].ground else "@"
+
+
+class Obstacle(Box):
+    def __init__(self, x, y):
+        super().__init__(x=x, y=y)
--- a/reasoning_gym/games/contrib/sokoban/src/game.py
+++ b/reasoning_gym/games/contrib/sokoban/src/game.py
@ -0,0 +1,173 @@
+from random import Random
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.box import Box, Obstacle
+from reasoning_gym.games.contrib.sokoban.src.player import Player, ReversePlayer
+from reasoning_gym.games.contrib.sokoban.src.utils import get_state
+
+
+class Floor:
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+
+class Goal(Floor):
+    def __init__(self, x, y):
+        super().__init__(x=x, y=y)
+
+
+class PuzzleElement:
+    def __init__(self, char: str, obj=None, ground=None):
+        self.char = char
+        self.ground = ground
+        self.obj = obj
+
+    def __str__(self) -> str:
+        return self.char
+
+
+class Game:
+    def __init__(self, width=19, height=10, level=None, path=None):
+        self.level = level
+        self.width = width
+        self.height = height
+        self.puzzle = np.empty((height, width), dtype=PuzzleElement)
+
+        self.player = None
+        self.puzzle_size = None
+        self.pad_x = 0
+        self.pad_y = 0
+        self.path = path or f"levels/lvl{level}.dat"
+
+        if path:
+            if type(self) == Game:
+                self.load_puzzle()
+
+    def get_matrix(self):
+        slice_x = slice(self.pad_x, self.pad_x + self.puzzle_size[1])
+        slice_y = slice(self.pad_y, self.pad_y + self.puzzle_size[0])
+        sliced = self.puzzle[slice_y, slice_x]
+        matrix = np.empty((self.puzzle_size), dtype="<U1")
+        for h in range(len(sliced)):
+            for w in range(len(sliced[0])):
+                matrix[h, w] = sliced[h, w].char
+        return matrix
+
+    def get_curr_state(self):
+        return get_state(self.get_matrix())
+
+    def print_puzzle(self):
+        for h in range(self.height):
+            for w in range(self.width):
+                if self.puzzle[h, w]:
+                    print(self.puzzle[h, w].char, end=" ")
+                else:
+                    print(" ", end=" ")
+            print(" ")
+
+    def is_level_complete(self):
+        boxes_left = 0
+        for h in range(self.height):
+            for w in range(self.width):
+                if self.puzzle[h, w] and self.puzzle[h, w].char == "@":
+                    boxes_left += 1
+        return boxes_left == 0
+
+    def load_puzzle(self):
+        """Load puzzle from file"""
+        try:
+            with open(self.path) as f:
+                # Read and parse file data
+                data = []
+                for line in f:
+                    data.append(line.strip().split())
+                self._process_puzzle_data(data)
+        except (OSError, ValueError) as e:
+            print(f"{e}")
+            return
+
+    def load_puzzle_matrix(self, matrix):
+        """New method: Load puzzle directly from a matrix (list/numpy array)"""
+        try:
+            # Convert numpy arrays to list of lists
+            if isinstance(matrix, np.ndarray):
+                data = matrix.tolist()
+            else:
+                data = matrix
+
+            # Validate and process
+            self._process_puzzle_data(data)
+        except ValueError as e:
+            print(f"{e}")
+            return
+
+    def _process_puzzle_data(self, data):
+        """Shared core logic for processing puzzle data"""
+
+        # Calculate puzzle size and padding
+        self.puzzle_size = (len(data), len(data[0]) if len(data) > 0 else 0)
+        pad_x = (self.width - self.puzzle_size[1] - 2) // 2  # -2 matches original file-based logic
+        pad_y = (self.height - self.puzzle_size[0]) // 2
+        self.pad_x, self.pad_y = pad_x, pad_y
+
+        # Populate puzzle elements
+        for i, row in enumerate(data):
+            for j, c in enumerate(row):
+                new_elem = PuzzleElement(c)
+                self.puzzle[i + pad_y, j + pad_x] = new_elem
+
+                # Create game objects based on characters
+                if c == "+":  # Wall
+                    new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y)
+                elif c == "@":  # Box
+                    new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+                elif c == "*":  # Player
+                    new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self)
+                    self.player = new_elem.obj
+                elif c == "X":  # Goal
+                    new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+                elif c == "$":  # Box on goal
+                    new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+                    new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+                elif c == "%":  # Player on goal
+                    new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self)
+                    new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+                    self.player = new_elem.obj
+                elif c not in " -":  # Validation
+                    raise ValueError(f"Invalid character in puzzle: {c}")
+
+
+class ReverseGame(Game):
+    def __init__(self, rng: Random, width=19, height=10, level=None):
+        super().__init__(width, height, level)
+        self.rng = rng
+        self.pad_x = 0
+        self.pad_y = 0
+
+    def load_puzzle(self, puzzle):
+        self.puzzle_size = (len(puzzle), len(puzzle[0]) if len(puzzle) > 0 else 0)
+        pad_x = (self.width - len(puzzle[0]) - 2) // 2
+        pad_y = (self.height - len(puzzle)) // 2
+        self.pad_x, self.pad_y = pad_x, pad_y
+        for i, row in enumerate(puzzle):
+            for j, c in enumerate(row):
+                new_elem = PuzzleElement(c)
+                self.puzzle[i + pad_y, j + pad_x] = new_elem
+                if c == "+":  # wall
+                    new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y)
+                elif c == "@":  # box
+                    new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+                elif c == "*":  # player
+                    new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self)
+                    self.player = new_elem.obj
+                elif c == "X":  # goal
+                    new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+                elif c == "$":  # box on goal
+                    new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+                    new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+                elif c == "%":  # player on goal
+                    new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self)
+                    new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+                    self.player = new_elem.obj
--- a/reasoning_gym/games/contrib/sokoban/src/generator.py
+++ b/reasoning_gym/games/contrib/sokoban/src/generator.py
@ -0,0 +1,107 @@
+from random import Random
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.astar import solve_astar
+from reasoning_gym.games.contrib.sokoban.src.game import Game, ReverseGame
+
+
+def num_boxes(puzzle_area, min_boxes, max_boxes, min_w, min_h, max_w, max_h):
+    m = (max_boxes - min_boxes) / (max_w * max_h - min_w * min_h)
+    b = min_boxes - m * min_w * min_h
+    return int(m * puzzle_area + b)
+
+
+def random_valid(rng: Random, width: int = 10, height: int = 10):
+    return rng.randrange(1, width - 1), rng.randrange(1, height - 1)
+
+
+def generate(
+    rng: Random,
+    debug: bool = False,
+    path: str = None,
+    min_w: int = 6,
+    min_h: int = 6,
+    max_w: int = 15,
+    max_h: int = 10,
+    min_boxes: int = 4,
+    max_boxes: int = 10,
+) -> tuple[str, str, dict]:
+    """
+    Generates a level with the given configuration parameters.
+
+    Parameters:
+        rng: Random number generator for reproducibility.
+        visualizer: Whether to visualize the generation process.
+        path: Path to save the level file (default 'levels/lvl0.dat').
+        min_w: Minimum width of the puzzle.
+        min_h: Minimum height of the puzzle.
+        max_w: Maximum width of the puzzle.
+        max_h: Maximum height of the puzzle.
+        min_boxes: Minimum number of boxes.
+        max_boxes: Maximum number of boxes.
+    Returns:
+        puzzle_string, solution
+    """
+    path = path or "levels/lvl0.dat"
+    while True:
+        width = rng.randint(min_w, max_w)
+        height = rng.randint(min_h, max_h)
+        puzzle = np.full((height, width), "+", dtype="<U1")
+        boxes = num_boxes(width * height, min_boxes, max_boxes, min_w, min_h, max_w, max_h)
+        boxes_seen = set()
+        player_pos = random_valid(rng, width, height)
+        puzzle_size = (height, width)
+        puzzle[player_pos[1], player_pos[0]] = "*"
+        boxes_created = 0
+        while boxes_created < boxes:
+            box_pos = random_valid(rng, height, width)
+            if puzzle[box_pos] == "+":
+                puzzle[box_pos] = "$"
+                boxes_created += 1
+                boxes_seen.add(box_pos)
+        reverse_game = ReverseGame(rng=rng, level=0)
+        reverse_game.load_puzzle(puzzle)
+        player = reverse_game.player
+        counter = round(height * width * rng.uniform(1.8, 3.6))
+        while counter > 0:
+            reverse_game.player.update(puzzle_size)
+            if player.states[player.curr_state] >= 20:
+                break
+            counter -= 1
+        slice_x = slice(reverse_game.pad_x, reverse_game.pad_x + width)
+        slice_y = slice(reverse_game.pad_y, reverse_game.pad_y + height)
+        matrix = reverse_game.puzzle[slice_y, slice_x]
+        # Optionally print the puzzle:
+        if debug:
+            player.print_puzzle(matrix)
+
+        out_of_place_boxes = np.sum([str(x) == "@" for x in matrix.flatten()])
+        if out_of_place_boxes >= boxes // 2:
+            # Optionally save the puzzle to a file:
+            # np.savetxt(path, matrix, fmt='%s')
+            puzzle_str = player.puzzle_to_string(matrix)
+
+            grid_list = [list(line) for line in puzzle_str.replace(" ", "").strip().split("\n")]
+            grid_array = np.array(grid_list)
+            solution, _ = solve_astar(grid_array)
+
+            if debug:
+                print(f"solution={solution}")
+                game = Game()
+                game.load_puzzle_matrix(grid_array)
+
+                for step, move in enumerate(solution):
+                    print(f"move #{step}: {move}")
+                    game.player.update(key=move)
+                    game.print_puzzle()
+
+            difficulty = {"size": puzzle_size, "num_steps": len(solution)}
+            return puzzle_str, solution, difficulty
+        else:
+            if debug:
+                print(f"Not enough boxes out of place, retrying generation... [{out_of_place_boxes}/{boxes}]")
+
+
+if __name__ == "__main__":
+    generate(rng=Random(), debug=True)
--- a/reasoning_gym/games/contrib/sokoban/src/player.py
+++ b/reasoning_gym/games/contrib/sokoban/src/player.py
@ -0,0 +1,118 @@
+from collections import defaultdict
+from random import Random
+
+from reasoning_gym.games.contrib.sokoban.src.box import Box, Obstacle
+
+
+class Player:
+    """A player that can only push boxes"""
+
+    def __init__(self, x, y, game):
+        self.game = game
+        self.x = x
+        self.y = y
+
+    def update(self, key: str = None) -> int:
+        move = None
+        if key:
+            if key == "R":
+                move = (1, 0)
+            elif key == "L":
+                move = (-1, 0)
+            elif key == "U":
+                move = (0, -1)
+            elif key == "D":
+                move = (0, 1)
+        if move:
+            curr = self.y, self.x
+            target = self.y + move[1], self.x + move[0]
+            target_elem = self.game.puzzle[target]
+            if not (target_elem and target_elem.obj and isinstance(target_elem.obj, Obstacle)):
+                is_box = isinstance(target_elem.obj, Box)
+                if not is_box or (is_box and target_elem.obj.can_move(move)):
+                    curr_elem = self.game.puzzle[curr]
+                    self.y, self.x = target
+                    curr_elem.char = "-" if not curr_elem.ground else "X"
+                    curr_elem.obj = None
+                    target_elem.char = "*" if not target_elem.ground else "%"
+                    target_elem.obj = self
+                    return 1
+        return 0
+
+
+class ReversePlayer(Player):
+    """A player that can only pull boxes"""
+
+    def __init__(self, rng: Random, x, y, game=None, puzzle=None):
+        super().__init__(x=x, y=y, game=game)
+        self.rng = rng
+        self.game = game
+        self.puzzle = puzzle
+        self.curr_state = ""
+        self.states = defaultdict(int)
+        self.prev_move = (0, 0)
+
+    def print_puzzle(self, matrix=None):
+        print(self.puzzle_to_string(matrix=matrix))
+
+    def puzzle_to_string(self, matrix=None):
+        matrix = matrix if matrix is not None else self.game.puzzle
+        height, width = len(matrix), len(matrix[0])
+        ss = ""
+        for h in range(height):
+            for w in range(width):
+                if matrix[h, w]:
+                    ss = ss + str(matrix[h, w]) + " "
+                else:
+                    ss = ss + "F" + " "
+            ss = ss + " " + "\n"
+        ss = ss + "\n"
+        return ss
+
+    def get_state(self):
+        state = ""
+        height, width = len(self.game.puzzle), len(self.game.puzzle[0])
+        for row in range(height):
+            for col in range(width):
+                if self.game.puzzle[row, col]:
+                    state += str(self.game.puzzle[row, col])
+        return state
+
+    def update(self, puzzle_size):
+        height, width = puzzle_size
+        quick_chars = {
+            "*": "-",
+            "%": "X",
+            "+": "*",
+            "-": "*",
+            "X": "%",
+            "@": "-",
+            "$": "X",
+        }
+        moves_tuples = [(1, 0), (-1, 0), (0, -1), (0, 1)]
+        moves = self.rng.choices(moves_tuples, weights=[0.1 if m == self.prev_move else 1 for m in moves_tuples], k=1)
+        self.curr_state = self.get_state()
+        for move in moves:
+            self.states[self.curr_state] += 1
+            curr_pos = self.y, self.x
+            target = self.y + move[0], self.x + move[1]
+            reverse_target = self.y - move[0], self.x - move[1]
+            if (
+                target[1] == self.game.pad_x
+                or target[0] == self.game.pad_y
+                or target[1] >= self.game.pad_x + width - 1
+                or target[0] >= self.game.pad_y + height - 1
+                or (self.game.puzzle[target] and self.game.puzzle[target].char in "@$")
+            ):
+                self.prev_move = move
+                return
+            self.prev_move = -move[0], -move[1]
+            self.game.puzzle[curr_pos].char = quick_chars[self.game.puzzle[curr_pos].char]
+            self.game.puzzle[curr_pos].obj = None
+            self.game.puzzle[target].char = quick_chars[self.game.puzzle[target].char]
+            self.game.puzzle[target].obj = self
+            if (c := self.game.puzzle[reverse_target].char) in "@$":
+                self.game.puzzle[reverse_target].char = quick_chars[c]
+                self.game.puzzle[reverse_target].obj.reverse_move(move)
+
+            self.y, self.x = target
--- a/reasoning_gym/games/contrib/sokoban/src/utils.py
+++ b/reasoning_gym/games/contrib/sokoban/src/utils.py
@ -0,0 +1,170 @@
+from heapq import heappop, heappush
+
+import numpy as np
+
+
+def print_state(state, shape):
+    if not state:
+        return
+    m, n = shape
+    matrix = np.array(list(state)).reshape(m, n)
+    print(matrix)
+
+
+def find_boxes_and_goals(state, shape):
+    _, width = shape
+    boxes, goals, boxes_on_goal = [], [], []
+    for pos, char in enumerate(state):
+        if char == "@":
+            boxes.append((pos // width, pos % width))
+        elif char in "X%":
+            goals.append((pos // width, pos % width))
+        elif char == "$":
+            boxes_on_goal.append((pos // width, pos % width))
+    return boxes, goals, boxes_on_goal
+
+
+def get_state(matrix):
+    return matrix.tobytes().decode("utf-8").replace("\x00", "")
+
+
+def is_solved(state):
+    return "@" not in state
+
+
+def manhattan_sum(state, player_pos, shape):
+    height, width = shape
+    player_x, player_y = player_pos
+    boxes, goals, _ = find_boxes_and_goals(state, shape)
+    boxes_cost = len(boxes) * height * width
+    player_cost = 0
+    for box_x, box_y in boxes:
+        boxes_cost += min(abs(box_x - goal_x) + abs(box_y - goal_y) for goal_x, goal_y in goals)
+    player_cost = min(abs(box_x - player_x) + abs(box_y - player_y) for box_x, box_y in boxes) if boxes else 0
+    return boxes_cost + player_cost
+
+
+def dijkstra(state, shape, box_pos=None, player_pos=None):
+    height, width = shape
+    dijk = np.array([[float("inf") for _ in range(width)] for _ in range(height)])
+    dijk[box_pos or player_pos] = 0
+    moves = [(1, 0), (-1, 0), (0, 1), (0, -1)]
+    heap = [(0, box_pos or player_pos)]
+    obstacles = "+" if player_pos else "+@$"
+    while heap:
+        distance, curr_pos = heappop(heap)
+        if distance > dijk[curr_pos]:
+            continue
+        for move in moves:
+            new_x, new_y = curr_pos[0] + move[0], curr_pos[1] + move[1]
+            new_pos = new_x, new_y
+            if 1 <= new_x < height - 1 and 1 <= new_y < width - 1 and state[new_x * width + new_y] not in obstacles:
+                new_distance = distance + 1
+                if new_distance < dijk[new_pos]:
+                    dijk[new_pos] = new_distance
+                    heappush(heap, (new_distance, new_pos))
+    return dijk
+
+
+def dijkstra_sum(state, player_pos, shape, distances):
+    height, width = shape
+    boxes, goals, boxes_on_goal = find_boxes_and_goals(state, shape)
+    boxes_cost = len(boxes) * height * width
+    player_cost = 0
+    for box in boxes + boxes_on_goal:
+        distances[box] = dijkstra(state, shape, box)
+    distances[player_pos] = dijkstra(state, shape, player_pos=player_pos)
+    for box in boxes:
+        boxes_cost += min(distances[box][goal] for goal in goals)
+    player_cost = min(distances[player_pos][box] for box in boxes) if boxes else 0
+    return boxes_cost + player_cost
+
+
+def is_deadlock(state, shape):
+    height, width = shape
+    if not state or len(state) != height * width:
+        return False
+    boxes, _, _ = find_boxes_and_goals(state, shape)
+    for bx, by in boxes:  # corner deadlock
+        box = bx * width + by
+        if (
+            (state[box - 1] == "+" and state[box - width] == "+")
+            or (state[box + 1] == "+" and state[box + width] == "+")
+            or (state[box + 1] == "+" and state[box - width] == "+")
+            or (state[box - 1] == "+" and state[box + width] == "+")
+        ):
+            return True
+    double_box_positions = [
+        (0, -1, -width, -width - 1),
+        (0, 1, -width, -width + 1),
+        (0, -1, width - 1, width),
+        (0, 1, width + 1, width),
+    ]
+    for bx, by in boxes:  # double box deadlock
+        box = bx * width + by
+        for pos in double_box_positions:
+            pos_set = set()
+            for dir in pos:
+                pos_set.add(state[box + dir])
+            if pos_set in ({"@", "+"}, {"@"}, {"@", "$"}, {"@", "$", "+"}):
+                return True
+    box = goal = 0
+    for i in range(width + 1, 2 * width - 1):  # too many boxes deadlock
+        if state[i] == "@":
+            box += 1
+        elif state[i] in "X%":
+            goal += 1
+    if box > goal:
+        return True
+    box = goal = 0
+    for i in range(width * (height - 2) + 1, width * (height - 2) + width - 1):
+        if state[i] == "@":
+            box += 1
+        elif state[i] in "X%":
+            goal += 1
+    if box > goal:
+        return True
+    box = goal = 0
+    for i in range(width + 1, width * (height - 1) + 1, width):
+        if state[i] == "@":
+            box += 1
+        elif state[i] in "X%":
+            goal += 1
+    if box > goal:
+        return True
+    box = goal = 0
+    for i in range(2 * width - 2, width * height - 2, width):
+        if state[i] == "@":
+            box += 1
+        elif state[i] in "X%":
+            goal += 1
+    if box > goal:
+        return True
+    return False
+
+
+def can_move(state, shape, player_pos, move):
+    new_state = list(state)
+    x, y = player_pos
+    _, width = shape
+    move_cost = 0
+    target = x + move[0], y + move[1]
+    boxtarget = x + move[0] * 2, y + move[1] * 2
+    curr1d = x * width + y
+    target1d = target[0] * width + target[1]
+    boxtarget1d = boxtarget[0] * width + boxtarget[1]
+    if state[target1d] == "+":
+        return None, move_cost
+    elif state[target1d] in "-X":
+        new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X"
+        new_state[target1d] = "*" if new_state[target1d] == "-" else "%"
+        move_cost = 3
+    elif state[target1d] in "@$":
+        if state[boxtarget1d] in "+@$":
+            return None, move_cost
+        elif state[boxtarget1d] in "-X":
+            new_state[boxtarget1d] = "@" if new_state[boxtarget1d] == "-" else "$"
+            new_state[target1d] = "*" if new_state[target1d] == "@" else "%"
+            new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X"
+            move_cost = 0 if new_state[boxtarget1d] == "$" else 2
+    return "".join(new_state), move_cost
--- a/reasoning_gym/games/sokoban.py
+++ b/reasoning_gym/games/sokoban.py
@ -0,0 +1,117 @@
+from dataclasses import dataclass
+from random import Random
+from typing import Dict, Optional
+
+import numpy as np
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+@dataclass
+class SokobanConfig:
+    """Configuration for sokoban puzzle generation"""
+
+    seed: Optional[int] = None
+    size: int = 500
+    min_w: int = 6  # Minimum width of the puzzle.
+    min_h: int = 6  # Minimum height of the puzzle.
+    max_w: int = 10  # Maximum width of the puzzle.
+    max_h: int = 10  # Maximum height of the puzzle.
+    min_boxes: int = 6  # Minimum number of boxes.
+    max_boxes: int = 10  # Maximum number of boxes.
+
+    def validate(self):
+        """Validate configuration parameters"""
+        assert self.min_w <= self.max_w, "min_w must be lte max_w"
+        assert self.min_h <= self.max_h, "min_h must be lte max_h"
+        assert self.min_boxes <= self.max_boxes, "min_boxes must be lte max_boxes"
+
+
+class SokobanDataset(ProceduralDataset):
+    """Generates Sokoban games with configurable parameters"""
+
+    def __init__(self, config: SokobanConfig):
+        self._prompt_templates = [
+            "What will this Sokoban board look like after {simulation_steps} steps of simulation?\n\n{board}"
+        ]
+
+        super().__init__(config=config, seed=config.seed, size=config.size)
+
+        # lazy loading of sokoban imports
+        from .contrib.sokoban.src.game import Game
+        from .contrib.sokoban.src.generator import generate
+        from .contrib.sokoban.src.utils import is_solved
+
+        self._Game = Game
+        self._generate = generate
+        self._is_solved = is_solved
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single Sokoban task
+
+        Returns:
+            dict with keys:
+                - question: str, the task description
+                - answer: str, a solution string
+                - metadata: dict with generation parameters
+        """
+
+        # Make the Sokoban!
+        rng = Random(self.seed + idx)
+        gamestr, solution, difficulty = self._generate(rng=rng)
+
+        return {
+            "question": """You are going to solve a 'sokoban' puzzle.
+
+* - The player
+% - The player on a goal
+@ - A box
+X - A goal
+$ - A box on a goal
+ - A wall
+- - An empty position
+
+Your solution must be a string of characters, ex: LDURRUDL.
+
+Here is your puzzle:
+"""
+            + gamestr,
+            "answer": solution,
+            "metadata": {"gamestr": gamestr, "difficulty": difficulty},
+        }
+
+    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+        """Determine if the solution provided solves the Sokoban task.
+
+        The function awards 1.0 for a correct answer.
+
+        Args:
+            answer (Optional[str]): The user's answer.
+            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+        Returns:
+            float: The computed score between 0.0 and 1.0.
+        """
+
+        if answer == None:
+            return 0.0
+
+        try:
+            grid_list = [list(line) for line in entry["metadata"]["gamestr"].replace(" ", "").strip().split("\n")]
+            matrix = np.array(grid_list)
+
+            game = self._Game()
+            game.load_puzzle_matrix(matrix)
+
+            for move in answer:
+                game.player.update(key=move)
+
+            if self._is_solved(game.get_curr_state()):
+                return 1.0
+        except Exception as e:
+            return 0.01
+
+        return 0.1
+
+
+register_dataset("sokoban", SokobanDataset, SokobanConfig)