Sokoban without pygame (#77)

* add minified version of https://github.com/xbandrade/sokoban-solver-generator

---------

Co-authored-by: Rich Jones <miserlou@gmail.com>
This commit is contained in:
Andreas Köpf 2025-02-07 11:57:53 +01:00 committed by GitHub
parent a8f9eafd43
commit 1b49713116
25 changed files with 1229 additions and 87 deletions

View file

@ -11,6 +11,7 @@ from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
from .maze import MazeConfig, MazeDataset
from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset
from .n_queens import NQueensDataset
from .sokoban import SokobanConfig, SokobanDataset
from .sudoku import SudokuConfig, SudokuDataset
from .tower_of_hanoi import HanoiConfig, HanoiDataset
@ -21,6 +22,8 @@ __all__ = [
"MiniSudokuDataset",
"SudokuConfig",
"SudokuDataset",
"SokobanConfig",
"SokobanDataset",
"MazeConfig",
"MazeDataset",
"GameOfLifeConfig",

View file

View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 Bruno Andrade
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -0,0 +1,52 @@
# 📦 Sokoban Solver and Generator
This folder contains a minified version of Bruno Andrade's Sokoban game, all pygame dependencies were stripped.
The original version can be found here: [xbandrade/sokoban-solver-generator](https://github.com/xbandrade/sokoban-solver-generator)
This is a Sokoban puzzle generator and solver that uses BFS, A* and Dijkstra search algorithms.
`Sokoban` is a puzzle game in which the player pushes boxes around in a warehouse, trying to get every box to a goal.
### ❕Sokoban Puzzle
The puzzle states are stored in a matrix, and each element of the puzzle is represented by a single character in the matrix.
```
+ + + + + + +
+ * - @ - X +
+ + - @ - + +
+ X - - - $ +
+ + + + + + +
```
`*` - The player </br>
`%` - The player on a goal </br>
`@` - A box </br>
`X` - A goal </br>
`$` - A box on a goal </br>
`+` - A wall </br>
`-` - An empty position </br>
A box on a goal will have its color changed to green on the game window.
### ❕Sokoban Generator
The generator will initially create a puzzle with a random board size, then the player and the boxes on goals will be randomly placed on the board.
The player will only be able to pull boxes from their positions during the generation of a puzzle, breaking every wall on his way, so it is guaranteed that the puzzle will have a valid solution.
### ❕ Sokoban Solver
The algorithms used to implement the Sokoban puzzle solvers were `Breadth-First Search(BFS)` and `A*`.
The `BFS` solver uses a queue to store the next states of the puzzle it needs to visit. A visited state is stored in a hashset, and BFS won't try to visit the same state twice.
The `A*` algorithm is similar to the BFS algorithm, but it uses a priority queue instead of a queue, and it prioritizes moves that are more likely to solve the problem.
It does so by setting costs to the puzzle state and the player's movements, punishing the player with high costs for a bad move and rewarding the player with lower costs for a good move.
The state costs are defined by heuristic functions, and this solver was implemented with two different heuristics: the `Manhattan Distance` function and `Dijkstra` distance function.
All three implementations check for possible deadlocks (states that are impossible to solve) before adding the new state to the queue.
More about Sokoban: [Wikipedia Article](https://en.wikipedia.org/wiki/Sokoban)

View file

@ -0,0 +1,10 @@
+ + + + + + +
+ - * - - - +
+ - - - $ - +
+ X - - @ - +
+ - - - - - +
+ $ - + - - +
+ + - - - - +
+ X @ - $ - +
+ + - - - - +
+ + + + + + +

View file

@ -0,0 +1,5 @@
+ + + + + + +
+ * - @ - X +
+ + - @ - + +
+ X - - - - +
+ + + + + + +

View file

@ -0,0 +1,6 @@
- - + + + + + +
- + + - - - * +
+ + - - - + X +
+ X - @ - @ @ +
+ X X @ - - - +
+ + + + + + + +

View file

@ -0,0 +1,7 @@
- + + + + + + - - -
- + X - - X + - - -
+ + - @ @ + + - - -
+ - - - - + + - - -
+ - @ - - * + + + +
+ + - - - - - - X +
- + + + + + + + + +

View file

@ -0,0 +1,7 @@
- + + + + + + - -
+ + X - @ - + + +
+ - - - - - - - +
+ - @ + + X - @ +
+ - - - @ - + - +
+ + + * - X - X +
- - + + + + + + +

View file

@ -0,0 +1,7 @@
- + + + + + + + -
+ + - - + - - + +
+ - @ - - - @ - +
+ - - X * X - - +
+ + @ + + - - + +
+ - - X - - - + -
+ + + + + + + + -

View file

@ -0,0 +1,9 @@
- - - + + + + + + + +
- - - + - - - - - - +
- - + + - - - - @ - +
- + + - - + + - + + +
+ + - - + - - X - - +
+ - - + X @ @ - - + +
+ * + X - - - - + + -
+ + - - - - - + + - -
+ + + + + + + + - - -

View file

@ -0,0 +1,6 @@
+ + + + + + + +
+ - - @ - X * +
+ - @ - - + X +
+ X X @ - @ @ +
+ X X @ - - - +
+ + + + + + + +

View file

@ -0,0 +1,81 @@
from collections import defaultdict
from heapq import heappop, heappush
import numpy as np
from reasoning_gym.games.contrib.sokoban.src.utils import (
can_move,
dijkstra_sum,
get_state,
is_deadlock,
is_solved,
manhattan_sum,
)
def astar(matrix, player_pos, debug=False, heuristic="manhattan"):
# print(f'A* - {heuristic.title()} Heuristic')
heur = "[A*]" if heuristic == "manhattan" else "[Dijkstra]"
shape = matrix.shape
initial_state = get_state(matrix)
initial_cost = curr_depth = 0
if heuristic == "manhattan":
curr_cost = manhattan_sum(initial_state, player_pos, shape)
else:
distances = defaultdict(lambda: [])
curr_cost = dijkstra_sum(initial_state, player_pos, shape, distances)
seen = {None}
heap = []
heappush(heap, (initial_cost, curr_cost, initial_state, player_pos, curr_depth, ""))
moves = [(1, 0), (-1, 0), (0, -1), (0, 1)]
direction = {
(1, 0): "D",
(-1, 0): "U",
(0, -1): "L",
(0, 1): "R",
}
while heap:
_, curr_cost, state, pos, depth, path = heappop(heap)
seen.add(state)
for move in moves:
new_state, move_cost = can_move(state, shape, pos, move)
deadlock = is_deadlock(new_state, shape)
if new_state in seen or deadlock:
continue
new_pos = pos[0] + move[0], pos[1] + move[1]
if heuristic == "manhattan":
new_cost = manhattan_sum(new_state, new_pos, shape)
else:
new_cost = dijkstra_sum(new_state, new_pos, shape, distances)
if new_cost == float("inf"):
continue
heappush(
heap,
(
move_cost + curr_cost,
new_cost,
new_state,
new_pos,
depth + 1,
path + direction[move],
),
)
if is_solved(new_state):
# print(f'{heur} Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n')
if debug:
print(f"{heur} Solution Found!\n{path + direction[move]}", 20)
return (path + direction[move], depth + 1)
if debug:
print(f"{heur} Solution Depth: {depth + 1}\n{path + direction[move]}", 20)
print(f"{heur} Solution not found!\n")
if debug:
print(f"{heur} Solution Not Found!\nDepth {depth + 1}", 20)
return (None, -1 if not heap else depth + 1)
def solve_astar(puzzle, visualizer=False, heuristic="manhattan"):
matrix = puzzle
where = np.where((matrix == "*") | (matrix == "%"))
player_pos = where[0][0], where[1][0]
return astar(matrix, player_pos, debug=visualizer, heuristic=heuristic)

View file

@ -0,0 +1,66 @@
import time
from collections import deque
import numpy as np
from reasoning_gym.games.contrib.sokoban.src.utils import can_move, get_state, is_deadlock, is_solved, print_state
def bfs(matrix, player_pos, debug=False):
print("Breadth-First Search")
initial_state = get_state(matrix)
shape = matrix.shape
print_state(initial_state, shape)
seen = {None}
q = deque([(initial_state, player_pos, 0, "")])
moves = [(1, 0), (-1, 0), (0, -1), (0, 1)]
curr_depth = 0
direction = {
(1, 0): "D",
(-1, 0): "U",
(0, -1): "L",
(0, 1): "R",
}
while q:
state, pos, depth, path = q.popleft()
# if depth != curr_depth:
# print(f'Depth: {depth}')
# curr_depth = depth
seen.add(state)
for move in moves:
new_state, _ = can_move(state, shape, pos, move)
deadlock = is_deadlock(new_state, shape)
if new_state in seen or deadlock:
continue
q.append(
(
new_state,
(pos[0] + move[0], pos[1] + move[1]),
depth + 1,
path + direction[move],
)
)
if is_solved(new_state):
print(f"[BFS] Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n")
if debug:
print(f"[BFS] Solution Found!\n{path + direction[move]}", 20)
return (path + direction[move], depth + 1)
if debug:
print(f"[BFS] Solution Depth: {depth + 1}\n{path + direction[move]}", 20)
print(f"[BFS] Solution not found!\n")
if debug:
print(f"[BFS] Solution Not Found!\nDepth {depth + 1}", 20)
return (None, -1 if not q else depth + 1)
def solve_bfs(puzzle, visualizer=False):
matrix = puzzle
where = np.where((matrix == "*") | (matrix == "%"))
player_pos = where[0][0], where[1][0]
return bfs(matrix, player_pos, debug=visualizer)
if __name__ == "__main__":
start = time.time()
root = solve_bfs(np.loadtxt("levels/lvl7.dat", dtype="<U1"))
print(f"Runtime: {time.time() - start} seconds")

View file

@ -0,0 +1,34 @@
class Box:
def __init__(self, x, y, game=None):
self.game = game
self.x = x
self.y = y
def can_move(self, move):
target_x, target_y = self.x + move[0], self.y + move[1]
target = target_y, target_x
curr = self.y, self.x
target_elem = self.game.puzzle[target]
if not isinstance(target_elem.obj, Box):
curr_elem = self.game.puzzle[curr]
self.y, self.x = target
curr_elem.char = "-" if not curr_elem.ground else "X"
curr_elem.obj = None
target_elem.char = "@" if not target_elem.ground else "$"
target_elem.obj = self
return True
return False
def reverse_move(self, move):
target = self.y + move[0], self.x + move[1]
curr_pos = self.y, self.x
self.game.puzzle[curr_pos].obj = None
self.game.puzzle[target].obj = self
self.y, self.x = target
self.game.puzzle[curr_pos].char = "X" if self.game.puzzle[curr_pos].ground else "-"
self.game.puzzle[target].char = "$" if self.game.puzzle[target].ground else "@"
class Obstacle(Box):
def __init__(self, x, y):
super().__init__(x=x, y=y)

View file

@ -0,0 +1,173 @@
from random import Random
import numpy as np
from reasoning_gym.games.contrib.sokoban.src.box import Box, Obstacle
from reasoning_gym.games.contrib.sokoban.src.player import Player, ReversePlayer
from reasoning_gym.games.contrib.sokoban.src.utils import get_state
class Floor:
def __init__(self, x, y):
self.x = x
self.y = y
class Goal(Floor):
def __init__(self, x, y):
super().__init__(x=x, y=y)
class PuzzleElement:
def __init__(self, char: str, obj=None, ground=None):
self.char = char
self.ground = ground
self.obj = obj
def __str__(self) -> str:
return self.char
class Game:
def __init__(self, width=19, height=10, level=None, path=None):
self.level = level
self.width = width
self.height = height
self.puzzle = np.empty((height, width), dtype=PuzzleElement)
self.player = None
self.puzzle_size = None
self.pad_x = 0
self.pad_y = 0
self.path = path or f"levels/lvl{level}.dat"
if path:
if type(self) == Game:
self.load_puzzle()
def get_matrix(self):
slice_x = slice(self.pad_x, self.pad_x + self.puzzle_size[1])
slice_y = slice(self.pad_y, self.pad_y + self.puzzle_size[0])
sliced = self.puzzle[slice_y, slice_x]
matrix = np.empty((self.puzzle_size), dtype="<U1")
for h in range(len(sliced)):
for w in range(len(sliced[0])):
matrix[h, w] = sliced[h, w].char
return matrix
def get_curr_state(self):
return get_state(self.get_matrix())
def print_puzzle(self):
for h in range(self.height):
for w in range(self.width):
if self.puzzle[h, w]:
print(self.puzzle[h, w].char, end=" ")
else:
print(" ", end=" ")
print(" ")
def is_level_complete(self):
boxes_left = 0
for h in range(self.height):
for w in range(self.width):
if self.puzzle[h, w] and self.puzzle[h, w].char == "@":
boxes_left += 1
return boxes_left == 0
def load_puzzle(self):
"""Load puzzle from file"""
try:
with open(self.path) as f:
# Read and parse file data
data = []
for line in f:
data.append(line.strip().split())
self._process_puzzle_data(data)
except (OSError, ValueError) as e:
print(f"{e}")
return
def load_puzzle_matrix(self, matrix):
"""New method: Load puzzle directly from a matrix (list/numpy array)"""
try:
# Convert numpy arrays to list of lists
if isinstance(matrix, np.ndarray):
data = matrix.tolist()
else:
data = matrix
# Validate and process
self._process_puzzle_data(data)
except ValueError as e:
print(f"{e}")
return
def _process_puzzle_data(self, data):
"""Shared core logic for processing puzzle data"""
# Calculate puzzle size and padding
self.puzzle_size = (len(data), len(data[0]) if len(data) > 0 else 0)
pad_x = (self.width - self.puzzle_size[1] - 2) // 2 # -2 matches original file-based logic
pad_y = (self.height - self.puzzle_size[0]) // 2
self.pad_x, self.pad_y = pad_x, pad_y
# Populate puzzle elements
for i, row in enumerate(data):
for j, c in enumerate(row):
new_elem = PuzzleElement(c)
self.puzzle[i + pad_y, j + pad_x] = new_elem
# Create game objects based on characters
if c == "+": # Wall
new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y)
elif c == "@": # Box
new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
elif c == "*": # Player
new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self)
self.player = new_elem.obj
elif c == "X": # Goal
new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
elif c == "$": # Box on goal
new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
elif c == "%": # Player on goal
new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self)
new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
self.player = new_elem.obj
elif c not in " -": # Validation
raise ValueError(f"Invalid character in puzzle: {c}")
class ReverseGame(Game):
def __init__(self, rng: Random, width=19, height=10, level=None):
super().__init__(width, height, level)
self.rng = rng
self.pad_x = 0
self.pad_y = 0
def load_puzzle(self, puzzle):
self.puzzle_size = (len(puzzle), len(puzzle[0]) if len(puzzle) > 0 else 0)
pad_x = (self.width - len(puzzle[0]) - 2) // 2
pad_y = (self.height - len(puzzle)) // 2
self.pad_x, self.pad_y = pad_x, pad_y
for i, row in enumerate(puzzle):
for j, c in enumerate(row):
new_elem = PuzzleElement(c)
self.puzzle[i + pad_y, j + pad_x] = new_elem
if c == "+": # wall
new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y)
elif c == "@": # box
new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
elif c == "*": # player
new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self)
self.player = new_elem.obj
elif c == "X": # goal
new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
elif c == "$": # box on goal
new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
elif c == "%": # player on goal
new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self)
new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
self.player = new_elem.obj

View file

@ -0,0 +1,107 @@
from random import Random
import numpy as np
from reasoning_gym.games.contrib.sokoban.src.astar import solve_astar
from reasoning_gym.games.contrib.sokoban.src.game import Game, ReverseGame
def num_boxes(puzzle_area, min_boxes, max_boxes, min_w, min_h, max_w, max_h):
m = (max_boxes - min_boxes) / (max_w * max_h - min_w * min_h)
b = min_boxes - m * min_w * min_h
return int(m * puzzle_area + b)
def random_valid(rng: Random, width: int = 10, height: int = 10):
return rng.randrange(1, width - 1), rng.randrange(1, height - 1)
def generate(
rng: Random,
debug: bool = False,
path: str = None,
min_w: int = 6,
min_h: int = 6,
max_w: int = 15,
max_h: int = 10,
min_boxes: int = 4,
max_boxes: int = 10,
) -> tuple[str, str, dict]:
"""
Generates a level with the given configuration parameters.
Parameters:
rng: Random number generator for reproducibility.
visualizer: Whether to visualize the generation process.
path: Path to save the level file (default 'levels/lvl0.dat').
min_w: Minimum width of the puzzle.
min_h: Minimum height of the puzzle.
max_w: Maximum width of the puzzle.
max_h: Maximum height of the puzzle.
min_boxes: Minimum number of boxes.
max_boxes: Maximum number of boxes.
Returns:
puzzle_string, solution
"""
path = path or "levels/lvl0.dat"
while True:
width = rng.randint(min_w, max_w)
height = rng.randint(min_h, max_h)
puzzle = np.full((height, width), "+", dtype="<U1")
boxes = num_boxes(width * height, min_boxes, max_boxes, min_w, min_h, max_w, max_h)
boxes_seen = set()
player_pos = random_valid(rng, width, height)
puzzle_size = (height, width)
puzzle[player_pos[1], player_pos[0]] = "*"
boxes_created = 0
while boxes_created < boxes:
box_pos = random_valid(rng, height, width)
if puzzle[box_pos] == "+":
puzzle[box_pos] = "$"
boxes_created += 1
boxes_seen.add(box_pos)
reverse_game = ReverseGame(rng=rng, level=0)
reverse_game.load_puzzle(puzzle)
player = reverse_game.player
counter = round(height * width * rng.uniform(1.8, 3.6))
while counter > 0:
reverse_game.player.update(puzzle_size)
if player.states[player.curr_state] >= 20:
break
counter -= 1
slice_x = slice(reverse_game.pad_x, reverse_game.pad_x + width)
slice_y = slice(reverse_game.pad_y, reverse_game.pad_y + height)
matrix = reverse_game.puzzle[slice_y, slice_x]
# Optionally print the puzzle:
if debug:
player.print_puzzle(matrix)
out_of_place_boxes = np.sum([str(x) == "@" for x in matrix.flatten()])
if out_of_place_boxes >= boxes // 2:
# Optionally save the puzzle to a file:
# np.savetxt(path, matrix, fmt='%s')
puzzle_str = player.puzzle_to_string(matrix)
grid_list = [list(line) for line in puzzle_str.replace(" ", "").strip().split("\n")]
grid_array = np.array(grid_list)
solution, _ = solve_astar(grid_array)
if debug:
print(f"solution={solution}")
game = Game()
game.load_puzzle_matrix(grid_array)
for step, move in enumerate(solution):
print(f"move #{step}: {move}")
game.player.update(key=move)
game.print_puzzle()
difficulty = {"size": puzzle_size, "num_steps": len(solution)}
return puzzle_str, solution, difficulty
else:
if debug:
print(f"Not enough boxes out of place, retrying generation... [{out_of_place_boxes}/{boxes}]")
if __name__ == "__main__":
generate(rng=Random(), debug=True)

View file

@ -0,0 +1,118 @@
from collections import defaultdict
from random import Random
from reasoning_gym.games.contrib.sokoban.src.box import Box, Obstacle
class Player:
"""A player that can only push boxes"""
def __init__(self, x, y, game):
self.game = game
self.x = x
self.y = y
def update(self, key: str = None) -> int:
move = None
if key:
if key == "R":
move = (1, 0)
elif key == "L":
move = (-1, 0)
elif key == "U":
move = (0, -1)
elif key == "D":
move = (0, 1)
if move:
curr = self.y, self.x
target = self.y + move[1], self.x + move[0]
target_elem = self.game.puzzle[target]
if not (target_elem and target_elem.obj and isinstance(target_elem.obj, Obstacle)):
is_box = isinstance(target_elem.obj, Box)
if not is_box or (is_box and target_elem.obj.can_move(move)):
curr_elem = self.game.puzzle[curr]
self.y, self.x = target
curr_elem.char = "-" if not curr_elem.ground else "X"
curr_elem.obj = None
target_elem.char = "*" if not target_elem.ground else "%"
target_elem.obj = self
return 1
return 0
class ReversePlayer(Player):
"""A player that can only pull boxes"""
def __init__(self, rng: Random, x, y, game=None, puzzle=None):
super().__init__(x=x, y=y, game=game)
self.rng = rng
self.game = game
self.puzzle = puzzle
self.curr_state = ""
self.states = defaultdict(int)
self.prev_move = (0, 0)
def print_puzzle(self, matrix=None):
print(self.puzzle_to_string(matrix=matrix))
def puzzle_to_string(self, matrix=None):
matrix = matrix if matrix is not None else self.game.puzzle
height, width = len(matrix), len(matrix[0])
ss = ""
for h in range(height):
for w in range(width):
if matrix[h, w]:
ss = ss + str(matrix[h, w]) + " "
else:
ss = ss + "F" + " "
ss = ss + " " + "\n"
ss = ss + "\n"
return ss
def get_state(self):
state = ""
height, width = len(self.game.puzzle), len(self.game.puzzle[0])
for row in range(height):
for col in range(width):
if self.game.puzzle[row, col]:
state += str(self.game.puzzle[row, col])
return state
def update(self, puzzle_size):
height, width = puzzle_size
quick_chars = {
"*": "-",
"%": "X",
"+": "*",
"-": "*",
"X": "%",
"@": "-",
"$": "X",
}
moves_tuples = [(1, 0), (-1, 0), (0, -1), (0, 1)]
moves = self.rng.choices(moves_tuples, weights=[0.1 if m == self.prev_move else 1 for m in moves_tuples], k=1)
self.curr_state = self.get_state()
for move in moves:
self.states[self.curr_state] += 1
curr_pos = self.y, self.x
target = self.y + move[0], self.x + move[1]
reverse_target = self.y - move[0], self.x - move[1]
if (
target[1] == self.game.pad_x
or target[0] == self.game.pad_y
or target[1] >= self.game.pad_x + width - 1
or target[0] >= self.game.pad_y + height - 1
or (self.game.puzzle[target] and self.game.puzzle[target].char in "@$")
):
self.prev_move = move
return
self.prev_move = -move[0], -move[1]
self.game.puzzle[curr_pos].char = quick_chars[self.game.puzzle[curr_pos].char]
self.game.puzzle[curr_pos].obj = None
self.game.puzzle[target].char = quick_chars[self.game.puzzle[target].char]
self.game.puzzle[target].obj = self
if (c := self.game.puzzle[reverse_target].char) in "@$":
self.game.puzzle[reverse_target].char = quick_chars[c]
self.game.puzzle[reverse_target].obj.reverse_move(move)
self.y, self.x = target

View file

@ -0,0 +1,170 @@
from heapq import heappop, heappush
import numpy as np
def print_state(state, shape):
if not state:
return
m, n = shape
matrix = np.array(list(state)).reshape(m, n)
print(matrix)
def find_boxes_and_goals(state, shape):
_, width = shape
boxes, goals, boxes_on_goal = [], [], []
for pos, char in enumerate(state):
if char == "@":
boxes.append((pos // width, pos % width))
elif char in "X%":
goals.append((pos // width, pos % width))
elif char == "$":
boxes_on_goal.append((pos // width, pos % width))
return boxes, goals, boxes_on_goal
def get_state(matrix):
return matrix.tobytes().decode("utf-8").replace("\x00", "")
def is_solved(state):
return "@" not in state
def manhattan_sum(state, player_pos, shape):
height, width = shape
player_x, player_y = player_pos
boxes, goals, _ = find_boxes_and_goals(state, shape)
boxes_cost = len(boxes) * height * width
player_cost = 0
for box_x, box_y in boxes:
boxes_cost += min(abs(box_x - goal_x) + abs(box_y - goal_y) for goal_x, goal_y in goals)
player_cost = min(abs(box_x - player_x) + abs(box_y - player_y) for box_x, box_y in boxes) if boxes else 0
return boxes_cost + player_cost
def dijkstra(state, shape, box_pos=None, player_pos=None):
height, width = shape
dijk = np.array([[float("inf") for _ in range(width)] for _ in range(height)])
dijk[box_pos or player_pos] = 0
moves = [(1, 0), (-1, 0), (0, 1), (0, -1)]
heap = [(0, box_pos or player_pos)]
obstacles = "+" if player_pos else "+@$"
while heap:
distance, curr_pos = heappop(heap)
if distance > dijk[curr_pos]:
continue
for move in moves:
new_x, new_y = curr_pos[0] + move[0], curr_pos[1] + move[1]
new_pos = new_x, new_y
if 1 <= new_x < height - 1 and 1 <= new_y < width - 1 and state[new_x * width + new_y] not in obstacles:
new_distance = distance + 1
if new_distance < dijk[new_pos]:
dijk[new_pos] = new_distance
heappush(heap, (new_distance, new_pos))
return dijk
def dijkstra_sum(state, player_pos, shape, distances):
height, width = shape
boxes, goals, boxes_on_goal = find_boxes_and_goals(state, shape)
boxes_cost = len(boxes) * height * width
player_cost = 0
for box in boxes + boxes_on_goal:
distances[box] = dijkstra(state, shape, box)
distances[player_pos] = dijkstra(state, shape, player_pos=player_pos)
for box in boxes:
boxes_cost += min(distances[box][goal] for goal in goals)
player_cost = min(distances[player_pos][box] for box in boxes) if boxes else 0
return boxes_cost + player_cost
def is_deadlock(state, shape):
height, width = shape
if not state or len(state) != height * width:
return False
boxes, _, _ = find_boxes_and_goals(state, shape)
for bx, by in boxes: # corner deadlock
box = bx * width + by
if (
(state[box - 1] == "+" and state[box - width] == "+")
or (state[box + 1] == "+" and state[box + width] == "+")
or (state[box + 1] == "+" and state[box - width] == "+")
or (state[box - 1] == "+" and state[box + width] == "+")
):
return True
double_box_positions = [
(0, -1, -width, -width - 1),
(0, 1, -width, -width + 1),
(0, -1, width - 1, width),
(0, 1, width + 1, width),
]
for bx, by in boxes: # double box deadlock
box = bx * width + by
for pos in double_box_positions:
pos_set = set()
for dir in pos:
pos_set.add(state[box + dir])
if pos_set in ({"@", "+"}, {"@"}, {"@", "$"}, {"@", "$", "+"}):
return True
box = goal = 0
for i in range(width + 1, 2 * width - 1): # too many boxes deadlock
if state[i] == "@":
box += 1
elif state[i] in "X%":
goal += 1
if box > goal:
return True
box = goal = 0
for i in range(width * (height - 2) + 1, width * (height - 2) + width - 1):
if state[i] == "@":
box += 1
elif state[i] in "X%":
goal += 1
if box > goal:
return True
box = goal = 0
for i in range(width + 1, width * (height - 1) + 1, width):
if state[i] == "@":
box += 1
elif state[i] in "X%":
goal += 1
if box > goal:
return True
box = goal = 0
for i in range(2 * width - 2, width * height - 2, width):
if state[i] == "@":
box += 1
elif state[i] in "X%":
goal += 1
if box > goal:
return True
return False
def can_move(state, shape, player_pos, move):
new_state = list(state)
x, y = player_pos
_, width = shape
move_cost = 0
target = x + move[0], y + move[1]
boxtarget = x + move[0] * 2, y + move[1] * 2
curr1d = x * width + y
target1d = target[0] * width + target[1]
boxtarget1d = boxtarget[0] * width + boxtarget[1]
if state[target1d] == "+":
return None, move_cost
elif state[target1d] in "-X":
new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X"
new_state[target1d] = "*" if new_state[target1d] == "-" else "%"
move_cost = 3
elif state[target1d] in "@$":
if state[boxtarget1d] in "+@$":
return None, move_cost
elif state[boxtarget1d] in "-X":
new_state[boxtarget1d] = "@" if new_state[boxtarget1d] == "-" else "$"
new_state[target1d] = "*" if new_state[target1d] == "@" else "%"
new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X"
move_cost = 0 if new_state[boxtarget1d] == "$" else 2
return "".join(new_state), move_cost

View file

@ -0,0 +1,117 @@
from dataclasses import dataclass
from random import Random
from typing import Dict, Optional
import numpy as np
from ..factory import ProceduralDataset, register_dataset
@dataclass
class SokobanConfig:
"""Configuration for sokoban puzzle generation"""
seed: Optional[int] = None
size: int = 500
min_w: int = 6 # Minimum width of the puzzle.
min_h: int = 6 # Minimum height of the puzzle.
max_w: int = 10 # Maximum width of the puzzle.
max_h: int = 10 # Maximum height of the puzzle.
min_boxes: int = 6 # Minimum number of boxes.
max_boxes: int = 10 # Maximum number of boxes.
def validate(self):
"""Validate configuration parameters"""
assert self.min_w <= self.max_w, "min_w must be lte max_w"
assert self.min_h <= self.max_h, "min_h must be lte max_h"
assert self.min_boxes <= self.max_boxes, "min_boxes must be lte max_boxes"
class SokobanDataset(ProceduralDataset):
"""Generates Sokoban games with configurable parameters"""
def __init__(self, config: SokobanConfig):
self._prompt_templates = [
"What will this Sokoban board look like after {simulation_steps} steps of simulation?\n\n{board}"
]
super().__init__(config=config, seed=config.seed, size=config.size)
# lazy loading of sokoban imports
from .contrib.sokoban.src.game import Game
from .contrib.sokoban.src.generator import generate
from .contrib.sokoban.src.utils import is_solved
self._Game = Game
self._generate = generate
self._is_solved = is_solved
def __getitem__(self, idx: int) -> dict:
"""Generate a single Sokoban task
Returns:
dict with keys:
- question: str, the task description
- answer: str, a solution string
- metadata: dict with generation parameters
"""
# Make the Sokoban!
rng = Random(self.seed + idx)
gamestr, solution, difficulty = self._generate(rng=rng)
return {
"question": """You are going to solve a 'sokoban' puzzle.
* - The player
% - The player on a goal
@ - A box
X - A goal
$ - A box on a goal
+ - A wall
- - An empty position
Your solution must be a string of characters, ex: LDURRUDL.
Here is your puzzle:
"""
+ gamestr,
"answer": solution,
"metadata": {"gamestr": gamestr, "difficulty": difficulty},
}
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
"""Determine if the solution provided solves the Sokoban task.
The function awards 1.0 for a correct answer.
Args:
answer (Optional[str]): The user's answer.
entry (Dict[str, any]): The original dataset entry containing the correct answer.
Returns:
float: The computed score between 0.0 and 1.0.
"""
if answer == None:
return 0.0
try:
grid_list = [list(line) for line in entry["metadata"]["gamestr"].replace(" ", "").strip().split("\n")]
matrix = np.array(grid_list)
game = self._Game()
game.load_puzzle_matrix(matrix)
for move in answer:
game.player.update(key=move)
if self._is_solved(game.get_curr_state()):
return 1.0
except Exception as e:
return 0.01
return 0.1
register_dataset("sokoban", SokobanDataset, SokobanConfig)