mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-28 17:29:39 +00:00
Sokoban without pygame (#77)
* add minified version of https://github.com/xbandrade/sokoban-solver-generator --------- Co-authored-by: Rich Jones <miserlou@gmail.com>
This commit is contained in:
parent
a8f9eafd43
commit
1b49713116
25 changed files with 1229 additions and 87 deletions
|
|
@ -11,6 +11,7 @@ from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
|
|||
from .maze import MazeConfig, MazeDataset
|
||||
from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset
|
||||
from .n_queens import NQueensDataset
|
||||
from .sokoban import SokobanConfig, SokobanDataset
|
||||
from .sudoku import SudokuConfig, SudokuDataset
|
||||
from .tower_of_hanoi import HanoiConfig, HanoiDataset
|
||||
|
||||
|
|
@ -21,6 +22,8 @@ __all__ = [
|
|||
"MiniSudokuDataset",
|
||||
"SudokuConfig",
|
||||
"SudokuDataset",
|
||||
"SokobanConfig",
|
||||
"SokobanDataset",
|
||||
"MazeConfig",
|
||||
"MazeDataset",
|
||||
"GameOfLifeConfig",
|
||||
|
|
|
|||
0
reasoning_gym/games/contrib/__init__.py
Normal file
0
reasoning_gym/games/contrib/__init__.py
Normal file
21
reasoning_gym/games/contrib/sokoban/LICENSE
Normal file
21
reasoning_gym/games/contrib/sokoban/LICENSE
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2023 Bruno Andrade
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
52
reasoning_gym/games/contrib/sokoban/README.md
Normal file
52
reasoning_gym/games/contrib/sokoban/README.md
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# 📦 Sokoban Solver and Generator
|
||||
|
||||
This folder contains a minified version of Bruno Andrade's Sokoban game, all pygame dependencies were stripped.
|
||||
|
||||
The original version can be found here: [xbandrade/sokoban-solver-generator](https://github.com/xbandrade/sokoban-solver-generator)
|
||||
|
||||
|
||||
This is a Sokoban puzzle generator and solver that uses BFS, A* and Dijkstra search algorithms.
|
||||
|
||||
`Sokoban` is a puzzle game in which the player pushes boxes around in a warehouse, trying to get every box to a goal.
|
||||
|
||||
|
||||
### ❕Sokoban Puzzle
|
||||
The puzzle states are stored in a matrix, and each element of the puzzle is represented by a single character in the matrix.
|
||||
```
|
||||
+ + + + + + +
|
||||
+ * - @ - X +
|
||||
+ + - @ - + +
|
||||
+ X - - - $ +
|
||||
+ + + + + + +
|
||||
```
|
||||
`*` - The player </br>
|
||||
`%` - The player on a goal </br>
|
||||
`@` - A box </br>
|
||||
`X` - A goal </br>
|
||||
`$` - A box on a goal </br>
|
||||
`+` - A wall </br>
|
||||
`-` - An empty position </br>
|
||||
|
||||
A box on a goal will have its color changed to green on the game window.
|
||||
|
||||
|
||||
### ❕Sokoban Generator
|
||||
|
||||
The generator will initially create a puzzle with a random board size, then the player and the boxes on goals will be randomly placed on the board.
|
||||
The player will only be able to pull boxes from their positions during the generation of a puzzle, breaking every wall on his way, so it is guaranteed that the puzzle will have a valid solution.
|
||||
|
||||
|
||||
### ❕ Sokoban Solver
|
||||
|
||||
The algorithms used to implement the Sokoban puzzle solvers were `Breadth-First Search(BFS)` and `A*`.
|
||||
|
||||
The `BFS` solver uses a queue to store the next states of the puzzle it needs to visit. A visited state is stored in a hashset, and BFS won't try to visit the same state twice.
|
||||
|
||||
The `A*` algorithm is similar to the BFS algorithm, but it uses a priority queue instead of a queue, and it prioritizes moves that are more likely to solve the problem.
|
||||
It does so by setting costs to the puzzle state and the player's movements, punishing the player with high costs for a bad move and rewarding the player with lower costs for a good move.
|
||||
The state costs are defined by heuristic functions, and this solver was implemented with two different heuristics: the `Manhattan Distance` function and `Dijkstra` distance function.
|
||||
|
||||
All three implementations check for possible deadlocks (states that are impossible to solve) before adding the new state to the queue.
|
||||
|
||||
|
||||
More about Sokoban: [Wikipedia Article](https://en.wikipedia.org/wiki/Sokoban)
|
||||
0
reasoning_gym/games/contrib/sokoban/__init__.py
Normal file
0
reasoning_gym/games/contrib/sokoban/__init__.py
Normal file
10
reasoning_gym/games/contrib/sokoban/levels/lvl0.dat
Normal file
10
reasoning_gym/games/contrib/sokoban/levels/lvl0.dat
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
+ + + + + + +
|
||||
+ - * - - - +
|
||||
+ - - - $ - +
|
||||
+ X - - @ - +
|
||||
+ - - - - - +
|
||||
+ $ - + - - +
|
||||
+ + - - - - +
|
||||
+ X @ - $ - +
|
||||
+ + - - - - +
|
||||
+ + + + + + +
|
||||
5
reasoning_gym/games/contrib/sokoban/levels/lvl1.dat
Normal file
5
reasoning_gym/games/contrib/sokoban/levels/lvl1.dat
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
+ + + + + + +
|
||||
+ * - @ - X +
|
||||
+ + - @ - + +
|
||||
+ X - - - - +
|
||||
+ + + + + + +
|
||||
6
reasoning_gym/games/contrib/sokoban/levels/lvl2.dat
Normal file
6
reasoning_gym/games/contrib/sokoban/levels/lvl2.dat
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
- - + + + + + +
|
||||
- + + - - - * +
|
||||
+ + - - - + X +
|
||||
+ X - @ - @ @ +
|
||||
+ X X @ - - - +
|
||||
+ + + + + + + +
|
||||
7
reasoning_gym/games/contrib/sokoban/levels/lvl3.dat
Normal file
7
reasoning_gym/games/contrib/sokoban/levels/lvl3.dat
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
- + + + + + + - - -
|
||||
- + X - - X + - - -
|
||||
+ + - @ @ + + - - -
|
||||
+ - - - - + + - - -
|
||||
+ - @ - - * + + + +
|
||||
+ + - - - - - - X +
|
||||
- + + + + + + + + +
|
||||
7
reasoning_gym/games/contrib/sokoban/levels/lvl4.dat
Normal file
7
reasoning_gym/games/contrib/sokoban/levels/lvl4.dat
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
- + + + + + + - -
|
||||
+ + X - @ - + + +
|
||||
+ - - - - - - - +
|
||||
+ - @ + + X - @ +
|
||||
+ - - - @ - + - +
|
||||
+ + + * - X - X +
|
||||
- - + + + + + + +
|
||||
7
reasoning_gym/games/contrib/sokoban/levels/lvl5.dat
Normal file
7
reasoning_gym/games/contrib/sokoban/levels/lvl5.dat
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
- + + + + + + + -
|
||||
+ + - - + - - + +
|
||||
+ - @ - - - @ - +
|
||||
+ - - X * X - - +
|
||||
+ + @ + + - - + +
|
||||
+ - - X - - - + -
|
||||
+ + + + + + + + -
|
||||
9
reasoning_gym/games/contrib/sokoban/levels/lvl6.dat
Normal file
9
reasoning_gym/games/contrib/sokoban/levels/lvl6.dat
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
- - - + + + + + + + +
|
||||
- - - + - - - - - - +
|
||||
- - + + - - - - @ - +
|
||||
- + + - - + + - + + +
|
||||
+ + - - + - - X - - +
|
||||
+ - - + X @ @ - - + +
|
||||
+ * + X - - - - + + -
|
||||
+ + - - - - - + + - -
|
||||
+ + + + + + + + - - -
|
||||
6
reasoning_gym/games/contrib/sokoban/levels/lvl7.dat
Normal file
6
reasoning_gym/games/contrib/sokoban/levels/lvl7.dat
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
+ + + + + + + +
|
||||
+ - - @ - X * +
|
||||
+ - @ - - + X +
|
||||
+ X X @ - @ @ +
|
||||
+ X X @ - - - +
|
||||
+ + + + + + + +
|
||||
0
reasoning_gym/games/contrib/sokoban/src/__init__.py
Normal file
0
reasoning_gym/games/contrib/sokoban/src/__init__.py
Normal file
81
reasoning_gym/games/contrib/sokoban/src/astar.py
Normal file
81
reasoning_gym/games/contrib/sokoban/src/astar.py
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
from collections import defaultdict
|
||||
from heapq import heappop, heappush
|
||||
|
||||
import numpy as np
|
||||
|
||||
from reasoning_gym.games.contrib.sokoban.src.utils import (
|
||||
can_move,
|
||||
dijkstra_sum,
|
||||
get_state,
|
||||
is_deadlock,
|
||||
is_solved,
|
||||
manhattan_sum,
|
||||
)
|
||||
|
||||
|
||||
def astar(matrix, player_pos, debug=False, heuristic="manhattan"):
|
||||
# print(f'A* - {heuristic.title()} Heuristic')
|
||||
heur = "[A*]" if heuristic == "manhattan" else "[Dijkstra]"
|
||||
shape = matrix.shape
|
||||
initial_state = get_state(matrix)
|
||||
initial_cost = curr_depth = 0
|
||||
if heuristic == "manhattan":
|
||||
curr_cost = manhattan_sum(initial_state, player_pos, shape)
|
||||
else:
|
||||
distances = defaultdict(lambda: [])
|
||||
curr_cost = dijkstra_sum(initial_state, player_pos, shape, distances)
|
||||
seen = {None}
|
||||
heap = []
|
||||
heappush(heap, (initial_cost, curr_cost, initial_state, player_pos, curr_depth, ""))
|
||||
moves = [(1, 0), (-1, 0), (0, -1), (0, 1)]
|
||||
direction = {
|
||||
(1, 0): "D",
|
||||
(-1, 0): "U",
|
||||
(0, -1): "L",
|
||||
(0, 1): "R",
|
||||
}
|
||||
while heap:
|
||||
_, curr_cost, state, pos, depth, path = heappop(heap)
|
||||
seen.add(state)
|
||||
for move in moves:
|
||||
new_state, move_cost = can_move(state, shape, pos, move)
|
||||
deadlock = is_deadlock(new_state, shape)
|
||||
if new_state in seen or deadlock:
|
||||
continue
|
||||
new_pos = pos[0] + move[0], pos[1] + move[1]
|
||||
if heuristic == "manhattan":
|
||||
new_cost = manhattan_sum(new_state, new_pos, shape)
|
||||
else:
|
||||
new_cost = dijkstra_sum(new_state, new_pos, shape, distances)
|
||||
if new_cost == float("inf"):
|
||||
continue
|
||||
heappush(
|
||||
heap,
|
||||
(
|
||||
move_cost + curr_cost,
|
||||
new_cost,
|
||||
new_state,
|
||||
new_pos,
|
||||
depth + 1,
|
||||
path + direction[move],
|
||||
),
|
||||
)
|
||||
if is_solved(new_state):
|
||||
# print(f'{heur} Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n')
|
||||
if debug:
|
||||
print(f"{heur} Solution Found!\n{path + direction[move]}", 20)
|
||||
return (path + direction[move], depth + 1)
|
||||
if debug:
|
||||
print(f"{heur} Solution Depth: {depth + 1}\n{path + direction[move]}", 20)
|
||||
print(f"{heur} Solution not found!\n")
|
||||
if debug:
|
||||
print(f"{heur} Solution Not Found!\nDepth {depth + 1}", 20)
|
||||
|
||||
return (None, -1 if not heap else depth + 1)
|
||||
|
||||
|
||||
def solve_astar(puzzle, visualizer=False, heuristic="manhattan"):
|
||||
matrix = puzzle
|
||||
where = np.where((matrix == "*") | (matrix == "%"))
|
||||
player_pos = where[0][0], where[1][0]
|
||||
return astar(matrix, player_pos, debug=visualizer, heuristic=heuristic)
|
||||
66
reasoning_gym/games/contrib/sokoban/src/bfs.py
Normal file
66
reasoning_gym/games/contrib/sokoban/src/bfs.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
import time
|
||||
from collections import deque
|
||||
|
||||
import numpy as np
|
||||
|
||||
from reasoning_gym.games.contrib.sokoban.src.utils import can_move, get_state, is_deadlock, is_solved, print_state
|
||||
|
||||
|
||||
def bfs(matrix, player_pos, debug=False):
|
||||
print("Breadth-First Search")
|
||||
initial_state = get_state(matrix)
|
||||
shape = matrix.shape
|
||||
print_state(initial_state, shape)
|
||||
seen = {None}
|
||||
q = deque([(initial_state, player_pos, 0, "")])
|
||||
moves = [(1, 0), (-1, 0), (0, -1), (0, 1)]
|
||||
curr_depth = 0
|
||||
direction = {
|
||||
(1, 0): "D",
|
||||
(-1, 0): "U",
|
||||
(0, -1): "L",
|
||||
(0, 1): "R",
|
||||
}
|
||||
while q:
|
||||
state, pos, depth, path = q.popleft()
|
||||
# if depth != curr_depth:
|
||||
# print(f'Depth: {depth}')
|
||||
# curr_depth = depth
|
||||
seen.add(state)
|
||||
for move in moves:
|
||||
new_state, _ = can_move(state, shape, pos, move)
|
||||
deadlock = is_deadlock(new_state, shape)
|
||||
if new_state in seen or deadlock:
|
||||
continue
|
||||
q.append(
|
||||
(
|
||||
new_state,
|
||||
(pos[0] + move[0], pos[1] + move[1]),
|
||||
depth + 1,
|
||||
path + direction[move],
|
||||
)
|
||||
)
|
||||
if is_solved(new_state):
|
||||
print(f"[BFS] Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n")
|
||||
if debug:
|
||||
print(f"[BFS] Solution Found!\n{path + direction[move]}", 20)
|
||||
return (path + direction[move], depth + 1)
|
||||
if debug:
|
||||
print(f"[BFS] Solution Depth: {depth + 1}\n{path + direction[move]}", 20)
|
||||
print(f"[BFS] Solution not found!\n")
|
||||
if debug:
|
||||
print(f"[BFS] Solution Not Found!\nDepth {depth + 1}", 20)
|
||||
return (None, -1 if not q else depth + 1)
|
||||
|
||||
|
||||
def solve_bfs(puzzle, visualizer=False):
|
||||
matrix = puzzle
|
||||
where = np.where((matrix == "*") | (matrix == "%"))
|
||||
player_pos = where[0][0], where[1][0]
|
||||
return bfs(matrix, player_pos, debug=visualizer)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
start = time.time()
|
||||
root = solve_bfs(np.loadtxt("levels/lvl7.dat", dtype="<U1"))
|
||||
print(f"Runtime: {time.time() - start} seconds")
|
||||
34
reasoning_gym/games/contrib/sokoban/src/box.py
Normal file
34
reasoning_gym/games/contrib/sokoban/src/box.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
class Box:
|
||||
def __init__(self, x, y, game=None):
|
||||
self.game = game
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def can_move(self, move):
|
||||
target_x, target_y = self.x + move[0], self.y + move[1]
|
||||
target = target_y, target_x
|
||||
curr = self.y, self.x
|
||||
target_elem = self.game.puzzle[target]
|
||||
if not isinstance(target_elem.obj, Box):
|
||||
curr_elem = self.game.puzzle[curr]
|
||||
self.y, self.x = target
|
||||
curr_elem.char = "-" if not curr_elem.ground else "X"
|
||||
curr_elem.obj = None
|
||||
target_elem.char = "@" if not target_elem.ground else "$"
|
||||
target_elem.obj = self
|
||||
return True
|
||||
return False
|
||||
|
||||
def reverse_move(self, move):
|
||||
target = self.y + move[0], self.x + move[1]
|
||||
curr_pos = self.y, self.x
|
||||
self.game.puzzle[curr_pos].obj = None
|
||||
self.game.puzzle[target].obj = self
|
||||
self.y, self.x = target
|
||||
self.game.puzzle[curr_pos].char = "X" if self.game.puzzle[curr_pos].ground else "-"
|
||||
self.game.puzzle[target].char = "$" if self.game.puzzle[target].ground else "@"
|
||||
|
||||
|
||||
class Obstacle(Box):
|
||||
def __init__(self, x, y):
|
||||
super().__init__(x=x, y=y)
|
||||
173
reasoning_gym/games/contrib/sokoban/src/game.py
Normal file
173
reasoning_gym/games/contrib/sokoban/src/game.py
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
from random import Random
|
||||
|
||||
import numpy as np
|
||||
|
||||
from reasoning_gym.games.contrib.sokoban.src.box import Box, Obstacle
|
||||
from reasoning_gym.games.contrib.sokoban.src.player import Player, ReversePlayer
|
||||
from reasoning_gym.games.contrib.sokoban.src.utils import get_state
|
||||
|
||||
|
||||
class Floor:
|
||||
def __init__(self, x, y):
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
|
||||
class Goal(Floor):
|
||||
def __init__(self, x, y):
|
||||
super().__init__(x=x, y=y)
|
||||
|
||||
|
||||
class PuzzleElement:
|
||||
def __init__(self, char: str, obj=None, ground=None):
|
||||
self.char = char
|
||||
self.ground = ground
|
||||
self.obj = obj
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.char
|
||||
|
||||
|
||||
class Game:
|
||||
def __init__(self, width=19, height=10, level=None, path=None):
|
||||
self.level = level
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.puzzle = np.empty((height, width), dtype=PuzzleElement)
|
||||
|
||||
self.player = None
|
||||
self.puzzle_size = None
|
||||
self.pad_x = 0
|
||||
self.pad_y = 0
|
||||
self.path = path or f"levels/lvl{level}.dat"
|
||||
|
||||
if path:
|
||||
if type(self) == Game:
|
||||
self.load_puzzle()
|
||||
|
||||
def get_matrix(self):
|
||||
slice_x = slice(self.pad_x, self.pad_x + self.puzzle_size[1])
|
||||
slice_y = slice(self.pad_y, self.pad_y + self.puzzle_size[0])
|
||||
sliced = self.puzzle[slice_y, slice_x]
|
||||
matrix = np.empty((self.puzzle_size), dtype="<U1")
|
||||
for h in range(len(sliced)):
|
||||
for w in range(len(sliced[0])):
|
||||
matrix[h, w] = sliced[h, w].char
|
||||
return matrix
|
||||
|
||||
def get_curr_state(self):
|
||||
return get_state(self.get_matrix())
|
||||
|
||||
def print_puzzle(self):
|
||||
for h in range(self.height):
|
||||
for w in range(self.width):
|
||||
if self.puzzle[h, w]:
|
||||
print(self.puzzle[h, w].char, end=" ")
|
||||
else:
|
||||
print(" ", end=" ")
|
||||
print(" ")
|
||||
|
||||
def is_level_complete(self):
|
||||
boxes_left = 0
|
||||
for h in range(self.height):
|
||||
for w in range(self.width):
|
||||
if self.puzzle[h, w] and self.puzzle[h, w].char == "@":
|
||||
boxes_left += 1
|
||||
return boxes_left == 0
|
||||
|
||||
def load_puzzle(self):
|
||||
"""Load puzzle from file"""
|
||||
try:
|
||||
with open(self.path) as f:
|
||||
# Read and parse file data
|
||||
data = []
|
||||
for line in f:
|
||||
data.append(line.strip().split())
|
||||
self._process_puzzle_data(data)
|
||||
except (OSError, ValueError) as e:
|
||||
print(f"{e}")
|
||||
return
|
||||
|
||||
def load_puzzle_matrix(self, matrix):
|
||||
"""New method: Load puzzle directly from a matrix (list/numpy array)"""
|
||||
try:
|
||||
# Convert numpy arrays to list of lists
|
||||
if isinstance(matrix, np.ndarray):
|
||||
data = matrix.tolist()
|
||||
else:
|
||||
data = matrix
|
||||
|
||||
# Validate and process
|
||||
self._process_puzzle_data(data)
|
||||
except ValueError as e:
|
||||
print(f"{e}")
|
||||
return
|
||||
|
||||
def _process_puzzle_data(self, data):
|
||||
"""Shared core logic for processing puzzle data"""
|
||||
|
||||
# Calculate puzzle size and padding
|
||||
self.puzzle_size = (len(data), len(data[0]) if len(data) > 0 else 0)
|
||||
pad_x = (self.width - self.puzzle_size[1] - 2) // 2 # -2 matches original file-based logic
|
||||
pad_y = (self.height - self.puzzle_size[0]) // 2
|
||||
self.pad_x, self.pad_y = pad_x, pad_y
|
||||
|
||||
# Populate puzzle elements
|
||||
for i, row in enumerate(data):
|
||||
for j, c in enumerate(row):
|
||||
new_elem = PuzzleElement(c)
|
||||
self.puzzle[i + pad_y, j + pad_x] = new_elem
|
||||
|
||||
# Create game objects based on characters
|
||||
if c == "+": # Wall
|
||||
new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y)
|
||||
elif c == "@": # Box
|
||||
new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
|
||||
elif c == "*": # Player
|
||||
new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self)
|
||||
self.player = new_elem.obj
|
||||
elif c == "X": # Goal
|
||||
new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
|
||||
elif c == "$": # Box on goal
|
||||
new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
|
||||
new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
|
||||
elif c == "%": # Player on goal
|
||||
new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self)
|
||||
new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
|
||||
self.player = new_elem.obj
|
||||
elif c not in " -": # Validation
|
||||
raise ValueError(f"Invalid character in puzzle: {c}")
|
||||
|
||||
|
||||
class ReverseGame(Game):
|
||||
def __init__(self, rng: Random, width=19, height=10, level=None):
|
||||
super().__init__(width, height, level)
|
||||
self.rng = rng
|
||||
self.pad_x = 0
|
||||
self.pad_y = 0
|
||||
|
||||
def load_puzzle(self, puzzle):
|
||||
self.puzzle_size = (len(puzzle), len(puzzle[0]) if len(puzzle) > 0 else 0)
|
||||
pad_x = (self.width - len(puzzle[0]) - 2) // 2
|
||||
pad_y = (self.height - len(puzzle)) // 2
|
||||
self.pad_x, self.pad_y = pad_x, pad_y
|
||||
for i, row in enumerate(puzzle):
|
||||
for j, c in enumerate(row):
|
||||
new_elem = PuzzleElement(c)
|
||||
self.puzzle[i + pad_y, j + pad_x] = new_elem
|
||||
if c == "+": # wall
|
||||
new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y)
|
||||
elif c == "@": # box
|
||||
new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
|
||||
elif c == "*": # player
|
||||
new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self)
|
||||
self.player = new_elem.obj
|
||||
elif c == "X": # goal
|
||||
new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
|
||||
elif c == "$": # box on goal
|
||||
new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
|
||||
new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
|
||||
elif c == "%": # player on goal
|
||||
new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self)
|
||||
new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
|
||||
self.player = new_elem.obj
|
||||
107
reasoning_gym/games/contrib/sokoban/src/generator.py
Normal file
107
reasoning_gym/games/contrib/sokoban/src/generator.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
from random import Random
|
||||
|
||||
import numpy as np
|
||||
|
||||
from reasoning_gym.games.contrib.sokoban.src.astar import solve_astar
|
||||
from reasoning_gym.games.contrib.sokoban.src.game import Game, ReverseGame
|
||||
|
||||
|
||||
def num_boxes(puzzle_area, min_boxes, max_boxes, min_w, min_h, max_w, max_h):
|
||||
m = (max_boxes - min_boxes) / (max_w * max_h - min_w * min_h)
|
||||
b = min_boxes - m * min_w * min_h
|
||||
return int(m * puzzle_area + b)
|
||||
|
||||
|
||||
def random_valid(rng: Random, width: int = 10, height: int = 10):
|
||||
return rng.randrange(1, width - 1), rng.randrange(1, height - 1)
|
||||
|
||||
|
||||
def generate(
|
||||
rng: Random,
|
||||
debug: bool = False,
|
||||
path: str = None,
|
||||
min_w: int = 6,
|
||||
min_h: int = 6,
|
||||
max_w: int = 15,
|
||||
max_h: int = 10,
|
||||
min_boxes: int = 4,
|
||||
max_boxes: int = 10,
|
||||
) -> tuple[str, str, dict]:
|
||||
"""
|
||||
Generates a level with the given configuration parameters.
|
||||
|
||||
Parameters:
|
||||
rng: Random number generator for reproducibility.
|
||||
visualizer: Whether to visualize the generation process.
|
||||
path: Path to save the level file (default 'levels/lvl0.dat').
|
||||
min_w: Minimum width of the puzzle.
|
||||
min_h: Minimum height of the puzzle.
|
||||
max_w: Maximum width of the puzzle.
|
||||
max_h: Maximum height of the puzzle.
|
||||
min_boxes: Minimum number of boxes.
|
||||
max_boxes: Maximum number of boxes.
|
||||
Returns:
|
||||
puzzle_string, solution
|
||||
"""
|
||||
path = path or "levels/lvl0.dat"
|
||||
while True:
|
||||
width = rng.randint(min_w, max_w)
|
||||
height = rng.randint(min_h, max_h)
|
||||
puzzle = np.full((height, width), "+", dtype="<U1")
|
||||
boxes = num_boxes(width * height, min_boxes, max_boxes, min_w, min_h, max_w, max_h)
|
||||
boxes_seen = set()
|
||||
player_pos = random_valid(rng, width, height)
|
||||
puzzle_size = (height, width)
|
||||
puzzle[player_pos[1], player_pos[0]] = "*"
|
||||
boxes_created = 0
|
||||
while boxes_created < boxes:
|
||||
box_pos = random_valid(rng, height, width)
|
||||
if puzzle[box_pos] == "+":
|
||||
puzzle[box_pos] = "$"
|
||||
boxes_created += 1
|
||||
boxes_seen.add(box_pos)
|
||||
reverse_game = ReverseGame(rng=rng, level=0)
|
||||
reverse_game.load_puzzle(puzzle)
|
||||
player = reverse_game.player
|
||||
counter = round(height * width * rng.uniform(1.8, 3.6))
|
||||
while counter > 0:
|
||||
reverse_game.player.update(puzzle_size)
|
||||
if player.states[player.curr_state] >= 20:
|
||||
break
|
||||
counter -= 1
|
||||
slice_x = slice(reverse_game.pad_x, reverse_game.pad_x + width)
|
||||
slice_y = slice(reverse_game.pad_y, reverse_game.pad_y + height)
|
||||
matrix = reverse_game.puzzle[slice_y, slice_x]
|
||||
# Optionally print the puzzle:
|
||||
if debug:
|
||||
player.print_puzzle(matrix)
|
||||
|
||||
out_of_place_boxes = np.sum([str(x) == "@" for x in matrix.flatten()])
|
||||
if out_of_place_boxes >= boxes // 2:
|
||||
# Optionally save the puzzle to a file:
|
||||
# np.savetxt(path, matrix, fmt='%s')
|
||||
puzzle_str = player.puzzle_to_string(matrix)
|
||||
|
||||
grid_list = [list(line) for line in puzzle_str.replace(" ", "").strip().split("\n")]
|
||||
grid_array = np.array(grid_list)
|
||||
solution, _ = solve_astar(grid_array)
|
||||
|
||||
if debug:
|
||||
print(f"solution={solution}")
|
||||
game = Game()
|
||||
game.load_puzzle_matrix(grid_array)
|
||||
|
||||
for step, move in enumerate(solution):
|
||||
print(f"move #{step}: {move}")
|
||||
game.player.update(key=move)
|
||||
game.print_puzzle()
|
||||
|
||||
difficulty = {"size": puzzle_size, "num_steps": len(solution)}
|
||||
return puzzle_str, solution, difficulty
|
||||
else:
|
||||
if debug:
|
||||
print(f"Not enough boxes out of place, retrying generation... [{out_of_place_boxes}/{boxes}]")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
generate(rng=Random(), debug=True)
|
||||
118
reasoning_gym/games/contrib/sokoban/src/player.py
Normal file
118
reasoning_gym/games/contrib/sokoban/src/player.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
from collections import defaultdict
|
||||
from random import Random
|
||||
|
||||
from reasoning_gym.games.contrib.sokoban.src.box import Box, Obstacle
|
||||
|
||||
|
||||
class Player:
|
||||
"""A player that can only push boxes"""
|
||||
|
||||
def __init__(self, x, y, game):
|
||||
self.game = game
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def update(self, key: str = None) -> int:
|
||||
move = None
|
||||
if key:
|
||||
if key == "R":
|
||||
move = (1, 0)
|
||||
elif key == "L":
|
||||
move = (-1, 0)
|
||||
elif key == "U":
|
||||
move = (0, -1)
|
||||
elif key == "D":
|
||||
move = (0, 1)
|
||||
if move:
|
||||
curr = self.y, self.x
|
||||
target = self.y + move[1], self.x + move[0]
|
||||
target_elem = self.game.puzzle[target]
|
||||
if not (target_elem and target_elem.obj and isinstance(target_elem.obj, Obstacle)):
|
||||
is_box = isinstance(target_elem.obj, Box)
|
||||
if not is_box or (is_box and target_elem.obj.can_move(move)):
|
||||
curr_elem = self.game.puzzle[curr]
|
||||
self.y, self.x = target
|
||||
curr_elem.char = "-" if not curr_elem.ground else "X"
|
||||
curr_elem.obj = None
|
||||
target_elem.char = "*" if not target_elem.ground else "%"
|
||||
target_elem.obj = self
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
class ReversePlayer(Player):
|
||||
"""A player that can only pull boxes"""
|
||||
|
||||
def __init__(self, rng: Random, x, y, game=None, puzzle=None):
|
||||
super().__init__(x=x, y=y, game=game)
|
||||
self.rng = rng
|
||||
self.game = game
|
||||
self.puzzle = puzzle
|
||||
self.curr_state = ""
|
||||
self.states = defaultdict(int)
|
||||
self.prev_move = (0, 0)
|
||||
|
||||
def print_puzzle(self, matrix=None):
|
||||
print(self.puzzle_to_string(matrix=matrix))
|
||||
|
||||
def puzzle_to_string(self, matrix=None):
|
||||
matrix = matrix if matrix is not None else self.game.puzzle
|
||||
height, width = len(matrix), len(matrix[0])
|
||||
ss = ""
|
||||
for h in range(height):
|
||||
for w in range(width):
|
||||
if matrix[h, w]:
|
||||
ss = ss + str(matrix[h, w]) + " "
|
||||
else:
|
||||
ss = ss + "F" + " "
|
||||
ss = ss + " " + "\n"
|
||||
ss = ss + "\n"
|
||||
return ss
|
||||
|
||||
def get_state(self):
|
||||
state = ""
|
||||
height, width = len(self.game.puzzle), len(self.game.puzzle[0])
|
||||
for row in range(height):
|
||||
for col in range(width):
|
||||
if self.game.puzzle[row, col]:
|
||||
state += str(self.game.puzzle[row, col])
|
||||
return state
|
||||
|
||||
def update(self, puzzle_size):
|
||||
height, width = puzzle_size
|
||||
quick_chars = {
|
||||
"*": "-",
|
||||
"%": "X",
|
||||
"+": "*",
|
||||
"-": "*",
|
||||
"X": "%",
|
||||
"@": "-",
|
||||
"$": "X",
|
||||
}
|
||||
moves_tuples = [(1, 0), (-1, 0), (0, -1), (0, 1)]
|
||||
moves = self.rng.choices(moves_tuples, weights=[0.1 if m == self.prev_move else 1 for m in moves_tuples], k=1)
|
||||
self.curr_state = self.get_state()
|
||||
for move in moves:
|
||||
self.states[self.curr_state] += 1
|
||||
curr_pos = self.y, self.x
|
||||
target = self.y + move[0], self.x + move[1]
|
||||
reverse_target = self.y - move[0], self.x - move[1]
|
||||
if (
|
||||
target[1] == self.game.pad_x
|
||||
or target[0] == self.game.pad_y
|
||||
or target[1] >= self.game.pad_x + width - 1
|
||||
or target[0] >= self.game.pad_y + height - 1
|
||||
or (self.game.puzzle[target] and self.game.puzzle[target].char in "@$")
|
||||
):
|
||||
self.prev_move = move
|
||||
return
|
||||
self.prev_move = -move[0], -move[1]
|
||||
self.game.puzzle[curr_pos].char = quick_chars[self.game.puzzle[curr_pos].char]
|
||||
self.game.puzzle[curr_pos].obj = None
|
||||
self.game.puzzle[target].char = quick_chars[self.game.puzzle[target].char]
|
||||
self.game.puzzle[target].obj = self
|
||||
if (c := self.game.puzzle[reverse_target].char) in "@$":
|
||||
self.game.puzzle[reverse_target].char = quick_chars[c]
|
||||
self.game.puzzle[reverse_target].obj.reverse_move(move)
|
||||
|
||||
self.y, self.x = target
|
||||
170
reasoning_gym/games/contrib/sokoban/src/utils.py
Normal file
170
reasoning_gym/games/contrib/sokoban/src/utils.py
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
from heapq import heappop, heappush
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def print_state(state, shape):
|
||||
if not state:
|
||||
return
|
||||
m, n = shape
|
||||
matrix = np.array(list(state)).reshape(m, n)
|
||||
print(matrix)
|
||||
|
||||
|
||||
def find_boxes_and_goals(state, shape):
|
||||
_, width = shape
|
||||
boxes, goals, boxes_on_goal = [], [], []
|
||||
for pos, char in enumerate(state):
|
||||
if char == "@":
|
||||
boxes.append((pos // width, pos % width))
|
||||
elif char in "X%":
|
||||
goals.append((pos // width, pos % width))
|
||||
elif char == "$":
|
||||
boxes_on_goal.append((pos // width, pos % width))
|
||||
return boxes, goals, boxes_on_goal
|
||||
|
||||
|
||||
def get_state(matrix):
|
||||
return matrix.tobytes().decode("utf-8").replace("\x00", "")
|
||||
|
||||
|
||||
def is_solved(state):
|
||||
return "@" not in state
|
||||
|
||||
|
||||
def manhattan_sum(state, player_pos, shape):
|
||||
height, width = shape
|
||||
player_x, player_y = player_pos
|
||||
boxes, goals, _ = find_boxes_and_goals(state, shape)
|
||||
boxes_cost = len(boxes) * height * width
|
||||
player_cost = 0
|
||||
for box_x, box_y in boxes:
|
||||
boxes_cost += min(abs(box_x - goal_x) + abs(box_y - goal_y) for goal_x, goal_y in goals)
|
||||
player_cost = min(abs(box_x - player_x) + abs(box_y - player_y) for box_x, box_y in boxes) if boxes else 0
|
||||
return boxes_cost + player_cost
|
||||
|
||||
|
||||
def dijkstra(state, shape, box_pos=None, player_pos=None):
|
||||
height, width = shape
|
||||
dijk = np.array([[float("inf") for _ in range(width)] for _ in range(height)])
|
||||
dijk[box_pos or player_pos] = 0
|
||||
moves = [(1, 0), (-1, 0), (0, 1), (0, -1)]
|
||||
heap = [(0, box_pos or player_pos)]
|
||||
obstacles = "+" if player_pos else "+@$"
|
||||
while heap:
|
||||
distance, curr_pos = heappop(heap)
|
||||
if distance > dijk[curr_pos]:
|
||||
continue
|
||||
for move in moves:
|
||||
new_x, new_y = curr_pos[0] + move[0], curr_pos[1] + move[1]
|
||||
new_pos = new_x, new_y
|
||||
if 1 <= new_x < height - 1 and 1 <= new_y < width - 1 and state[new_x * width + new_y] not in obstacles:
|
||||
new_distance = distance + 1
|
||||
if new_distance < dijk[new_pos]:
|
||||
dijk[new_pos] = new_distance
|
||||
heappush(heap, (new_distance, new_pos))
|
||||
return dijk
|
||||
|
||||
|
||||
def dijkstra_sum(state, player_pos, shape, distances):
|
||||
height, width = shape
|
||||
boxes, goals, boxes_on_goal = find_boxes_and_goals(state, shape)
|
||||
boxes_cost = len(boxes) * height * width
|
||||
player_cost = 0
|
||||
for box in boxes + boxes_on_goal:
|
||||
distances[box] = dijkstra(state, shape, box)
|
||||
distances[player_pos] = dijkstra(state, shape, player_pos=player_pos)
|
||||
for box in boxes:
|
||||
boxes_cost += min(distances[box][goal] for goal in goals)
|
||||
player_cost = min(distances[player_pos][box] for box in boxes) if boxes else 0
|
||||
return boxes_cost + player_cost
|
||||
|
||||
|
||||
def is_deadlock(state, shape):
|
||||
height, width = shape
|
||||
if not state or len(state) != height * width:
|
||||
return False
|
||||
boxes, _, _ = find_boxes_and_goals(state, shape)
|
||||
for bx, by in boxes: # corner deadlock
|
||||
box = bx * width + by
|
||||
if (
|
||||
(state[box - 1] == "+" and state[box - width] == "+")
|
||||
or (state[box + 1] == "+" and state[box + width] == "+")
|
||||
or (state[box + 1] == "+" and state[box - width] == "+")
|
||||
or (state[box - 1] == "+" and state[box + width] == "+")
|
||||
):
|
||||
return True
|
||||
double_box_positions = [
|
||||
(0, -1, -width, -width - 1),
|
||||
(0, 1, -width, -width + 1),
|
||||
(0, -1, width - 1, width),
|
||||
(0, 1, width + 1, width),
|
||||
]
|
||||
for bx, by in boxes: # double box deadlock
|
||||
box = bx * width + by
|
||||
for pos in double_box_positions:
|
||||
pos_set = set()
|
||||
for dir in pos:
|
||||
pos_set.add(state[box + dir])
|
||||
if pos_set in ({"@", "+"}, {"@"}, {"@", "$"}, {"@", "$", "+"}):
|
||||
return True
|
||||
box = goal = 0
|
||||
for i in range(width + 1, 2 * width - 1): # too many boxes deadlock
|
||||
if state[i] == "@":
|
||||
box += 1
|
||||
elif state[i] in "X%":
|
||||
goal += 1
|
||||
if box > goal:
|
||||
return True
|
||||
box = goal = 0
|
||||
for i in range(width * (height - 2) + 1, width * (height - 2) + width - 1):
|
||||
if state[i] == "@":
|
||||
box += 1
|
||||
elif state[i] in "X%":
|
||||
goal += 1
|
||||
if box > goal:
|
||||
return True
|
||||
box = goal = 0
|
||||
for i in range(width + 1, width * (height - 1) + 1, width):
|
||||
if state[i] == "@":
|
||||
box += 1
|
||||
elif state[i] in "X%":
|
||||
goal += 1
|
||||
if box > goal:
|
||||
return True
|
||||
box = goal = 0
|
||||
for i in range(2 * width - 2, width * height - 2, width):
|
||||
if state[i] == "@":
|
||||
box += 1
|
||||
elif state[i] in "X%":
|
||||
goal += 1
|
||||
if box > goal:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def can_move(state, shape, player_pos, move):
|
||||
new_state = list(state)
|
||||
x, y = player_pos
|
||||
_, width = shape
|
||||
move_cost = 0
|
||||
target = x + move[0], y + move[1]
|
||||
boxtarget = x + move[0] * 2, y + move[1] * 2
|
||||
curr1d = x * width + y
|
||||
target1d = target[0] * width + target[1]
|
||||
boxtarget1d = boxtarget[0] * width + boxtarget[1]
|
||||
if state[target1d] == "+":
|
||||
return None, move_cost
|
||||
elif state[target1d] in "-X":
|
||||
new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X"
|
||||
new_state[target1d] = "*" if new_state[target1d] == "-" else "%"
|
||||
move_cost = 3
|
||||
elif state[target1d] in "@$":
|
||||
if state[boxtarget1d] in "+@$":
|
||||
return None, move_cost
|
||||
elif state[boxtarget1d] in "-X":
|
||||
new_state[boxtarget1d] = "@" if new_state[boxtarget1d] == "-" else "$"
|
||||
new_state[target1d] = "*" if new_state[target1d] == "@" else "%"
|
||||
new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X"
|
||||
move_cost = 0 if new_state[boxtarget1d] == "$" else 2
|
||||
return "".join(new_state), move_cost
|
||||
117
reasoning_gym/games/sokoban.py
Normal file
117
reasoning_gym/games/sokoban.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Dict, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
|
||||
@dataclass
|
||||
class SokobanConfig:
|
||||
"""Configuration for sokoban puzzle generation"""
|
||||
|
||||
seed: Optional[int] = None
|
||||
size: int = 500
|
||||
min_w: int = 6 # Minimum width of the puzzle.
|
||||
min_h: int = 6 # Minimum height of the puzzle.
|
||||
max_w: int = 10 # Maximum width of the puzzle.
|
||||
max_h: int = 10 # Maximum height of the puzzle.
|
||||
min_boxes: int = 6 # Minimum number of boxes.
|
||||
max_boxes: int = 10 # Maximum number of boxes.
|
||||
|
||||
def validate(self):
|
||||
"""Validate configuration parameters"""
|
||||
assert self.min_w <= self.max_w, "min_w must be lte max_w"
|
||||
assert self.min_h <= self.max_h, "min_h must be lte max_h"
|
||||
assert self.min_boxes <= self.max_boxes, "min_boxes must be lte max_boxes"
|
||||
|
||||
|
||||
class SokobanDataset(ProceduralDataset):
|
||||
"""Generates Sokoban games with configurable parameters"""
|
||||
|
||||
def __init__(self, config: SokobanConfig):
|
||||
self._prompt_templates = [
|
||||
"What will this Sokoban board look like after {simulation_steps} steps of simulation?\n\n{board}"
|
||||
]
|
||||
|
||||
super().__init__(config=config, seed=config.seed, size=config.size)
|
||||
|
||||
# lazy loading of sokoban imports
|
||||
from .contrib.sokoban.src.game import Game
|
||||
from .contrib.sokoban.src.generator import generate
|
||||
from .contrib.sokoban.src.utils import is_solved
|
||||
|
||||
self._Game = Game
|
||||
self._generate = generate
|
||||
self._is_solved = is_solved
|
||||
|
||||
def __getitem__(self, idx: int) -> dict:
|
||||
"""Generate a single Sokoban task
|
||||
|
||||
Returns:
|
||||
dict with keys:
|
||||
- question: str, the task description
|
||||
- answer: str, a solution string
|
||||
- metadata: dict with generation parameters
|
||||
"""
|
||||
|
||||
# Make the Sokoban!
|
||||
rng = Random(self.seed + idx)
|
||||
gamestr, solution, difficulty = self._generate(rng=rng)
|
||||
|
||||
return {
|
||||
"question": """You are going to solve a 'sokoban' puzzle.
|
||||
|
||||
* - The player
|
||||
% - The player on a goal
|
||||
@ - A box
|
||||
X - A goal
|
||||
$ - A box on a goal
|
||||
+ - A wall
|
||||
- - An empty position
|
||||
|
||||
Your solution must be a string of characters, ex: LDURRUDL.
|
||||
|
||||
Here is your puzzle:
|
||||
"""
|
||||
+ gamestr,
|
||||
"answer": solution,
|
||||
"metadata": {"gamestr": gamestr, "difficulty": difficulty},
|
||||
}
|
||||
|
||||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||
"""Determine if the solution provided solves the Sokoban task.
|
||||
|
||||
The function awards 1.0 for a correct answer.
|
||||
|
||||
Args:
|
||||
answer (Optional[str]): The user's answer.
|
||||
entry (Dict[str, any]): The original dataset entry containing the correct answer.
|
||||
|
||||
Returns:
|
||||
float: The computed score between 0.0 and 1.0.
|
||||
"""
|
||||
|
||||
if answer == None:
|
||||
return 0.0
|
||||
|
||||
try:
|
||||
grid_list = [list(line) for line in entry["metadata"]["gamestr"].replace(" ", "").strip().split("\n")]
|
||||
matrix = np.array(grid_list)
|
||||
|
||||
game = self._Game()
|
||||
game.load_puzzle_matrix(matrix)
|
||||
|
||||
for move in answer:
|
||||
game.player.update(key=move)
|
||||
|
||||
if self._is_solved(game.get_curr_state()):
|
||||
return 1.0
|
||||
except Exception as e:
|
||||
return 0.01
|
||||
|
||||
return 0.1
|
||||
|
||||
|
||||
register_dataset("sokoban", SokobanDataset, SokobanConfig)
|
||||
Loading…
Add table
Add a link
Reference in a new issue