mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
This is a variant of the Game of Life task, which rather than trying to test the algorithmic simulation, tests the ability of the model to do explanatory reasoning of the board. The idea is that a model with good explanatory reasoning will be able to see that a game will not halt without simulating it into the future. The task presents a GoL board, and the model is asked to predict if the board will halt (die, all cells zero) after n steps. Sometimes, the board will be made up of 'oscillators', isolated structures which never die. Othertimes, it is filled with non-oscillators, structures which will always die after a few steps. The model should deduce which case the presented board is.
146 lines
5.1 KiB
Python
146 lines
5.1 KiB
Python
import json
|
|
from dataclasses import dataclass
|
|
from random import Random
|
|
from typing import Any, Optional
|
|
|
|
import cellpylib as cpl
|
|
|
|
from ..factory import ProceduralDataset, register_dataset
|
|
|
|
|
|
@dataclass
|
|
class GameOfLifeConfig:
|
|
"""Configuration for Game of Life puzzle generation"""
|
|
|
|
grid_size_x: int = 10
|
|
grid_size_y: int = 10
|
|
filled_cells: int = 100 # actually a max
|
|
simulation_steps: int = 1
|
|
seed: Optional[int] = None
|
|
size: int = 500
|
|
|
|
def validate(self):
|
|
"""Validate configuration parameters"""
|
|
assert 3 <= self.grid_size_x <= 999, "grid_size_x must be between 0 and 999"
|
|
assert 3 <= self.grid_size_y <= 999, "grid_size_y must be between 0 and 999"
|
|
assert self.simulation_steps >= 0, "simulation_steps must be gte 0"
|
|
assert self.filled_cells <= self.grid_size_x * self.grid_size_y, "filled_cells must fit in x times y"
|
|
|
|
|
|
class GameOfLifeDataset(ProceduralDataset):
|
|
"""Generates Game of Life games with configurable parameters"""
|
|
|
|
def __init__(self, config: GameOfLifeConfig):
|
|
self._prompt_templates = [
|
|
"What will this Game of Life board look like after {simulation_steps} steps of simulation? Assume a Moore neighborhood and wrapping topology. Reply as array of arrays representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}."
|
|
]
|
|
|
|
super().__init__(config=config, seed=config.seed, size=config.size)
|
|
|
|
def __getitem__(self, idx: int) -> dict:
|
|
"""Generate a single GameOfLife task
|
|
|
|
Returns:
|
|
dict with keys:
|
|
- question: str, the task description
|
|
- answer: str, a solution string
|
|
- metadata: dict with generation parameters
|
|
"""
|
|
rng = Random(self.seed + idx)
|
|
|
|
# Make the board
|
|
board = cpl.init_simple2d(self.config.grid_size_x, self.config.grid_size_y)
|
|
board[:, :, :] = 0
|
|
|
|
# Add the cells
|
|
for i in range(0, self.config.filled_cells):
|
|
rx = rng.randint(0, self.config.grid_size_x - 1)
|
|
ry = rng.randint(0, self.config.grid_size_y - 1)
|
|
board[:, rx, ry] = 1
|
|
|
|
# Simulate the result to get the answer
|
|
evolved = cpl.evolve2d(
|
|
board,
|
|
timesteps=self.config.simulation_steps + 1,
|
|
apply_rule=cpl.game_of_life_rule,
|
|
memoize="recursive",
|
|
)
|
|
|
|
rows = [json.dumps(board[0, i].tolist(), separators=(",", ":")) for i in range(board.shape[1])]
|
|
board_str = "[" + ",\n ".join(rows) + "]"
|
|
|
|
final_step = evolved[-1]
|
|
final_step_list = final_step.tolist()
|
|
result_str = json.dumps(final_step_list, separators=(",", ":"))
|
|
|
|
return {
|
|
"question": rng.choice(self._prompt_templates).format(
|
|
simulation_steps=self.config.simulation_steps, board=board_str
|
|
),
|
|
"answer": result_str,
|
|
"metadata": {
|
|
"grid_size_x": self.config.grid_size_x,
|
|
"grid_size_y": self.config.grid_size_y,
|
|
"filled_cells": self.config.filled_cells,
|
|
"simulation_steps": self.config.simulation_steps,
|
|
},
|
|
}
|
|
|
|
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
|
|
"""Determine if the solution provided solves the GoL task.
|
|
|
|
The function awards 1.0 for a correct answer.
|
|
|
|
Args:
|
|
answer (Optional[str]): The user's answer.
|
|
entry (dict[str, Any]): The original dataset entry containing the correct answer.
|
|
|
|
Returns:
|
|
float: The computed score between 0.0 and 1.0.
|
|
"""
|
|
|
|
if answer == None:
|
|
return 0.0
|
|
|
|
try:
|
|
ans_arr = json.loads(answer)
|
|
correct_arr = json.loads(entry["answer"])
|
|
except Exception:
|
|
return 0.0
|
|
|
|
total_cells = 0
|
|
correct_cells = 0
|
|
|
|
# Determine if the array is 2D (i.e. a list of lists)
|
|
is_2d = correct_arr and isinstance(correct_arr[0], list)
|
|
|
|
if is_2d:
|
|
# Iterate over rows and columns of the expected grid.
|
|
for i, expected_row in enumerate(correct_arr):
|
|
for j, expected_value in enumerate(expected_row):
|
|
total_cells += 1
|
|
try:
|
|
if ans_arr[i][j] == expected_value:
|
|
correct_cells += 1
|
|
except (IndexError, TypeError):
|
|
# Either the row or the cell is missing, treat as incorrect.
|
|
pass
|
|
else:
|
|
# 1D array case.
|
|
for i, expected_value in enumerate(correct_arr):
|
|
total_cells += 1
|
|
try:
|
|
if ans_arr[i] == expected_value:
|
|
correct_cells += 1
|
|
except IndexError:
|
|
pass
|
|
|
|
# If for some reason there are no cells, return 0.0.
|
|
if total_cells == 0:
|
|
return 0.0
|
|
|
|
# Each cell contributes equally.
|
|
return correct_cells / total_cells
|
|
|
|
|
|
register_dataset("game_of_life", GameOfLifeDataset, GameOfLifeConfig)
|