reasoning-gym/reasoning_gym/algorithmic/game_of_life.py
Rich Jones 07dc01ad87
[Env] Game of Life Halting Prediction (#272)
This is a variant of the Game of Life task, which rather than trying to test the algorithmic simulation, tests the ability of the model to do explanatory reasoning of the board. The idea is that a model with good explanatory reasoning will be able to see that a game will not halt without simulating it into the future.

The task presents a GoL board, and the model is asked to predict if the board will halt (die, all cells zero) after n steps. Sometimes, the board will be made up of 'oscillators', isolated structures which never die. Othertimes, it is filled with non-oscillators, structures which will always die after a few steps. The model should deduce which case the presented board is.
2025-03-07 10:05:12 +01:00

146 lines
5.1 KiB
Python

import json
from dataclasses import dataclass
from random import Random
from typing import Any, Optional
import cellpylib as cpl
from ..factory import ProceduralDataset, register_dataset
@dataclass
class GameOfLifeConfig:
"""Configuration for Game of Life puzzle generation"""
grid_size_x: int = 10
grid_size_y: int = 10
filled_cells: int = 100 # actually a max
simulation_steps: int = 1
seed: Optional[int] = None
size: int = 500
def validate(self):
"""Validate configuration parameters"""
assert 3 <= self.grid_size_x <= 999, "grid_size_x must be between 0 and 999"
assert 3 <= self.grid_size_y <= 999, "grid_size_y must be between 0 and 999"
assert self.simulation_steps >= 0, "simulation_steps must be gte 0"
assert self.filled_cells <= self.grid_size_x * self.grid_size_y, "filled_cells must fit in x times y"
class GameOfLifeDataset(ProceduralDataset):
"""Generates Game of Life games with configurable parameters"""
def __init__(self, config: GameOfLifeConfig):
self._prompt_templates = [
"What will this Game of Life board look like after {simulation_steps} steps of simulation? Assume a Moore neighborhood and wrapping topology. Reply as array of arrays representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}."
]
super().__init__(config=config, seed=config.seed, size=config.size)
def __getitem__(self, idx: int) -> dict:
"""Generate a single GameOfLife task
Returns:
dict with keys:
- question: str, the task description
- answer: str, a solution string
- metadata: dict with generation parameters
"""
rng = Random(self.seed + idx)
# Make the board
board = cpl.init_simple2d(self.config.grid_size_x, self.config.grid_size_y)
board[:, :, :] = 0
# Add the cells
for i in range(0, self.config.filled_cells):
rx = rng.randint(0, self.config.grid_size_x - 1)
ry = rng.randint(0, self.config.grid_size_y - 1)
board[:, rx, ry] = 1
# Simulate the result to get the answer
evolved = cpl.evolve2d(
board,
timesteps=self.config.simulation_steps + 1,
apply_rule=cpl.game_of_life_rule,
memoize="recursive",
)
rows = [json.dumps(board[0, i].tolist(), separators=(",", ":")) for i in range(board.shape[1])]
board_str = "[" + ",\n ".join(rows) + "]"
final_step = evolved[-1]
final_step_list = final_step.tolist()
result_str = json.dumps(final_step_list, separators=(",", ":"))
return {
"question": rng.choice(self._prompt_templates).format(
simulation_steps=self.config.simulation_steps, board=board_str
),
"answer": result_str,
"metadata": {
"grid_size_x": self.config.grid_size_x,
"grid_size_y": self.config.grid_size_y,
"filled_cells": self.config.filled_cells,
"simulation_steps": self.config.simulation_steps,
},
}
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
"""Determine if the solution provided solves the GoL task.
The function awards 1.0 for a correct answer.
Args:
answer (Optional[str]): The user's answer.
entry (dict[str, Any]): The original dataset entry containing the correct answer.
Returns:
float: The computed score between 0.0 and 1.0.
"""
if answer == None:
return 0.0
try:
ans_arr = json.loads(answer)
correct_arr = json.loads(entry["answer"])
except Exception:
return 0.0
total_cells = 0
correct_cells = 0
# Determine if the array is 2D (i.e. a list of lists)
is_2d = correct_arr and isinstance(correct_arr[0], list)
if is_2d:
# Iterate over rows and columns of the expected grid.
for i, expected_row in enumerate(correct_arr):
for j, expected_value in enumerate(expected_row):
total_cells += 1
try:
if ans_arr[i][j] == expected_value:
correct_cells += 1
except (IndexError, TypeError):
# Either the row or the cell is missing, treat as incorrect.
pass
else:
# 1D array case.
for i, expected_value in enumerate(correct_arr):
total_cells += 1
try:
if ans_arr[i] == expected_value:
correct_cells += 1
except IndexError:
pass
# If for some reason there are no cells, return 0.0.
if total_cells == 0:
return 0.0
# Each cell contributes equally.
return correct_cells / total_cells
register_dataset("game_of_life", GameOfLifeDataset, GameOfLifeConfig)