reasoning-gym/reasoning_gym/algorithmic/game_of_life_halting.py
Zafir Stojanovski dced3bfc45
fix(curriculum): Make boundaries in curriculum more sensible (#407)
* init

* fix tests

* unify codeio

* filtered for libraries not present in reasoning-gym

* fix more bounds

* puzzle24

* knight swap curriculum

* fix number sorting

* fix attributes

* add validation of config in creation of dataset

* dry run for instantiating and validating the datasets

* remove unused imports

* fix curriculum tests to reference newly updated attribute names
2025-04-04 20:24:14 +02:00

445 lines
15 KiB
Python

from dataclasses import dataclass
from random import Random
from typing import Dict, List, Optional
import cellpylib as cpl
from ..coaching import BaseCurriculum, ScalarAttributeDefinition
from ..factory import ProceduralDataset, register_dataset
DATASET_NAME = "game_of_life_halting"
@dataclass
class GameOfLifeHaltingConfig:
"""Configuration for Game of Life halting problems generation"""
grid_size_x: int = 12
grid_size_y: int = 12
difficulty: int = 1
num_oscillators: int = 5
max_simulation_steps: int = 20
seed: Optional[int] = None
size: int = 500
def validate(self):
"""Validate configuration parameters"""
assert self.difficulty in (1, 2, 3), "difficulty must be one of (1, 2, 3)"
if self.difficulty == 1:
assert self.grid_size_x >= 7, "grid_size_x must be gte 7 (difficulty 1)"
assert self.grid_size_y >= 7, "grid_size_y must be gte 7 (difficulty 1)"
if self.difficulty == 2:
assert self.grid_size_x >= 13, "grid_size_x must be gte 13 (difficulty 2)"
assert self.grid_size_y >= 13, "grid_size_y must be gte 13 (difficulty 2)"
if self.difficulty == 3:
assert self.grid_size_x >= 25, "grid_size_x must be gte 25 (difficulty 3)"
assert self.grid_size_y >= 25, "grid_size_y must be gte 25 (difficulty 3)"
class GameOfLifeHaltingDataset(ProceduralDataset):
"""Generates Game of Life games with configurable parameters
This is a variant of the Game of Life task, which rather than trying to test the algorithmic simulation, tests
the ability of the model to do explanatory reasoning of the board. The idea is that a model with good
explanatory reasoning will be able to see that a game will not halt without simulating it into the future.
The task presents a GoL board, and the model is asked to predict if the board will halt (die, all cells zero)
after n steps. Sometimes, the board will be made up of 'oscillators', isolated structures which never die.
Othertimes, it is filled with non-oscillators, structures which will always die after a few steps. The model
should deduce which case the presented board is.
"""
# via this great wiki https://conwaylife.com/wiki/oscillator
OSCILLATORS = [
# Easy
{
"name": "blinker",
"size_x": 3,
"size_y": 3,
"period": 2,
"difficulty": 1,
"cells": [
[0, 0, 0],
[1, 1, 1],
[0, 0, 0],
],
},
{
"name": "toad",
"size_x": 4,
"size_y": 4,
"period": 2,
"difficulty": 1,
"cells": [
[0, 1, 1, 0],
[1, 0, 0, 0],
[0, 0, 0, 1],
[0, 1, 1, 0],
],
},
{
"name": "clock",
"size_x": 4,
"size_y": 4,
"period": 2,
"difficulty": 1,
"cells": [
[0, 0, 1, 0],
[1, 0, 1, 0],
[0, 1, 0, 1],
[0, 1, 0, 0],
],
},
{
"name": "bipole",
"size_x": 5,
"size_y": 5,
"period": 2,
"difficulty": 1,
"cells": [
[0, 0, 0, 1, 1],
[0, 0, 1, 0, 1],
[0, 0, 0, 0, 0],
[1, 0, 1, 0, 0],
[1, 1, 0, 0, 0],
],
},
{
"name": "tripole",
"size_x": 6,
"size_y": 6,
"period": 2,
"difficulty": 1,
"cells": [
[0, 0, 0, 0, 1, 1],
[0, 0, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0],
[0, 1, 0, 1, 0, 0],
[1, 0, 0, 0, 0, 0],
[1, 1, 0, 0, 0, 0],
],
},
# Medium
{
"name": "caterer",
"size_x": 6,
"size_y": 9,
"period": 3,
"difficulty": 2,
"cells": [
[0, 1, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 1, 0],
[0, 1, 1, 0, 0, 0],
[0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
],
},
{
"name": "mold",
"size_x": 6,
"size_y": 6,
"period": 4,
"difficulty": 2,
"cells": [
[0, 0, 0, 1, 1, 0],
[0, 0, 1, 0, 0, 1],
[1, 0, 0, 1, 0, 1],
[0, 0, 0, 0, 1, 0],
[1, 0, 1, 1, 0, 0],
[0, 1, 0, 0, 0, 0],
],
},
{
"name": "pinwheel",
"size_x": 12,
"size_y": 12,
"period": 4,
"difficulty": 2,
"cells": [
[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
[1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0],
[1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1],
[0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1],
[0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0],
],
},
# Hard
{
"name": "pentadecathlon",
"size_x": 16,
"size_y": 9,
"period": 15,
"difficulty": 3,
"cells": [
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0],
[1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1],
[0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
],
},
]
NON_OSCILLATORS = [
{
"size_x": 3,
"size_y": 3,
"cells": [
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
],
},
{
"size_x": 3,
"size_y": 3,
"cells": [
[0, 0, 1],
[0, 1, 0],
[1, 0, 0],
],
},
{
"size_x": 3,
"size_y": 3,
"cells": [
[1, 0, 0],
[0, 1, 0],
[1, 0, 0],
],
},
{
"size_x": 3,
"size_y": 3,
"cells": [
[0, 0, 1],
[0, 1, 0],
[0, 0, 1],
],
},
{
"size_x": 4,
"size_y": 4,
"cells": [
[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1],
],
},
{
"size_x": 5,
"size_y": 5,
"cells": [
[1, 0, 0, 0, 0],
[0, 1, 0, 0, 0],
[0, 0, 1, 0, 0],
[0, 0, 0, 1, 0],
[0, 0, 0, 0, 1],
],
},
{
"size_x": 6,
"size_y": 6,
"cells": [
[1, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 1],
],
},
]
def __init__(self, config: GameOfLifeHaltingConfig):
super().__init__(config=config, seed=config.seed, size=config.size)
def __getitem__(self, idx: int) -> dict:
"""Generate a single GameOfLife task
Returns:
dict with keys:
- question: str, the task description
- answer: str, a solution string
- metadata: dict with generation parameters
"""
# Create a reproducible random generator for this index.
rng = Random(self.seed + idx)
# Flip a coin to decide if we should oscillate.
should_oscillate = rng.choice([True, False])
# Get dimensions for convenience.
grid_x = self.config.grid_size_x
grid_y = self.config.grid_size_y
# Initialize the board.
# Note: cpl.init_simple2d returns an array with shape
# (timesteps, grid_size_x, grid_size_y)
board = cpl.init_simple2d(grid_x, grid_y)
board[:, :, :] = 0 # reset all cells to dead
# We will place patterns on the initial board (timestep 0).
initial_board = board[0]
# Create an occupancy grid to keep track of which cells (and their 1-cell buffer)
# are already occupied by a pattern.
occupancy = [[False for _ in range(grid_y)] for _ in range(grid_x)]
# Determine which set of patterns to use based on should_oscillate.
if should_oscillate:
valid_patterns = [osc for osc in self.OSCILLATORS if osc["difficulty"] == self.config.difficulty]
else:
valid_patterns = self.NON_OSCILLATORS
placed_patterns: List[Dict] = []
# Place the requested number of patterns.
for _ in range(self.config.num_oscillators):
pattern = rng.choice(valid_patterns)
height = pattern["size_y"]
width = pattern["size_x"]
# Ensure the pattern (plus a 1-cell border) fits in the grid.
# Valid top-left positions (i,j) must satisfy:
# 1 <= i <= grid_x - height - 1 and 1 <= j <= grid_y - width - 1.
attempts = 1000
placed = False
while attempts > 0 and not placed:
i = rng.randint(1, grid_x - height - 1)
j = rng.randint(1, grid_y - width - 1)
# Check if the region from (i-1, j-1) to (i+height, j+width) is free.
valid = True
for x in range(i - 1, i + height + 1):
for y in range(j - 1, j + width + 1):
if occupancy[x][y]:
valid = False
break
if not valid:
break
if valid:
# Mark the region (including the 1-cell border) as occupied.
for x in range(i - 1, i + height + 1):
for y in range(j - 1, j + width + 1):
occupancy[x][y] = True
# Place the pattern on the initial board.
for dx in range(height):
for dy in range(width):
initial_board[i + dx, j + dy] = pattern["cells"][dx][dy]
placed = True
placed_patterns.append({"name": pattern.get("name", "non-oscillator"), "position": (i, j)})
attempts -= 1
# If no valid placement is found after many attempts, we skip this pattern.
# Convert the initial board state to string
board_str = str(initial_board)
# Create the question string.
question = (
f"This is a 'Game of Life' grid. We consider a game halted if there are no cells alive.\n"
f"Will this game halt at or before {self.config.max_simulation_steps} steps? Assume a Moore neighborhood and wrapping topology. If it will halt, reply 'True'. If it won't halt, reply 'False'.\n\n"
f"Initial board:\n{board_str}"
)
return {
"question": question,
"answer": str(not should_oscillate),
"metadata": {
"source_dataset": DATASET_NAME,
"source_index": idx,
"grid_size_x": grid_x,
"grid_size_y": grid_y,
"placed_patterns": placed_patterns,
"simulation_steps": self.config.max_simulation_steps,
"should_oscillate": should_oscillate,
"difficulty": {
"grid_size_x": self.config.grid_size_x,
"grid_size_y": self.config.grid_size_y,
"difficulty": self.config.difficulty,
"num_oscillators": self.config.num_oscillators,
"max_simulation_steps": self.config.max_simulation_steps,
},
},
}
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
"""Determine if the solution provided solves the GoL task.
The function awards 1.0 for a correct answer.
Args:
answer (Optional[str]): The user's answer.
entry (Dict[str, any]): The original dataset entry containing the correct answer.
Returns:
float: The computed score between 0.0 and 1.0.
"""
if answer is not None and bool(answer) == bool(entry["answer"]):
# python's bool conversion is very tolerant and normally doesn't raise exceptions
return 1.0
return 0.0
class GameOfLifeHaltingCurriculum(BaseCurriculum):
"""Curriculum for Game of Life Halting dataset"""
def __init__(self):
super().__init__(GameOfLifeHaltingCurriculum.__name__, GameOfLifeHaltingConfig)
# Define attributes
self._define_attributes(
ScalarAttributeDefinition(
name="grid_size_x",
field_name="grid_size_x",
levels=[10, 25, 50, 100],
description="Grid size in the x direction",
),
ScalarAttributeDefinition(
name="grid_size_y",
field_name="grid_size_y",
levels=[10, 25, 50, 100],
description="Grid size in the y direction",
),
ScalarAttributeDefinition(
name="difficulty",
field_name="difficulty",
levels=[1, 2, 3],
description="Oscillator type difficulty",
),
ScalarAttributeDefinition(
name="num_oscillators",
field_name="num_oscillators",
levels=[3, 7, 10, 20],
description="Number of oscillators to place",
),
ScalarAttributeDefinition(
name="max_simulation_steps",
field_name="max_simulation_steps",
levels=[20, 50, 100, 200],
description="Number of simulation steps to query",
),
)
register_dataset(DATASET_NAME, GameOfLifeHaltingDataset, GameOfLifeHaltingConfig, GameOfLifeHaltingCurriculum)