mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
* remove min_value from AttributeDefinition * remove type from AttributeDefinition * Add CurriculumContext * add ensure_interval option for RangeAttributes * docs: Add legend explaining curriculum indicators in dataset gallery * update GALLERY.md
190 lines
6.9 KiB
Python
190 lines
6.9 KiB
Python
import json
|
|
from dataclasses import dataclass
|
|
from random import Random
|
|
from typing import Any, Optional
|
|
|
|
import cellpylib as cpl
|
|
|
|
from ..coaching import BaseCurriculum, ScalarAttributeDefinition
|
|
from ..factory import ProceduralDataset, register_dataset
|
|
|
|
|
|
@dataclass
|
|
class GameOfLifeConfig:
|
|
"""Configuration for Game of Life puzzle generation"""
|
|
|
|
grid_size_x: int = 10
|
|
grid_size_y: int = 10
|
|
filled_cells_weights: float = 0.1
|
|
filled_cells: int = int(filled_cells_weights * grid_size_x * grid_size_y) # actually a max
|
|
simulation_steps: int = 1
|
|
seed: Optional[int] = None
|
|
size: int = 500
|
|
|
|
def validate(self):
|
|
"""Validate configuration parameters"""
|
|
assert 3 <= self.grid_size_x <= 999, "grid_size_x must be between 0 and 999"
|
|
assert 3 <= self.grid_size_y <= 999, "grid_size_y must be between 0 and 999"
|
|
assert self.simulation_steps >= 0, "simulation_steps must be gte 0"
|
|
assert self.filled_cells <= self.grid_size_x * self.grid_size_y, "filled_cells must fit in x times y"
|
|
|
|
|
|
class GameOfLifeDataset(ProceduralDataset):
|
|
"""Generates Game of Life games with configurable parameters"""
|
|
|
|
def __init__(self, config: GameOfLifeConfig):
|
|
self._prompt_templates = [
|
|
"What will this Game of Life board look like after {simulation_steps} steps of simulation? Assume a Moore neighborhood and wrapping topology. Reply as array of arrays representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}."
|
|
]
|
|
|
|
super().__init__(config=config, seed=config.seed, size=config.size)
|
|
|
|
def __getitem__(self, idx: int) -> dict:
|
|
"""Generate a single GameOfLife task
|
|
|
|
Returns:
|
|
dict with keys:
|
|
- question: str, the task description
|
|
- answer: str, a solution string
|
|
- metadata: dict with generation parameters
|
|
"""
|
|
rng = Random(self.seed + idx)
|
|
|
|
# Make the board
|
|
board = cpl.init_simple2d(self.config.grid_size_x, self.config.grid_size_y)
|
|
board[:, :, :] = 0
|
|
|
|
# Add the cells
|
|
for i in range(0, self.config.filled_cells):
|
|
rx = rng.randint(0, self.config.grid_size_x - 1)
|
|
ry = rng.randint(0, self.config.grid_size_y - 1)
|
|
board[:, rx, ry] = 1
|
|
|
|
# Simulate the result to get the answer
|
|
evolved = cpl.evolve2d(
|
|
board,
|
|
timesteps=self.config.simulation_steps + 1,
|
|
apply_rule=cpl.game_of_life_rule,
|
|
memoize="recursive",
|
|
)
|
|
|
|
rows = [json.dumps(board[0, i].tolist(), separators=(",", ":")) for i in range(board.shape[1])]
|
|
board_str = "[" + ",\n ".join(rows) + "]"
|
|
|
|
final_step = evolved[-1]
|
|
final_step_list = final_step.tolist()
|
|
result_str = json.dumps(final_step_list, separators=(",", ":"))
|
|
|
|
return {
|
|
"question": rng.choice(self._prompt_templates).format(
|
|
simulation_steps=self.config.simulation_steps, board=board_str
|
|
),
|
|
"answer": result_str,
|
|
"metadata": {
|
|
"grid_size_x": self.config.grid_size_x,
|
|
"grid_size_y": self.config.grid_size_y,
|
|
"filled_cells": self.config.filled_cells,
|
|
"simulation_steps": self.config.simulation_steps,
|
|
"difficulty": {
|
|
"grid_size_x": self.config.grid_size_x,
|
|
"grid_size_y": self.config.grid_size_y,
|
|
"filled_cells_weights": self.config.filled_cells_weights,
|
|
"simulation_steps": self.config.simulation_steps,
|
|
},
|
|
},
|
|
}
|
|
|
|
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
|
|
"""Determine if the solution provided solves the GoL task.
|
|
|
|
The function awards 1.0 for a correct answer.
|
|
|
|
Args:
|
|
answer (Optional[str]): The user's answer.
|
|
entry (dict[str, Any]): The original dataset entry containing the correct answer.
|
|
|
|
Returns:
|
|
float: The computed score between 0.0 and 1.0.
|
|
"""
|
|
|
|
if answer == None:
|
|
return 0.0
|
|
|
|
try:
|
|
ans_arr = json.loads(answer)
|
|
correct_arr = json.loads(entry["answer"])
|
|
except Exception:
|
|
return 0.0
|
|
|
|
total_cells = 0
|
|
correct_cells = 0
|
|
|
|
# Determine if the array is 2D (i.e. a list of lists)
|
|
is_2d = correct_arr and isinstance(correct_arr[0], list)
|
|
|
|
if is_2d:
|
|
# Iterate over rows and columns of the expected grid.
|
|
for i, expected_row in enumerate(correct_arr):
|
|
for j, expected_value in enumerate(expected_row):
|
|
total_cells += 1
|
|
try:
|
|
if ans_arr[i][j] == expected_value:
|
|
correct_cells += 1
|
|
except (IndexError, TypeError):
|
|
# Either the row or the cell is missing, treat as incorrect.
|
|
pass
|
|
else:
|
|
# 1D array case.
|
|
for i, expected_value in enumerate(correct_arr):
|
|
total_cells += 1
|
|
try:
|
|
if ans_arr[i] == expected_value:
|
|
correct_cells += 1
|
|
except IndexError:
|
|
pass
|
|
|
|
# If for some reason there are no cells, return 0.0.
|
|
if total_cells == 0:
|
|
return 0.0
|
|
|
|
# Each cell contributes equally.
|
|
return correct_cells / total_cells
|
|
|
|
|
|
class GameOfLifeCurriculum(BaseCurriculum):
|
|
"""Curriculum for Game of Life dataset"""
|
|
|
|
def __init__(self):
|
|
super().__init__(GameOfLifeCurriculum.__name__, GameOfLifeConfig)
|
|
|
|
# Define attributes
|
|
self._define_attributes(
|
|
ScalarAttributeDefinition(
|
|
name="grid_size_x",
|
|
field_name="grid_size_x",
|
|
levels=[10, 100, 500, 999],
|
|
description="Grid size in the x direction",
|
|
),
|
|
ScalarAttributeDefinition(
|
|
name="grid_size_y",
|
|
field_name="grid_size_y",
|
|
levels=[10, 100, 500, 999],
|
|
description="Grid size in the y direction",
|
|
),
|
|
# Filled cells should be 10%, 20%, 30%, 50% of the grid_size_x * grid_size_y
|
|
ScalarAttributeDefinition(
|
|
name="filled_cells_weights",
|
|
field_name="filled_cells_weights",
|
|
levels=[0.1, 0.2, 0.5, 0.8],
|
|
description="Percentage of filled cells in the grid",
|
|
),
|
|
ScalarAttributeDefinition(
|
|
name="simulation_steps",
|
|
field_name="simulation_steps",
|
|
levels=[1, 2, 5, 10],
|
|
description="Number of simulation steps to run",
|
|
),
|
|
)
|
|
|
|
|
|
register_dataset("game_of_life", GameOfLifeDataset, GameOfLifeConfig, GameOfLifeCurriculum)
|