mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-23 16:55:05 +00:00
This is a variant of the Game of Life task, which rather than trying to test the algorithmic simulation, tests the ability of the model to do explanatory reasoning of the board. The idea is that a model with good explanatory reasoning will be able to see that a game will not halt without simulating it into the future. The task presents a GoL board, and the model is asked to predict if the board will halt (die, all cells zero) after n steps. Sometimes, the board will be made up of 'oscillators', isolated structures which never die. Othertimes, it is filled with non-oscillators, structures which will always die after a few steps. The model should deduce which case the presented board is.
27 lines
905 B
Python
27 lines
905 B
Python
import pytest
|
|
|
|
from reasoning_gym.algorithmic.game_of_life_halting import GameOfLifeHaltingConfig, GameOfLifeHaltingDataset
|
|
|
|
|
|
def test_game_of_life():
|
|
"""Test basic properties and solution of generated items"""
|
|
|
|
# Easy
|
|
config = GameOfLifeHaltingConfig(
|
|
seed=42, size=10, difficulty=3, grid_size_x=25, grid_size_y=25, max_simulation_steps=99
|
|
)
|
|
dataset = GameOfLifeHaltingDataset(config)
|
|
|
|
for item in dataset:
|
|
assert isinstance(item, dict)
|
|
assert "question" in item
|
|
assert "answer" in item
|
|
assert "metadata" in item
|
|
|
|
# # Check metadata contains required fields
|
|
assert "grid_size_x" in item["metadata"]
|
|
assert "grid_size_y" in item["metadata"]
|
|
|
|
# # Test the scoring
|
|
assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
|
|
assert dataset.score_answer(answer=None, entry=item) == 0.0
|