mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
* init * fix tests * unify codeio * filtered for libraries not present in reasoning-gym * fix more bounds * puzzle24 * knight swap curriculum * fix number sorting * fix attributes * add validation of config in creation of dataset * dry run for instantiating and validating the datasets * remove unused imports * fix curriculum tests to reference newly updated attribute names
138 lines
4.9 KiB
Python
138 lines
4.9 KiB
Python
from dataclasses import dataclass
|
|
from random import Random
|
|
from typing import Optional
|
|
|
|
from ..coaching import BaseCurriculum, RangeAttributeDefinition
|
|
from ..dataset import ProceduralDataset
|
|
from ..factory import register_dataset
|
|
|
|
DATASET_NAME = "arc_1d"
|
|
|
|
|
|
@dataclass
|
|
class Arc1DConfig:
|
|
"""Configuration for ARC 1D task generation"""
|
|
|
|
min_size: int = 10 # Minimum grid size
|
|
max_size: int = 30 # Maximum grid size
|
|
num_train: int = 3 # Number of training examples
|
|
seed: Optional[int] = None
|
|
size: int = 500
|
|
|
|
def validate(self) -> None:
|
|
"""Validate configuration parameters"""
|
|
assert self.min_size >= 8, "min_size must be >= 8"
|
|
assert self.max_size >= self.min_size, "max_size must be >= min_size"
|
|
assert self.num_train > 0, "num_train must be positive"
|
|
assert self.size > 0, "size must be positive"
|
|
|
|
|
|
class Arc1DDataset(ProceduralDataset):
|
|
"""
|
|
Generates ARC 1D tasks by randomly selecting from available task generators
|
|
|
|
This dataset is a procedural variant of the 1D-ARC dataset which is described in the paper:
|
|
`LLMs and the Abstraction and Reasoning Corpus: Successes, Failures, and the Importance
|
|
of Object-based Representations` (https://arxiv.org/abs/2305.18354)
|
|
|
|
Ilya Sheprut (optozorax) created rust generators for most of the ARC 1d tasks. For
|
|
reasoning-gym rust tasks were machine-converted to python via Sonnet.
|
|
|
|
Ilya's original rust code can be found here: https://github.com/optozorax/arc_1d/
|
|
"""
|
|
|
|
def __init__(self, config: Arc1DConfig):
|
|
from .arc_1d_tasks import ARC_1D_TASKS
|
|
|
|
super().__init__(config=config, seed=config.seed, size=config.size)
|
|
self.ARC_1D_TASKS = ARC_1D_TASKS
|
|
self.task_names = list(ARC_1D_TASKS.keys())
|
|
|
|
def __getitem__(self, idx: int) -> dict:
|
|
"""Generate a single ARC 1D task with training examples
|
|
|
|
Args:
|
|
idx: Index of the item to generate
|
|
|
|
Returns:
|
|
dict with keys:
|
|
- question: str, the task description and examples
|
|
- answer: str, the expected output format
|
|
- metadata: dict with generation parameters
|
|
"""
|
|
# Create deterministic RNG from base seed and idx
|
|
rng = Random(self.seed + idx)
|
|
|
|
# Select random task
|
|
task_name = rng.choice(self.task_names)
|
|
task_func, task_kwargs = self.ARC_1D_TASKS[task_name]
|
|
|
|
# Generate training examples
|
|
train_examples = []
|
|
size = rng.randint(self.config.min_size, self.config.max_size)
|
|
|
|
for _ in range(self.config.num_train):
|
|
example = None
|
|
while example is None:
|
|
example = task_func(rng, size, **task_kwargs)
|
|
|
|
train_examples.append(example)
|
|
|
|
# Generate test example
|
|
test_example = None
|
|
while test_example is None:
|
|
test_example = task_func(rng, size, **task_kwargs)
|
|
|
|
# Format question
|
|
question = "Find the common rule that maps an input grid to an output grid, given the examples below.\n\n"
|
|
|
|
# Add training examples
|
|
for i, example in enumerate(train_examples, 1):
|
|
question += f"Example {i}:\n"
|
|
question += "Input: " + " ".join(str(x) for x in example["input"]) + "\n"
|
|
question += "Output: " + " ".join(str(x) for x in example["output"]) + "\n\n"
|
|
|
|
# Add test input
|
|
question += "Below is a test input grid. Predict the corresponding output grid by applying the rule you found. "
|
|
question += "Describe how you derived the rule and your overall reasoning process in detail before you submit your answer. "
|
|
question += "Your final answer should be just the test output grid itself.\n\n"
|
|
question += "Input:\n"
|
|
question += " ".join(str(x) for x in test_example["input"])
|
|
|
|
return {
|
|
"question": question,
|
|
"answer": " ".join(str(x) for x in test_example["output"]),
|
|
"metadata": {
|
|
"source_dataset": DATASET_NAME,
|
|
"source_index": idx,
|
|
"task_name": task_name,
|
|
"size": size,
|
|
"train_examples": train_examples,
|
|
"test_example": test_example,
|
|
"difficulty": {
|
|
"size": (self.config.min_size, self.config.max_size),
|
|
},
|
|
},
|
|
}
|
|
|
|
|
|
class Arc1DCurriculum(BaseCurriculum):
|
|
"""Curriculum for ARC 1D tasks"""
|
|
|
|
def __init__(self):
|
|
super().__init__(Arc1DCurriculum.__name__, Arc1DConfig)
|
|
|
|
# Define attributes
|
|
self._define_attributes(
|
|
RangeAttributeDefinition(
|
|
name="size",
|
|
levels=[10, 25, 50, 100],
|
|
lower_field_name="min_size",
|
|
upper_field_name="max_size",
|
|
description="Grid size",
|
|
)
|
|
)
|
|
|
|
|
|
# Register the dataset
|
|
register_dataset(DATASET_NAME, Arc1DDataset, Arc1DConfig, Arc1DCurriculum)
|