refactor: Inherit NumberFilteringDataset and LetterCountingDataset from ProceduralDataset

This commit is contained in:
Andreas Koepf (aider) 2025-01-24 11:13:32 +01:00
parent 3e3aec1b37
commit 8b49af5535
2 changed files with 8 additions and 33 deletions

View file

@ -5,6 +5,8 @@ from dataclasses import dataclass
from random import Random
from typing import List, Optional
from ..dataset import ProceduralDataset
from reasoning_gym.data import read_data_file
@ -23,33 +25,19 @@ class LetterCountingConfig:
assert self.max_words >= self.min_words, "max_words must be >= min_words"
class LetterCountingDataset:
class LetterCountingDataset(ProceduralDataset):
"""Generates letter counting tasks from text spans"""
def __init__(self, config: LetterCountingConfig):
self.config = config
self.config.validate()
self.seed = config.seed if config.seed is not None else Random().randint(0, 2**32)
super().__init__(seed=config.seed, size=config.size)
# Load and preprocess text
text = read_data_file("in_the_year_2889.txt")
# Extract words and clean them to contain only alphanumeric characters
self.words = [word for word in re.findall(r"\b\w+\b", text) if word.isalnum()]
def __len__(self) -> int:
return self.config.size
def __iter__(self):
self._current_idx = 0
return self
def __next__(self):
if self._current_idx >= self.config.size:
raise StopIteration
item = self[self._current_idx]
self._current_idx += 1
return item
def __getitem__(self, idx: int) -> dict:
"""Generate a single letter counting task"""
rng = Random(self.seed + idx)

View file

@ -1,10 +1,11 @@
"""Number filtering task generator"""
import random
from dataclasses import dataclass
from random import Random
from typing import List, Optional, Tuple
from ..dataset import ProceduralDataset
@dataclass
class NumberFilteringConfig:
@ -28,27 +29,13 @@ class NumberFilteringConfig:
assert self.max_value > self.min_value, "max_value must be > min_value"
class NumberFilteringDataset:
class NumberFilteringDataset(ProceduralDataset):
"""Generates number filtering tasks"""
def __init__(self, config: NumberFilteringConfig):
self.config = config
self.config.validate()
self.seed = config.seed if config.seed is not None else Random().randint(0, 2**32)
def __len__(self) -> int:
return self.config.size
def __iter__(self):
self._current_idx = 0
return self
def __next__(self):
if self._current_idx >= self.config.size:
raise StopIteration
item = self[self._current_idx]
self._current_idx += 1
return item
super().__init__(seed=config.seed, size=config.size)
def _format_number(self, num: float, decimals: int) -> str:
"""Format a number with specified decimal places"""