pass config to ProceduralDataset base

This commit is contained in:
Andreas Koepf 2025-01-25 00:23:05 +01:00
parent df2b8d2809
commit e9549f2a63
20 changed files with 45 additions and 80 deletions

View file

@ -21,7 +21,7 @@ class BasicArithmeticDatasetConfig:
format_style: Literal["simple", "natural"] = "simple"
whitespace: Literal["no_space", "single", "random"] = "single" # Whitespace style between terms
def validate(self):
def validate(self) -> None:
"""Validate configuration parameters"""
assert self.min_terms > 0, "min_terms must be positive"
assert self.max_terms >= self.min_terms, "max_terms must be >= min_terms"
@ -63,9 +63,7 @@ class BasicArithmeticDataset(ProceduralDataset):
"""Dataset that generates basic arithmetic tasks with configurable complexity"""
def __init__(self, config: BasicArithmeticDatasetConfig):
self.config = config
self.config.validate()
super().__init__(seed=config.seed, size=config.size)
super().__init__(config=config, seed=config.seed, size=config.size)
def __getitem__(self, idx: int) -> dict[str, Any]:
"""Generate a single arithmetic task

View file

@ -18,7 +18,7 @@ class ChainSumConfig:
seed: Optional[int] = None
size: int = 500
def validate(self):
def validate(self) -> None:
"""Validate configuration parameters"""
assert self.min_terms > 0, "min_terms must be positive"
assert self.max_terms >= self.min_terms, "max_terms must be >= min_terms"
@ -34,9 +34,7 @@ class ChainSum(ProceduralDataset):
"""Generates simple arithmetic tasks using only + and - operators"""
def __init__(self, config: ChainSumConfig):
self.config = config
self.config.validate()
super().__init__(seed=config.seed, size=config.size)
super().__init__(config=config, seed=config.seed, size=config.size)
def __getitem__(self, idx: int) -> dict:
"""Generate a single chain sum task
@ -145,5 +143,6 @@ def chain_sum_dataset(
)
return ChainSum(config)
# Register the dataset
register_dataset("chain_sum", ChainSum, ChainSumConfig)

View file

@ -20,7 +20,7 @@ class FractionSimplificationConfig:
seed: Optional[int] = None
size: int = 500 # Virtual dataset size
def validate(self):
def validate(self) -> None:
"""Validate configuration parameters"""
assert self.min_value > 0, "min_value must be positive"
assert self.max_value > self.min_value, "max_value must be > min_value"
@ -37,9 +37,7 @@ class FractionSimplificationDataset(ProceduralDataset):
"""Generates fraction simplification tasks"""
def __init__(self, config: FractionSimplificationConfig):
self.config = config
self.config.validate()
super().__init__(seed=config.seed, size=config.size)
super().__init__(config=config, seed=config.seed, size=config.size)
def _generate_fraction(self, rng: Random) -> Tuple[int, int, int, int]:
"""Generate a random fraction and its simplified form.

View file

@ -20,7 +20,7 @@ class GCDConfig:
seed: Optional[int] = None
size: int = 500 # Virtual dataset size
def validate(self):
def validate(self) -> None:
"""Validate configuration parameters"""
assert self.min_numbers >= 2, "min_numbers must be at least 2"
assert self.max_numbers >= self.min_numbers, "max_numbers must be >= min_numbers"
@ -32,9 +32,7 @@ class GCDDataset(ProceduralDataset):
"""Generates Greatest Common Divisor (GCD) tasks"""
def __init__(self, config: GCDConfig):
self.config = config
self.config.validate()
super().__init__(seed=config.seed, size=config.size)
super().__init__(config=config, seed=config.seed, size=config.size)
def _generate_numbers(self, rng: Random) -> Tuple[List[int], int]:
"""Generate a list of random positive integers and their GCD.

View file

@ -20,7 +20,7 @@ class LCMConfig:
seed: Optional[int] = None
size: int = 500 # Virtual dataset size
def validate(self):
def validate(self) -> None:
"""Validate configuration parameters"""
assert self.min_numbers >= 2, "min_numbers must be at least 2"
assert self.max_numbers >= self.min_numbers, "max_numbers must be >= min_numbers"
@ -32,9 +32,7 @@ class LCMDataset(ProceduralDataset):
"""Generates Least Common Multiple (LCM) tasks"""
def __init__(self, config: LCMConfig):
self.config = config
self.config.validate()
super().__init__(seed=config.seed, size=config.size)
super().__init__(config=config, seed=config.seed, size=config.size)
def _generate_numbers(self, rng: Random) -> Tuple[List[int], int]:
"""Generate a list of random positive integers and their LCM.

View file

@ -65,7 +65,7 @@ class LegCountingConfig:
seed: Optional[int] = None
size: int = 500 # Virtual dataset size
def validate(self):
def validate(self) -> None:
"""Validate configuration parameters"""
assert self.min_animals > 0, "min_animals must be positive"
assert self.max_animals >= self.min_animals, "max_animals must be >= min_animals"
@ -76,9 +76,7 @@ class LegCountingDataset(ProceduralDataset):
"""Generates leg counting arithmetic tasks"""
def __init__(self, config: LegCountingConfig):
self.config = config
self.config.validate()
super().__init__(seed=config.seed, size=config.size)
super().__init__(config=config, seed=config.seed, size=config.size)
def _generate_animals(self, rng: Random) -> Dict[str, int]:
"""Generate a random set of animals and their counts"""

View file

@ -16,7 +16,7 @@ class PrimeFactorizationConfig:
seed: Optional[int] = None
size: int = 500 # Virtual dataset size
def validate(self):
def validate(self) -> None:
"""Validate configuration parameters"""
assert self.min_value >= 2, "min_value must be >= 2"
assert self.max_value >= self.min_value, "max_value must be >= min_value"
@ -26,9 +26,7 @@ class PrimeFactorizationDataset(ProceduralDataset):
"""Generates prime factorization tasks"""
def __init__(self, config: PrimeFactorizationConfig):
self.config = config
self.config.validate()
super().__init__(seed=config.seed, size=config.size)
super().__init__(config=config, seed=config.seed, size=config.size)
def _prime_factors(self, n: int) -> List[int]:
"""Compute prime factors of a number"""