formatting

This commit is contained in:
Andreas Koepf 2025-01-24 10:34:07 +01:00
parent 98988c8481
commit 20069b2a7d
37 changed files with 504 additions and 666 deletions

View file

@ -1,21 +1,24 @@
"""Fraction simplification task generator"""
from dataclasses import dataclass
from random import Random
from typing import Optional, Tuple, Sequence
from ..dataset import ProceduralDataset
from math import gcd
from random import Random
from typing import Optional, Sequence, Tuple
from ..dataset import ProceduralDataset
@dataclass
class FractionSimplificationConfig:
"""Configuration for fraction simplification task generation"""
min_value: int = 1 # Minimum value for numerator/denominator
max_value: int = 1000 # Maximum value for numerator/denominator
min_factor: int = 1 # Minimum multiplication factor
max_factor: int = 100 # Maximum multiplication factor
min_value: int = 1 # Minimum value for numerator/denominator
max_value: int = 1000 # Maximum value for numerator/denominator
min_factor: int = 1 # Minimum multiplication factor
max_factor: int = 100 # Maximum multiplication factor
styles: Sequence[str] = ("plain", "latex_inline", "latex_frac", "latex_dfrac") # Allowed fraction formatting styles
seed: Optional[int] = None
size: int = 500 # Virtual dataset size
size: int = 500 # Virtual dataset size
def validate(self):
"""Validate configuration parameters"""
@ -23,7 +26,7 @@ class FractionSimplificationConfig:
assert self.max_value > self.min_value, "max_value must be > min_value"
assert self.min_factor >= 1, "min_factor must be at least 1"
assert self.max_factor >= self.min_factor, "max_factor must be >= min_factor"
# Validate styles
valid_styles = {"plain", "latex_inline", "latex_frac", "latex_dfrac"}
for style in self.styles:
@ -46,37 +49,38 @@ class FractionSimplificationDataset(ProceduralDataset):
# Generate the simplified fraction first
simplified_num = rng.randint(self.config.min_value, self.config.max_value)
simplified_den = rng.randint(self.config.min_value, self.config.max_value)
# Make sure they're coprime by dividing by their GCD
common = gcd(simplified_num, simplified_den)
simplified_num //= common
simplified_den //= common
# Check if simplified fraction is within bounds
if (self.config.min_value <= simplified_num <= self.config.max_value and
self.config.min_value <= simplified_den <= self.config.max_value):
if (
self.config.min_value <= simplified_num <= self.config.max_value
and self.config.min_value <= simplified_den <= self.config.max_value
):
# Ensure numerator is smaller than denominator
if simplified_num > simplified_den:
simplified_num, simplified_den = simplified_den, simplified_num
# Multiply both by a random factor to create the unsimplified version
factor = rng.randint(self.config.min_factor, self.config.max_factor)
numerator = simplified_num * factor
denominator = simplified_den * factor
return numerator, denominator, simplified_num, simplified_den
# If we failed to find a good fraction after max attempts,
# generate one that's guaranteed to be within bounds
simplified_num = rng.randint(self.config.min_value, self.config.max_value)
simplified_den = rng.randint(self.config.min_value, self.config.max_value)
# Ensure numerator is smaller than denominator
if simplified_num > simplified_den:
simplified_num, simplified_den = simplified_den, simplified_num
factor = rng.randint(self.config.min_factor, self.config.max_factor)
return (simplified_num * factor, simplified_den * factor,
simplified_num, simplified_den)
return (simplified_num * factor, simplified_den * factor, simplified_num, simplified_den)
def _format_fraction(self, num: int, den: int, style: str = "plain") -> str:
"""Format a fraction in various styles"""
@ -95,16 +99,16 @@ class FractionSimplificationDataset(ProceduralDataset):
def __getitem__(self, idx: int) -> dict:
"""Generate a single fraction simplification task"""
rng = Random(self.seed + idx)
num, den, simple_num, simple_den = self._generate_fraction(rng)
# Choose a random style from configured styles
style = self.config.styles[rng.randint(0, len(self.config.styles)-1)]
style = self.config.styles[rng.randint(0, len(self.config.styles) - 1)]
# Format both question and answer in the same style
question_fraction = self._format_fraction(num, den, style)
answer_fraction = self._format_fraction(simple_num, simple_den, style)
return {
"question": f"Simplify the fraction {question_fraction} to its lowest terms",
"answer": answer_fraction,
@ -114,8 +118,8 @@ class FractionSimplificationDataset(ProceduralDataset):
"simplified_numerator": simple_num,
"simplified_denominator": simple_den,
"reduction_factor": num // simple_num, # Will be same as den // simple_den
"style": style
}
"style": style,
},
}