Add Coaching & ScoreBoard class (result tracking) (#72)

* feat: Add Coach and ScoreBoard classes for performance tracking and difficulty adjustment
* feat: Add GroupedScores class to wrap aggregated scores
* refactor: Create ScoreStats class with tuple-based score statistics
* feat: Add unit test for Coach with CompositeDataset and multiple datasets
* fix: Add difficulty metadata to leg counting dataset
* feat: Add clear() method to ScoreBoard to reset all stored data
* feat: Add __len__ method to ScoreBoard to return number of scores
* feat: Add update_dataset_config method to CompositeDataset
* cleanup __init__ & imports
This commit is contained in:
Andreas Köpf 2025-02-06 23:15:28 +01:00 committed by GitHub
parent 05e2681ada
commit a607db79f7
18 changed files with 549 additions and 39 deletions

View file

@ -1,10 +1,5 @@
"""
Arithmetic tasks for training reasoning capabilities:
- Basic arithmetic
- Chain sums
- Word problems
- Leg counting
- Time intervals
"""
from .basic_arithmetic import BasicArithmeticDataset, BasicArithmeticDatasetConfig
@ -21,13 +16,10 @@ from .time_intervals import TimeIntervalsConfig, TimeIntervalsDataset
__all__ = [
"BasicArithmeticDataset",
"BasicArithmeticDatasetConfig",
"basic_arithmetic_dataset",
"ChainSum",
"ChainSumConfig",
"CalendarArithmeticConfig",
"CalendarArithmeticDataset",
"Weekday",
"CalendarTask",
"FractionSimplificationConfig",
"FractionSimplificationDataset",
"GCDConfig",

View file

@ -3,7 +3,7 @@ import math
import random
from dataclasses import dataclass
from datetime import date, timedelta
from enum import Enum, auto
from enum import Enum, StrEnum, auto
from typing import Any, Dict, List, Optional, Tuple
from ..factory import ProceduralDataset, register_dataset
@ -38,7 +38,7 @@ class Weekday(Enum):
return self.name.capitalize()
class CalendarTask(Enum):
class CalendarTask(StrEnum):
WEEKDAY_OFFSET = "weekday_offset"
WEEKDAY_OF_DATE = "weekday_of_date"
WEEKDAY_OF_DATE_FROM_FIRST_DATE = "weekday_of_date_from_first_day"

View file

@ -65,8 +65,10 @@ class ChainSum(ProceduralDataset):
"question": f"{expression} =",
"answer": str(result),
"metadata": {
"num_terms": num_terms,
"num_digits": num_digits,
"difficulty": {
"num_terms": num_terms,
"num_digits": num_digits,
},
"expression": expression,
},
}

View file

@ -111,7 +111,13 @@ class LegCountingDataset(ProceduralDataset):
return {
"question": question,
"answer": str(total_legs),
"metadata": {"animals": animals, "total_legs": total_legs},
"metadata": {
"difficulty": {
"num_animals": len(animals),
},
"animals": animals,
"total_legs": total_legs,
},
}

View file

@ -2,7 +2,7 @@
from dataclasses import dataclass
from random import Random
from typing import List, Optional, Tuple
from typing import List, Optional
from ..factory import ProceduralDataset, register_dataset