Add Coaching & ScoreBoard class (result tracking) (#72)

* feat: Add Coach and ScoreBoard classes for performance tracking and difficulty adjustment
* feat: Add GroupedScores class to wrap aggregated scores
* refactor: Create ScoreStats class with tuple-based score statistics
* feat: Add unit test for Coach with CompositeDataset and multiple datasets
* fix: Add difficulty metadata to leg counting dataset
* feat: Add clear() method to ScoreBoard to reset all stored data
* feat: Add __len__ method to ScoreBoard to return number of scores
* feat: Add update_dataset_config method to CompositeDataset
* cleanup __init__ & imports
This commit is contained in:
Andreas Köpf 2025-02-06 23:15:28 +01:00 committed by GitHub
parent 7c08c05b1e
commit 3f6b2fc807
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 549 additions and 39 deletions

View file

@ -1,10 +1,5 @@
"""
Arithmetic tasks for training reasoning capabilities:
- Basic arithmetic
- Chain sums
- Word problems
- Leg counting
- Time intervals
"""
from .basic_arithmetic import BasicArithmeticDataset, BasicArithmeticDatasetConfig
@ -21,13 +16,10 @@ from .time_intervals import TimeIntervalsConfig, TimeIntervalsDataset
__all__ = [
"BasicArithmeticDataset",
"BasicArithmeticDatasetConfig",
"basic_arithmetic_dataset",
"ChainSum",
"ChainSumConfig",
"CalendarArithmeticConfig",
"CalendarArithmeticDataset",
"Weekday",
"CalendarTask",
"FractionSimplificationConfig",
"FractionSimplificationDataset",
"GCDConfig",

View file

@ -3,7 +3,7 @@ import math
import random
from dataclasses import dataclass
from datetime import date, timedelta
from enum import Enum, auto
from enum import Enum, StrEnum, auto
from typing import Any, Dict, List, Optional, Tuple
from ..factory import ProceduralDataset, register_dataset
@ -38,7 +38,7 @@ class Weekday(Enum):
return self.name.capitalize()
class CalendarTask(Enum):
class CalendarTask(StrEnum):
WEEKDAY_OFFSET = "weekday_offset"
WEEKDAY_OF_DATE = "weekday_of_date"
WEEKDAY_OF_DATE_FROM_FIRST_DATE = "weekday_of_date_from_first_day"

View file

@ -65,8 +65,10 @@ class ChainSum(ProceduralDataset):
"question": f"{expression} =",
"answer": str(result),
"metadata": {
"num_terms": num_terms,
"num_digits": num_digits,
"difficulty": {
"num_terms": num_terms,
"num_digits": num_digits,
},
"expression": expression,
},
}

View file

@ -111,7 +111,13 @@ class LegCountingDataset(ProceduralDataset):
return {
"question": question,
"answer": str(total_legs),
"metadata": {"animals": animals, "total_legs": total_legs},
"metadata": {
"difficulty": {
"num_animals": len(animals),
},
"animals": animals,
"total_legs": total_legs,
},
}

View file

@ -2,7 +2,7 @@
from dataclasses import dataclass
from random import Random
from typing import List, Optional, Tuple
from typing import List, Optional
from ..factory import ProceduralDataset, register_dataset