mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
Add Coaching & ScoreBoard class (result tracking) (#72)
* feat: Add Coach and ScoreBoard classes for performance tracking and difficulty adjustment * feat: Add GroupedScores class to wrap aggregated scores * refactor: Create ScoreStats class with tuple-based score statistics * feat: Add unit test for Coach with CompositeDataset and multiple datasets * fix: Add difficulty metadata to leg counting dataset * feat: Add clear() method to ScoreBoard to reset all stored data * feat: Add __len__ method to ScoreBoard to return number of scores * feat: Add update_dataset_config method to CompositeDataset * cleanup __init__ & imports
This commit is contained in:
parent
05e2681ada
commit
a607db79f7
18 changed files with 549 additions and 39 deletions
|
|
@ -1,10 +1,5 @@
|
|||
"""
|
||||
Arithmetic tasks for training reasoning capabilities:
|
||||
- Basic arithmetic
|
||||
- Chain sums
|
||||
- Word problems
|
||||
- Leg counting
|
||||
- Time intervals
|
||||
"""
|
||||
|
||||
from .basic_arithmetic import BasicArithmeticDataset, BasicArithmeticDatasetConfig
|
||||
|
|
@ -21,13 +16,10 @@ from .time_intervals import TimeIntervalsConfig, TimeIntervalsDataset
|
|||
__all__ = [
|
||||
"BasicArithmeticDataset",
|
||||
"BasicArithmeticDatasetConfig",
|
||||
"basic_arithmetic_dataset",
|
||||
"ChainSum",
|
||||
"ChainSumConfig",
|
||||
"CalendarArithmeticConfig",
|
||||
"CalendarArithmeticDataset",
|
||||
"Weekday",
|
||||
"CalendarTask",
|
||||
"FractionSimplificationConfig",
|
||||
"FractionSimplificationDataset",
|
||||
"GCDConfig",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue