fix(envs): Add source dataset and index to metadata (#388)

* add source dataset and index to metadata

* fix typo

* fix coach class and its test
This commit is contained in:
Zafir Stojanovski 2025-03-20 12:12:14 +01:00 committed by GitHub
parent 7475a20700
commit ce0a6c4878
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
104 changed files with 549 additions and 146 deletions

View file

@ -9,6 +9,8 @@ from typing import Any, Optional
from ..coaching import BaseCurriculum, ScalarAttributeDefinition
from ..factory import ProceduralDataset, register_dataset
DATASET_NAME = "calendar_arithmetic"
class Weekday(Enum):
MONDAY = auto()
@ -126,6 +128,8 @@ class CalendarArithmeticDataset(ProceduralDataset):
rng = random.Random(self.seed + idx)
task = rng.choice(self.tasks)
question, answer, metadata = task(rng)
metadata["source_dataset"] = DATASET_NAME
metadata["source_index"] = idx
metadata["difficulty"] = {
"task_complexity": self.tasks.index(task),
"date_range": self.config.offset_upper_bound,
@ -523,6 +527,4 @@ class CalendarArithmeticCurriculum(BaseCurriculum):
)
register_dataset(
"calendar_arithmetic", CalendarArithmeticDataset, CalendarArithmeticConfig, CalendarArithmeticCurriculum
)
register_dataset(DATASET_NAME, CalendarArithmeticDataset, CalendarArithmeticConfig, CalendarArithmeticCurriculum)