fix(envs): Add source dataset and index to metadata (#388)

* add source dataset and index to metadata

* fix typo

* fix coach class and its test
This commit is contained in:
Zafir Stojanovski 2025-03-20 12:12:14 +01:00 committed by GitHub
parent c6d01541aa
commit 4c47527130
104 changed files with 549 additions and 146 deletions

View file

@ -5,6 +5,8 @@ from typing import Any, Optional
from ..coaching import BaseCurriculum, RangeAttributeDefinition
from ..factory import ProceduralDataset, register_dataset
DATASET_NAME = "needle_haystack"
@dataclass
class NeedleHaystackConfig:
@ -104,6 +106,8 @@ class NeedleHaystackDataset(ProceduralDataset):
"question": full_text,
"answer": stack["needle"][0],
"metadata": {
"source_dataset": DATASET_NAME,
"source_index": idx,
"question": question,
"num_statements": num_statements,
"difficulty": {
@ -153,4 +157,4 @@ class NeedleHaystackCurriculum(BaseCurriculum):
# Register the dataset
register_dataset("needle_haystack", NeedleHaystackDataset, NeedleHaystackConfig, NeedleHaystackCurriculum)
register_dataset(DATASET_NAME, NeedleHaystackDataset, NeedleHaystackConfig, NeedleHaystackCurriculum)