fix(envs): Add source dataset and index to metadata (#388)

* add source dataset and index to metadata

* fix typo

* fix coach class and its test
This commit is contained in:
Zafir Stojanovski 2025-03-20 12:12:14 +01:00 committed by GitHub
parent c6d01541aa
commit 4c47527130
104 changed files with 549 additions and 146 deletions

View file

@ -12,6 +12,8 @@ from reasoning_gym.factory import ProceduralDataset, register_dataset
from .blicket import config_control, dist_control, final_parse, serialize
from .const import ALL_CONFIG_SIZE, ATTR_CONFIG_SIZE
DATASET_NAME = "acre"
# Create blicket questions
@dataclass
@ -88,7 +90,14 @@ What is the detector light status?"""
prompt_input = ", ".join(" ".join(x) for x in input["question"]["input"])
answer = input["question"]["output"]
question = self.prompt_template.format(examples=formatted_examples, input=prompt_input)
return {"question": question, "answer": answer, "metadata": {}}
return {
"question": question,
"answer": answer,
"metadata": {
"source_dataset": DATASET_NAME,
"source_index": idx,
},
}
register_dataset("acre", ACREDataset, ACREDatasetConfig)
register_dataset(DATASET_NAME, ACREDataset, ACREDatasetConfig)

View file

@ -6,6 +6,8 @@ from typing import Any, Callable, Optional
from reasoning_gym.factory import ProceduralDataset, register_dataset
DATASET_NAME = "list_functions"
@dataclass
class ListFunctionsDatasetConfig:
@ -75,7 +77,14 @@ Output:
Output {index + 1}: {examples[key]}
"""
question = self.prompt_template.format(examples=formatted_examples, input=input)
return {"question": question, "answer": output, "metadata": {}}
return {
"question": question,
"answer": output,
"metadata": {
"source_dataset": DATASET_NAME,
"source_index": idx,
},
}
register_dataset("list_functions", ListFunctionsDataset, ListFunctionsDatasetConfig)
register_dataset(DATASET_NAME, ListFunctionsDataset, ListFunctionsDatasetConfig)