fix(envs): Add source dataset and index to metadata (#388)

* add source dataset and index to metadata

* fix typo

* fix coach class and its test
This commit is contained in:
Zafir Stojanovski 2025-03-20 12:12:14 +01:00 committed by GitHub
parent c6d01541aa
commit 4c47527130
104 changed files with 549 additions and 146 deletions

View file

@ -21,6 +21,8 @@ You are given the following list of prerequisites, where prerequisites[i] = (a_i
Return True if you can finish all courses considering the prerequisites, or False otherwise.
"""
DATASET_NAME = "course_schedule"
@dataclass
class CourseScheduleConfig:
@ -132,6 +134,8 @@ class CourseScheduleDataset(ProceduralDataset):
),
"answer": str(answer),
"metadata": {
"source_dataset": DATASET_NAME,
"source_index": idx,
"courses": courses,
"prerequisites": prerequisites,
"solution": answer,
@ -178,4 +182,4 @@ class CourseScheduleCurriculum(BaseCurriculum):
)
register_dataset("course_schedule", CourseScheduleDataset, CourseScheduleConfig, CourseScheduleCurriculum)
register_dataset(DATASET_NAME, CourseScheduleDataset, CourseScheduleConfig, CourseScheduleCurriculum)

View file

@ -7,6 +7,8 @@ from typing import Any, Optional
from ..coaching import BaseCurriculum, RangeAttributeDefinition
from ..factory import ProceduralDataset, register_dataset
DATASET_NAME = "family_relationships"
class Gender(StrEnum):
MALE = "male"
@ -201,6 +203,8 @@ class FamilyRelationshipsDataset(ProceduralDataset):
"question": f"{story}\n\n{question}",
"answer": relationship.value,
"metadata": {
"source_dataset": DATASET_NAME,
"source_index": idx,
"person1": person1.name,
"person2": person2.name,
"relationship": relationship.value,
@ -386,6 +390,4 @@ class FamilyRelationshipsCurriculum(BaseCurriculum):
)
register_dataset(
"family_relationships", FamilyRelationshipsDataset, FamilyRelationshipsConfig, FamilyRelationshipsCurriculum
)
register_dataset(DATASET_NAME, FamilyRelationshipsDataset, FamilyRelationshipsConfig, FamilyRelationshipsCurriculum)

View file

@ -23,6 +23,8 @@ The area of an island is the number of cells with a value 1 in the island.
Return the maximum area of an island in grid. If there is no island, return 0.
"""
DATASET_NAME = "largest_island"
@dataclass
class LargestIslandConfig:
@ -139,6 +141,8 @@ class LargestIslandDataset(ProceduralDataset):
"question": QUESTION_TEMPLATE.format(rows=rows, cols=cols, grid=grid_str),
"answer": str(answer),
"metadata": {
"source_dataset": DATASET_NAME,
"source_index": idx,
"grid": grid,
"solution": answer,
"difficulty": {
@ -188,4 +192,4 @@ class LargestIslandCurriculum(BaseCurriculum):
)
register_dataset("largest_island", LargestIslandDataset, LargestIslandConfig, LargestIslandCurriculum)
register_dataset(DATASET_NAME, LargestIslandDataset, LargestIslandConfig, LargestIslandCurriculum)

View file

@ -7,6 +7,8 @@ from typing import Any, Optional
from ..coaching import BaseCurriculum, ScalarAttributeDefinition
from ..factory import ProceduralDataset, register_dataset
DATASET_NAME = "quantum_lock"
@dataclass
class QuantumLockConfig:
@ -56,6 +58,8 @@ Buttons:
"question": self.format_puzzle(rng.choice(self._prompt_templates), puzzle=puzzle_data),
"answer": "".join(puzzle_data["solution"]),
"metadata": {
"source_dataset": DATASET_NAME,
"source_index": idx,
"solution_path": puzzle_data["solution"],
"target_value": puzzle_data["target_value"],
"buttons": puzzle_data["buttons"],
@ -249,4 +253,4 @@ class QuantumLockCurriculum(BaseCurriculum):
# Register the dataset
register_dataset("quantum_lock", QuantumLockDataset, QuantumLockConfig, QuantumLockCurriculum)
register_dataset(DATASET_NAME, QuantumLockDataset, QuantumLockConfig, QuantumLockCurriculum)

View file

@ -28,6 +28,8 @@ Now, find the length of the shortest path from * to # in the following grid:
{grid}
"""
DATASET_NAME = "shortest_path"
@dataclass
class ShortestPathConfig:
@ -159,6 +161,8 @@ class ShortestPathDataset(ProceduralDataset):
"question": QUESTION_TEMPLATE.format(grid=matrix_str),
"answer": answer_str,
"metadata": {
"source_dataset": DATASET_NAME,
"source_index": idx,
"matrix": matrix,
"solution": answer,
"difficulty": {
@ -192,4 +196,4 @@ class ShortestPathCurriculum(BaseCurriculum):
)
register_dataset("shortest_path", ShortestPathDataset, ShortestPathConfig, ShortestPathCurriculum)
register_dataset(DATASET_NAME, ShortestPathDataset, ShortestPathConfig, ShortestPathCurriculum)