add eval types

This commit is contained in:
hjc-puro 2025-07-11 03:36:55 +00:00
parent 290e087fc5
commit 0d4ce37b73

View file

@ -69,3 +69,41 @@ class GameStep(TypedDict):
# GameHistory is represented as a list of game steps.
GameHistory = List[GameStep]
class EvaluationConfigGeneral(TypedDict):
"""Configuration section of evaluation results."""
total_evaluation_time_secondes: str
model_name: Optional[str]
generation_parameters: Dict[str, Any]
class EvaluationResults(TypedDict):
"""Results section containing metrics for tasks and aggregated results."""
all: Dict[str, float] # Aggregated metrics across all tasks
class EvaluationMetrics(TypedDict):
"""Complete evaluation metrics JSON structure."""
config_general: EvaluationConfigGeneral
results: EvaluationResults
class EvaluationSample(TypedDict, total=False):
"""Individual sample data written to JSONL files.
All fields are optional to accommodate different evaluation scenarios.
"""
messages: Optional[List[Dict[str, str]]]
question: Optional[str]
gold_answer: Optional[str]
gold_parsed: Optional[str]
model_parsed: Optional[str]
score: Optional[int]
correct: Optional[bool]
finish_reason: Optional[str]
response_after_think: Optional[str]