add eval types

2026-04-19 12:57:58 +00:00 · 2025-07-11 03:36:55 +00:00 · 2025-07-11 03:36:55 +00:00 · 0d4ce37b73
commit 0d4ce37b73
parent 290e087fc5
1 changed files with 38 additions and 0 deletions
--- a/atroposlib/type_definitions.py
+++ b/atroposlib/type_definitions.py
@ -69,3 +69,41 @@ class GameStep(TypedDict):

 # GameHistory is represented as a list of game steps.
 GameHistory = List[GameStep]
+
+
+class EvaluationConfigGeneral(TypedDict):
+    """Configuration section of evaluation results."""
+
+    total_evaluation_time_secondes: str
+    model_name: Optional[str]
+    generation_parameters: Dict[str, Any]
+
+
+class EvaluationResults(TypedDict):
+    """Results section containing metrics for tasks and aggregated results."""
+
+    all: Dict[str, float]  # Aggregated metrics across all tasks
+
+
+class EvaluationMetrics(TypedDict):
+    """Complete evaluation metrics JSON structure."""
+
+    config_general: EvaluationConfigGeneral
+    results: EvaluationResults
+
+
+class EvaluationSample(TypedDict, total=False):
+    """Individual sample data written to JSONL files.
+
+    All fields are optional to accommodate different evaluation scenarios.
+    """
+
+    messages: Optional[List[Dict[str, str]]]
+    question: Optional[str]
+    gold_answer: Optional[str]
+    gold_parsed: Optional[str]
+    model_parsed: Optional[str]
+    score: Optional[int]
+    correct: Optional[bool]
+    finish_reason: Optional[str]
+    response_after_think: Optional[str]