Generate eval config tool (#240)

* feat: Add generate_config.py script to create eval configurations
2026-04-19 12:58:07 +00:00 · 2025-02-27 21:40:53 +01:00 · 2025-02-27 21:40:53 +01:00 · 5b8d1b5175
commit 5b8d1b5175
parent 850c1cf6f4
9 changed files with 858 additions and 338 deletions
--- a/eval/eval.py
+++ b/eval/eval.py
@ -27,7 +27,6 @@ import logging
 import os
 import subprocess
 import sys
-from collections import OrderedDict
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Union
@ -318,7 +317,7 @@ class AsyncModelEvaluator:

        return results

-    def generate_summary(self, results: dict[str, Any]) -> dict[str, Union[int, OrderedDict]]:
+    def generate_summary(self, results: dict[str, Any]) -> dict[str, Any]:
        """Generate a summary of evaluation results in the original configuration order.

        Args:
@ -330,7 +329,7 @@ class AsyncModelEvaluator:
        summary = {
            "total_datasets": 0,
            "total_examples": 0,
-            "dataset_scores": OrderedDict(),
+            "dataset_scores": {},
        }

        # Iterate through categories and datasets in the original order from config