Generate eval config tool (#240)

* feat: Add generate_config.py script to create eval  configurations
This commit is contained in:
Andreas Köpf 2025-02-27 21:40:53 +01:00 committed by GitHub
parent 850c1cf6f4
commit 5b8d1b5175
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 858 additions and 338 deletions

View file

@ -27,7 +27,6 @@ import logging
import os
import subprocess
import sys
from collections import OrderedDict
from datetime import datetime
from pathlib import Path
from typing import Any, Union
@ -318,7 +317,7 @@ class AsyncModelEvaluator:
return results
def generate_summary(self, results: dict[str, Any]) -> dict[str, Union[int, OrderedDict]]:
def generate_summary(self, results: dict[str, Any]) -> dict[str, Any]:
"""Generate a summary of evaluation results in the original configuration order.
Args:
@ -330,7 +329,7 @@ class AsyncModelEvaluator:
summary = {
"total_datasets": 0,
"total_examples": 0,
"dataset_scores": OrderedDict(),
"dataset_scores": {},
}
# Iterate through categories and datasets in the original order from config