mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
Generate eval config tool (#240)
* feat: Add generate_config.py script to create eval configurations
This commit is contained in:
parent
850c1cf6f4
commit
5b8d1b5175
9 changed files with 858 additions and 338 deletions
|
|
@ -27,7 +27,6 @@ import logging
|
|||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import OrderedDict
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Union
|
||||
|
|
@ -318,7 +317,7 @@ class AsyncModelEvaluator:
|
|||
|
||||
return results
|
||||
|
||||
def generate_summary(self, results: dict[str, Any]) -> dict[str, Union[int, OrderedDict]]:
|
||||
def generate_summary(self, results: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Generate a summary of evaluation results in the original configuration order.
|
||||
|
||||
Args:
|
||||
|
|
@ -330,7 +329,7 @@ class AsyncModelEvaluator:
|
|||
summary = {
|
||||
"total_datasets": 0,
|
||||
"total_examples": 0,
|
||||
"dataset_scores": OrderedDict(),
|
||||
"dataset_scores": {},
|
||||
}
|
||||
|
||||
# Iterate through categories and datasets in the original order from config
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue