mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-23 16:55:05 +00:00
use native types List->list, Dict->dict, Set->set, Tuple->tuple
This commit is contained in:
parent
5d02064b5a
commit
3e7ff3b084
95 changed files with 754 additions and 760 deletions
|
|
@ -5,7 +5,7 @@ import logging
|
|||
import os
|
||||
from dataclasses import asdict
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any
|
||||
|
||||
import aiohttp
|
||||
from eval_config import EvalConfig
|
||||
|
|
@ -32,7 +32,7 @@ class OpenRouterEvaluator:
|
|||
}
|
||||
self.semaphore = asyncio.Semaphore(10) # Control concurrency
|
||||
|
||||
def save_results(self, results: List[Dict[str, Any]], dataset, dataset_name) -> Dict[str, Any]:
|
||||
def save_results(self, results: list[dict[str, Any]], dataset, dataset_name) -> dict[str, Any]:
|
||||
file_name = f"{self.output_dir}/{dataset_name}.json"
|
||||
total_score = sum(r["score"] for r in results)
|
||||
|
||||
|
|
@ -52,7 +52,7 @@ class OpenRouterEvaluator:
|
|||
json.dump(metrics, f, indent=2)
|
||||
return metrics
|
||||
|
||||
def prepare_messages(self, prompt: str) -> List[Dict[str, str]]:
|
||||
def prepare_messages(self, prompt: str) -> list[dict[str, str]]:
|
||||
return {
|
||||
"model": self.model,
|
||||
"messages": [
|
||||
|
|
@ -92,7 +92,7 @@ class OpenRouterEvaluator:
|
|||
|
||||
raise Exception("Failed to get valid response after retries")
|
||||
|
||||
async def process_entry(self, session: aiohttp.ClientSession, dataset: Any, entry: Any) -> Dict[str, Any]:
|
||||
async def process_entry(self, session: aiohttp.ClientSession, dataset: Any, entry: Any) -> dict[str, Any]:
|
||||
"""Process a single entry with concurrency control."""
|
||||
async with self.semaphore:
|
||||
response = await self.get_model_response(session, entry["question"])
|
||||
|
|
@ -108,7 +108,7 @@ class OpenRouterEvaluator:
|
|||
"metadata": str(entry["metadata"]),
|
||||
}
|
||||
|
||||
async def evaluate_dataset(self, session: aiohttp.ClientSession, dataset_name: str) -> Dict[str, Any]:
|
||||
async def evaluate_dataset(self, session: aiohttp.ClientSession, dataset_name: str) -> dict[str, Any]:
|
||||
"""Evaluate a single dataset asynchronously."""
|
||||
self.logger.info(f"\nEvaluating dataset: {dataset_name}")
|
||||
dataset = reasoning_gym.create_dataset(
|
||||
|
|
@ -119,7 +119,7 @@ class OpenRouterEvaluator:
|
|||
results = await asyncio.gather(*tasks)
|
||||
return self.save_results(results, dataset, dataset_name)
|
||||
|
||||
async def evaluate_datasets(self) -> List[Dict[str, Any]]:
|
||||
async def evaluate_datasets(self) -> list[dict[str, Any]]:
|
||||
"""Main async evaluation entry point."""
|
||||
all_results = []
|
||||
async with aiohttp.ClientSession(headers=self.headers) as session:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue