mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-29 17:35:16 +00:00
use native types List->list, Dict->dict, Set->set, Tuple->tuple
This commit is contained in:
parent
5d02064b5a
commit
3e7ff3b084
95 changed files with 754 additions and 760 deletions
|
|
@ -5,7 +5,7 @@ import os
|
|||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
from tqdm.asyncio import tqdm_asyncio
|
||||
|
|
@ -44,7 +44,7 @@ class AsyncOpenRouterEvaluator:
|
|||
match = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
|
||||
return match.group(1).strip() if match else response
|
||||
|
||||
async def process_single_question(self, entry: Dict, dataset) -> Dict:
|
||||
async def process_single_question(self, entry: dict, dataset) -> dict:
|
||||
"""Process a single question and return the result."""
|
||||
response = await self.get_model_response(entry["question"])
|
||||
answer = self.parse_model_response(response)
|
||||
|
|
@ -58,7 +58,7 @@ class AsyncOpenRouterEvaluator:
|
|||
"metadata": entry["metadata"],
|
||||
}
|
||||
|
||||
async def evaluate_dataset(self, dataset_config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
async def evaluate_dataset(self, dataset_config: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Evaluate a single dataset with concurrent question processing."""
|
||||
dataset_name = dataset_config.pop("name")
|
||||
print(f"\nEvaluating dataset: {dataset_name}")
|
||||
|
|
@ -92,7 +92,7 @@ class AsyncOpenRouterEvaluator:
|
|||
print(f"Error evaluating dataset {dataset_name}: {str(e)}")
|
||||
return None
|
||||
|
||||
async def evaluate_datasets(self, dataset_configs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
async def evaluate_datasets(self, dataset_configs: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Evaluate multiple datasets concurrently."""
|
||||
tasks = [self.evaluate_dataset(config) for config in dataset_configs]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue