use native types List->list, Dict->dict, Set->set, Tuple->tuple

2026-04-23 16:55:05 +00:00 · 2025-02-21 15:13:19 +01:00 · 2025-02-21 15:13:19 +01:00 · 3e7ff3b084
commit 3e7ff3b084
parent 5d02064b5a
95 changed files with 754 additions and 760 deletions
--- a/eval/r1/eval.py
+++ b/eval/r1/eval.py
@ -5,7 +5,7 @@ import logging
 import os
 from dataclasses import asdict
 from datetime import datetime
-from typing import Any, Dict, List
+from typing import Any

 import aiohttp
 from eval_config import EvalConfig
@ -32,7 +32,7 @@ class OpenRouterEvaluator:
        }
        self.semaphore = asyncio.Semaphore(10)  # Control concurrency

-    def save_results(self, results: List[Dict[str, Any]], dataset, dataset_name) -> Dict[str, Any]:
+    def save_results(self, results: list[dict[str, Any]], dataset, dataset_name) -> dict[str, Any]:
        file_name = f"{self.output_dir}/{dataset_name}.json"
        total_score = sum(r["score"] for r in results)

@ -52,7 +52,7 @@ class OpenRouterEvaluator:
            json.dump(metrics, f, indent=2)
        return metrics

-    def prepare_messages(self, prompt: str) -> List[Dict[str, str]]:
+    def prepare_messages(self, prompt: str) -> list[dict[str, str]]:
        return {
            "model": self.model,
            "messages": [
@ -92,7 +92,7 @@ class OpenRouterEvaluator:

        raise Exception("Failed to get valid response after retries")

-    async def process_entry(self, session: aiohttp.ClientSession, dataset: Any, entry: Any) -> Dict[str, Any]:
+    async def process_entry(self, session: aiohttp.ClientSession, dataset: Any, entry: Any) -> dict[str, Any]:
        """Process a single entry with concurrency control."""
        async with self.semaphore:
            response = await self.get_model_response(session, entry["question"])
@ -108,7 +108,7 @@ class OpenRouterEvaluator:
                "metadata": str(entry["metadata"]),
            }

-    async def evaluate_dataset(self, session: aiohttp.ClientSession, dataset_name: str) -> Dict[str, Any]:
+    async def evaluate_dataset(self, session: aiohttp.ClientSession, dataset_name: str) -> dict[str, Any]:
        """Evaluate a single dataset asynchronously."""
        self.logger.info(f"\nEvaluating dataset: {dataset_name}")
        dataset = reasoning_gym.create_dataset(
@ -119,7 +119,7 @@ class OpenRouterEvaluator:
        results = await asyncio.gather(*tasks)
        return self.save_results(results, dataset, dataset_name)

-    async def evaluate_datasets(self) -> List[Dict[str, Any]]:
+    async def evaluate_datasets(self) -> list[dict[str, Any]]:
        """Main async evaluation entry point."""
        all_results = []
        async with aiohttp.ClientSession(headers=self.headers) as session: