verify that OPENROUTER_API_KEY env var is set

2026-04-19 12:58:07 +00:00 · 2025-02-26 22:15:18 +01:00 · 2025-02-26 22:15:18 +01:00 · 477e1f85cc
commit 477e1f85cc
parent 941da618d8
2 changed files with 21 additions and 14 deletions
--- a/eval/eval.py
+++ b/eval/eval.py
@ -24,13 +24,13 @@ logging.basicConfig(


 class OpenRouterEvaluator:
-    def __init__(self, model: str, config: EvalConfig):
+    def __init__(self, model: str, config: EvalConfig, api_key: str):
        self.logger = logging.getLogger(f"OpenRouterEvaluator.{model}")
        self.config = config
        self.output_dir = f"{config.eval_dir}/{config.category}"
        os.makedirs(self.output_dir, exist_ok=True)
        self.base_url = "https://openrouter.ai/api/v1/chat/completions"
-        self.api_key = os.getenv("OPENROUTER_API_KEY")
+        self.api_key = api_key
        self.model = model
        self.headers = {
            "Authorization": f"Bearer {self.api_key}",
@ -98,6 +98,8 @@ class OpenRouterEvaluator:
            model_answer = extract_answer(response)
            score = dataset.score_answer(answer=model_answer, entry=entry)

+            print(f"answer: {model_answer}, score: {score}")
+
            return {
                "question": entry["question"],
                "expected_answer": str(entry["answer"]),
@ -120,18 +122,23 @@ class OpenRouterEvaluator:

    async def evaluate_datasets(self) -> list[dict[str, Any]]:
        """Main async evaluation entry point."""
-        all_results = []
        async with aiohttp.ClientSession(headers=self.headers) as session:
            return await asyncio.gather(*(self.evaluate_dataset(session, name) for name in self.config.datasets))


 async def async_main():
+    api_key = os.getenv("OPENROUTER_API_KEY")
+    if not api_key:
+        print("Error: OPENROUTER_API_KEY environment variable is not set")
+        print("Please set it using: export OPENROUTER_API_KEY=your-api-key")
+        exit(1)
+
    parser = argparse.ArgumentParser(description="Evaluate models on reasoning datasets")
    parser.add_argument("--yaml", required=True, help="Path to YAML configuration file")
    args = parser.parse_args()

    config = EvalConfig.from_yaml(args.yaml)
-    evaluator = OpenRouterEvaluator(model=config.model, config=config)
+    evaluator = OpenRouterEvaluator(model=config.model, config=config, api_key=api_key)
    results = await evaluator.evaluate_datasets()

    output_dir = f"{config.eval_dir}/{config.category}"
--- a/eval/scripts/run_llama-3.3-70-instruct_all.sh
+++ b/eval/scripts/run_llama-3.3-70-instruct_all.sh
@ -1,12 +1,12 @@
 #!/bin/bash
 # run this script from the parent directory
-./eval.py --yaml algebra.yaml
-./eval.py --yaml algorithmic.yaml
-./eval.py --yaml arc.yaml
-./eval.py --yaml arithmetic.yaml
-./eval.py --yaml code.yaml
-./eval.py --yaml cognition.yaml
-./eval.py --yaml games.yaml
-./eval.py --yaml geometry.yaml
-./eval.py --yaml graphs.yaml
-./eval.py --yaml logic.yaml
+./eval.py --yaml yaml/llama-3.3-70b-instruct/algebra.yaml
+./eval.py --yaml yaml/llama-3.3-70b-instruct/algorithmic.yaml
+./eval.py --yaml yaml/llama-3.3-70b-instruct/arc.yaml
+./eval.py --yaml yaml/llama-3.3-70b-instruct/arithmetic.yaml
+./eval.py --yaml yaml/llama-3.3-70b-instruct/code.yaml
+./eval.py --yaml yaml/llama-3.3-70b-instruct/cognition.yaml
+./eval.py --yaml yaml/llama-3.3-70b-instruct/games.yaml
+./eval.py --yaml yaml/llama-3.3-70b-instruct/geometry.yaml
+./eval.py --yaml yaml/llama-3.3-70b-instruct/graphs.yaml
+./eval.py --yaml yaml/llama-3.3-70b-instruct/logic.yaml