mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
verify that OPENROUTER_API_KEY env var is set
This commit is contained in:
parent
941da618d8
commit
477e1f85cc
2 changed files with 21 additions and 14 deletions
15
eval/eval.py
15
eval/eval.py
|
|
@ -24,13 +24,13 @@ logging.basicConfig(
|
|||
|
||||
|
||||
class OpenRouterEvaluator:
|
||||
def __init__(self, model: str, config: EvalConfig):
|
||||
def __init__(self, model: str, config: EvalConfig, api_key: str):
|
||||
self.logger = logging.getLogger(f"OpenRouterEvaluator.{model}")
|
||||
self.config = config
|
||||
self.output_dir = f"{config.eval_dir}/{config.category}"
|
||||
os.makedirs(self.output_dir, exist_ok=True)
|
||||
self.base_url = "https://openrouter.ai/api/v1/chat/completions"
|
||||
self.api_key = os.getenv("OPENROUTER_API_KEY")
|
||||
self.api_key = api_key
|
||||
self.model = model
|
||||
self.headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
|
|
@ -98,6 +98,8 @@ class OpenRouterEvaluator:
|
|||
model_answer = extract_answer(response)
|
||||
score = dataset.score_answer(answer=model_answer, entry=entry)
|
||||
|
||||
print(f"answer: {model_answer}, score: {score}")
|
||||
|
||||
return {
|
||||
"question": entry["question"],
|
||||
"expected_answer": str(entry["answer"]),
|
||||
|
|
@ -120,18 +122,23 @@ class OpenRouterEvaluator:
|
|||
|
||||
async def evaluate_datasets(self) -> list[dict[str, Any]]:
|
||||
"""Main async evaluation entry point."""
|
||||
all_results = []
|
||||
async with aiohttp.ClientSession(headers=self.headers) as session:
|
||||
return await asyncio.gather(*(self.evaluate_dataset(session, name) for name in self.config.datasets))
|
||||
|
||||
|
||||
async def async_main():
|
||||
api_key = os.getenv("OPENROUTER_API_KEY")
|
||||
if not api_key:
|
||||
print("Error: OPENROUTER_API_KEY environment variable is not set")
|
||||
print("Please set it using: export OPENROUTER_API_KEY=your-api-key")
|
||||
exit(1)
|
||||
|
||||
parser = argparse.ArgumentParser(description="Evaluate models on reasoning datasets")
|
||||
parser.add_argument("--yaml", required=True, help="Path to YAML configuration file")
|
||||
args = parser.parse_args()
|
||||
|
||||
config = EvalConfig.from_yaml(args.yaml)
|
||||
evaluator = OpenRouterEvaluator(model=config.model, config=config)
|
||||
evaluator = OpenRouterEvaluator(model=config.model, config=config, api_key=api_key)
|
||||
results = await evaluator.evaluate_datasets()
|
||||
|
||||
output_dir = f"{config.eval_dir}/{config.category}"
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
#!/bin/bash
|
||||
# run this script from the parent directory
|
||||
./eval.py --yaml algebra.yaml
|
||||
./eval.py --yaml algorithmic.yaml
|
||||
./eval.py --yaml arc.yaml
|
||||
./eval.py --yaml arithmetic.yaml
|
||||
./eval.py --yaml code.yaml
|
||||
./eval.py --yaml cognition.yaml
|
||||
./eval.py --yaml games.yaml
|
||||
./eval.py --yaml geometry.yaml
|
||||
./eval.py --yaml graphs.yaml
|
||||
./eval.py --yaml logic.yaml
|
||||
./eval.py --yaml yaml/llama-3.3-70b-instruct/algebra.yaml
|
||||
./eval.py --yaml yaml/llama-3.3-70b-instruct/algorithmic.yaml
|
||||
./eval.py --yaml yaml/llama-3.3-70b-instruct/arc.yaml
|
||||
./eval.py --yaml yaml/llama-3.3-70b-instruct/arithmetic.yaml
|
||||
./eval.py --yaml yaml/llama-3.3-70b-instruct/code.yaml
|
||||
./eval.py --yaml yaml/llama-3.3-70b-instruct/cognition.yaml
|
||||
./eval.py --yaml yaml/llama-3.3-70b-instruct/games.yaml
|
||||
./eval.py --yaml yaml/llama-3.3-70b-instruct/geometry.yaml
|
||||
./eval.py --yaml yaml/llama-3.3-70b-instruct/graphs.yaml
|
||||
./eval.py --yaml yaml/llama-3.3-70b-instruct/logic.yaml
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue