mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
[eval-basic] initial scripts for evaluating models on reasoning gym
This commit is contained in:
parent
8c4400b18a
commit
75cfd31ec2
11 changed files with 1306 additions and 0 deletions
30
eval/eval_basic.sh
Normal file
30
eval/eval_basic.sh
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Check if OPENROUTER_API_KEY is set
|
||||
if [ -z "$OPENROUTER_API_KEY" ]; then
|
||||
echo "Error: OPENROUTER_API_KEY environment variable is not set"
|
||||
echo "Please set it using: export OPENROUTER_API_KEY=your-api-key"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Configuration
|
||||
OUTPUT_DIR="results"
|
||||
|
||||
# List of models to evaluate
|
||||
MODELS=(
|
||||
"google/gemini-2.0-flash-001"
|
||||
)
|
||||
|
||||
# Create output directory
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
# Run evaluations
|
||||
for model in "${MODELS[@]}"; do
|
||||
echo "Evaluating $model..."
|
||||
python eval_basic.py \
|
||||
--model "$model" \
|
||||
--config "eval_basic.json" \
|
||||
--output-dir "$OUTPUT_DIR"
|
||||
done
|
||||
|
||||
echo "All evaluations completed!"
|
||||
Loading…
Add table
Add a link
Reference in a new issue