[eval-basic] initial scripts for evaluating models on reasoning gym

2026-04-19 12:58:07 +00:00 · 2025-02-09 22:36:27 -08:00 · 2025-02-09 22:36:27 -08:00 · 75cfd31ec2
commit 75cfd31ec2
parent 8c4400b18a
11 changed files with 1306 additions and 0 deletions
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -5,3 +5,4 @@ isort>=5.13.2
 flake8>=7.1.1
 mypy>=1.14.1
 pre-commit>=4.1.0
+openai>=1.61.1