[eval-basic] initial scripts for evaluating models on reasoning gym

This commit is contained in:
rishabhranawat 2025-02-09 22:36:27 -08:00
parent 8c4400b18a
commit 75cfd31ec2
11 changed files with 1306 additions and 0 deletions

21
eval/eval_basic.json Normal file
View file

@ -0,0 +1,21 @@
[
{
"name": "letter_counting",
"min_words": 5,
"max_words": 15,
"size": 10,
"seed": 42
},
{
"name": "propositional_logic",
"size": 10,
"seed": 42
},
{
"name": "leg_counting",
"min_animals": 3,
"max_animals": 8,
"size": 10,
"seed": 42
}
]