[eval-v1] benchmark with 50 samples

This commit is contained in:
rishabhranawat 2025-02-10 22:05:09 -08:00
parent 06cabcfdee
commit 6e3d049fed
2 changed files with 74 additions and 3 deletions

View file

@ -3,19 +3,29 @@
"name": "letter_counting",
"min_words": 5,
"max_words": 15,
"size": 10,
"size": 50,
"seed": 42
},
{
"name": "propositional_logic",
"size": 10,
"size": 50,
"seed": 42
},
{
"name": "leg_counting",
"min_animals": 3,
"max_animals": 8,
"size": 10,
"size": 50,
"seed": 42
},
{
"name": "group_anagrams",
"size": 50,
"seed": 42
},
{
"name": "spell_backward",
"size": 50,
"seed": 42
}
]