mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
Merge remote-tracking branch 'origin/consolidate_eval_script' into fix/eval
This commit is contained in:
commit
56cc111ab3
6 changed files with 11 additions and 4 deletions
|
|
@ -7,7 +7,7 @@ datasets:
|
|||
- simple_equations
|
||||
- simple_integration
|
||||
- complex_arithmetic
|
||||
eval_dir: eval/r1
|
||||
eval_dir: results/r1
|
||||
dataset_size: 50
|
||||
dataset_seed: 42
|
||||
developer_role: system
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ datasets:
|
|||
- figlet_font
|
||||
- number_sequence
|
||||
- rubiks_cube
|
||||
eval_dir: eval/r1
|
||||
eval_dir: results/r1
|
||||
dataset_size: 50
|
||||
dataset_seed: 42
|
||||
developer_role: system
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ datasets:
|
|||
- self_reference
|
||||
- syllogism
|
||||
- zebra_puzzles
|
||||
eval_dir: eval/r1
|
||||
eval_dir: results/r1
|
||||
dataset_size: 50
|
||||
dataset_seed: 42
|
||||
developer_role: system
|
||||
|
|
|
|||
8
eval/yaml/r1/test.yaml
Normal file
8
eval/yaml/r1/test.yaml
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
model: deepseek/deepseek-r1
|
||||
category: test
|
||||
datasets:
|
||||
- YOUR_DATASET_NAME
|
||||
eval_dir: results/r1
|
||||
dataset_size: 10
|
||||
dataset_seed: 42
|
||||
developer_role: system
|
||||
Loading…
Add table
Add a link
Reference in a new issue