Merge remote-tracking branch 'origin/consolidate_eval_script' into fix/eval

This commit is contained in:
joesharratt1229 2025-02-25 18:10:07 +00:00
commit 56cc111ab3
6 changed files with 11 additions and 4 deletions

View file

@ -7,7 +7,7 @@ datasets:
- simple_equations
- simple_integration
- complex_arithmetic
eval_dir: eval/r1
eval_dir: results/r1
dataset_size: 50
dataset_seed: 42
developer_role: system

View file

@ -5,7 +5,7 @@ datasets:
- figlet_font
- number_sequence
- rubiks_cube
eval_dir: eval/r1
eval_dir: results/r1
dataset_size: 50
dataset_seed: 42
developer_role: system

View file

@ -5,7 +5,7 @@ datasets:
- self_reference
- syllogism
- zebra_puzzles
eval_dir: eval/r1
eval_dir: results/r1
dataset_size: 50
dataset_seed: 42
developer_role: system

8
eval/yaml/r1/test.yaml Normal file
View file

@ -0,0 +1,8 @@
model: deepseek/deepseek-r1
category: test
datasets:
- YOUR_DATASET_NAME
eval_dir: results/r1
dataset_size: 10
dataset_seed: 42
developer_role: system