mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
use results folder name for eval results
This commit is contained in:
parent
ffe60ef112
commit
791f16ec0f
4 changed files with 12 additions and 12 deletions
|
|
@ -42,7 +42,7 @@ category: category-name
|
||||||
datasets:
|
datasets:
|
||||||
- dataset1
|
- dataset1
|
||||||
- dataset2
|
- dataset2
|
||||||
eval_dir: eval/r1
|
eval_dir: results/model-name
|
||||||
dataset_size: 50
|
dataset_size: 50
|
||||||
dataset_seed: 42
|
dataset_seed: 42
|
||||||
developer_role: system
|
developer_role: system
|
||||||
|
|
@ -81,7 +81,7 @@ datasets:
|
||||||
- word_ladder
|
- word_ladder
|
||||||
- word_sequence_reversal
|
- word_sequence_reversal
|
||||||
- word_sorting
|
- word_sorting
|
||||||
eval_dir: eval/r1
|
eval_dir: results/deepseek-r1
|
||||||
dataset_size: 50
|
dataset_size: 50
|
||||||
dataset_seed: 45
|
dataset_seed: 45
|
||||||
developer_role: system
|
developer_role: system
|
||||||
|
|
@ -117,7 +117,7 @@ datasets:
|
||||||
- word_ladder
|
- word_ladder
|
||||||
- word_sequence_reversal
|
- word_sequence_reversal
|
||||||
- word_sorting
|
- word_sorting
|
||||||
eval_dir: eval/r1
|
eval_dir: results/claude-3.5-sonnet
|
||||||
dataset_size: 50
|
dataset_size: 50
|
||||||
dataset_seed: 45
|
dataset_seed: 45
|
||||||
developer_role: system
|
developer_role: system
|
||||||
|
|
|
||||||
8
eval/yaml/example.yaml
Normal file
8
eval/yaml/example.yaml
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
model: anthropic/claude-3.7-sonnet # find model id: https://openrouter.ai/models
|
||||||
|
category: test
|
||||||
|
datasets:
|
||||||
|
- YOUR_DATASET_NAME
|
||||||
|
eval_dir: results/test
|
||||||
|
dataset_size: 100
|
||||||
|
dataset_seed: 42
|
||||||
|
developer_role: system
|
||||||
|
|
@ -28,7 +28,7 @@ datasets:
|
||||||
- word_ladder
|
- word_ladder
|
||||||
- word_sequence_reversal
|
- word_sequence_reversal
|
||||||
- word_sorting
|
- word_sorting
|
||||||
eval_dir: eval/r1
|
eval_dir: results/r1
|
||||||
dataset_size: 50
|
dataset_size: 50
|
||||||
dataset_seed: 45
|
dataset_seed: 45
|
||||||
developer_role: system
|
developer_role: system
|
||||||
|
|
|
||||||
|
|
@ -1,8 +0,0 @@
|
||||||
model: deepseek/deepseek-r1
|
|
||||||
category: test
|
|
||||||
datasets:
|
|
||||||
- YOUR_DATASET_NAME
|
|
||||||
eval_dir: results/r1
|
|
||||||
dataset_size: 10
|
|
||||||
dataset_seed: 42
|
|
||||||
developer_role: system
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue