mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
consolidate eval scripts to have single eval.py
This commit is contained in:
parent
bea806fe3c
commit
e7ae82a831
12 changed files with 104 additions and 337 deletions
13
eval/yaml/algebra.yaml
Normal file
13
eval/yaml/algebra.yaml
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
model: deepseek/deepseek-r1
|
||||
category: algebra
|
||||
datasets:
|
||||
- intermediate_integration
|
||||
- polynomial_equations
|
||||
- polynomial_multiplication
|
||||
- simple_equations
|
||||
- simple_integration
|
||||
- complex_arithmetic
|
||||
eval_dir: eval/r1
|
||||
dataset_size: 50
|
||||
dataset_seed: 42
|
||||
developer_role: system
|
||||
24
eval/yaml/algorithmic.yaml
Normal file
24
eval/yaml/algorithmic.yaml
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
model: deepseek/deepseek-r1
|
||||
category: algorithmic
|
||||
datasets:
|
||||
- binary_matrix
|
||||
- caesar_cipher
|
||||
- group_anagrams
|
||||
- isomorphic_strings
|
||||
- letter_counting
|
||||
- letter_jumble
|
||||
- number_filtering
|
||||
- number_sorting
|
||||
- palindrome
|
||||
- ransom_note
|
||||
- rotate_matrix
|
||||
- sentence_reordering
|
||||
- spell_backward
|
||||
- spiral_matrix
|
||||
- word_ladder
|
||||
- word_sequence_reversal
|
||||
- word_sorting
|
||||
eval_dir: eval/r1
|
||||
dataset_size: 50
|
||||
dataset_seed: 42
|
||||
developer_role: system
|
||||
11
eval/yaml/cognition.yaml
Normal file
11
eval/yaml/cognition.yaml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
model: deepseek/deepseek-r1
|
||||
category: cognition
|
||||
datasets:
|
||||
- color_cube_rotation
|
||||
- figlet_font
|
||||
- number_sequence
|
||||
- rubiks_cube
|
||||
eval_dir: eval/r1
|
||||
dataset_size: 50
|
||||
dataset_seed: 42
|
||||
developer_role: system
|
||||
11
eval/yaml/logic.yaml
Normal file
11
eval/yaml/logic.yaml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
model: deepseek/deepseek-r1
|
||||
category: logic
|
||||
datasets:
|
||||
- propositional_logic
|
||||
- self_reference
|
||||
- syllogism
|
||||
- zebra_puzzles
|
||||
eval_dir: eval/r1
|
||||
dataset_size: 50
|
||||
dataset_seed: 42
|
||||
developer_role: system
|
||||
8
eval/yaml/test.yaml
Normal file
8
eval/yaml/test.yaml
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
model: deepseek/deepseek-r1
|
||||
category: test
|
||||
datasets:
|
||||
- YOUR_DATASET_NAME
|
||||
eval_dir: eval/r1
|
||||
dataset_size: 10
|
||||
dataset_seed: 42
|
||||
developer_role: system
|
||||
Loading…
Add table
Add a link
Reference in a new issue