mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
added o3 mini yaml
This commit is contained in:
parent
ffe60ef112
commit
7d7e44d1af
10 changed files with 168 additions and 0 deletions
14
eval/yaml/openai-o3/algebra.yaml
Normal file
14
eval/yaml/openai-o3/algebra.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
model: openai/o3-mini
|
||||
category: algebra
|
||||
provider: OpenAI
|
||||
datasets:
|
||||
- complex_arithmetic
|
||||
- intermediate_integration
|
||||
- polynomial_equations
|
||||
- polynomial_multiplication
|
||||
- simple_equations
|
||||
- simple_integration
|
||||
eval_dir: results/openai-03
|
||||
dataset_size: 50
|
||||
dataset_seed: 45
|
||||
developer_role: system
|
||||
37
eval/yaml/openai-o3/algorithmic.yaml
Normal file
37
eval/yaml/openai-o3/algorithmic.yaml
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
model: openai/o3-mini
|
||||
category: algorithmic
|
||||
provider: OpenAI
|
||||
datasets:
|
||||
- ab
|
||||
- binary_alternation
|
||||
- base_conversion
|
||||
- binary_matrix
|
||||
- caesar_cipher
|
||||
- count_primes
|
||||
- cryptarithm
|
||||
- game_of_life
|
||||
- graph_color
|
||||
- group_anagrams
|
||||
- isomorphic_strings
|
||||
- letter_counting
|
||||
- letter_jumble
|
||||
- manipulate_matrix
|
||||
- number_filtering
|
||||
- number_sorting
|
||||
- palindrome
|
||||
- pool_matrix
|
||||
- ransom_note
|
||||
- rotate_matrix
|
||||
- sentence_reordering
|
||||
- spell_backward
|
||||
- spiral_matrix
|
||||
- string_insertion
|
||||
- string_manipulation
|
||||
- string_synthesis
|
||||
- word_ladder
|
||||
- word_sequence_reversal
|
||||
- word_sorting
|
||||
eval_dir: results/openai-03
|
||||
dataset_size: 50
|
||||
dataset_seed: 45
|
||||
developer_role: system
|
||||
11
eval/yaml/openai-o3/arc.yaml
Normal file
11
eval/yaml/openai-o3/arc.yaml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
model: openai/o3-mini
|
||||
category: arc
|
||||
provider: OpenAI
|
||||
datasets:
|
||||
- arc_1d
|
||||
- arc_agi
|
||||
- rearc
|
||||
eval_dir: results/openai-03
|
||||
dataset_size: 50
|
||||
dataset_seed: 45
|
||||
developer_role: system
|
||||
26
eval/yaml/openai-o3/arithmetic.yaml
Normal file
26
eval/yaml/openai-o3/arithmetic.yaml
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
model: openai/o3-mini
|
||||
category: arithmetic
|
||||
provider: OpenAI
|
||||
datasets:
|
||||
- basic_arithmetic
|
||||
- bitwise_arithmetic
|
||||
- calendar_arithmetic
|
||||
- chain_sum
|
||||
- count_bits
|
||||
- decimal_arithmetic
|
||||
- decimal_chain_sum
|
||||
- dice
|
||||
- fraction_simplification
|
||||
- gcd
|
||||
- gsm_symbolic
|
||||
- lcm
|
||||
- leg_counting
|
||||
- number_format
|
||||
- power_function
|
||||
- prime_factorization
|
||||
- products
|
||||
- time_intervals
|
||||
eval_dir: results/openai-03
|
||||
dataset_size: 50
|
||||
dataset_seed: 45
|
||||
developer_role: system
|
||||
9
eval/yaml/openai-o3/code.yaml
Normal file
9
eval/yaml/openai-o3/code.yaml
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
model: openai/o3-mini
|
||||
category: code
|
||||
provider: OpenAI
|
||||
datasets:
|
||||
- bf
|
||||
eval_dir: results/openai-03
|
||||
dataset_size: 50
|
||||
dataset_seed: 45
|
||||
developer_role: system
|
||||
14
eval/yaml/openai-o3/cognition.yaml
Normal file
14
eval/yaml/openai-o3/cognition.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
model: openai/o3-mini
|
||||
category: cognition
|
||||
provider: OpenAI
|
||||
datasets:
|
||||
- color_cube_rotation
|
||||
- figlet_font
|
||||
- needle_haystack
|
||||
- number_sequence
|
||||
- rectangle_count
|
||||
- rubiks_cube
|
||||
eval_dir: results/openai-03
|
||||
dataset_size: 50
|
||||
dataset_seed: 45
|
||||
developer_role: system
|
||||
19
eval/yaml/openai-o3/games.yaml
Normal file
19
eval/yaml/openai-o3/games.yaml
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
model: openai/o3-mini
|
||||
category: games
|
||||
provider: OpenAI
|
||||
datasets:
|
||||
- countdown
|
||||
- emoji_mystery
|
||||
- futoshuki
|
||||
- knight_swap
|
||||
- maze
|
||||
- mini_sudoku
|
||||
- n_queens
|
||||
- sokoban
|
||||
- sudoku
|
||||
- tower_of_hanoi
|
||||
- tsumego
|
||||
eval_dir: results/openai-03
|
||||
dataset_size: 50
|
||||
dataset_seed: 45
|
||||
developer_role: system
|
||||
10
eval/yaml/openai-o3/geometry.yaml
Normal file
10
eval/yaml/openai-o3/geometry.yaml
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
model: openai/o3-mini
|
||||
category: geometry
|
||||
provider: OpenAI
|
||||
datasets:
|
||||
- simple_geometry
|
||||
- advanced_geometry
|
||||
eval_dir: results/openai-03
|
||||
dataset_size: 50
|
||||
dataset_seed: 45
|
||||
developer_role: system
|
||||
14
eval/yaml/openai-o3/graphs.yaml
Normal file
14
eval/yaml/openai-o3/graphs.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
model: openai/o3-mini
|
||||
category: graphs
|
||||
provider: OpenAI
|
||||
datasets:
|
||||
- course_schedule
|
||||
- family_relationships
|
||||
- largest_island
|
||||
- list_functions
|
||||
- quantum_lock
|
||||
- shortest_path
|
||||
eval_dir: results/openai-03
|
||||
dataset_size: 50
|
||||
dataset_seed: 45
|
||||
developer_role: system
|
||||
14
eval/yaml/openai-o3/logic.yaml
Normal file
14
eval/yaml/openai-o3/logic.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
model: openai/o3-mini
|
||||
category: logic
|
||||
provider: OpenAI
|
||||
datasets:
|
||||
- aiw
|
||||
- circuit_logic
|
||||
- propositional_logic
|
||||
- self_reference
|
||||
- syllogism
|
||||
- zebra_puzzles
|
||||
eval_dir: results/openai-03
|
||||
dataset_size: 50
|
||||
dataset_seed: 45
|
||||
developer_role: system
|
||||
Loading…
Add table
Add a link
Reference in a new issue