From 7d7e44d1afa254eeaea4e5d2cf541fe411c33036 Mon Sep 17 00:00:00 2001 From: joesharratt1229 Date: Wed, 26 Feb 2025 08:09:12 +0000 Subject: [PATCH] added o3 mini yaml --- eval/yaml/openai-o3/algebra.yaml | 14 +++++++++++ eval/yaml/openai-o3/algorithmic.yaml | 37 ++++++++++++++++++++++++++++ eval/yaml/openai-o3/arc.yaml | 11 +++++++++ eval/yaml/openai-o3/arithmetic.yaml | 26 +++++++++++++++++++ eval/yaml/openai-o3/code.yaml | 9 +++++++ eval/yaml/openai-o3/cognition.yaml | 14 +++++++++++ eval/yaml/openai-o3/games.yaml | 19 ++++++++++++++ eval/yaml/openai-o3/geometry.yaml | 10 ++++++++ eval/yaml/openai-o3/graphs.yaml | 14 +++++++++++ eval/yaml/openai-o3/logic.yaml | 14 +++++++++++ 10 files changed, 168 insertions(+) create mode 100644 eval/yaml/openai-o3/algebra.yaml create mode 100644 eval/yaml/openai-o3/algorithmic.yaml create mode 100644 eval/yaml/openai-o3/arc.yaml create mode 100644 eval/yaml/openai-o3/arithmetic.yaml create mode 100644 eval/yaml/openai-o3/code.yaml create mode 100644 eval/yaml/openai-o3/cognition.yaml create mode 100644 eval/yaml/openai-o3/games.yaml create mode 100644 eval/yaml/openai-o3/geometry.yaml create mode 100644 eval/yaml/openai-o3/graphs.yaml create mode 100644 eval/yaml/openai-o3/logic.yaml diff --git a/eval/yaml/openai-o3/algebra.yaml b/eval/yaml/openai-o3/algebra.yaml new file mode 100644 index 00000000..aee04d20 --- /dev/null +++ b/eval/yaml/openai-o3/algebra.yaml @@ -0,0 +1,14 @@ +model: openai/o3-mini +category: algebra +provider: OpenAI +datasets: + - complex_arithmetic + - intermediate_integration + - polynomial_equations + - polynomial_multiplication + - simple_equations + - simple_integration +eval_dir: results/openai-03 +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/openai-o3/algorithmic.yaml b/eval/yaml/openai-o3/algorithmic.yaml new file mode 100644 index 00000000..295afc7c --- /dev/null +++ b/eval/yaml/openai-o3/algorithmic.yaml @@ -0,0 +1,37 @@ +model: openai/o3-mini +category: algorithmic +provider: OpenAI +datasets: + - ab + - binary_alternation + - base_conversion + - binary_matrix + - caesar_cipher + - count_primes + - cryptarithm + - game_of_life + - graph_color + - group_anagrams + - isomorphic_strings + - letter_counting + - letter_jumble + - manipulate_matrix + - number_filtering + - number_sorting + - palindrome + - pool_matrix + - ransom_note + - rotate_matrix + - sentence_reordering + - spell_backward + - spiral_matrix + - string_insertion + - string_manipulation + - string_synthesis + - word_ladder + - word_sequence_reversal + - word_sorting +eval_dir: results/openai-03 +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/openai-o3/arc.yaml b/eval/yaml/openai-o3/arc.yaml new file mode 100644 index 00000000..992fc8a9 --- /dev/null +++ b/eval/yaml/openai-o3/arc.yaml @@ -0,0 +1,11 @@ +model: openai/o3-mini +category: arc +provider: OpenAI +datasets: + - arc_1d + - arc_agi + - rearc +eval_dir: results/openai-03 +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/openai-o3/arithmetic.yaml b/eval/yaml/openai-o3/arithmetic.yaml new file mode 100644 index 00000000..67f56e59 --- /dev/null +++ b/eval/yaml/openai-o3/arithmetic.yaml @@ -0,0 +1,26 @@ +model: openai/o3-mini +category: arithmetic +provider: OpenAI +datasets: + - basic_arithmetic + - bitwise_arithmetic + - calendar_arithmetic + - chain_sum + - count_bits + - decimal_arithmetic + - decimal_chain_sum + - dice + - fraction_simplification + - gcd + - gsm_symbolic + - lcm + - leg_counting + - number_format + - power_function + - prime_factorization + - products + - time_intervals +eval_dir: results/openai-03 +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/openai-o3/code.yaml b/eval/yaml/openai-o3/code.yaml new file mode 100644 index 00000000..8fa83d73 --- /dev/null +++ b/eval/yaml/openai-o3/code.yaml @@ -0,0 +1,9 @@ +model: openai/o3-mini +category: code +provider: OpenAI +datasets: + - bf +eval_dir: results/openai-03 +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/openai-o3/cognition.yaml b/eval/yaml/openai-o3/cognition.yaml new file mode 100644 index 00000000..e5cb510d --- /dev/null +++ b/eval/yaml/openai-o3/cognition.yaml @@ -0,0 +1,14 @@ +model: openai/o3-mini +category: cognition +provider: OpenAI +datasets: + - color_cube_rotation + - figlet_font + - needle_haystack + - number_sequence + - rectangle_count + - rubiks_cube +eval_dir: results/openai-03 +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/openai-o3/games.yaml b/eval/yaml/openai-o3/games.yaml new file mode 100644 index 00000000..f01c13c1 --- /dev/null +++ b/eval/yaml/openai-o3/games.yaml @@ -0,0 +1,19 @@ +model: openai/o3-mini +category: games +provider: OpenAI +datasets: + - countdown + - emoji_mystery + - futoshuki + - knight_swap + - maze + - mini_sudoku + - n_queens + - sokoban + - sudoku + - tower_of_hanoi + - tsumego +eval_dir: results/openai-03 +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/openai-o3/geometry.yaml b/eval/yaml/openai-o3/geometry.yaml new file mode 100644 index 00000000..57a40a6d --- /dev/null +++ b/eval/yaml/openai-o3/geometry.yaml @@ -0,0 +1,10 @@ +model: openai/o3-mini +category: geometry +provider: OpenAI +datasets: + - simple_geometry + - advanced_geometry +eval_dir: results/openai-03 +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/openai-o3/graphs.yaml b/eval/yaml/openai-o3/graphs.yaml new file mode 100644 index 00000000..371b4459 --- /dev/null +++ b/eval/yaml/openai-o3/graphs.yaml @@ -0,0 +1,14 @@ +model: openai/o3-mini +category: graphs +provider: OpenAI +datasets: + - course_schedule + - family_relationships + - largest_island + - list_functions + - quantum_lock + - shortest_path +eval_dir: results/openai-03 +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/openai-o3/logic.yaml b/eval/yaml/openai-o3/logic.yaml new file mode 100644 index 00000000..a941b54d --- /dev/null +++ b/eval/yaml/openai-o3/logic.yaml @@ -0,0 +1,14 @@ +model: openai/o3-mini +category: logic +provider: OpenAI +datasets: + - aiw + - circuit_logic + - propositional_logic + - self_reference + - syllogism + - zebra_puzzles +eval_dir: results/openai-03 +dataset_size: 50 +dataset_seed: 45 +developer_role: system