diff --git a/eval/scripts/run_llama-3.3-70-instruct_all.sh b/eval/scripts/run_llama-3.3-70-instruct_all.sh new file mode 100755 index 00000000..083bb86b --- /dev/null +++ b/eval/scripts/run_llama-3.3-70-instruct_all.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# run this script from the parent directory +./eval.py --yaml algebra.yaml +./eval.py --yaml algorithmic.yaml +./eval.py --yaml arc.yaml +./eval.py --yaml arithmetic.yaml +./eval.py --yaml code.yaml +./eval.py --yaml cognition.yaml +./eval.py --yaml games.yaml +./eval.py --yaml geometry.yaml +./eval.py --yaml graphs.yaml +./eval.py --yaml logic.yaml diff --git a/eval/yaml/llama-3.3-70b-instruct/arc.yaml b/eval/yaml/llama-3.3-70b-instruct/arc.yaml new file mode 100644 index 00000000..50ca22b4 --- /dev/null +++ b/eval/yaml/llama-3.3-70b-instruct/arc.yaml @@ -0,0 +1,11 @@ +model: meta-llama/llama-3.3-70b-instruct +provider: Hyperbolic +category: arc +datasets: + - arc_1d + - arc_agi + - rearc +eval_dir: results/llama-3.3-70b-instruct +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/llama-3.3-70b-instruct/arithmetic.yaml b/eval/yaml/llama-3.3-70b-instruct/arithmetic.yaml new file mode 100644 index 00000000..63e8e53f --- /dev/null +++ b/eval/yaml/llama-3.3-70b-instruct/arithmetic.yaml @@ -0,0 +1,26 @@ +model: meta-llama/llama-3.3-70b-instruct +provider: Hyperbolic +category: arithmetic +datasets: + - basic_arithmetic + - bitwise_arithmetic + - calendar_arithmetic + - chain_sum + - count_bits + - decimal_arithmetic + - decimal_chain_sum + - dice + - fraction_simplification + - gcd + - gsm_symbolic + - lcm + - leg_counting + - number_format + - power_function + - prime_factorization + - products + - time_intervals +eval_dir: results/llama-3.3-70b-instruct +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/llama-3.3-70b-instruct/code.yaml b/eval/yaml/llama-3.3-70b-instruct/code.yaml new file mode 100644 index 00000000..eb17fe1e --- /dev/null +++ b/eval/yaml/llama-3.3-70b-instruct/code.yaml @@ -0,0 +1,9 @@ +model: meta-llama/llama-3.3-70b-instruct +provider: Hyperbolic +category: code +datasets: + - bf +eval_dir: results/llama-3.3-70b-instruct +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/llama-3.3-70b-instruct/cognition.yaml b/eval/yaml/llama-3.3-70b-instruct/cognition.yaml new file mode 100644 index 00000000..9ae33127 --- /dev/null +++ b/eval/yaml/llama-3.3-70b-instruct/cognition.yaml @@ -0,0 +1,14 @@ +model: meta-llama/llama-3.3-70b-instruct +provider: Hyperbolic +category: cognition +datasets: + - color_cube_rotation + - figlet_font + - needle_haystack + - number_sequence + - rectangle_count + - rubiks_cube +eval_dir: results/llama-3.3-70b-instruct +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/llama-3.3-70b-instruct/games.yaml b/eval/yaml/llama-3.3-70b-instruct/games.yaml new file mode 100644 index 00000000..1620da14 --- /dev/null +++ b/eval/yaml/llama-3.3-70b-instruct/games.yaml @@ -0,0 +1,19 @@ +model: meta-llama/llama-3.3-70b-instruct +provider: Hyperbolic +category: games +datasets: + - countdown + - emoji_mystery + - futoshuki + - knight_swap + - maze + - mini_sudoku + - n_queens + - sokoban + - sudoku + - tower_of_hanoi + - tsumego +eval_dir: results/llama-3.3-70b-instruct +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/llama-3.3-70b-instruct/geometry.yaml b/eval/yaml/llama-3.3-70b-instruct/geometry.yaml new file mode 100644 index 00000000..a3b8fd0d --- /dev/null +++ b/eval/yaml/llama-3.3-70b-instruct/geometry.yaml @@ -0,0 +1,10 @@ +model: meta-llama/llama-3.3-70b-instruct +provider: Hyperbolic +category: geometry +datasets: + - simple_geometry + - advanced_geometry +eval_dir: results/llama-3.3-70b-instruct +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/llama-3.3-70b-instruct/graphs.yaml b/eval/yaml/llama-3.3-70b-instruct/graphs.yaml new file mode 100644 index 00000000..c9414474 --- /dev/null +++ b/eval/yaml/llama-3.3-70b-instruct/graphs.yaml @@ -0,0 +1,14 @@ +model: meta-llama/llama-3.3-70b-instruct +provider: Hyperbolic +category: graphs +datasets: + - course_schedule + - family_relationships + - largest_island + - list_functions + - quantum_lock + - shortest_path +eval_dir: results/llama-3.3-70b-instruct +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/eval/yaml/llama-3.3-70b-instruct/logic.yaml b/eval/yaml/llama-3.3-70b-instruct/logic.yaml new file mode 100644 index 00000000..9d2e126d --- /dev/null +++ b/eval/yaml/llama-3.3-70b-instruct/logic.yaml @@ -0,0 +1,14 @@ +model: meta-llama/llama-3.3-70b-instruct +provider: Hyperbolic +category: logic +datasets: + - aiw + - circuit_logic + - propositional_logic + - self_reference + - syllogism + - zebra_puzzles +eval_dir: results/llama-3.3-70b-instruct +dataset_size: 50 +dataset_seed: 45 +developer_role: system