add llama-3.3-70b-instruct eval yaml files

2026-04-19 12:58:07 +00:00 · 2025-02-26 20:54:07 +01:00 · 2025-02-26 20:54:07 +01:00 · acb2d7eb53
commit acb2d7eb53
parent 6511725711
9 changed files with 129 additions and 0 deletions
--- a/eval/yaml/llama-3.3-70b-instruct/arc.yaml
+++ b/eval/yaml/llama-3.3-70b-instruct/arc.yaml
@ -0,0 +1,11 @@
+model: meta-llama/llama-3.3-70b-instruct
+provider: Hyperbolic
+category: arc
+datasets:
+  - arc_1d
+  - arc_agi
+  - rearc
+eval_dir: results/llama-3.3-70b-instruct
+dataset_size: 50
+dataset_seed: 45
+developer_role: system
--- a/eval/yaml/llama-3.3-70b-instruct/arithmetic.yaml
+++ b/eval/yaml/llama-3.3-70b-instruct/arithmetic.yaml
@ -0,0 +1,26 @@
+model: meta-llama/llama-3.3-70b-instruct
+provider: Hyperbolic
+category: arithmetic
+datasets:
+  - basic_arithmetic
+  - bitwise_arithmetic
+  - calendar_arithmetic
+  - chain_sum
+  - count_bits
+  - decimal_arithmetic
+  - decimal_chain_sum
+  - dice
+  - fraction_simplification
+  - gcd
+  - gsm_symbolic
+  - lcm
+  - leg_counting
+  - number_format
+  - power_function
+  - prime_factorization
+  - products
+  - time_intervals
+eval_dir: results/llama-3.3-70b-instruct
+dataset_size: 50
+dataset_seed: 45
+developer_role: system
--- a/eval/yaml/llama-3.3-70b-instruct/code.yaml
+++ b/eval/yaml/llama-3.3-70b-instruct/code.yaml
@ -0,0 +1,9 @@
+model: meta-llama/llama-3.3-70b-instruct
+provider: Hyperbolic
+category: code
+datasets:
+  - bf
+eval_dir: results/llama-3.3-70b-instruct
+dataset_size: 50
+dataset_seed: 45
+developer_role: system
--- a/eval/yaml/llama-3.3-70b-instruct/cognition.yaml
+++ b/eval/yaml/llama-3.3-70b-instruct/cognition.yaml
@ -0,0 +1,14 @@
+model: meta-llama/llama-3.3-70b-instruct
+provider: Hyperbolic
+category: cognition
+datasets:
+  - color_cube_rotation
+  - figlet_font
+  - needle_haystack
+  - number_sequence
+  - rectangle_count
+  - rubiks_cube
+eval_dir: results/llama-3.3-70b-instruct
+dataset_size: 50
+dataset_seed: 45
+developer_role: system
--- a/eval/yaml/llama-3.3-70b-instruct/games.yaml
+++ b/eval/yaml/llama-3.3-70b-instruct/games.yaml
@ -0,0 +1,19 @@
+model: meta-llama/llama-3.3-70b-instruct
+provider: Hyperbolic
+category: games
+datasets:
+  - countdown
+  - emoji_mystery
+  - futoshuki
+  - knight_swap
+  - maze
+  - mini_sudoku
+  - n_queens
+  - sokoban
+  - sudoku
+  - tower_of_hanoi
+  - tsumego
+eval_dir: results/llama-3.3-70b-instruct
+dataset_size: 50
+dataset_seed: 45
+developer_role: system
--- a/eval/yaml/llama-3.3-70b-instruct/geometry.yaml
+++ b/eval/yaml/llama-3.3-70b-instruct/geometry.yaml
@ -0,0 +1,10 @@
+model: meta-llama/llama-3.3-70b-instruct
+provider: Hyperbolic
+category: geometry
+datasets:
+  - simple_geometry
+  - advanced_geometry
+eval_dir: results/llama-3.3-70b-instruct
+dataset_size: 50
+dataset_seed: 45
+developer_role: system
--- a/eval/yaml/llama-3.3-70b-instruct/graphs.yaml
+++ b/eval/yaml/llama-3.3-70b-instruct/graphs.yaml
@ -0,0 +1,14 @@
+model: meta-llama/llama-3.3-70b-instruct
+provider: Hyperbolic
+category: graphs
+datasets:
+  - course_schedule
+  - family_relationships
+  - largest_island
+  - list_functions
+  - quantum_lock
+  - shortest_path
+eval_dir: results/llama-3.3-70b-instruct
+dataset_size: 50
+dataset_seed: 45
+developer_role: system
--- a/eval/yaml/llama-3.3-70b-instruct/logic.yaml
+++ b/eval/yaml/llama-3.3-70b-instruct/logic.yaml
@ -0,0 +1,14 @@
+model: meta-llama/llama-3.3-70b-instruct
+provider: Hyperbolic
+category: logic
+datasets:
+  - aiw
+  - circuit_logic
+  - propositional_logic
+  - self_reference
+  - syllogism
+  - zebra_puzzles
+eval_dir: results/llama-3.3-70b-instruct
+dataset_size: 50
+dataset_seed: 45
+developer_role: system