add llama-3.3-70b-instruct eval yaml files

This commit is contained in:
Andreas Koepf 2025-02-26 20:54:07 +01:00
parent 6511725711
commit acb2d7eb53
9 changed files with 129 additions and 0 deletions

View file

@ -0,0 +1,11 @@
model: meta-llama/llama-3.3-70b-instruct
provider: Hyperbolic
category: arc
datasets:
- arc_1d
- arc_agi
- rearc
eval_dir: results/llama-3.3-70b-instruct
dataset_size: 50
dataset_seed: 45
developer_role: system

View file

@ -0,0 +1,26 @@
model: meta-llama/llama-3.3-70b-instruct
provider: Hyperbolic
category: arithmetic
datasets:
- basic_arithmetic
- bitwise_arithmetic
- calendar_arithmetic
- chain_sum
- count_bits
- decimal_arithmetic
- decimal_chain_sum
- dice
- fraction_simplification
- gcd
- gsm_symbolic
- lcm
- leg_counting
- number_format
- power_function
- prime_factorization
- products
- time_intervals
eval_dir: results/llama-3.3-70b-instruct
dataset_size: 50
dataset_seed: 45
developer_role: system

View file

@ -0,0 +1,9 @@
model: meta-llama/llama-3.3-70b-instruct
provider: Hyperbolic
category: code
datasets:
- bf
eval_dir: results/llama-3.3-70b-instruct
dataset_size: 50
dataset_seed: 45
developer_role: system

View file

@ -0,0 +1,14 @@
model: meta-llama/llama-3.3-70b-instruct
provider: Hyperbolic
category: cognition
datasets:
- color_cube_rotation
- figlet_font
- needle_haystack
- number_sequence
- rectangle_count
- rubiks_cube
eval_dir: results/llama-3.3-70b-instruct
dataset_size: 50
dataset_seed: 45
developer_role: system

View file

@ -0,0 +1,19 @@
model: meta-llama/llama-3.3-70b-instruct
provider: Hyperbolic
category: games
datasets:
- countdown
- emoji_mystery
- futoshuki
- knight_swap
- maze
- mini_sudoku
- n_queens
- sokoban
- sudoku
- tower_of_hanoi
- tsumego
eval_dir: results/llama-3.3-70b-instruct
dataset_size: 50
dataset_seed: 45
developer_role: system

View file

@ -0,0 +1,10 @@
model: meta-llama/llama-3.3-70b-instruct
provider: Hyperbolic
category: geometry
datasets:
- simple_geometry
- advanced_geometry
eval_dir: results/llama-3.3-70b-instruct
dataset_size: 50
dataset_seed: 45
developer_role: system

View file

@ -0,0 +1,14 @@
model: meta-llama/llama-3.3-70b-instruct
provider: Hyperbolic
category: graphs
datasets:
- course_schedule
- family_relationships
- largest_island
- list_functions
- quantum_lock
- shortest_path
eval_dir: results/llama-3.3-70b-instruct
dataset_size: 50
dataset_seed: 45
developer_role: system

View file

@ -0,0 +1,14 @@
model: meta-llama/llama-3.3-70b-instruct
provider: Hyperbolic
category: logic
datasets:
- aiw
- circuit_logic
- propositional_logic
- self_reference
- syllogism
- zebra_puzzles
eval_dir: results/llama-3.3-70b-instruct
dataset_size: 50
dataset_seed: 45
developer_role: system