add llama-3.3-70b-instruct algebra, algorithmic eval configs

2026-04-19 12:58:07 +00:00 · 2025-02-25 23:07:46 +01:00 · 2025-02-25 23:07:46 +01:00 · 6d5168d1e5
commit 6d5168d1e5
parent 92c8be1699
9 changed files with 62 additions and 0 deletions
--- a/eval/yaml/example.yaml
+++ b/eval/yaml/example.yaml
@ -1,4 +1,5 @@
 model: anthropic/claude-3.7-sonnet  # find model id: https://openrouter.ai/models
+provider: Anthropic
 category: test
 datasets:
  - YOUR_DATASET_NAME
--- a/eval/yaml/llama-3.3-70b-instruct/algebra.yaml
+++ b/eval/yaml/llama-3.3-70b-instruct/algebra.yaml
@ -0,0 +1,14 @@
+model: meta-llama/llama-3.3-70b-instruct
+provider: Hyperbolic
+category: algebra
+datasets:
+  - intermediate_integration
+  - polynomial_equations
+  - polynomial_multiplication
+  - simple_equations
+  - simple_integration
+  - complex_arithmetic
+eval_dir: results/llama-3.3-70b-instruct
+dataset_size: 50
+dataset_seed: 42
+developer_role: system
--- a/eval/yaml/llama-3.3-70b-instruct/algorithmic.yaml
+++ b/eval/yaml/llama-3.3-70b-instruct/algorithmic.yaml
@ -0,0 +1,41 @@
+model: meta-llama/llama-3.3-70b-instruct
+provider: Hyperbolic
+category: algorithmic
+datasets:
+  - ab
+  - base_conversion
+  - binary_alternation
+  - binary_matrix
+  - caesar_cipher
+  - count_primes
+  - cryptarithm
+  - game_of_life
+  - graph_color
+  - group_anagrams
+  - isomorphic_strings
+  - jugs
+  - letter_counting
+  - letter_jumble
+  - manipulate_matrix
+  - number_filtering
+  - number_sorting
+  - palindrome
+  - palindrome_partitioning
+  - pool_matrix
+  - ransom_note
+  - rotate_matrix
+  - rotten_oranges
+  - sentence_reordering
+  - spell_backward
+  - spiral_matrix
+  - string_insertion
+  - string_manipulation
+  - string_splitting
+  - string_synthesis
+  - word_ladder
+  - word_sequence_reversal
+  - word_sorting
+eval_dir: results/llama-3.3-70b-instruct
+dataset_size: 50
+dataset_seed: 45
+developer_role: system
--- a/eval/yaml/r1/algebra.yaml
+++ b/eval/yaml/r1/algebra.yaml
@ -1,4 +1,5 @@
 model: deepseek/deepseek-r1
+provider: Nebius
 category: algebra
 datasets:
  - intermediate_integration
--- a/eval/yaml/r1/algorithmic.yaml
+++ b/eval/yaml/r1/algorithmic.yaml
@ -1,4 +1,5 @@
 model: deepseek/deepseek-r1
+provider: Nebius
 category: algorithmic
 datasets:
  - ab
--- a/eval/yaml/r1/cognition.yaml
+++ b/eval/yaml/r1/cognition.yaml
@ -1,4 +1,5 @@
 model: deepseek/deepseek-r1
+provider: Nebius
 category: cognition
 datasets:
  -  color_cube_rotation
--- a/eval/yaml/r1/logic.yaml
+++ b/eval/yaml/r1/logic.yaml
@ -1,4 +1,5 @@
 model: deepseek/deepseek-r1
+provider: Nebius
 category: logic
 datasets:
  - propositional_logic