mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-27 17:23:19 +00:00
add llama-3.3-70b-instruct algebra, algorithmic eval configs
This commit is contained in:
parent
92c8be1699
commit
6d5168d1e5
9 changed files with 62 additions and 0 deletions
1
eval/.gitignore
vendored
Normal file
1
eval/.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
results/
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
model: anthropic/claude-3.7-sonnet # find model id: https://openrouter.ai/models
|
model: anthropic/claude-3.7-sonnet # find model id: https://openrouter.ai/models
|
||||||
|
provider: Anthropic
|
||||||
category: test
|
category: test
|
||||||
datasets:
|
datasets:
|
||||||
- YOUR_DATASET_NAME
|
- YOUR_DATASET_NAME
|
||||||
|
|
|
||||||
14
eval/yaml/llama-3.3-70b-instruct/algebra.yaml
Normal file
14
eval/yaml/llama-3.3-70b-instruct/algebra.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
model: meta-llama/llama-3.3-70b-instruct
|
||||||
|
provider: Hyperbolic
|
||||||
|
category: algebra
|
||||||
|
datasets:
|
||||||
|
- intermediate_integration
|
||||||
|
- polynomial_equations
|
||||||
|
- polynomial_multiplication
|
||||||
|
- simple_equations
|
||||||
|
- simple_integration
|
||||||
|
- complex_arithmetic
|
||||||
|
eval_dir: results/llama-3.3-70b-instruct
|
||||||
|
dataset_size: 50
|
||||||
|
dataset_seed: 42
|
||||||
|
developer_role: system
|
||||||
41
eval/yaml/llama-3.3-70b-instruct/algorithmic.yaml
Normal file
41
eval/yaml/llama-3.3-70b-instruct/algorithmic.yaml
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
model: meta-llama/llama-3.3-70b-instruct
|
||||||
|
provider: Hyperbolic
|
||||||
|
category: algorithmic
|
||||||
|
datasets:
|
||||||
|
- ab
|
||||||
|
- base_conversion
|
||||||
|
- binary_alternation
|
||||||
|
- binary_matrix
|
||||||
|
- caesar_cipher
|
||||||
|
- count_primes
|
||||||
|
- cryptarithm
|
||||||
|
- game_of_life
|
||||||
|
- graph_color
|
||||||
|
- group_anagrams
|
||||||
|
- isomorphic_strings
|
||||||
|
- jugs
|
||||||
|
- letter_counting
|
||||||
|
- letter_jumble
|
||||||
|
- manipulate_matrix
|
||||||
|
- number_filtering
|
||||||
|
- number_sorting
|
||||||
|
- palindrome
|
||||||
|
- palindrome_partitioning
|
||||||
|
- pool_matrix
|
||||||
|
- ransom_note
|
||||||
|
- rotate_matrix
|
||||||
|
- rotten_oranges
|
||||||
|
- sentence_reordering
|
||||||
|
- spell_backward
|
||||||
|
- spiral_matrix
|
||||||
|
- string_insertion
|
||||||
|
- string_manipulation
|
||||||
|
- string_splitting
|
||||||
|
- string_synthesis
|
||||||
|
- word_ladder
|
||||||
|
- word_sequence_reversal
|
||||||
|
- word_sorting
|
||||||
|
eval_dir: results/llama-3.3-70b-instruct
|
||||||
|
dataset_size: 50
|
||||||
|
dataset_seed: 45
|
||||||
|
developer_role: system
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
model: deepseek/deepseek-r1
|
model: deepseek/deepseek-r1
|
||||||
|
provider: Nebius
|
||||||
category: algebra
|
category: algebra
|
||||||
datasets:
|
datasets:
|
||||||
- intermediate_integration
|
- intermediate_integration
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
model: deepseek/deepseek-r1
|
model: deepseek/deepseek-r1
|
||||||
|
provider: Nebius
|
||||||
category: algorithmic
|
category: algorithmic
|
||||||
datasets:
|
datasets:
|
||||||
- ab
|
- ab
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
model: deepseek/deepseek-r1
|
model: deepseek/deepseek-r1
|
||||||
|
provider: Nebius
|
||||||
category: cognition
|
category: cognition
|
||||||
datasets:
|
datasets:
|
||||||
- color_cube_rotation
|
- color_cube_rotation
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
model: deepseek/deepseek-r1
|
model: deepseek/deepseek-r1
|
||||||
|
provider: Nebius
|
||||||
category: logic
|
category: logic
|
||||||
datasets:
|
datasets:
|
||||||
- propositional_logic
|
- propositional_logic
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue