mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
Eval script consolidation (#238)
The script now supports: - YAML and JSON configurations - Dataset-specific parameters - Overriding configuration via command line - Detailed logging and error handling
This commit is contained in:
parent
8a66d2a216
commit
850c1cf6f4
40 changed files with 1111 additions and 670 deletions
33
eval/example_config.yaml
Normal file
33
eval/example_config.yaml
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
# Example configuration for the evaluation script
|
||||
model: "meta-llama/llama-3.3-70b-instruct"
|
||||
provider: "Hyperbolic"
|
||||
output_dir: "results"
|
||||
max_concurrent: 10
|
||||
default_size: 20 # Default size for all datasets
|
||||
default_seed: 42 # Default seed for all datasets
|
||||
|
||||
categories:
|
||||
- category: "algebra"
|
||||
datasets:
|
||||
- dataset: "complex_arithmetic"
|
||||
params:
|
||||
min_real: -10
|
||||
max_real: 10
|
||||
min_imag: -10
|
||||
max_imag: 10
|
||||
|
||||
- category: "arithmetic"
|
||||
datasets:
|
||||
- dataset: "products"
|
||||
size: 10
|
||||
seed: 43
|
||||
params:
|
||||
min_digits: 2
|
||||
allow_negation: true
|
||||
|
||||
- dataset: "chain_sum"
|
||||
size: 12
|
||||
seed: 43
|
||||
params:
|
||||
min_digits: 2
|
||||
allow_negation: true
|
||||
Loading…
Add table
Add a link
Reference in a new issue