mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
Eval script consolidation (#238)
The script now supports: - YAML and JSON configurations - Dataset-specific parameters - Overriding configuration via command line - Detailed logging and error handling
This commit is contained in:
parent
8a66d2a216
commit
850c1cf6f4
40 changed files with 1111 additions and 670 deletions
47
eval/example_config.json
Normal file
47
eval/example_config.json
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
{
|
||||
"model": "meta-llama/llama-3.3-70b-instruct",
|
||||
"provider": "Hyperbolic",
|
||||
"output_dir": "results",
|
||||
"max_concurrent": 10,
|
||||
"default_size": 20,
|
||||
"default_seed": 42,
|
||||
"categories": [
|
||||
{
|
||||
"category": "algebra",
|
||||
"datasets": [
|
||||
{
|
||||
"dataset": "complex_arithmetic",
|
||||
"params": {
|
||||
"min_real": -10,
|
||||
"max_real": 10,
|
||||
"min_imag": -10,
|
||||
"max_imag": 10
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"category": "arithmetic",
|
||||
"datasets": [
|
||||
{
|
||||
"dataset": "products",
|
||||
"size": 10,
|
||||
"seed": 43,
|
||||
"params": {
|
||||
"min_digits": 2,
|
||||
"allow_negation": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"dataset": "chain_sum",
|
||||
"size": 12,
|
||||
"seed": 43,
|
||||
"params": {
|
||||
"min_digits": 2,
|
||||
"allow_negation": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue