Eval script consolidation (#238)

The script now supports:
   - YAML and JSON configurations
   - Dataset-specific parameters
   - Overriding configuration via command line
   - Detailed logging and error handling
This commit is contained in:
Andreas Köpf 2025-02-27 17:39:14 +01:00 committed by GitHub
parent 8a66d2a216
commit 850c1cf6f4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
40 changed files with 1111 additions and 670 deletions

47
eval/example_config.json Normal file
View file

@ -0,0 +1,47 @@
{
"model": "meta-llama/llama-3.3-70b-instruct",
"provider": "Hyperbolic",
"output_dir": "results",
"max_concurrent": 10,
"default_size": 20,
"default_seed": 42,
"categories": [
{
"category": "algebra",
"datasets": [
{
"dataset": "complex_arithmetic",
"params": {
"min_real": -10,
"max_real": 10,
"min_imag": -10,
"max_imag": 10
}
}
]
},
{
"category": "arithmetic",
"datasets": [
{
"dataset": "products",
"size": 10,
"seed": 43,
"params": {
"min_digits": 2,
"allow_negation": true
}
},
{
"dataset": "chain_sum",
"size": 12,
"seed": 43,
"params": {
"min_digits": 2,
"allow_negation": true
}
}
]
}
]
}