Eval script consolidation (#238)

The script now supports: - YAML and JSON configurations - Dataset-specific parameters - Overriding configuration via command line - Detailed logging and error handling
2026-04-19 12:58:07 +00:00 · 2025-02-27 17:39:14 +01:00 · 2025-02-27 17:39:14 +01:00 · 850c1cf6f4
commit 850c1cf6f4
parent 8a66d2a216
40 changed files with 1111 additions and 670 deletions
--- a/eval/example_config.yaml
+++ b/eval/example_config.yaml
@ -0,0 +1,33 @@
+# Example configuration for the evaluation script
+model: "meta-llama/llama-3.3-70b-instruct"
+provider: "Hyperbolic"
+output_dir: "results"
+max_concurrent: 10
+default_size: 20  # Default size for all datasets
+default_seed: 42  # Default seed for all datasets
+
+categories:
+  - category: "algebra"
+    datasets:
+      - dataset: "complex_arithmetic"
+        params:
+          min_real: -10
+          max_real: 10
+          min_imag: -10
+          max_imag: 10
+
+  - category: "arithmetic"
+    datasets:
+      - dataset: "products"
+        size: 10
+        seed: 43
+        params:
+          min_digits: 2
+          allow_negation: true
+
+      - dataset: "chain_sum"
+        size: 12
+        seed: 43
+        params:
+          min_digits: 2
+          allow_negation: true