Eval sampling settings for generation (temperature, top-p, max_tokens) (#242)

* feat: Add sampling parameters to eval configuration and API call * feat: Add support for system_prompt_id and optional system_prompt configuration
2026-04-19 12:58:07 +00:00 · 2025-02-28 11:48:37 +01:00 · 2025-02-28 11:48:37 +01:00 · b4207162ff
commit b4207162ff
parent b1c8840129
3 changed files with 83 additions and 22 deletions
--- a/eval/README.md
+++ b/eval/README.md
@ -44,6 +44,11 @@ output_dir: "results"
 max_concurrent: 10
 default_size: 20  # Default size for all datasets
 default_seed: 42  # Default seed for all datasets
+max_tokens: 32768  # Maximum generation length (optional)
+temperature: 0.6   # Generation temperature (optional)
+top_p: 0.95        # Top-p sampling parameter (optional)
+system_prompt_id: "default"  # Use a predefined system prompt by ID (optional)
+# system_prompt: "Your custom system prompt here"  # Or specify a custom system prompt directly

 categories:
  - category: "algebra"