Add eval configs, small fixes to eval script & rush-hour score_answer

This commit is contained in:
Andreas Köpf 2025-03-16 09:18:05 +01:00 committed by GitHub
parent 27cdd25548
commit d6f399b8e4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 283 additions and 22 deletions

View file

@ -2,7 +2,7 @@ model: anthropic/claude-3.7-sonnet:thinking
provider: Anthropic
output_dir: results
max_concurrent: 10
default_size: 50
default_size: 5
default_seed: 45
categories:
- category: algebra