reasoning-gym/eval/yaml
2025-03-16 12:26:27 +01:00
..
claude-3.5-sonnet.yaml update eval yaml config files 2025-03-10 00:48:32 +01:00
claude-3.7-sonnet.yaml Add eval configs, small fixes to eval script & rush-hour score_answer 2025-03-16 09:18:05 +01:00
claude-3.7-sonnet_thinking.yaml Add eval configs, small fixes to eval script & rush-hour score_answer 2025-03-16 09:18:05 +01:00
deepseek-r1.yaml Eval N completions per prompt (#374) 2025-03-15 16:39:36 +01:00
google-gemma-3-27b-it.yaml add gemma-3-27b & qwq-32b configs 2025-03-15 20:47:51 +01:00
llama-3.1-8b-instruct.yaml feat: Add comprehensive visualization script for evaluation results (#376) 2025-03-16 12:26:27 +01:00
llama-3.3-70b-instruct.yaml Eval N completions per prompt (#374) 2025-03-15 16:39:36 +01:00
openai-o1.yaml Eval N completions per prompt (#374) 2025-03-15 16:39:36 +01:00
openai-o3-mini.yaml Eval N completions per prompt (#374) 2025-03-15 16:39:36 +01:00
qwen-qwq-32b.yaml add gemma-3-27b & qwq-32b configs 2025-03-15 20:47:51 +01:00