model_path: ../utils/games max_tokens: 2048 temperature: 0.6 # Lower temperature for more focused responses top_p: 0.9 # From rollout top_p developer_prompt: DeepSeekZero developer_role: system # Standard role for system prompts output_dir: results save_metadata: true save_full_results: true eval_repeats: 3 # Categories and datasets to evaluate categories: - category: reasoning datasets: - dataset: tower_of_hanoi size: 100 seed: 42 params: min_disks: 3 max_disks: 4 min_pegs: 3 max_pegs: 4