# Model configuration model_path: ../utils/qwen_3b_arithmetic_100 max_tokens: 1024 temperature: 0.6 top_p: 0.9 developer_prompt: DeepSeekZero developer_role: system # Standard role for system prompts # Output configuration output_dir: results save_metadata: true save_full_results: true eval_repeats: 3 # Categories and datasets to evaluate categories: - category: reasoning datasets: - dataset: prime_factorization size: 100 seed: 42 params: min_value: 2 max_value: 1000