# Model configuration model_path: Qwen/Qwen2.5-3B-Instruct # Change to the smaller model max_tokens: 1024 # From max_response_length in training config temperature: 0.7 # Lower temperature for more focused responses top_p: 0.9 # From rollout top_p developer_prompt: DeepSeekZero developer_role: system # Standard role for system prompts # Output configuration output_dir: results save_metadata: true save_full_results: true eval_repeats: 3 # Categories and datasets to evaluate categories: - category: reasoning datasets: - dataset: decimal_chain_sum size: 100 seed: 42 params: min_terms: 2 max_terms: 4 min_digits: 1 max_digits: 3 min_decimal_places: 1 max_decimal_places: 4