# Config used for evaluating curriculum generalisation experiment models on Knights and Knaves # Models evaluated on this config: # Qwen/Qwen2.5-3B-Instruct (original model) # qwen3b_knights-knaves_noncurriculum (original + 300 GRPO steps on non-curriculum Knights and Knaves data) # qwen3b_knights-knaves_curriculum (original + 300 GRPO steps on curriculum Knights and Knaves data) model_path: Qwen/Qwen2.5-3B-Instruct # Default model path # model_path: /workspace/reasoning-gym/training/qwen3b_knights-knaves_noncurriculum # model_path: /workspace/reasoning-gym/training/qwen3b_knights-knaves_curriculum max_tokens: 2048 # From max_response_length in training config top_p: 1.0 temperature: 1.0 # Lower temperature for more focused responses dtype: bfloat16 developer_prompt: DeepSeekZero developer_role: system output_dir: results save_metadata: true save_full_results: true eval_repeats: 3 categories: - category: logic datasets: - dataset: knights_knaves size: 100 seed: 42 params: n_people: 4 depth_constraint: 3 width_constraint: 3