diff --git a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml index 842f5ac7..037df31c 100644 --- a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml +++ b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml @@ -90,7 +90,7 @@ actor_rollout_ref: grad_clip: 1.0 clip_ratio: 0.2 entropy_coeff: 0.001 - use_kl_loss: True # True for GRPO + use_kl_loss: False # True for GRPO kl_loss_coef: 0.001 # for grpo kl_loss_type: low_var_kl # for grpo ppo_epochs: 1