diff --git a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml index 546a8d7b..b8291b3f 100644 --- a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml +++ b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml @@ -86,6 +86,7 @@ actor_rollout_ref: ppo_max_token_len_per_gpu: 49152 # n * ${data.max_prompt_length} + ${data.max_response_length} grad_clip: 1.0 clip_ratio: 0.2 + loss_agg_mode: "token-mean" # / "seq-mean-token-sum" / "seq-mean-token-mean" entropy_coeff: 0.001 use_kl_loss: False # True for GRPO kl_loss_coef: 0.001 # for grpo