add loss_agg_mode

2026-04-25 17:10:51 +00:00 · 2025-04-24 20:46:37 +01:00 · 2025-04-24 20:46:37 +01:00 · 37b88d194b
commit 37b88d194b
parent 1ee3b0bbb8
1 changed files with 1 additions and 0 deletions
--- a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml
+++ b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml
@ -86,6 +86,7 @@ actor_rollout_ref:
    ppo_max_token_len_per_gpu: 49152 # n * ${data.max_prompt_length} + ${data.max_response_length}
    grad_clip: 1.0
    clip_ratio: 0.2
+    loss_agg_mode: "token-mean" # / "seq-mean-token-sum" / "seq-mean-token-mean"
    entropy_coeff: 0.001
    use_kl_loss: False # True for GRPO
    kl_loss_coef: 0.001 # for grpo