add loss_agg_mode

This commit is contained in:
Oliver 2025-04-24 20:46:37 +01:00
parent 1ee3b0bbb8
commit 37b88d194b

View file

@ -86,6 +86,7 @@ actor_rollout_ref:
ppo_max_token_len_per_gpu: 49152 # n * ${data.max_prompt_length} + ${data.max_response_length}
grad_clip: 1.0
clip_ratio: 0.2
loss_agg_mode: "token-mean" # / "seq-mean-token-sum" / "seq-mean-token-mean"
entropy_coeff: 0.001
use_kl_loss: False # True for GRPO
kl_loss_coef: 0.001 # for grpo