This commit is contained in:
Oliver 2025-04-29 19:03:28 +01:00
parent 85f0675cca
commit 374760577e

View file

@ -71,7 +71,7 @@ actor_rollout_ref:
clip_ratio_low: 0.2
clip_ratio_high: 0.2
loss_agg_mode: "token-mean" # / "seq-mean-token-sum" / "seq-mean-token-mean"
entropy_coeff: 0.001
entropy_coeff: 0.000
use_kl_loss: False # True for GRPO
kl_loss_coef: 0.001 # for grpo
kl_loss_type: low_var_kl # for grpo