This commit is contained in:
Oliver 2025-04-22 20:34:37 +01:00
parent 1ccd62bc1a
commit 1343bcf63e

View file

@ -90,7 +90,7 @@ actor_rollout_ref:
grad_clip: 1.0
clip_ratio: 0.2
entropy_coeff: 0.001
use_kl_loss: True # True for GRPO
use_kl_loss: False # True for GRPO
kl_loss_coef: 0.001 # for grpo
kl_loss_type: low_var_kl # for grpo
ppo_epochs: 1