add use kl param

This commit is contained in:
Oliver 2025-04-24 20:42:57 +01:00
parent e39b6b5f27
commit 1ee3b0bbb8

View file

@ -155,6 +155,7 @@ algorithm:
gamma: 1.0
lam: 1.0
adv_estimator: grpo
use_kl_in_reward: False
kl_penalty: kl # how to estimate kl divergence
kl_ctrl:
type: fixed