mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-25 17:10:51 +00:00
add use kl param
This commit is contained in:
parent
e39b6b5f27
commit
1ee3b0bbb8
1 changed files with 1 additions and 0 deletions
|
|
@ -155,6 +155,7 @@ algorithm:
|
|||
gamma: 1.0
|
||||
lam: 1.0
|
||||
adv_estimator: grpo
|
||||
use_kl_in_reward: False
|
||||
kl_penalty: kl # how to estimate kl divergence
|
||||
kl_ctrl:
|
||||
type: fixed
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue