diff --git a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml index 277fff61..546a8d7b 100644 --- a/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml +++ b/training/configs/external_generalisation/math_curriculum_qwen_7b.yaml @@ -155,6 +155,7 @@ algorithm: gamma: 1.0 lam: 1.0 adv_estimator: grpo + use_kl_in_reward: False kl_penalty: kl # how to estimate kl divergence kl_ctrl: type: fixed