mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-28 17:29:39 +00:00
fix
This commit is contained in:
parent
a5aaa89de4
commit
39b6f8f7e2
1 changed files with 1 additions and 1 deletions
|
|
@ -330,7 +330,7 @@ class RayGRPOTrainer(RayPPOTrainer):
|
|||
batch.batch["token_level_scores"] = reward_tensor
|
||||
|
||||
# compute rewards. apply_kl_penalty if available
|
||||
if config.algorithm.use_kl_in_reward:
|
||||
if self.config.algorithm.use_kl_in_reward:
|
||||
batch, kl_metrics = apply_kl_penalty(
|
||||
batch, kl_ctrl=self.kl_ctrl_in_reward, kl_penalty=self.config.algorithm.kl_penalty
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue