This commit is contained in:
Oliver 2025-04-28 23:08:11 +01:00
parent a5aaa89de4
commit 39b6f8f7e2

View file

@ -330,7 +330,7 @@ class RayGRPOTrainer(RayPPOTrainer):
batch.batch["token_level_scores"] = reward_tensor
# compute rewards. apply_kl_penalty if available
if config.algorithm.use_kl_in_reward:
if self.config.algorithm.use_kl_in_reward:
batch, kl_metrics = apply_kl_penalty(
batch, kl_ctrl=self.kl_ctrl_in_reward, kl_penalty=self.config.algorithm.kl_penalty
)