From 39b6f8f7e2cb4367708faf7b1da414d6d4b3c68f Mon Sep 17 00:00:00 2001 From: Oliver Date: Mon, 28 Apr 2025 23:08:11 +0100 Subject: [PATCH] fix --- training/trainers/ray_grpo_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/trainers/ray_grpo_trainer.py b/training/trainers/ray_grpo_trainer.py index fecca6c1..6e793ccc 100644 --- a/training/trainers/ray_grpo_trainer.py +++ b/training/trainers/ray_grpo_trainer.py @@ -330,7 +330,7 @@ class RayGRPOTrainer(RayPPOTrainer): batch.batch["token_level_scores"] = reward_tensor # compute rewards. apply_kl_penalty if available - if config.algorithm.use_kl_in_reward: + if self.config.algorithm.use_kl_in_reward: batch, kl_metrics = apply_kl_penalty( batch, kl_ctrl=self.kl_ctrl_in_reward, kl_penalty=self.config.algorithm.kl_penalty )