From 8d0e7db20416a4fa97aa6a954ed51b3a6f2da541 Mon Sep 17 00:00:00 2001 From: joesharratt1229 Date: Tue, 1 Apr 2025 16:28:04 +0000 Subject: [PATCH] added preappend token --- training/configs/algorithmic_qwen_3b.yaml | 2 ++ training/rewards/reward.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/training/configs/algorithmic_qwen_3b.yaml b/training/configs/algorithmic_qwen_3b.yaml index 49bbdcaa..99946ff7 100644 --- a/training/configs/algorithmic_qwen_3b.yaml +++ b/training/configs/algorithmic_qwen_3b.yaml @@ -41,6 +41,8 @@ reward: scaling_factor: 0.3 - name: format scaling_factor: 0.2 + kwargs: + preappend_thinking_token: False data: tokenizer: null diff --git a/training/rewards/reward.py b/training/rewards/reward.py index 4501b720..4e741483 100644 --- a/training/rewards/reward.py +++ b/training/rewards/reward.py @@ -60,6 +60,10 @@ def cosine_scaled_reward(solution_str, scaling_factor, **kwargs): @reward_registry.register("format") def compute_format_reward(solution_str: str, scaling_factor: float = 0.2, **kwargs) -> float: """Reward use of exactly one correctly structured and block.""" + preappend_thinking_token = kwargs.get("preappend_thinking_token", False) + if preappend_thinking_token: + solution_str = "" + solution_str + pattern = r"\s*.*?\s*.*?" if not re.match(pattern, solution_str, re.DOTALL): return 0.0