mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-30 17:40:45 +00:00
added preappend token
This commit is contained in:
parent
4b9c155cef
commit
8d0e7db204
2 changed files with 6 additions and 0 deletions
|
|
@ -60,6 +60,10 @@ def cosine_scaled_reward(solution_str, scaling_factor, **kwargs):
|
|||
@reward_registry.register("format")
|
||||
def compute_format_reward(solution_str: str, scaling_factor: float = 0.2, **kwargs) -> float:
|
||||
"""Reward use of exactly one correctly structured <think> and <answer> block."""
|
||||
preappend_thinking_token = kwargs.get("preappend_thinking_token", False)
|
||||
if preappend_thinking_token:
|
||||
solution_str = "<think>" + solution_str
|
||||
|
||||
pattern = r"\s*<think>.*?</think>\s*<answer>.*?</answer>"
|
||||
if not re.match(pattern, solution_str, re.DOTALL):
|
||||
return 0.0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue