diff --git a/reasoning_gym/games/countdown.py b/reasoning_gym/games/countdown.py index 169fc5ee..4bb6215e 100644 --- a/reasoning_gym/games/countdown.py +++ b/reasoning_gym/games/countdown.py @@ -8,6 +8,16 @@ from sympy.parsing.sympy_parser import parse_expr from ..factory import ProceduralDataset, register_dataset +QUESTION_FORMAT_TEMPLATE = """ +{question} +Final answer format instructions: +1. Provide your solution as a arithmetic expression (no '=' sign). +2. Do not include the target number in the expression. +3. Use '*' for multiplication. +4. Use '/' for division. +5. Do not include any other text or formatting. +""" + @dataclass class CountdownConfig: @@ -67,8 +77,11 @@ class CountdownDataset(ProceduralDataset): numbers_str = ", ".join(map(str, numbers)) + question = rng.choice(self._prompt_templates) + question = question.format(numbers=numbers_str, target=target) + return { - "question": rng.choice(self._prompt_templates).format(numbers=numbers_str, target=target), + "question": QUESTION_FORMAT_TEMPLATE.format(question=question), "answer": expression, "metadata": { "numbers": numbers, diff --git a/reasoning_gym/games/maze.py b/reasoning_gym/games/maze.py index cc110f42..c656755e 100644 --- a/reasoning_gym/games/maze.py +++ b/reasoning_gym/games/maze.py @@ -95,7 +95,8 @@ class MazeDataset(ProceduralDataset): + "\n```" + "\nLegend: " + f"'{self.wall_char}' = Wall, '{self.path_char}' = Passage\n\n" - + "What is the minimum number of steps to reach the goal?" + + "What is the minimum number of steps to reach the goal?\n" + + "Give only the number of steps as your final answer, no other text or formatting." ) return { diff --git a/reasoning_gym/games/tower_of_hanoi.py b/reasoning_gym/games/tower_of_hanoi.py index e3adab5b..00e24f9d 100644 --- a/reasoning_gym/games/tower_of_hanoi.py +++ b/reasoning_gym/games/tower_of_hanoi.py @@ -8,6 +8,22 @@ from typing import Any, Dict, List, Optional, Tuple from ..factory import ProceduralDataset, register_dataset +QUESTION_TEMPLATE = """Solve the Tower of Hanoi problem with {num_disks} disks and {num_pegs} pegs.\n + Move all disks from {start_peg} to {target_peg} following the rules:\n + - Only one disk can be moved at a time.\n + - A larger disk cannot be placed on top of a smaller disk.\n + - All disks must be on a peg at all times.\n + Example:\n + Move disk 1 from Peg 1 to Peg 3\n + Move disk 2 from Peg 1 to Peg 2\n + Move disk 1 from Peg 3 to Peg 2\n + \n + Provide the sequence of moves.\n + Formatting guidelines: \n + Each instruction should be placed on a single line. \n + Each line should be formatted as 'Move disk X from Peg Y to Peg Z' \n + Do not include any other text or formatting. \n""" + @dataclass class HanoiConfig: @@ -245,22 +261,13 @@ class HanoiDataset(ProceduralDataset): # Peg labels peg_labels = {peg: f"Peg {peg}" for peg in pegs} - question_str = ( - f"Solve the Tower of Hanoi problem with {num_disks} disks and {num_pegs} pegs.\n" - f"Move all disks from {peg_labels[start_peg]} to {peg_labels[target_peg]} following the rules:\n" - "- Only one disk can be moved at a time.\n" - "- A larger disk cannot be placed on top of a smaller disk.\n" - "- All disks must be on a peg at all times.\n" - "Example:\n" - "Move disk 1 from Peg 1 to Peg 3\n" - "Move disk 2 from Peg 1 to Peg 2\n" - "Move disk 1 from Peg 3 to Peg 2\n" - "\n" - "Provide the sequence of moves." - ) - result = { - "question": question_str, + "question": QUESTION_TEMPLATE.format( + num_disks=num_disks, + num_pegs=num_pegs, + start_peg=peg_labels[start_peg], + target_peg=peg_labels[target_peg], + ), "answer": solution, "metadata": { "num_disks": num_disks, @@ -359,7 +366,7 @@ class HanoiDataset(ProceduralDataset): tuple: (disk, from_peg, to_peg) """ pattern = r"Move disk (\d+) from Peg (\d+) to Peg (\d+)" - match = re.match(pattern, move) + match = re.search(pattern, move) if not match: raise ValueError(f"Unexpected move format: '{move}'")