diff --git a/reasoning_gym/algorithmic/count_primes.py b/reasoning_gym/algorithmic/count_primes.py index 0a553c7f..6317ded5 100644 --- a/reasoning_gym/algorithmic/count_primes.py +++ b/reasoning_gym/algorithmic/count_primes.py @@ -11,7 +11,11 @@ from typing import Optional from ..factory import ProceduralDataset, register_dataset -QUESTION_TEMPLATE = """Count how many prime numbers there are between {start} and {end} (inclusive) ?""" +QUESTION_TEMPLATE = """Please use python code to count how many prime numbers there are between {start} and {end} (inclusive) ? +Please follow the instruction below: +# 1. Create and run the python code to return the count of the prime numbers. +# 2. Make sure to only report the count of the prime numbers as answer. +""" @dataclass diff --git a/reasoning_gym/algorithmic/game_of_life.py b/reasoning_gym/algorithmic/game_of_life.py index c43f8345..7565206f 100644 --- a/reasoning_gym/algorithmic/game_of_life.py +++ b/reasoning_gym/algorithmic/game_of_life.py @@ -32,7 +32,7 @@ class GameOfLifeDataset(ProceduralDataset): def __init__(self, config: GameOfLifeConfig): self._prompt_templates = [ - "What will this Game of Life board look like after {simulation_steps} steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}." + "What will this Game of Life board look like after {simulation_steps} steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. Let your answer(array of array be on a single line). (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}." ] super().__init__(config=config, seed=config.seed, size=config.size) diff --git a/reasoning_gym/algorithmic/graph_color.py b/reasoning_gym/algorithmic/graph_color.py index 39244944..8f730b25 100644 --- a/reasoning_gym/algorithmic/graph_color.py +++ b/reasoning_gym/algorithmic/graph_color.py @@ -200,7 +200,7 @@ Vertices: {puzzle["vertices"]} Edges: {edges} Possible colors: {puzzle["color_options"]} -Return your solution as a JSON map of verteces to colors. (For example: {{0: 1, 1: 2, 2: 3}}) +Return your solution as a JSON map of vertices to colors. (For example: {{0: 1, 1: 2, 2: 3}}) """ return { diff --git a/reasoning_gym/algorithmic/word_ladder.py b/reasoning_gym/algorithmic/word_ladder.py index 3be99138..928a4c97 100644 --- a/reasoning_gym/algorithmic/word_ladder.py +++ b/reasoning_gym/algorithmic/word_ladder.py @@ -9,8 +9,8 @@ from ..data import get_data_file_path from ..factory import ProceduralDataset, register_dataset QUESTION_TEMPLATE = """Transform the word ladder '{start}' to '{end}' by changing one letter at a time. - Provide your answer as a comma-separated sequence of uppercase letters without spaces. - Each step must be a valid English word.""" +Provide your answer as a comma-separated sequence of uppercase letters without spaces. +Each step must be a valid English word.""" @dataclass diff --git a/reasoning_gym/arithmetic/basic_arithmetic.py b/reasoning_gym/arithmetic/basic_arithmetic.py index c72ff143..eed3bb98 100644 --- a/reasoning_gym/arithmetic/basic_arithmetic.py +++ b/reasoning_gym/arithmetic/basic_arithmetic.py @@ -64,6 +64,9 @@ class BasicArithmeticDataset(ProceduralDataset): def __init__(self, config: BasicArithmeticDatasetConfig): super().__init__(config=config, seed=config.seed, size=config.size) + self.added_instruction = ( + "Ensure to report the answer as an integer. Please do not add commas to the integer answers reported." + ) def __getitem__(self, idx: int) -> dict[str, Any]: """Generate a single arithmetic task @@ -88,7 +91,7 @@ class BasicArithmeticDataset(ProceduralDataset): else: expression, result = self._generate_simple_task(rng, num_terms, num_digits) - question = self._format_question(rng, expression) + question = self._format_question(rng, expression) + self.added_instruction return { "question": question, @@ -224,14 +227,14 @@ class BasicArithmeticDataset(ProceduralDataset): def _format_question(self, rng: Random, expression: str) -> str: """Format the expression with clear answer positioning""" - answer_instruction = "Put your final answer after '=' without additional text." + # answer_instruction = "Put your final answer after '=' without additional text." if self.config.format_style == "simple": - return f"{answer_instruction} Calculate {expression} =" + return f"Calculate {expression}. " else: - templates = ["What is {0} =", "Solve {0}=", "Compute {0} =", "Evaluate: {0} ="] + templates = ["What is {0}. ", "Solve {0}. ", "Compute {0}. ", "Evaluate: {0}. "] template = rng.choice(templates).format(expression) - return f"{answer_instruction} {template}" + return f"{template}" # Register the dataset