diff --git a/reasoning_gym/algorithmic/game_of_life.py b/reasoning_gym/algorithmic/game_of_life.py index c43f8345..7565206f 100644 --- a/reasoning_gym/algorithmic/game_of_life.py +++ b/reasoning_gym/algorithmic/game_of_life.py @@ -32,7 +32,7 @@ class GameOfLifeDataset(ProceduralDataset): def __init__(self, config: GameOfLifeConfig): self._prompt_templates = [ - "What will this Game of Life board look like after {simulation_steps} steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}." + "What will this Game of Life board look like after {simulation_steps} steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. Let your answer(array of array be on a single line). (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}." ] super().__init__(config=config, seed=config.seed, size=config.size) diff --git a/reasoning_gym/algorithmic/graph_color.py b/reasoning_gym/algorithmic/graph_color.py index 39244944..8f730b25 100644 --- a/reasoning_gym/algorithmic/graph_color.py +++ b/reasoning_gym/algorithmic/graph_color.py @@ -200,7 +200,7 @@ Vertices: {puzzle["vertices"]} Edges: {edges} Possible colors: {puzzle["color_options"]} -Return your solution as a JSON map of verteces to colors. (For example: {{0: 1, 1: 2, 2: 3}}) +Return your solution as a JSON map of vertices to colors. (For example: {{0: 1, 1: 2, 2: 3}}) """ return { diff --git a/reasoning_gym/algorithmic/word_ladder.py b/reasoning_gym/algorithmic/word_ladder.py index 3be99138..928a4c97 100644 --- a/reasoning_gym/algorithmic/word_ladder.py +++ b/reasoning_gym/algorithmic/word_ladder.py @@ -9,8 +9,8 @@ from ..data import get_data_file_path from ..factory import ProceduralDataset, register_dataset QUESTION_TEMPLATE = """Transform the word ladder '{start}' to '{end}' by changing one letter at a time. - Provide your answer as a comma-separated sequence of uppercase letters without spaces. - Each step must be a valid English word.""" +Provide your answer as a comma-separated sequence of uppercase letters without spaces. +Each step must be a valid English word.""" @dataclass diff --git a/reasoning_gym/arithmetic/basic_arithmetic.py b/reasoning_gym/arithmetic/basic_arithmetic.py index c72ff143..e8a1cb94 100644 --- a/reasoning_gym/arithmetic/basic_arithmetic.py +++ b/reasoning_gym/arithmetic/basic_arithmetic.py @@ -64,6 +64,9 @@ class BasicArithmeticDataset(ProceduralDataset): def __init__(self, config: BasicArithmeticDatasetConfig): super().__init__(config=config, seed=config.seed, size=config.size) + self.added_instruction = ( + " Ensure to report the answer as an integer. Do not add commas to the integer answers reported." + ) def __getitem__(self, idx: int) -> dict[str, Any]: """Generate a single arithmetic task @@ -88,7 +91,7 @@ class BasicArithmeticDataset(ProceduralDataset): else: expression, result = self._generate_simple_task(rng, num_terms, num_digits) - question = self._format_question(rng, expression) + question = self._format_question(rng, expression) + self.added_instruction return { "question": question, @@ -223,15 +226,14 @@ class BasicArithmeticDataset(ProceduralDataset): return expression, result def _format_question(self, rng: Random, expression: str) -> str: - """Format the expression with clear answer positioning""" - answer_instruction = "Put your final answer after '=' without additional text." + """Format the the question with the arithmetic expression""" if self.config.format_style == "simple": - return f"{answer_instruction} Calculate {expression} =" + return f"Calculate {expression}." else: - templates = ["What is {0} =", "Solve {0}=", "Compute {0} =", "Evaluate: {0} ="] - template = rng.choice(templates).format(expression) - return f"{answer_instruction} {template}" + templates = ["What is {0}?", "Solve {0}.", "Compute {0}.", "Evaluate: {0}."] + template = rng.choice(templates) + return template.format(expression) # Register the dataset diff --git a/tests/test_basic_arithmetic.py b/tests/test_basic_arithmetic.py index 406e4617..c1035af9 100644 --- a/tests/test_basic_arithmetic.py +++ b/tests/test_basic_arithmetic.py @@ -1,5 +1,3 @@ -from random import Random - import pytest from reasoning_gym.arithmetic.basic_arithmetic import ( @@ -64,11 +62,19 @@ def test_arithmetic_dataset_format_styles(): max_digits=2, ) dataset = BasicArithmeticDataset(config) - assert all(item["question"].endswith("=") for item in dataset) + assert all(item["question"].strip().endswith(".") for item in dataset) - config.format_style = "natural" + config = BasicArithmeticDatasetConfig( + size=10, + seed=42, + format_style="natural", + min_terms=2, + max_terms=3, # Keep expressions simple for testing + min_digits=1, + max_digits=2, + ) dataset = BasicArithmeticDataset(config) - assert all("=" in item["question"] for item in dataset) + assert all(item["question"].strip().endswith(".") for item in dataset) def test_arithmetic_dataset_iteration():