diff --git a/reasoning_gym/algorithmic/__init__.py b/reasoning_gym/algorithmic/__init__.py index 2581789f..fb6eabd2 100644 --- a/reasoning_gym/algorithmic/__init__.py +++ b/reasoning_gym/algorithmic/__init__.py @@ -16,6 +16,7 @@ from .game_of_life import GameOfLifeConfig, GameOfLifeDataset from .graph_color import GraphColorConfig, GraphColorDataset from .group_anagrams import GroupAnagramsConfig, GroupAnagramsDataset from .isomorphic_strings import IsomorphicStringsConfig, IsomorphicStringsDataset +from .jugs import JugsConfig, JugsDataset from .letter_counting import LetterCountingConfig, LetterCountingDataset from .letter_jumble import LetterJumbleConfig, LetterJumbleDataset from .manipulate_matrix import ManipulateMatrixConfig, ManipulateMatrixDataset @@ -99,4 +100,6 @@ __all__ = [ "StringSplittingDataset", "StringSynthesisConfig", "StringSynthesisDataset", + "JugsConfig", + "JugsDataset", ] diff --git a/reasoning_gym/algorithmic/jugs.py b/reasoning_gym/algorithmic/jugs.py new file mode 100644 index 00000000..43e97620 --- /dev/null +++ b/reasoning_gym/algorithmic/jugs.py @@ -0,0 +1,314 @@ +import json +import math +from collections import deque +from dataclasses import dataclass +from functools import reduce +from random import Random +from typing import Dict, List, Optional, Tuple + +from ..factory import ProceduralDataset, register_dataset + + +def min_moves_n(jug_capacities, target): + """ + Compute the minimum number of moves required to have exactly `target` gallons + in any one jug for a puzzle with multiple jugs. + The state is represented as a tuple (w1, w2, ..., wn), where each wi is the current + amount in jug i. + + Allowed moves: + - Fill jug i to its capacity. + - Empty jug i. + - Pour from jug i to jug j until jug i is empty or jug j is full. + + Returns the minimal move count if a solution exists, otherwise None. + """ + n = len(jug_capacities) + start = tuple([0] * n) + queue = deque([(start, 0)]) + visited = set([start]) + + while queue: + state, moves = queue.popleft() + + # Check if any jug has the target amount. + if any(w == target for w in state): + return moves + + # Generate next states. + next_states = [] + + # 1. Fill any jug. + for i in range(n): + new_state = list(state) + new_state[i] = jug_capacities[i] + next_states.append(tuple(new_state)) + + # 2. Empty any jug. + for i in range(n): + new_state = list(state) + new_state[i] = 0 + next_states.append(tuple(new_state)) + + # 3. Pour from one jug to another. + for i in range(n): + for j in range(n): + if i == j: + continue + if state[i] == 0 or state[j] == jug_capacities[j]: + continue + new_state = list(state) + # Maximum water that can be poured from i to j. + amount = min(state[i], jug_capacities[j] - state[j]) + new_state[i] -= amount + new_state[j] += amount + next_states.append(tuple(new_state)) + + # Add valid next states to the queue. + for ns in next_states: + if ns not in visited: + visited.add(ns) + queue.append((ns, moves + 1)) + + return None + + +def generate_puzzle(rng, num_jugs=3, difficulty=6, max_attempts=10000): + """ + Generate a multi-jug water puzzle. + + Parameters: + - num_jugs: number of jugs to use (>=2; default 3). + - difficulty: minimal required moves for a solution. + - max_attempts: maximum attempts to generate a puzzle meeting the difficulty. + + For a valid puzzle: + - Each jug gets a random capacity (between 3 and 3+difficulty). + - The target is chosen as one of the numbers 1 .. (max_capacity) that is a multiple + of the gcd of all jug capacities. + + Returns a dictionary with: + { "jug_capacities": [c1, c2, ...], + "target": target, + "min_moves": minimum moves required }. + + Raises a ValueError if no puzzle is generated after max_attempts. + """ + for _ in range(max_attempts): + # Generate capacities for each jug. + jug_capacities = [rng.randint(3, 3 + difficulty) for _ in range(num_jugs)] + max_cap = max(jug_capacities) + # Compute gcd of all jug capacities. + gcd_all = reduce(math.gcd, jug_capacities) + # Possible targets are between 1 and max_cap that are multiples of gcd_all. + possible_targets = [t for t in range(1, max_cap + 1) if t % gcd_all == 0] + if not possible_targets: + continue + target = rng.choice(possible_targets) + + moves = min_moves_n(jug_capacities, target) + if moves is not None and moves >= difficulty: + return {"jug_capacities": jug_capacities, "target": target, "min_moves": moves} + raise ValueError(f"Could not generate a puzzle with difficulty at least {difficulty} using {num_jugs} jugs.") + + +def verify_solution(puzzle, moves): + """ + Verify a given solution for a multi-jug puzzle. + + The puzzle is a dictionary with keys: + - "jug_capacities": list of capacities for each jug. + - "target": the target amount that must be in any one jug. + + Moves should be a list of strings in the following formats: + - "fill X": Fill jug X to its capacity. + - "empty X": Empty jug X. + - "pour X->Y": Pour water from jug X to jug Y. + + Jug labels are letters: jug 0 is "A", jug 1 is "B", etc. + + The function simulates the moves starting from all jugs empty. + + Returns a tuple (result, states) where: + - result is True if, after executing all moves, at least one jug has exactly + the target amount; otherwise False. + - states is a list of state tuples after each move. + """ + jug_capacities = puzzle["jug_capacities"] + target = puzzle["target"] + n = len(jug_capacities) + + # Map jug letters to indices (A->0, B->1, C->2, etc.) + jug_map = {chr(ord("A") + i): i for i in range(n)} + + state = tuple([0] * n) + states = [state] + + for move in moves: + tokens = move.split() + if tokens[0] == "fill": + # Move format: "fill X" + jug = tokens[1] + idx = jug_map[jug] + state = list(state) + state[idx] = jug_capacities[idx] + state = tuple(state) + elif tokens[0] == "empty": + # Move format: "empty X" + jug = tokens[1] + idx = jug_map[jug] + state = list(state) + state[idx] = 0 + state = tuple(state) + elif tokens[0] == "pour": + # Move format: "pour X->Y" + # Expect tokens[1] to be in the form "X->Y" + parts = tokens[1].split("->") + if len(parts) != 2: + raise ValueError(f"Invalid pour move format: {move}") + source, dest = parts + i = jug_map[source] + j = jug_map[dest] + state = list(state) + amount = min(state[i], jug_capacities[j] - state[j]) + state[i] -= amount + state[j] += amount + state = tuple(state) + else: + raise ValueError(f"Unknown move: {move}") + states.append(state) + + return (any(w == target for w in state), states) + + +def generate_jug_solution(jug_capacities: Tuple[int, int, int], target: int) -> List[str]: + """Solves the jug puzzle and returns a sequence of formatted steps.""" + capacities = list(jug_capacities) + initial_state = (0, 0, 0) + queue = deque([(initial_state, [])]) + visited = set() + + while queue: + (state, path) = queue.popleft() + + if target in state: + return path # Solution found + + if state in visited: + continue + visited.add(state) + + for i in range(3): # Iterate over each jug + # Fill jug i + new_state = list(state) + new_state[i] = capacities[i] + queue.append((tuple(new_state), path + [f"fill {chr(65 + i)}"])) + + # Empty jug i + new_state = list(state) + new_state[i] = 0 + queue.append((tuple(new_state), path + [f"empty {chr(65 + i)}"])) + + # Pour from jug i to jug j + for j in range(3): + if i != j: + new_state = list(state) + pour_amount = min(state[i], capacities[j] - state[j]) + new_state[i] -= pour_amount + new_state[j] += pour_amount + queue.append((tuple(new_state), path + [f"pour {chr(65 + i)}->{chr(65 + j)}"])) + + return ["No solution"] # No valid solution found + + +@dataclass +class JugsConfig: + """Configuration for Jugs puzzle generation""" + + num_jugs: int = 3 + difficulty: int = 10 + seed: Optional[int] = None + size: int = 500 + + def validate(self): + """Validate configuration parameters""" + assert self.num_jugs > 2, "edge_probability must be gt 2" + assert self.difficulty > 0, "edge_probability must be gt 0" + assert self.difficulty < 200, "edge_probability must be gt 200" + + +class JugsDataset(ProceduralDataset): + """Generates water jug puzzles inspired by [this scene from _Die Hard 3_](https://www.youtube.com/watch?v=6cAbgAaEOVE), with configurable parameters""" + + def __init__(self, config: JugsConfig): + super().__init__(config=config, seed=config.seed, size=config.size) + + def __getitem__(self, idx: int) -> dict: + """Generate a single Jugs task + + Returns: + dict with keys: + - question: str, the task description + - answer: str, a solution string + - metadata: dict with generation parameters + """ + rng = Random(self.seed + idx) + + puzzle = generate_puzzle(rng, num_jugs=self.config.num_jugs, difficulty=self.config.difficulty) + solution = generate_jug_solution(puzzle["jug_capacities"], puzzle["target"]) + + cap_str = ", ".join(f"{chr(ord('A')+i)}:{cap}" for i, cap in enumerate(puzzle["jug_capacities"])) + question = f""" +You are a police officer. A maniac has planted a bomb next to a public fountain. + +To defuse the bomb, you must solve a puzzle. The puzzle is solved when you fill any of the available jugs with the target amount of water. + +You have three move types: 'fill', 'empty' and 'pour'. + +To fill Jug A, you 'fill A'. +To empty Jug B, you 'empty B'. +To pour the contents of Jug A into Jug B, you 'pour A->B'. +All jugs are empty to begin with. + +The empty jugs hold this many litres of water: {cap_str} +And your target is: {puzzle['target']} litres. + +How do you defuse the bomb? + +Reply as a JSON-parsable list of moves which result in any of the jugs being filled with the target amount. + """ + + return { + "question": question, + "answer": None, + "metadata": {"possible_answer": solution, "puzzle": puzzle}, + } + + def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float: + """Determine if the solution provided solves the Jugs task. + + The function awards 1.0 for a correct answer. + + Args: + answer (Optional[str]): The user's answer. + entry (Dict[str, any]): The original dataset entry containing the correct answer. + + Returns: + float: The computed score between 0.0 and 1.0. + """ + + if answer == None: + return 0.0 + + danswer = json.loads(answer) + try: + valid, states = verify_solution(entry["metadata"]["puzzle"], danswer) + if not valid: + return 0.01 + else: + return 1.0 # Yay + except Exception as e: + return 0.01 + + +register_dataset("jugs", JugsDataset, JugsConfig) diff --git a/tests/test_jugs.py b/tests/test_jugs.py new file mode 100644 index 00000000..dc5f0476 --- /dev/null +++ b/tests/test_jugs.py @@ -0,0 +1,22 @@ +import json + +import pytest + +from reasoning_gym.algorithmic.jugs import JugsConfig, JugsDataset + + +def test_jugs(): + """Test basic properties and solution of generated items""" + config = JugsConfig(seed=42, size=10, num_jugs=3, difficulty=20) + dataset = JugsDataset(config) + + # easy + for item in dataset: + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + + # Test the scoring + assert dataset.score_answer(answer=json.dumps(item["metadata"]["possible_answer"]), entry=item) == 1.0 + assert dataset.score_answer(answer=None, entry=item) == 0.0