basic jugs

2026-04-23 16:55:05 +00:00 · 2025-02-20 15:24:46 +01:00 · 2025-02-20 15:24:46 +01:00 · 6f00690ae1
commit 6f00690ae1
parent e25973b118
3 changed files with 339 additions and 0 deletions
--- a/reasoning_gym/algorithmic/jugs.py
+++ b/reasoning_gym/algorithmic/jugs.py
@ -0,0 +1,314 @@
+import json
+import math
+from collections import deque
+from dataclasses import dataclass
+from functools import reduce
+from random import Random
+from typing import Dict, List, Optional, Tuple
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+def min_moves_n(jug_capacities, target):
+    """
+    Compute the minimum number of moves required to have exactly `target` gallons
+    in any one jug for a puzzle with multiple jugs.
+    The state is represented as a tuple (w1, w2, ..., wn), where each wi is the current
+    amount in jug i.
+
+    Allowed moves:
+      - Fill jug i to its capacity.
+      - Empty jug i.
+      - Pour from jug i to jug j until jug i is empty or jug j is full.
+
+    Returns the minimal move count if a solution exists, otherwise None.
+    """
+    n = len(jug_capacities)
+    start = tuple([0] * n)
+    queue = deque([(start, 0)])
+    visited = set([start])
+
+    while queue:
+        state, moves = queue.popleft()
+
+        # Check if any jug has the target amount.
+        if any(w == target for w in state):
+            return moves
+
+        # Generate next states.
+        next_states = []
+
+        # 1. Fill any jug.
+        for i in range(n):
+            new_state = list(state)
+            new_state[i] = jug_capacities[i]
+            next_states.append(tuple(new_state))
+
+        # 2. Empty any jug.
+        for i in range(n):
+            new_state = list(state)
+            new_state[i] = 0
+            next_states.append(tuple(new_state))
+
+        # 3. Pour from one jug to another.
+        for i in range(n):
+            for j in range(n):
+                if i == j:
+                    continue
+                if state[i] == 0 or state[j] == jug_capacities[j]:
+                    continue
+                new_state = list(state)
+                # Maximum water that can be poured from i to j.
+                amount = min(state[i], jug_capacities[j] - state[j])
+                new_state[i] -= amount
+                new_state[j] += amount
+                next_states.append(tuple(new_state))
+
+        # Add valid next states to the queue.
+        for ns in next_states:
+            if ns not in visited:
+                visited.add(ns)
+                queue.append((ns, moves + 1))
+
+    return None
+
+
+def generate_puzzle(rng, num_jugs=3, difficulty=6, max_attempts=10000):
+    """
+    Generate a multi-jug water puzzle.
+
+    Parameters:
+      - num_jugs: number of jugs to use (>=2; default 3).
+      - difficulty: minimal required moves for a solution.
+      - max_attempts: maximum attempts to generate a puzzle meeting the difficulty.
+
+    For a valid puzzle:
+      - Each jug gets a random capacity (between 3 and 3+difficulty).
+      - The target is chosen as one of the numbers 1 .. (max_capacity) that is a multiple
+        of the gcd of all jug capacities.
+
+    Returns a dictionary with:
+       { "jug_capacities": [c1, c2, ...],
+         "target": target,
+         "min_moves": minimum moves required }.
+
+    Raises a ValueError if no puzzle is generated after max_attempts.
+    """
+    for _ in range(max_attempts):
+        # Generate capacities for each jug.
+        jug_capacities = [rng.randint(3, 3 + difficulty) for _ in range(num_jugs)]
+        max_cap = max(jug_capacities)
+        # Compute gcd of all jug capacities.
+        gcd_all = reduce(math.gcd, jug_capacities)
+        # Possible targets are between 1 and max_cap that are multiples of gcd_all.
+        possible_targets = [t for t in range(1, max_cap + 1) if t % gcd_all == 0]
+        if not possible_targets:
+            continue
+        target = rng.choice(possible_targets)
+
+        moves = min_moves_n(jug_capacities, target)
+        if moves is not None and moves >= difficulty:
+            return {"jug_capacities": jug_capacities, "target": target, "min_moves": moves}
+    raise ValueError(f"Could not generate a puzzle with difficulty at least {difficulty} using {num_jugs} jugs.")
+
+
+def verify_solution(puzzle, moves):
+    """
+    Verify a given solution for a multi-jug puzzle.
+
+    The puzzle is a dictionary with keys:
+      - "jug_capacities": list of capacities for each jug.
+      - "target": the target amount that must be in any one jug.
+
+    Moves should be a list of strings in the following formats:
+      - "fill X": Fill jug X to its capacity.
+      - "empty X": Empty jug X.
+      - "pour X->Y": Pour water from jug X to jug Y.
+
+    Jug labels are letters: jug 0 is "A", jug 1 is "B", etc.
+
+    The function simulates the moves starting from all jugs empty.
+
+    Returns a tuple (result, states) where:
+      - result is True if, after executing all moves, at least one jug has exactly
+        the target amount; otherwise False.
+      - states is a list of state tuples after each move.
+    """
+    jug_capacities = puzzle["jug_capacities"]
+    target = puzzle["target"]
+    n = len(jug_capacities)
+
+    # Map jug letters to indices (A->0, B->1, C->2, etc.)
+    jug_map = {chr(ord("A") + i): i for i in range(n)}
+
+    state = tuple([0] * n)
+    states = [state]
+
+    for move in moves:
+        tokens = move.split()
+        if tokens[0] == "fill":
+            # Move format: "fill X"
+            jug = tokens[1]
+            idx = jug_map[jug]
+            state = list(state)
+            state[idx] = jug_capacities[idx]
+            state = tuple(state)
+        elif tokens[0] == "empty":
+            # Move format: "empty X"
+            jug = tokens[1]
+            idx = jug_map[jug]
+            state = list(state)
+            state[idx] = 0
+            state = tuple(state)
+        elif tokens[0] == "pour":
+            # Move format: "pour X->Y"
+            # Expect tokens[1] to be in the form "X->Y"
+            parts = tokens[1].split("->")
+            if len(parts) != 2:
+                raise ValueError(f"Invalid pour move format: {move}")
+            source, dest = parts
+            i = jug_map[source]
+            j = jug_map[dest]
+            state = list(state)
+            amount = min(state[i], jug_capacities[j] - state[j])
+            state[i] -= amount
+            state[j] += amount
+            state = tuple(state)
+        else:
+            raise ValueError(f"Unknown move: {move}")
+        states.append(state)
+
+    return (any(w == target for w in state), states)
+
+
+def generate_jug_solution(jug_capacities: Tuple[int, int, int], target: int) -> List[str]:
+    """Solves the jug puzzle and returns a sequence of formatted steps."""
+    capacities = list(jug_capacities)
+    initial_state = (0, 0, 0)
+    queue = deque([(initial_state, [])])
+    visited = set()
+
+    while queue:
+        (state, path) = queue.popleft()
+
+        if target in state:
+            return path  # Solution found
+
+        if state in visited:
+            continue
+        visited.add(state)
+
+        for i in range(3):  # Iterate over each jug
+            # Fill jug i
+            new_state = list(state)
+            new_state[i] = capacities[i]
+            queue.append((tuple(new_state), path + [f"fill {chr(65 + i)}"]))
+
+            # Empty jug i
+            new_state = list(state)
+            new_state[i] = 0
+            queue.append((tuple(new_state), path + [f"empty {chr(65 + i)}"]))
+
+            # Pour from jug i to jug j
+            for j in range(3):
+                if i != j:
+                    new_state = list(state)
+                    pour_amount = min(state[i], capacities[j] - state[j])
+                    new_state[i] -= pour_amount
+                    new_state[j] += pour_amount
+                    queue.append((tuple(new_state), path + [f"pour {chr(65 + i)}->{chr(65 + j)}"]))
+
+    return ["No solution"]  # No valid solution found
+
+
+@dataclass
+class JugsConfig:
+    """Configuration for Jugs puzzle generation"""
+
+    num_jugs: int = 3
+    difficulty: int = 10
+    seed: Optional[int] = None
+    size: int = 500
+
+    def validate(self):
+        """Validate configuration parameters"""
+        assert self.num_jugs > 2, "edge_probability must be gt 2"
+        assert self.difficulty > 0, "edge_probability must be gt 0"
+        assert self.difficulty < 200, "edge_probability must be gt 200"
+
+
+class JugsDataset(ProceduralDataset):
+    """Generates water jug puzzles inspired by [this scene from _Die Hard 3_](https://www.youtube.com/watch?v=6cAbgAaEOVE), with configurable parameters"""
+
+    def __init__(self, config: JugsConfig):
+        super().__init__(config=config, seed=config.seed, size=config.size)
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single Jugs task
+
+        Returns:
+            dict with keys:
+                - question: str, the task description
+                - answer: str, a solution string
+                - metadata: dict with generation parameters
+        """
+        rng = Random(self.seed + idx)
+
+        puzzle = generate_puzzle(rng, num_jugs=self.config.num_jugs, difficulty=self.config.difficulty)
+        solution = generate_jug_solution(puzzle["jug_capacities"], puzzle["target"])
+
+        cap_str = ", ".join(f"{chr(ord('A')+i)}:{cap}" for i, cap in enumerate(puzzle["jug_capacities"]))
+        question = f"""
+You are a police officer. A maniac has planted a bomb next to a public fountain.
+
+To defuse the bomb, you must solve a puzzle. The puzzle is solved when you fill any of the available jugs with the target amount of water.
+
+You have three move types: 'fill', 'empty' and 'pour'.
+
+To fill Jug A, you 'fill A'.
+To empty Jug B, you 'empty B'.
+To pour the contents of Jug A into Jug B, you 'pour A->B'.
+All jugs are empty to begin with.
+
+The empty jugs hold this many litres of water: {cap_str}
+And your target is: {puzzle['target']} litres.
+
+How do you defuse the bomb?
+
+Reply as a JSON-parsable list of moves which result in any of the jugs being filled with the target amount.
+        """
+
+        return {
+            "question": question,
+            "answer": None,
+            "metadata": {"possible_answer": solution, "puzzle": puzzle},
+        }
+
+    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+        """Determine if the solution provided solves the Jugs task.
+
+        The function awards 1.0 for a correct answer.
+
+        Args:
+            answer (Optional[str]): The user's answer.
+            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+        Returns:
+            float: The computed score between 0.0 and 1.0.
+        """
+
+        if answer == None:
+            return 0.0
+
+        danswer = json.loads(answer)
+        try:
+            valid, states = verify_solution(entry["metadata"]["puzzle"], danswer)
+            if not valid:
+                return 0.01
+            else:
+                return 1.0  # Yay
+        except Exception as e:
+            return 0.01
+
+
+register_dataset("jugs", JugsDataset, JugsConfig)