reasoning-gym/reasoning_gym/games/tower_of_hanoi.py

# reasoning_gym/games/tower_of_hanoi.py

import math
import random
import re
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple

from ..factory import ProceduralDataset, register_dataset


@dataclass
class HanoiConfig:
    """
    Configuration for the Tower of Hanoi task.

    - min_disks: Minimum number of disks in the puzzle.
    - max_disks: Maximum number of disks in the puzzle.
    - min_pegs: Minimum number of pegs (minimum 3).
    - max_pegs: Maximum number of pegs.
    - size: Number of problem instances in the dataset.
    - seed: Optional seed for reproducibility.
    - visualize: Whether to include a visualization of the initial state.
    """

    min_disks: int = 3
    max_disks: int = 7
    min_pegs: int = 3
    max_pegs: int = 4
    size: int = 50
    seed: Optional[int] = None
    visualize: bool = False  # New parameter

    def validate(self) -> None:
        """Validate configuration parameters."""
        assert self.min_disks >= 1, "min_disks must be at least 1"
        assert self.max_disks >= self.min_disks, "max_disks must be >= min_disks"
        assert self.min_pegs >= 3, "min_pegs must be at least 3"
        assert self.max_pegs >= self.min_pegs, "max_pegs must be >= min_pegs"


class MoveGenerator:
    """
    Helper class to generate valid move sequences for Tower of Hanoi using the Frame-Stewart algorithm.
    It maintains the current state of all pegs to ensure move validity.
    """

    def __init__(self, num_disks: int, pegs: List[int], start: int, target: int):
        self.num_disks = num_disks
        self.pegs = pegs
        self.start = start
        self.target = target
        self.auxiliary_pegs = [peg for peg in pegs if peg not in (start, target)]
        self.pegs_state: Dict[int, List[int]] = {peg: [] for peg in pegs}
        for disk in range(num_disks, 0, -1):  # Largest disk at the bottom
            self.pegs_state[start].append(disk)
        self.moves: List[str] = []
        self.memo: Dict[Tuple[int, int], int] = {}  # Memoization for T(n, k)

    def generate_moves(self) -> List[str]:
        self.move(n=self.num_disks, source=self.start, target=self.target, auxiliary_pegs=self.auxiliary_pegs)
        return self.moves

    def move(self, n: int, source: int, target: int, auxiliary_pegs: List[int]):
        if n == 0:
            return
        if n == 1:
            self._move_disk(source, target)
            return

        k = len(auxiliary_pegs) + 2  # Total number of pegs including source and target

        if k < 3:
            raise ValueError("At least 3 pegs are required.")

        if k == 3:
            # Classic Tower of Hanoi solution
            aux = auxiliary_pegs[0]
            self.move(n - 1, source, aux, [target])
            self._move_disk(source, target)
            self.move(n - 1, aux, target, [source])
            return

        # For k > 3, apply Frame-Stewart algorithm
        # Find m that minimizes 2*T(m, k) + T(n - m, k - 1)
        min_moves = math.inf
        best_m = 1
        for m in range(1, n):
            moves_m = self._compute_T(m, k)
            moves_n_minus_m = self._compute_T(n - m, k - 1)
            total_moves = 2 * moves_m + moves_n_minus_m
            if total_moves < min_moves:
                min_moves = total_moves
                best_m = m

        # Select a temporary peg to hold m disks
        temp_peg = auxiliary_pegs[0]
        new_auxiliary = [peg for peg in auxiliary_pegs if peg != temp_peg]

        # Step 1: Move top m disks to temp_peg using all pegs
        self.move(n=best_m, source=source, target=temp_peg, auxiliary_pegs=auxiliary_pegs[1:] + [target])

        # Step 2: Move remaining n - m disks to target using k - 1 pegs
        self.move(n=n - best_m, source=source, target=target, auxiliary_pegs=new_auxiliary)

        # Step 3: Move m disks from temp_peg to target using all pegs
        self.move(n=best_m, source=temp_peg, target=target, auxiliary_pegs=auxiliary_pegs[1:] + [source])

    def _move_disk(self, from_peg: int, to_peg: int):
        if not self.pegs_state[from_peg]:
            raise ValueError(f"No disks to move from Peg {from_peg}.")
        disk = self.pegs_state[from_peg][-1]
        self.pegs_state[from_peg].pop()
        self.pegs_state[to_peg].append(disk)
        self.moves.append(f"Move disk {disk} from Peg {from_peg} to Peg {to_peg}")

    def _compute_T(self, n: int, k: int) -> int:
        """
        Compute the minimal number of moves (T(n, k)) required to move n disks using k pegs.
        Utilizes memoization to store previously computed results.
        """
        if n == 0:
            return 0
        if n == 1:
            return 1
        if k == 3:
            return 2**n - 1
        if (n, k) in self.memo:
            return self.memo[(n, k)]

        min_moves = math.inf
        for m in range(1, n):
            moves = 2 * self._compute_T(m, k) + self._compute_T(n - m, k - 1)
            if moves < min_moves:
                min_moves = moves
        self.memo[(n, k)] = min_moves
        return min_moves


class HanoiDataset(ProceduralDataset):
    """
    Generates Tower of Hanoi problems with solutions.
    Supports variable number of pegs using the optimized Frame-Stewart algorithm with Peg State Tracking.
    """

    def __init__(self, config: HanoiConfig):
        super().__init__(config=config, seed=config.seed, size=config.size)
        self.min_pegs = config.min_pegs
        self.max_pegs = config.max_pegs
        self.min_disks = config.min_disks
        self.max_disks = config.max_disks
        self.visualize = config.visualize  # Initialize the visualize attribute

    def __getitem__(self, idx: int) -> dict:
        """
        Generate a Tower of Hanoi problem instance.

        Returns:
            dict with:
            - "question": Text describing the problem setup.
            - "answer": List of moves to solve the puzzle.
            - "metadata": Configuration and solution details.
            - "initial_state": (Optional) ASCII visualization of the initial pegs.
            - "states": (Optional) List of ASCII visualizations after each move.
        """
        rng = random.Random(self.seed + idx if self.seed is not None else None)

        # Randomly select number of disks and pegs within the specified ranges
        num_disks = rng.randint(self.min_disks, self.max_disks)
        num_pegs = rng.randint(self.min_pegs, self.max_pegs)

        # Assign unique peg identifiers (e.g., integers starting from 1)
        pegs = list(range(1, num_pegs + 1))

        """ #Debug: Print current instance configuration
        print(f"\n--- Generating Instance {idx} ---")
        print(f"Number of Disks: {num_disks}")
        print(f"Number of Pegs: {num_pegs}")
        print(f"Pegs: {pegs}")
        """

        # Randomly select start and target pegs
        start_peg, target_peg = rng.sample(pegs, 2)

        # Auxiliary pegs are the remaining pegs
        auxiliary_pegs = [peg for peg in pegs if peg not in (start_peg, target_peg)]

        """ # Debug: Print start, target, and auxiliary pegs
        print(f"Start Peg: {start_peg}")
        print(f"Target Peg: {target_peg}")
        print(f"Auxiliary Pegs: {auxiliary_pegs}")
        """

        # Initialize the MoveGenerator and generate moves
        move_gen = MoveGenerator(num_disks, pegs, start_peg, target_peg)
        try:
            solution = move_gen.generate_moves()
        except ValueError as ve:
            # print(f"Error during move generation: {ve}")
            raise ve

        """ # Debug: Print the solution moves
        print(f"Solution Length: {len(solution)}")
        print("Solution Moves:")
        for move_num, move in enumerate(solution, start=1):
            print(f"  Move {move_num}: {move}")
        """

        # Initialize pegs_state: all disks start on the start peg
        pegs_state = {peg: [] for peg in pegs}
        for disk in range(num_disks, 0, -1):  # Largest disk at the bottom
            pegs_state[start_peg].append(disk)

        # Generate initial state visualization if requested
        initial_state_str = None
        if self.visualize:
            initial_state_str = self._visualize_state(pegs_state)

        # Apply moves to track state changes
        states = []
        if self.visualize:
            states.append(initial_state_str)  # Initial state
            for move in solution:
                # Parse the move string using regex
                try:
                    disk, from_peg, to_peg = self._parse_move(move)
                except ValueError as ve:
                    # print(f"Error parsing move: {ve}")
                    raise ve

                # Validate the move
                if not self._validate_move(pegs_state, move):
                    # print(f"Invalid move detected: {move}")
                    # print(f"Current Pegs State: {pegs_state}")
                    raise ValueError(f"Invalid move detected: {move}")

                # Move the disk
                pegs_state[from_peg].pop()
                pegs_state[to_peg].append(disk)

                # Visualize the new state
                new_state_str = self._visualize_state(pegs_state)
                states.append(new_state_str)

        # Peg labels
        peg_labels = {peg: f"Peg {peg}" for peg in pegs}

        question_str = (
            f"Solve the Tower of Hanoi problem with {num_disks} disks and {num_pegs} pegs.\n"
            f"Move all disks from {peg_labels[start_peg]} to {peg_labels[target_peg]} following the rules:\n"
            "- Only one disk can be moved at a time.\n"
            "- A larger disk cannot be placed on top of a smaller disk.\n"
            "- All disks must be on a peg at all times.\n"
            "Example:\n"
            "Move disk 1 from Peg 1 to Peg 3\n"
            "Move disk 2 from Peg 1 to Peg 2\n"
            "Move disk 1 from Peg 3 to Peg 2\n"
            "\n"
            "Provide the sequence of moves."
        )

        result = {
            "question": question_str,
            "answer": solution,
            "metadata": {
                "num_disks": num_disks,
                "num_pegs": num_pegs,
                "start_peg": start_peg,
                "target_peg": target_peg,
                "auxiliary_pegs": auxiliary_pegs,
                "solution_length": len(solution),
            },
        }

        if self.visualize:
            result["initial_state"] = initial_state_str
            result["states"] = states  # List of all states including initial and after each move

        return result

    def _visualize_state(self, pegs_state: Dict[int, List[int]]) -> str:
        """
        Create an ASCII visualization of the current state of the pegs.
        Adapts to variable number of pegs.

        Args:
            pegs_state (dict): Dictionary mapping peg numbers to lists of disks.

        Returns:
            str: ASCII art representing the pegs and disks.
        """
        # Determine the number of levels based on the maximum number of disks on any peg
        max_height = max(len(disks) for disks in pegs_state.values())
        pegs = sorted(pegs_state.keys())

        visualization = ""
        for level in range(max_height, 0, -1):
            for peg in pegs:
                if len(pegs_state[peg]) >= level:
                    disk_size = pegs_state[peg][level - 1]
                    disk_str = f"[{'*' * disk_size}]"
                else:
                    disk_str = "[ ]"
                visualization += disk_str.center(7)  # Adjust spacing as needed
            visualization += "\n"

        # Add the base and peg numbers
        visualization += "-" * (7 * len(pegs)) + "\n"
        for peg in pegs:
            peg_label = f"P{peg}".center(7)
            visualization += peg_label
        visualization += "\n"

        return visualization

    def _validate_move(self, pegs_state: Dict[int, List[int]], move: str) -> bool:
        """
        Validate that a move adheres to the Tower of Hanoi rules.

        Args:
            pegs_state (dict): Current state of the pegs.
            move (str): Move instruction, e.g., "Move disk 2 from Peg 1 to Peg 3".

        Returns:
            bool: True if the move is valid, False otherwise.
        """
        try:
            parts = move.split()
            if len(parts) != 9:
                # print(f"Unexpected move format: '{move}'")
                return False
            disk = int(parts[2])
            from_peg = int(parts[5])
            to_peg = int(parts[8])

            # Check if the disk to move is the top disk on the from_peg
            if not pegs_state[from_peg] or pegs_state[from_peg][-1] != disk:
                # print(f"Disk {disk} is not on top of Peg {from_peg}. Current state: {pegs_state[from_peg]}")
                return False

            # Check if placing the disk on the to_peg violates size constraints
            if pegs_state[to_peg] and pegs_state[to_peg][-1] < disk:
                # print(f"Cannot place disk {disk} on top of smaller disk {pegs_state[to_peg][-1]} on Peg {to_peg}.")
                return False

            return True
        except Exception as e:
            print(f"Error validating move '{move}': {e}")
            return False

    def _parse_move(self, move: str) -> Tuple[int, int, int]:
        """
        Parse a move string and extract disk number, from peg, and to peg.

        Args:
            move (str): Move instruction, e.g., "Move disk 2 from Peg 1 to Peg 3".

        Returns:
            tuple: (disk, from_peg, to_peg)
        """
        pattern = r"Move disk (\d+) from Peg (\d+) to Peg (\d+)"
        match = re.match(pattern, move)
        if not match:
            raise ValueError(f"Unexpected move format: '{move}'")

        disk = int(match.group(1))
        from_peg = int(match.group(2))
        to_peg = int(match.group(3))
        return disk, from_peg, to_peg

    def score_answer(self, answer: Optional[str], metadata: Dict[str, Any]) -> float:
        """
        Score the user's solution for the Tower of Hanoi puzzle.

        The answer is expected to be a newline-separated sequence of moves in the format:
        "Move disk X from Peg Y to Peg Z"

        Expected behavior:
            - Correct answer (i.e. equivalent in length, or better, than the one provided in the dataset item) gives 1.0.
            - A correct solution that is suboptimal length gives a proportional reward of optimal_move_count/user_move_count
            - A badly formatted answer gives a minimal reward (0.01).
            - An answer that is syntactically valid but does not solve the puzzle gives a partial reward (0.05).
            - An empty string gives 0.01.
            - None gives 0.0.
        """
        if answer is None:
            return 0.0

        if answer == "":
            return 0.01

        # If answer is a string, split it into lines; if it's already a list, use it directly.
        if isinstance(answer, str):
            moves = [line.strip() for line in answer.strip().splitlines() if line.strip()]
        elif isinstance(answer, list):
            moves = [line.strip() for line in answer if isinstance(line, str) and line.strip()]
        else:
            return 0.0

        # Build the initial peg state from metadata.
        num_disks = metadata["num_disks"]
        num_pegs = metadata["num_pegs"]
        start_peg = metadata["start_peg"]
        target_peg = metadata["target_peg"]

        peg_state = {peg: [] for peg in range(1, num_pegs + 1)}
        for disk in range(num_disks, 0, -1):
            peg_state[start_peg].append(disk)

        # Process each move.
        for move in moves:
            try:
                disk, from_peg, to_peg = self._parse_move(move)
            except Exception:
                return 0.01  # Invalid move format

            # Validate the move using existing _validate_move method.
            if not self._validate_move(peg_state, move):
                return 0.01

            # Execute the move.
            peg_state[from_peg].pop()
            peg_state[to_peg].append(disk)

        # Check if the final state is solved: all disks on target peg in descending order.
        expected_final = list(range(num_disks, 0, -1))
        solved = peg_state[target_peg] == expected_final
        if not solved:
            return 0.05

        optimal_moves = metadata.get("solution_length", len(moves))
        user_moves = len(moves)
        if user_moves <= optimal_moves:
            return 1.0
        else:
            return optimal_moves / user_moves


# Register the dataset
register_dataset("tower_of_hanoi", HanoiDataset, HanoiConfig)