diff --git a/reasoning_gym/games/rush_hour.py b/reasoning_gym/games/rush_hour.py index ba48f8a2..5b3fff68 100644 --- a/reasoning_gym/games/rush_hour.py +++ b/reasoning_gym/games/rush_hour.py @@ -149,7 +149,9 @@ class RushHourDataset(ProceduralDataset): instructions = ( "Move the red car (AA) to the exit on the right.\n" "Specify moves in the format: 'F+1 K+1 M-1 C+3 H+2 ...'\n" - "where the letter is the vehicle and +/- number is spaces to move right/left or down/up." + "where the letter is the vehicle and +/- number is spaces to move right/left or down/up.\n" + "Walls are marked with an 'x'. Cars cannot move through walls, and walls cannot be moved.\n" + "A car oriented vertically can only move up and down, a car oriented horizontally can only move left and right." ) return { @@ -182,7 +184,7 @@ class RushHourDataset(ProceduralDataset): board.perform_moves(answer) # Check if solved - return 1.0 if board.solved else 0.0 + return 1.0 if board.solved else 0.01 except (ValueError, IndexError, AttributeError) as e: # Handle malformed input gracefully @@ -326,9 +328,7 @@ class Board: move_ops = [(chars, int(num) if sign == "+" else -int(num)) for chars, sign, num in matches] for target, dir in move_ops: - print(target, dir) self.move(target, dir) - print(self.board_str()) @property def solved(self) -> bool: diff --git a/tests/test_rush_hour.py b/tests/test_rush_hour.py index b05ee78c..db25811e 100644 --- a/tests/test_rush_hour.py +++ b/tests/test_rush_hour.py @@ -30,7 +30,7 @@ def test_rush_hour_deterministic(): def test_rush_hour_items(): """Test basic properties of generated items""" - config = RushHourConfig(min_moves=1, max_moves=10, size=10, seed=42) + config = RushHourConfig(min_moves=1, max_moves=10, size=18000, seed=42) dataset = RushHourDataset(config) for i in range(len(dataset)): @@ -73,27 +73,31 @@ def test_score_answer(): # Test invalid answers assert dataset.score_answer(None, puzzle) == 0.0 assert dataset.score_answer("", puzzle) == 0.0 - assert dataset.score_answer("invalid", puzzle) == 0.0 - assert dataset.score_answer("A+1 B-2 INVALID", puzzle) == 0.0 + assert dataset.score_answer("invalid", puzzle) == 0.01 + assert dataset.score_answer("A+1 B-2 INVALID", puzzle) == 0.01 # Test incomplete solution - assert dataset.score_answer("A+1 B-2", puzzle) == 0.0 + assert dataset.score_answer("A+1 B-2", puzzle) == 0.01 def test_perform_moves(): b = Board("GBBoLoGHIoLMGHIAAMCCCKoMooJKDDEEJFFo") + assert not b.solved incomplete_moves = "F+1 K+1 M-1 C+3 H+2 J-1 E+1 G+3 B-1 I-1 A-3 I+1 L+1 B+3 I-1 A+2 G-3" b.perform_moves(incomplete_moves) assert not b.solved + incomplete_moves = "X+1 \n Y+22" + b.perform_moves(incomplete_moves) + assert not b.solved solution = "E-1 H-3 A-1 J+1 C-3 M+1 B+1 K-4 A+1 C+2 D-1 F-1 H+3 A-1 K+1 B-1 M-1 C+1 J-1 E+1 G+3 A-1 I+1 B-3 I-1 A+1 G-1 E-1 J+1 C-1 K-1 L-1 M+3 A+3" b.perform_moves(solution) assert b.solved def test_perform_moves_walls(): + ## ?? This test is incomplete. I don't know why. b = Board("BBoIKxCCCIKoGAAJooGoHJDDooHEELoFFoxL") - print(b.board_str()) # assert sum(1 for p in b._pieces if p.fixed) == 2, "two walls expected" # assert not b.solved