diff --git a/reasoning_gym/games/rush_hour.py b/reasoning_gym/games/rush_hour.py
index ba48f8a2..5b3fff68 100644
--- a/reasoning_gym/games/rush_hour.py
+++ b/reasoning_gym/games/rush_hour.py
@@ -149,7 +149,9 @@ class RushHourDataset(ProceduralDataset):
         instructions = (
             "Move the red car (AA) to the exit on the right.\n"
             "Specify moves in the format: 'F+1 K+1 M-1 C+3 H+2 ...'\n"
-            "where the letter is the vehicle and +/- number is spaces to move right/left or down/up."
+            "where the letter is the vehicle and +/- number is spaces to move right/left or down/up.\n"
+            "Walls are marked with an 'x'. Cars cannot move through walls, and walls cannot be moved.\n"
+            "A car oriented vertically can only move up and down, a car oriented horizontally can only move left and right."
         )
 
         return {
@@ -182,7 +184,7 @@ class RushHourDataset(ProceduralDataset):
             board.perform_moves(answer)
 
             # Check if solved
-            return 1.0 if board.solved else 0.0
+            return 1.0 if board.solved else 0.01
 
         except (ValueError, IndexError, AttributeError) as e:
             # Handle malformed input gracefully
@@ -326,9 +328,7 @@ class Board:
         move_ops = [(chars, int(num) if sign == "+" else -int(num)) for chars, sign, num in matches]
 
         for target, dir in move_ops:
-            print(target, dir)
             self.move(target, dir)
-            print(self.board_str())
 
     @property
     def solved(self) -> bool:
diff --git a/tests/test_rush_hour.py b/tests/test_rush_hour.py
index b05ee78c..db25811e 100644
--- a/tests/test_rush_hour.py
+++ b/tests/test_rush_hour.py
@@ -30,7 +30,7 @@ def test_rush_hour_deterministic():
 
 def test_rush_hour_items():
     """Test basic properties of generated items"""
-    config = RushHourConfig(min_moves=1, max_moves=10, size=10, seed=42)
+    config = RushHourConfig(min_moves=1, max_moves=10, size=18000, seed=42)
     dataset = RushHourDataset(config)
 
     for i in range(len(dataset)):
@@ -73,27 +73,31 @@ def test_score_answer():
     # Test invalid answers
     assert dataset.score_answer(None, puzzle) == 0.0
     assert dataset.score_answer("", puzzle) == 0.0
-    assert dataset.score_answer("invalid", puzzle) == 0.0
-    assert dataset.score_answer("A+1 B-2 INVALID", puzzle) == 0.0
+    assert dataset.score_answer("invalid", puzzle) == 0.01
+    assert dataset.score_answer("A+1 B-2 INVALID", puzzle) == 0.01
 
     # Test incomplete solution
-    assert dataset.score_answer("A+1 B-2", puzzle) == 0.0
+    assert dataset.score_answer("A+1 B-2", puzzle) == 0.01
 
 
 def test_perform_moves():
     b = Board("GBBoLoGHIoLMGHIAAMCCCKoMooJKDDEEJFFo")
+
     assert not b.solved
     incomplete_moves = "F+1 K+1 M-1 C+3 H+2 J-1 E+1 G+3 B-1 I-1 A-3 I+1 L+1 B+3 I-1 A+2 G-3"
     b.perform_moves(incomplete_moves)
     assert not b.solved
+    incomplete_moves = "X+1 \n Y+22"
+    b.perform_moves(incomplete_moves)
+    assert not b.solved
     solution = "E-1 H-3 A-1 J+1 C-3 M+1 B+1 K-4 A+1 C+2 D-1 F-1 H+3 A-1 K+1 B-1 M-1 C+1 J-1 E+1 G+3 A-1 I+1 B-3 I-1 A+1 G-1 E-1 J+1 C-1 K-1 L-1 M+3 A+3"
     b.perform_moves(solution)
     assert b.solved
 
 
 def test_perform_moves_walls():
+    ## ?? This test is incomplete. I don't know why.
     b = Board("BBoIKxCCCIKoGAAJooGoHJDDooHEELoFFoxL")
-    print(b.board_str())
     # assert sum(1 for p in b._pieces if p.fixed) == 2, "two walls expected"
     # assert not b.solved