diff --git a/reasoning_gym/games/rush_hour.py b/reasoning_gym/games/rush_hour.py
index 98f0a5ad..9bf9efaa 100644
--- a/reasoning_gym/games/rush_hour.py
+++ b/reasoning_gym/games/rush_hour.py
@@ -156,6 +156,33 @@ class RushHourDataset(ProceduralDataset):
             },
         }
 
+    def score_answer(self, answer: Optional[str], entry: dict) -> float:
+        """Score a Rush Hour solution by simulating the moves.
+
+        Args:
+            answer: String of moves in format "F+1 K+1 M-1 C+3 H+2 ..."
+            entry: The problem entry containing board configuration
+
+        Returns:
+            1.0 if solution reaches goal state, 0.0 otherwise
+        """
+        if not answer:
+            return 0.0
+
+        try:
+            # Create board from config
+            board = Board(entry["metadata"]["board_config"])
+
+            # Perform the moves
+            board.perform_moves(answer)
+
+            # Check if solved
+            return 1.0 if board.solved else 0.0
+
+        except (ValueError, IndexError, AttributeError) as e:
+            # Handle malformed input gracefully
+            return 0.0
+
 
 class Board:
     def __init__(self, desc: str):
diff --git a/tests/test_rush_hour.py b/tests/test_rush_hour.py
index 5123ebc5..bd439667 100644
--- a/tests/test_rush_hour.py
+++ b/tests/test_rush_hour.py
@@ -62,6 +62,24 @@ def test_rush_hour_move_filtering():
         assert 5 <= moves <= 10, f"Puzzle with {moves} moves outside configured range 5-10"
 
 
+def test_score_answer():
+    """Test that score_answer correctly validates solutions"""
+    config = RushHourConfig(min_moves=1, max_moves=50, size=10, seed=42)
+    dataset = RushHourDataset(config)
+
+    # Get a puzzle
+    puzzle = dataset[0]
+
+    # Test invalid answers
+    assert dataset.score_answer(None, puzzle) == 0.0
+    assert dataset.score_answer("", puzzle) == 0.0
+    assert dataset.score_answer("invalid", puzzle) == 0.0
+    assert dataset.score_answer("A+1 B-2 INVALID", puzzle) == 0.0
+
+    # Test incomplete solution
+    assert dataset.score_answer("A+1 B-2", puzzle) == 0.0
+
+
 def test_perform_moves():
     b = Board("GBBoLoGHIoLMGHIAAMCCCKoMooJKDDEEJFFo")
     assert not b.solved