atropos/environments/community/selcube/rubiks_cube_curriculum.py
Shannon Sands c360ee20e7 linting
2025-05-26 09:39:51 +10:00

321 lines
11 KiB
Python

#!/usr/bin/env python3
"""
RubiksCubeCurriculum: Curriculum learning utilities for Rubik's Cube environment
This module provides classes and functions to implement curriculum learning for
the Rubik's cube environment, where the difficulty gradually increases as the
model improves in solving simpler challenges.
"""
import logging
import random
from typing import Any, Dict, List
logger = logging.getLogger(__name__)
class CurriculumLevel:
"""Represents a curriculum learning level for Rubik's cube solving"""
def __init__(
self,
level: int,
min_scramble_moves: int,
max_scramble_moves: int,
max_steps: int,
reward_per_correctly_placed_cubie: float,
example_patterns: List[List[str]] = None,
description: str = None,
):
"""
Initialize a curriculum level
Args:
level: Level number (higher is more difficult)
min_scramble_moves: Minimum number of scramble moves
max_scramble_moves: Maximum number of scramble moves
max_steps: Maximum allowed steps to solve at this level
reward_per_correctly_placed_cubie: Reward multiplier for correctly placed cubies
example_patterns: Optional list of move sequences to learn at this level
description: Human-readable description of this level
"""
self.level = level
self.min_scramble_moves = min_scramble_moves
self.max_scramble_moves = max_scramble_moves
self.max_steps = max_steps
self.reward_per_correctly_placed_cubie = reward_per_correctly_placed_cubie
self.example_patterns = example_patterns or []
self.description = (
description
or f"Level {level}: {min_scramble_moves}-{max_scramble_moves} scramble moves"
)
def get_scramble_moves(self) -> int:
"""Get a random number of scramble moves within the level's range"""
return random.randint(self.min_scramble_moves, self.max_scramble_moves)
def __repr__(self) -> str:
return (
f"CurriculumLevel(level={self.level}, "
f"scramble_moves={self.min_scramble_moves}-{self.max_scramble_moves})"
)
class RubiksCubeCurriculum:
"""Manages curriculum progression for Rubik's cube solver training"""
def __init__(
self,
starting_level: int = 1,
max_level: int = 5,
auto_progress: bool = True,
success_threshold: float = 0.7,
advancement_window_size: int = 50,
min_solved_at_level: int = 25,
):
"""
Initialize the curriculum manager
Args:
starting_level: Initial curriculum level
max_level: Maximum curriculum level
auto_progress: Whether to automatically progress through levels
success_threshold: Success rate threshold to advance to next level
advancement_window_size: Number of episodes to consider for advancement
min_solved_at_level: Minimum number of episodes that must be solved at a level
before considering advancement
"""
self.current_level = starting_level
self.max_level = max_level
self.auto_progress = auto_progress
self.success_threshold = success_threshold
self.advancement_window_size = advancement_window_size
self.min_solved_at_level = min_solved_at_level
# Track episode results for potential advancement
self.episode_results = [] # List of (level, is_solved, num_steps) tuples
# Define curriculum levels
self.levels = self._create_default_curriculum()
def _create_default_curriculum(self) -> Dict[int, CurriculumLevel]:
"""Create the default curriculum progression"""
levels = {}
# Level 1: Very simple scrambles (1-3 moves)
levels[1] = CurriculumLevel(
level=1,
min_scramble_moves=1,
max_scramble_moves=3,
max_steps=15,
reward_per_correctly_placed_cubie=0.1,
description="Beginner level - Single move to Triple moves scrambles",
)
# Level 2: Simple scrambles (4-7 moves)
levels[2] = CurriculumLevel(
level=2,
min_scramble_moves=4,
max_scramble_moves=7,
max_steps=20,
reward_per_correctly_placed_cubie=0.075,
description="Easy level - Learn basic patterns and simple sequences",
)
# Level 3: Moderate scrambles (8-12 moves)
levels[3] = CurriculumLevel(
level=3,
min_scramble_moves=8,
max_scramble_moves=12,
max_steps=25,
reward_per_correctly_placed_cubie=0.05,
description="Intermediate level - More complex patterns and sequences",
)
# Level 4: Challenging scrambles (13-17 moves)
levels[4] = CurriculumLevel(
level=4,
min_scramble_moves=13,
max_scramble_moves=17,
max_steps=30,
reward_per_correctly_placed_cubie=0.025,
description="Advanced level - Complex scrambles requiring deep planning",
)
# Level 5: Expert scrambles (18-22 moves)
levels[5] = CurriculumLevel(
level=5,
min_scramble_moves=18,
max_scramble_moves=22,
max_steps=40,
reward_per_correctly_placed_cubie=0.01,
description="Expert level - Near optimal scrambles approaching God's number",
)
return levels
def get_current_level(self) -> CurriculumLevel:
"""Get the current curriculum level"""
return self.levels[self.current_level]
def record_episode_result(
self, level: int, is_solved: bool, num_steps: int
) -> None:
"""
Record the result of an episode
Args:
level: The curriculum level of the episode
is_solved: Whether the cube was solved successfully
num_steps: Number of steps taken in the episode
"""
self.episode_results.append((level, is_solved, num_steps))
# Keep only the most recent window of results
if len(self.episode_results) > self.advancement_window_size:
self.episode_results = self.episode_results[-self.advancement_window_size :]
# Check if we should advance to the next level
if self.auto_progress:
self._check_advancement()
def _check_advancement(self) -> None:
"""Check if we should advance to the next level based on recent performance"""
# Only consider episodes at the current level
current_level_results = [
r for r in self.episode_results if r[0] == self.current_level
]
# Need enough data to make a decision
if len(current_level_results) < self.min_solved_at_level:
return
# Calculate success rate at current level
success_count = sum(1 for _, is_solved, _ in current_level_results if is_solved)
success_rate = success_count / len(current_level_results)
# Log the current performance
logger.info(
f"Curriculum performance: Level {self.current_level}, "
f"Success rate: {success_rate:.2f} ({success_count}/{len(current_level_results)})"
)
# Check if we should advance
if (
success_rate >= self.success_threshold
and success_count >= self.min_solved_at_level
and self.current_level < self.max_level
):
self.current_level += 1
logger.info(
f"Advancing to curriculum level {self.current_level}: "
f"{self.levels[self.current_level].description}"
)
# Reset episode results after advancing
self.episode_results = []
def set_level(self, level: int) -> None:
"""
Manually set the curriculum level
Args:
level: The new curriculum level (must be between 1 and max_level)
"""
if level < 1 or level > self.max_level:
logger.warning(
f"Invalid curriculum level {level}. Must be between 1 and {self.max_level}. "
f"Keeping current level {self.current_level}."
)
return
self.current_level = level
logger.info(
f"Manually set curriculum to level {level}: {self.levels[level].description}"
)
# Reset episode results after manual level change
self.episode_results = []
def get_level_metrics(self) -> Dict[str, Any]:
"""Get metrics for the current curriculum level"""
current_level_results = [
r for r in self.episode_results if r[0] == self.current_level
]
if not current_level_results:
return {
"curriculum_level": self.current_level,
"curriculum_description": self.levels[self.current_level].description,
"level_success_rate": 0.0,
"level_episodes": 0,
"level_solved_count": 0,
"level_avg_steps": 0.0,
"progress_to_next_level": 0.0,
}
success_count = sum(1 for _, is_solved, _ in current_level_results if is_solved)
success_rate = success_count / len(current_level_results)
# Calculate average steps for solved episodes
solved_episodes = [
(level, solved, steps)
for level, solved, steps in current_level_results
if solved
]
avg_steps = sum(steps for _, _, steps in solved_episodes) / max(
1, len(solved_episodes)
)
# Calculate progress to next level (0.0 to 1.0)
if self.current_level >= self.max_level:
progress_to_next = 1.0
else:
progress_threshold = self.success_threshold * self.min_solved_at_level
current_progress = success_rate * len(current_level_results)
progress_to_next = min(1.0, current_progress / progress_threshold)
return {
"curriculum_level": self.current_level,
"curriculum_description": self.levels[self.current_level].description,
"level_success_rate": success_rate,
"level_episodes": len(current_level_results),
"level_solved_count": success_count,
"level_avg_steps": avg_steps,
"progress_to_next_level": progress_to_next,
}
# Example usage
if __name__ == "__main__":
# Set up logging
logging.basicConfig(level=logging.INFO)
# Create curriculum manager
curriculum = RubiksCubeCurriculum(
starting_level=1,
max_level=5,
auto_progress=True,
success_threshold=0.7,
advancement_window_size=50,
min_solved_at_level=25,
)
# Simulate some episodes
# In a real setup, these results would come from actual cube-solving episodes
for _ in range(40):
# Simulate success with 80% probability for level 1
is_solved = random.random() < 0.8
steps = random.randint(5, 15)
curriculum.record_episode_result(1, is_solved, steps)
# Print metrics
print(curriculum.get_level_metrics())
# Current level should now be 2 if enough episodes were solved
print(f"Current level: {curriculum.current_level}")
# Manually set to level 3
curriculum.set_level(3)
print(f"After manual set, current level: {curriculum.current_level}")