mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
321 lines
11 KiB
Python
321 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
RubiksCubeCurriculum: Curriculum learning utilities for Rubik's Cube environment
|
|
|
|
This module provides classes and functions to implement curriculum learning for
|
|
the Rubik's cube environment, where the difficulty gradually increases as the
|
|
model improves in solving simpler challenges.
|
|
"""
|
|
|
|
import logging
|
|
import random
|
|
from typing import Any, Dict, List
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class CurriculumLevel:
|
|
"""Represents a curriculum learning level for Rubik's cube solving"""
|
|
|
|
def __init__(
|
|
self,
|
|
level: int,
|
|
min_scramble_moves: int,
|
|
max_scramble_moves: int,
|
|
max_steps: int,
|
|
reward_per_correctly_placed_cubie: float,
|
|
example_patterns: List[List[str]] = None,
|
|
description: str = None,
|
|
):
|
|
"""
|
|
Initialize a curriculum level
|
|
|
|
Args:
|
|
level: Level number (higher is more difficult)
|
|
min_scramble_moves: Minimum number of scramble moves
|
|
max_scramble_moves: Maximum number of scramble moves
|
|
max_steps: Maximum allowed steps to solve at this level
|
|
reward_per_correctly_placed_cubie: Reward multiplier for correctly placed cubies
|
|
example_patterns: Optional list of move sequences to learn at this level
|
|
description: Human-readable description of this level
|
|
"""
|
|
self.level = level
|
|
self.min_scramble_moves = min_scramble_moves
|
|
self.max_scramble_moves = max_scramble_moves
|
|
self.max_steps = max_steps
|
|
self.reward_per_correctly_placed_cubie = reward_per_correctly_placed_cubie
|
|
self.example_patterns = example_patterns or []
|
|
self.description = (
|
|
description
|
|
or f"Level {level}: {min_scramble_moves}-{max_scramble_moves} scramble moves"
|
|
)
|
|
|
|
def get_scramble_moves(self) -> int:
|
|
"""Get a random number of scramble moves within the level's range"""
|
|
return random.randint(self.min_scramble_moves, self.max_scramble_moves)
|
|
|
|
def __repr__(self) -> str:
|
|
return (
|
|
f"CurriculumLevel(level={self.level}, "
|
|
f"scramble_moves={self.min_scramble_moves}-{self.max_scramble_moves})"
|
|
)
|
|
|
|
|
|
class RubiksCubeCurriculum:
|
|
"""Manages curriculum progression for Rubik's cube solver training"""
|
|
|
|
def __init__(
|
|
self,
|
|
starting_level: int = 1,
|
|
max_level: int = 5,
|
|
auto_progress: bool = True,
|
|
success_threshold: float = 0.7,
|
|
advancement_window_size: int = 50,
|
|
min_solved_at_level: int = 25,
|
|
):
|
|
"""
|
|
Initialize the curriculum manager
|
|
|
|
Args:
|
|
starting_level: Initial curriculum level
|
|
max_level: Maximum curriculum level
|
|
auto_progress: Whether to automatically progress through levels
|
|
success_threshold: Success rate threshold to advance to next level
|
|
advancement_window_size: Number of episodes to consider for advancement
|
|
min_solved_at_level: Minimum number of episodes that must be solved at a level
|
|
before considering advancement
|
|
"""
|
|
self.current_level = starting_level
|
|
self.max_level = max_level
|
|
self.auto_progress = auto_progress
|
|
self.success_threshold = success_threshold
|
|
self.advancement_window_size = advancement_window_size
|
|
self.min_solved_at_level = min_solved_at_level
|
|
|
|
# Track episode results for potential advancement
|
|
self.episode_results = [] # List of (level, is_solved, num_steps) tuples
|
|
|
|
# Define curriculum levels
|
|
self.levels = self._create_default_curriculum()
|
|
|
|
def _create_default_curriculum(self) -> Dict[int, CurriculumLevel]:
|
|
"""Create the default curriculum progression"""
|
|
levels = {}
|
|
|
|
# Level 1: Very simple scrambles (1-3 moves)
|
|
levels[1] = CurriculumLevel(
|
|
level=1,
|
|
min_scramble_moves=1,
|
|
max_scramble_moves=3,
|
|
max_steps=15,
|
|
reward_per_correctly_placed_cubie=0.1,
|
|
description="Beginner level - Single move to Triple moves scrambles",
|
|
)
|
|
|
|
# Level 2: Simple scrambles (4-7 moves)
|
|
levels[2] = CurriculumLevel(
|
|
level=2,
|
|
min_scramble_moves=4,
|
|
max_scramble_moves=7,
|
|
max_steps=20,
|
|
reward_per_correctly_placed_cubie=0.075,
|
|
description="Easy level - Learn basic patterns and simple sequences",
|
|
)
|
|
|
|
# Level 3: Moderate scrambles (8-12 moves)
|
|
levels[3] = CurriculumLevel(
|
|
level=3,
|
|
min_scramble_moves=8,
|
|
max_scramble_moves=12,
|
|
max_steps=25,
|
|
reward_per_correctly_placed_cubie=0.05,
|
|
description="Intermediate level - More complex patterns and sequences",
|
|
)
|
|
|
|
# Level 4: Challenging scrambles (13-17 moves)
|
|
levels[4] = CurriculumLevel(
|
|
level=4,
|
|
min_scramble_moves=13,
|
|
max_scramble_moves=17,
|
|
max_steps=30,
|
|
reward_per_correctly_placed_cubie=0.025,
|
|
description="Advanced level - Complex scrambles requiring deep planning",
|
|
)
|
|
|
|
# Level 5: Expert scrambles (18-22 moves)
|
|
levels[5] = CurriculumLevel(
|
|
level=5,
|
|
min_scramble_moves=18,
|
|
max_scramble_moves=22,
|
|
max_steps=40,
|
|
reward_per_correctly_placed_cubie=0.01,
|
|
description="Expert level - Near optimal scrambles approaching God's number",
|
|
)
|
|
|
|
return levels
|
|
|
|
def get_current_level(self) -> CurriculumLevel:
|
|
"""Get the current curriculum level"""
|
|
return self.levels[self.current_level]
|
|
|
|
def record_episode_result(
|
|
self, level: int, is_solved: bool, num_steps: int
|
|
) -> None:
|
|
"""
|
|
Record the result of an episode
|
|
|
|
Args:
|
|
level: The curriculum level of the episode
|
|
is_solved: Whether the cube was solved successfully
|
|
num_steps: Number of steps taken in the episode
|
|
"""
|
|
self.episode_results.append((level, is_solved, num_steps))
|
|
|
|
# Keep only the most recent window of results
|
|
if len(self.episode_results) > self.advancement_window_size:
|
|
self.episode_results = self.episode_results[-self.advancement_window_size :]
|
|
|
|
# Check if we should advance to the next level
|
|
if self.auto_progress:
|
|
self._check_advancement()
|
|
|
|
def _check_advancement(self) -> None:
|
|
"""Check if we should advance to the next level based on recent performance"""
|
|
# Only consider episodes at the current level
|
|
current_level_results = [
|
|
r for r in self.episode_results if r[0] == self.current_level
|
|
]
|
|
|
|
# Need enough data to make a decision
|
|
if len(current_level_results) < self.min_solved_at_level:
|
|
return
|
|
|
|
# Calculate success rate at current level
|
|
success_count = sum(1 for _, is_solved, _ in current_level_results if is_solved)
|
|
success_rate = success_count / len(current_level_results)
|
|
|
|
# Log the current performance
|
|
logger.info(
|
|
f"Curriculum performance: Level {self.current_level}, "
|
|
f"Success rate: {success_rate:.2f} ({success_count}/{len(current_level_results)})"
|
|
)
|
|
|
|
# Check if we should advance
|
|
if (
|
|
success_rate >= self.success_threshold
|
|
and success_count >= self.min_solved_at_level
|
|
and self.current_level < self.max_level
|
|
):
|
|
|
|
self.current_level += 1
|
|
logger.info(
|
|
f"Advancing to curriculum level {self.current_level}: "
|
|
f"{self.levels[self.current_level].description}"
|
|
)
|
|
|
|
# Reset episode results after advancing
|
|
self.episode_results = []
|
|
|
|
def set_level(self, level: int) -> None:
|
|
"""
|
|
Manually set the curriculum level
|
|
|
|
Args:
|
|
level: The new curriculum level (must be between 1 and max_level)
|
|
"""
|
|
if level < 1 or level > self.max_level:
|
|
logger.warning(
|
|
f"Invalid curriculum level {level}. Must be between 1 and {self.max_level}. "
|
|
f"Keeping current level {self.current_level}."
|
|
)
|
|
return
|
|
|
|
self.current_level = level
|
|
logger.info(
|
|
f"Manually set curriculum to level {level}: {self.levels[level].description}"
|
|
)
|
|
|
|
# Reset episode results after manual level change
|
|
self.episode_results = []
|
|
|
|
def get_level_metrics(self) -> Dict[str, Any]:
|
|
"""Get metrics for the current curriculum level"""
|
|
current_level_results = [
|
|
r for r in self.episode_results if r[0] == self.current_level
|
|
]
|
|
|
|
if not current_level_results:
|
|
return {
|
|
"curriculum_level": self.current_level,
|
|
"curriculum_description": self.levels[self.current_level].description,
|
|
"level_success_rate": 0.0,
|
|
"level_episodes": 0,
|
|
"level_solved_count": 0,
|
|
"level_avg_steps": 0.0,
|
|
"progress_to_next_level": 0.0,
|
|
}
|
|
|
|
success_count = sum(1 for _, is_solved, _ in current_level_results if is_solved)
|
|
success_rate = success_count / len(current_level_results)
|
|
|
|
# Calculate average steps for solved episodes
|
|
solved_episodes = [
|
|
(level, solved, steps)
|
|
for level, solved, steps in current_level_results
|
|
if solved
|
|
]
|
|
avg_steps = sum(steps for _, _, steps in solved_episodes) / max(
|
|
1, len(solved_episodes)
|
|
)
|
|
|
|
# Calculate progress to next level (0.0 to 1.0)
|
|
if self.current_level >= self.max_level:
|
|
progress_to_next = 1.0
|
|
else:
|
|
progress_threshold = self.success_threshold * self.min_solved_at_level
|
|
current_progress = success_rate * len(current_level_results)
|
|
progress_to_next = min(1.0, current_progress / progress_threshold)
|
|
|
|
return {
|
|
"curriculum_level": self.current_level,
|
|
"curriculum_description": self.levels[self.current_level].description,
|
|
"level_success_rate": success_rate,
|
|
"level_episodes": len(current_level_results),
|
|
"level_solved_count": success_count,
|
|
"level_avg_steps": avg_steps,
|
|
"progress_to_next_level": progress_to_next,
|
|
}
|
|
|
|
|
|
# Example usage
|
|
if __name__ == "__main__":
|
|
# Set up logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
# Create curriculum manager
|
|
curriculum = RubiksCubeCurriculum(
|
|
starting_level=1,
|
|
max_level=5,
|
|
auto_progress=True,
|
|
success_threshold=0.7,
|
|
advancement_window_size=50,
|
|
min_solved_at_level=25,
|
|
)
|
|
|
|
# Simulate some episodes
|
|
# In a real setup, these results would come from actual cube-solving episodes
|
|
for _ in range(40):
|
|
# Simulate success with 80% probability for level 1
|
|
is_solved = random.random() < 0.8
|
|
steps = random.randint(5, 15)
|
|
curriculum.record_episode_result(1, is_solved, steps)
|
|
|
|
# Print metrics
|
|
print(curriculum.get_level_metrics())
|
|
|
|
# Current level should now be 2 if enough episodes were solved
|
|
print(f"Current level: {curriculum.current_level}")
|
|
|
|
# Manually set to level 3
|
|
curriculum.set_level(3)
|
|
print(f"After manual set, current level: {curriculum.current_level}")
|