atropos/environments/infinimath/test_curriculum.py
2025-05-12 07:26:10 +10:00

265 lines
9.8 KiB
Python

#!/usr/bin/env python3
"""
Test script for the Infinite Math curriculum manager.
This script tests the core functionality of both the MathCurriculum and InfiniteMath classes.
"""
import os
import random
import sys
from typing import Any, Dict, List, Optional, Tuple
# Use relative imports
from .curriculum import MathCurriculum
from .infinimath import InfiniteMath
def test_curriculum_initialization():
"""Test that the curriculum initializes correctly with different levels."""
print("\n=== Testing Curriculum Initialization ===")
# Test default initialization
curriculum = MathCurriculum()
assert curriculum.get_current_level() == 1, "Default level should be 1"
print("✓ Default initialization successful")
# Test initialization with specific level
curriculum = MathCurriculum(starting_level=3)
assert curriculum.get_current_level() == 3, "Starting level should be 3"
print("✓ Custom level initialization successful")
# Test initialization with invalid level
try:
curriculum = MathCurriculum(starting_level=10)
print("✗ Invalid level initialization should fail")
except ValueError:
print("✓ Invalid level initialization correctly raises ValueError")
def test_problem_generation():
"""Test that problems are generated correctly at different difficulty levels."""
print("\n=== Testing Problem Generation ===")
# Test problem generation at different levels
for level in range(1, 8):
curriculum = MathCurriculum(starting_level=level)
problem, solution, generator_id = curriculum.get_problem()
# Verify we got a problem and solution
assert (
isinstance(problem, str) and len(problem) > 0
), f"Problem at level {level} should be a non-empty string"
assert solution is not None, f"Solution at level {level} should not be None"
# Verify the generator ID belongs to the correct level
assert (
generator_id in curriculum.DIFFICULTY_LEVELS[level]
), f"Generator ID {generator_id} should be in level {level}"
print(
f"✓ Level {level} problem generated: {problem[:50]}{'...' if len(problem) > 50 else ''}"
)
print(f" Solution: {solution}")
print(f" Generator ID: {generator_id}")
def test_performance_tracking():
"""Test performance tracking and level advancement."""
print("\n=== Testing Performance Tracking and Advancement ===")
# Create curriculum with test parameters
curriculum = MathCurriculum(
starting_level=1, progress_threshold=0.7, min_evaluations=3
)
# Record some correct answers (not enough to advance)
generator_id = curriculum.DIFFICULTY_LEVELS[1][0] # Get a generator from level 1
curriculum.record_performance(generator_id, True)
curriculum.record_performance(generator_id, True)
# Check if we advance (should not)
did_advance = curriculum.advance_difficulty()
assert not did_advance, "Should not advance with only 2 evaluations"
assert curriculum.get_current_level() == 1, "Level should still be 1"
print("✓ Correctly did not advance with insufficient evaluations")
# Add one more correct answer (now should advance)
curriculum.record_performance(generator_id, True)
did_advance = curriculum.advance_difficulty()
assert did_advance, "Should advance with 3 correct evaluations (100% success rate)"
assert curriculum.get_current_level() == 2, "Level should be 2 after advancement"
print("✓ Correctly advanced to level 2 after sufficient success")
# Test with too low success rate
curriculum = MathCurriculum(
starting_level=1, progress_threshold=0.7, min_evaluations=3
)
generator_id = curriculum.DIFFICULTY_LEVELS[1][0]
curriculum.record_performance(generator_id, True) # 1 correct
curriculum.record_performance(generator_id, False) # 1 incorrect
curriculum.record_performance(generator_id, False) # 1 incorrect
did_advance = curriculum.advance_difficulty()
assert (
not did_advance
), "Should not advance with 33% success rate when threshold is 70%"
print("✓ Correctly did not advance with insufficient success rate")
# Test advancement at the highest level
curriculum = MathCurriculum(
starting_level=7, progress_threshold=0.7, min_evaluations=3
)
generator_id = curriculum.DIFFICULTY_LEVELS[7][0]
curriculum.record_performance(generator_id, True)
curriculum.record_performance(generator_id, True)
curriculum.record_performance(generator_id, True)
did_advance = curriculum.advance_difficulty()
assert not did_advance, "Should not advance beyond the highest level"
print("✓ Correctly did not advance beyond the highest level")
def test_level_descriptions():
"""Test that level descriptions are correct."""
print("\n=== Testing Level Descriptions ===")
curriculum = MathCurriculum()
for level in range(1, 8):
description = curriculum.get_level_description(level)
assert (
isinstance(description, str) and len(description) > 0
), f"Description for level {level} should be a non-empty string"
print(f"✓ Level {level}: {description}")
def test_infinite_math_environment():
"""Test the InfiniteMath environment functionality."""
print("\n=== Testing InfiniteMath Environment ===")
# Initialize the environment
env = InfiniteMath(starting_level=1, progress_threshold=0.7, min_evaluations=3)
# Get the initial state
state = env.get_state()
assert "problem" in state, "State should include a problem"
assert "current_level" in state, "State should include current level"
print(
f"✓ Initial state: Level {state['current_level']}, Problem: {state['problem'][:50]}{'...' if len(state['problem']) > 50 else ''}"
)
# Test answering a problem incorrectly
result = env.submit_answer("wrong answer")
assert "is_correct" in result, "Result should indicate correctness"
assert not result["is_correct"], "Result should be incorrect"
print("✓ Incorrect answer handled correctly")
# Test answering a problem correctly
# Note: We can't predict the correct answer, so we'll get it from the environment
correct_solution = env.current_solution
result = env.submit_answer(correct_solution)
assert result["is_correct"], "Result should be correct"
print("✓ Correct answer handled successfully")
# Test resetting the environment
env.reset(level=3)
state = env.get_state()
assert state["current_level"] == 3, "After reset, level should be 3"
print(f"✓ Reset to level 3 successful")
# Test getting difficulty stats
stats = env.get_difficulty_stats()
assert len(stats) == 7, "Should have stats for all 7 difficulty levels"
print("✓ Difficulty statistics retrieved successfully")
def simulate_learning():
"""Simulate a learning agent improving performance over time."""
print("\n=== Simulating Learning Process ===")
env = InfiniteMath(starting_level=1, progress_threshold=0.7, min_evaluations=5)
episodes = 30
print(f"Starting simulation with {episodes} episodes")
print(f"Initial level: {env.get_state()['current_level']}")
for i in range(episodes):
state = env.get_state()
current_level = state["current_level"]
# Simulated agent gradually improves - higher chance of correct answer over time
# and higher chance at lower levels
success_probability = min(0.5 + (i / episodes) + (1 / current_level), 0.95)
# Simulate an answer
is_correct = random.random() < success_probability
# If we decide to be correct, use the actual solution
if is_correct:
answer = env.current_solution
else:
# Otherwise provide a wrong answer
answer = "wrong answer"
# Submit the answer
result = env.submit_answer(answer)
# Check for level advancement
if result.get("did_advance_level", False):
new_level = result["new_level"]
print(
f"Episode {i+1}: Advanced to level {new_level}! (success probability: {success_probability:.2f})"
)
elif i % 5 == 0: # Print status occasionally
print(
f"Episode {i+1}: Still at level {current_level} (success probability: {success_probability:.2f})"
)
# Print final stats
final_state = env.get_state()
print(f"\nFinal level: {final_state['current_level']}")
print(
f"Overall accuracy: {final_state['correct_problems'] / final_state['total_problems']:.2%}"
)
# Print level-by-level stats
stats = env.get_difficulty_stats()
print("\nPerformance by level:")
for level, level_stats in stats.items():
if level_stats["problems_attempted"] > 0:
success_rate = level_stats["success_rate"]
if success_rate is not None:
print(
f"Level {level}: {success_rate:.2%} success rate ({level_stats['problems_attempted']} problems)"
)
else:
print(
f"Level {level}: Not enough data ({level_stats['problems_attempted']} problems)"
)
def main():
"""Run all tests."""
print("=== Starting Curriculum Manager Tests ===")
try:
test_curriculum_initialization()
test_problem_generation()
test_performance_tracking()
test_level_descriptions()
test_infinite_math_environment()
simulate_learning()
print("\n=== All tests completed successfully! ===")
except AssertionError as e:
print(f"\n❌ Test failed: {e}")
return 1
except Exception as e:
print(f"\n❌ Unexpected error: {e}")
return 1
return 0
if __name__ == "__main__":
sys.exit(main())