diff --git a/environments/infinimath/curriculum.py b/environments/infinimath/curriculum.py
index 3a2d196a..c9c67cf9 100644
--- a/environments/infinimath/curriculum.py
+++ b/environments/infinimath/curriculum.py
@@ -16,151 +16,138 @@ class MathCurriculum:
     # Define difficulty levels and map generator IDs to each level
     DIFFICULTY_LEVELS = {
         # Level 1: Basic arithmetic operations
-        # Addition, Subtraction, Multiplication, 
-        # Division, Square, Factorial, Absolute difference, Percentage, IsPrime
         1: [
             0,  # Addition
             1,  # Subtraction
             2,  # Multiplication
             3,  # Division
             8,  # Square
-            31, # Factorial
-            71, # Absolute difference between two numbers
-            80, # Percentage of a number
-            90, # isprime
-        ],  
+            31,  # Factorial
+            71,  # Absolute difference between two numbers
+            80,  # Percentage of a number
+            90,  # isprime
+        ],
         # Level 2: Basic operations with fractions and pre-algebra
-        # Square Root, Basic Algebra, Fraction to Decimal, Fraction Division, 
-        # Fraction Multiplication, Compare Fractions, Cube Root, Exponentiation, 
-        # Power of Powers, Percentage difference/error, Is Composite
         2: [
             6,  # Square Root
-            11, # Basic Algebra
-            13, # Fraction to Decimal
-            16, # Fraction Division
-            28, # Fraction Multiplication
-            44, # Compare Fractions
-            47, # Cube Root
-            53, # Exponentiation
-            97, # Power of Powers
-            118,# Percentage difference
-            119,# Percentage error
-            124,# Is Composite
+            11,  # Basic Algebra
+            13,  # Fraction to Decimal
+            16,  # Fraction Division
+            28,  # Fraction Multiplication
+            44,  # Compare Fractions
+            47,  # Cube Root
+            53,  # Exponentiation
+            97,  # Power of Powers
+            118,  # Percentage difference
+            119,  # Percentage error
+            124,  # Is Composite
         ],
         # Level 3: Basic geometry and more algebra
-        # Area of Triangle, Triangle exists check, Third Angle of Triangle,
-        # Distance between 2 points, Pythagorean Theorem, 
-        # Fourth Angle of Quadrilateral, Sum of Angles of Polygon, 
-        # Area of a Sector, Perimeter of Polygons, Circumference, Arc length, 
-        # Area of Circle
         3: [
-            18, # Area of Triangle
-            19, # Triangle exists check
-            22, # Third Angle of Triangle
-            24, # Distance between 2 points
-            25, # Pythagorean Theorem
-            49, # Fourth Angle of Quadrilateral
-            58, # Sum of Angles of Polygon
-            75, # Area of a Sector
-            96, # Perimeter of Polygons
-            104,# Circumference
-            108,# Arc length of Angle
-            112,# Area of Circle
-            115,# Area of Circle given center and a point on circle
-        ],  
+            18,  # Area of Triangle
+            19,  # Triangle exists check
+            22,  # Third Angle of Triangle
+            24,  # Distance between 2 points
+            25,  # Pythagorean Theorem
+            49,  # Fourth Angle of Quadrilateral
+            58,  # Sum of Angles of Polygon
+            75,  # Area of a Sector
+            96,  # Perimeter of Polygons
+            104,  # Circumference
+            108,  # Arc length of Angle
+            112,  # Area of Circle
+            115,  # Area of Circle given center and a point on circle
+        ],
         # Level 4: More advanced algebra and basic statistics
-        # LCM, GCD, Midpoint, Factoring Quadratic, System of Equations, 
-        # Linear Equations, Common Factors, Intersection of Two Lines, Simple Interest, 
-        # Quadratic Equation, Mean and Median, Compound Interest, Combine Like terms
         4: [
             9,  # LCM (Least Common Multiple)
-            10, # GCD (Greatest Common Denominator)
-            20, # Midpoint of the two point
-            21, # Factoring Quadratic
-            23, # Solve a System of Equations in R^2
-            26, # Linear Equations
-            40, # Common Factors
-            41, # Intersection of Two Lines
-            45, # Simple Interest
-            50, # Quadratic Equation
-            76, # Mean and Median
-            78, # Compound Interest
-            105,# Combine Like terms
+            10,  # GCD (Greatest Common Denominator)
+            20,  # Midpoint of the two point
+            21,  # Factoring Quadratic
+            23,  # Solve a System of Equations in R^2
+            26,  # Linear Equations
+            40,  # Common Factors
+            41,  # Intersection of Two Lines
+            45,  # Simple Interest
+            50,  # Quadratic Equation
+            76,  # Mean and Median
+            78,  # Compound Interest
+            105,  # Combine Like terms
         ],
         # Level 5: Vectors, matrices, and solid geometry
         5: [
-            17, # Integer Multiplication with 2x2 Matrix
-            32, # Surface Area of Cube
-            33, # Surface Area of Cuboid
-            34, # Surface Area of Cylinder
-            35, # Volume of Cube
-            36, # Volume of Cuboid
-            37, # Volume of cylinder
-            38, # Surface Area of cone
-            39, # Volume of cone
-            43, # Cross Product of 2 Vectors
-            46, # Multiplication of two matrices
-            60, # Surface Area of Sphere
-            61, # Volume of Sphere
-            70, # Angle between 2 vectors
-            72, # Dot Product of 2 Vectors
-            77, # Determinant to 2x2 Matrix
-            95, # Curved surface area of a cylinder
-            113,# Volume of frustum
-            117,# Volume of Hemisphere
-            122,# Volume of pyramid
-            123,# Surface area of pyramid
-        ],  # Matrix Multiplication, Surface Areas, Volumes, Vector operations, etc.
+            17,  # Integer Multiplication with 2x2 Matrix
+            32,  # Surface Area of Cube
+            33,  # Surface Area of Cuboid
+            34,  # Surface Area of Cylinder
+            35,  # Volume of Cube
+            36,  # Volume of Cuboid
+            37,  # Volume of cylinder
+            38,  # Surface Area of cone
+            39,  # Volume of cone
+            43,  # Cross Product of 2 Vectors
+            46,  # Multiplication of two matrices
+            60,  # Surface Area of Sphere
+            61,  # Volume of Sphere
+            70,  # Angle between 2 vectors
+            72,  # Dot Product of 2 Vectors
+            77,  # Determinant to 2x2 Matrix
+            95,  # Curved surface area of a cylinder
+            113,  # Volume of frustum
+            117,  # Volume of Hemisphere
+            122,  # Volume of pyramid
+            123,  # Surface area of pyramid
+        ],
         # Level 6: Advanced topics (calculus, statistics, computer science)
         6: [
             4,  # Binary Complement 1s
             5,  # Modulo Division
             7,  # Power Rule Differentiation
-            12, # Logarithm
-            14, # Decimal to Binary
-            15, # Binary to Decimal
-            27, # Prime Factorisation
-            30, # Combinations of Objects
-            42, # Permutations
-            48, # Power Rule Integration
-            52, # Probability of a certain sum appearing on faces of dice
-            54, # Confidence interval For sample S
-            55, # Comparing surds
-            56, # Fibonacci Series
-            59, # Mean,Standard Deviation,Variance
-            62, # nth Fibonacci number
-            64, # Binary to Hexidecimal
-            73, # Binary 2's Complement
-            79, # Decimal to Hexadecimal
-            84, # Converts decimal to octal
-            88, # Trigonometric Differentiation
-            89, # Definite Integral of Quadratic Equation
-            91, # Binary Coded Decimal to Integer
-            103,# Decimal to Binary Coded Decimal
-            107,# Conditional Probability
-            110,# Stationary Points
-        ],  # Binary operations, Calculus, Combinatorics, Probability, etc.
+            12,  # Logarithm
+            14,  # Decimal to Binary
+            15,  # Binary to Decimal
+            27,  # Prime Factorisation
+            30,  # Combinations of Objects
+            42,  # Permutations
+            48,  # Power Rule Integration
+            52,  # Probability of a certain sum appearing on faces of dice
+            54,  # Confidence interval For sample S
+            55,  # Comparing surds
+            56,  # Fibonacci Series
+            59,  # Mean,Standard Deviation,Variance
+            62,  # nth Fibonacci number
+            64,  # Binary to Hexidecimal
+            73,  # Binary 2's Complement
+            79,  # Decimal to Hexadecimal
+            84,  # Converts decimal to octal
+            88,  # Trigonometric Differentiation
+            89,  # Definite Integral of Quadratic Equation
+            91,  # Binary Coded Decimal to Integer
+            103,  # Decimal to Binary Coded Decimal
+            107,  # Conditional Probability
+            110,  # Stationary Points
+        ],
         # Level 7: Most complex topics
         7: [
-            65, # Multiplication of 2 complex numbers
-            66, # Geometric Progression
-            67, # Geometric Mean of N Numbers
-            68, # Harmonic Mean of N Numbers
-            69, # Euclidian norm or L2 norm of a vector
-            74, # Inverse of a Matrix
-            85, # Converts decimal to Roman Numerals
-            92, # Complex To Polar Form
-            93, # Union,Intersection,Difference of Two Sets
-            94, # Base Conversion
-            98, # Quotient of Powers with Same Base
-            99, # Quotient of Powers with Same Power
-            100,# complex Quadratic Equation
-            101,# Leap Year or Not
-            106,# signum function
-            109,# Binomial distribution
-            111,# Expanding Factored Binomial
-            121,# Product of scientific notations
-        ],  # Complex numbers, Advanced operations, etc.
+            65,  # Multiplication of 2 complex numbers
+            66,  # Geometric Progression
+            67,  # Geometric Mean of N Numbers
+            68,  # Harmonic Mean of N Numbers
+            69,  # Euclidian norm or L2 norm of a vector
+            74,  # Inverse of a Matrix
+            85,  # Converts decimal to Roman Numerals
+            92,  # Complex To Polar Form
+            93,  # Union,Intersection,Difference of Two Sets
+            94,  # Base Conversion
+            98,  # Quotient of Powers with Same Base
+            99,  # Quotient of Powers with Same Power
+            100,  # complex Quadratic Equation
+            101,  # Leap Year or Not
+            106,  # signum function
+            109,  # Binomial distribution
+            111,  # Expanding Factored Binomial
+            121,  # Product of scientific notations
+        ],
     }
 
     def __init__(
diff --git a/environments/infinimath/infinimath_env.py b/environments/infinimath/infinimath_env.py
index f4815904..7b0a5a35 100644
--- a/environments/infinimath/infinimath_env.py
+++ b/environments/infinimath/infinimath_env.py
@@ -3,7 +3,7 @@ import json
 import logging
 import random
 import re
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Tuple, Union
 
 from atroposlib.envs.base import BaseEnv, BaseEnvConfig, OpenaiConfig, ScoredDataGroup
 from atroposlib.utils.tokenize_for_trainer import tokenize_for_trainer
diff --git a/environments/infinimath/infinimath_local_server.py b/environments/infinimath/infinimath_local_server.py
index d170e443..84853db6 100644
--- a/environments/infinimath/infinimath_local_server.py
+++ b/environments/infinimath/infinimath_local_server.py
@@ -1,14 +1,11 @@
 #!/usr/bin/env python3
-import argparse
 import asyncio
 import logging
 import os
 
 from dotenv import load_dotenv
-from openai import OpenAI
 
 from atroposlib.envs.base import OpenaiConfig
-from atroposlib.utils.config_handler import ConfigHandler
 from environments.infinimath.infinimath_env import (
     InfiniteMathEnv,
     InfiniteMathEnvConfig,
@@ -20,182 +17,119 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
 
-def parse_arguments():
-    parser = argparse.ArgumentParser(description="InfiniteMath environment server")
-    parser.add_argument(
-        "--config",
-        type=str,
-        default="infinimath",
-        help="Configuration file name (without .yaml extension or path for configs/envs/ directory, or full path)",
-    )
-    return parser.parse_args()
-
-
 async def main():
-    logger.info("Starting InfiniteMath environment server")
+    logger.info("Starting InfiniteMath environment local runner")
 
-    # Parse command line arguments
-    args = parse_arguments()
+    config = InfiniteMathEnvConfig(
+        tokenizer_name="NousResearch/Nous-Hermes-2-Yi-34B",
+        group_size=1,
+        use_wandb=False,
+        max_num_workers=1,
+        rollout_server_url="http://localhost:8000",
+        total_steps=1,
+        batch_size=1,
+        steps_per_eval=0,
+        max_token_length=2048,
+        wandb_name="infinite_math_local_debug",
+        ensure_scores_are_not_same=False,
+        starting_level=1,
+        progress_threshold=0.8,
+        min_evaluations=3,
+        correct_reward=1.0,
+        incorrect_reward=-0.5,
+        think_block_bonus=0.1,
+        boxed_answer_bonus=0.2,
+        apply_length_penalty=False,
+        length_threshold_ratio=0.6,
+        temperature=0.3,
+        top_p=0.9,
+    )
 
-    # Initialize config handler and load configuration
-    config_handler = ConfigHandler()
-
-    # Determine config path
-    if (
-        os.path.isabs(args.config)
-        or "/" in args.config
-        or args.config.endswith(".yaml")
-    ):
-        config_path = args.config
-    else:
-        # short form that defaults to the envs directory
-        config_path = os.path.join(
-            config_handler.config_dir, f"envs/{args.config}.yaml"
+    server_configs = [
+        OpenaiConfig(
+            model_name="NousResearch/Nous-Hermes-2-Yi-34B",
+            base_url=os.getenv("OPENAI_BASE_URL", "http://localhost:9004/v1"),
+            api_key=os.getenv("OPENAI_API_KEY", "dummy-key"),
+            num_requests_for_eval=0,
         )
+    ]
+    
+    logger.info("Using hardcoded debug configuration.")
+    logger.debug(f"Env Config: {config}")
+    logger.debug(f"Server Configs: {server_configs}")
 
-    logger.info(f"Loading configuration from: {config_path}")
 
     try:
-        with open(config_path, "r") as f:
-            import yaml
-
-            raw_config = yaml.safe_load(f)
-            logger.info(f"Loaded configuration successfully")
-    except Exception as e:
-        logger.error(f"Error loading config directly: {e}")
-        logger.info("Falling back to default config handler")
-        raw_config = config_handler.load_config(args)
-
-    # Configure the InfiniteMath environment with values from config
-    config = InfiniteMathEnvConfig(
-        # Base environment parameters
-        tokenizer_name=raw_config.get(
-            "tokenizer_name", "NousResearch/DeepHermes-3-Llama-3-8B-Preview"
-        ),
-        group_size=raw_config.get("group_size", 1),
-        use_wandb=raw_config.get("use_wandb", False),
-        max_num_workers=raw_config.get("max_num_workers", 1),
-        rollout_server_url=raw_config.get(
-            "rollout_server_url", "http://localhost:8000"
-        ),
-        total_steps=raw_config.get("total_steps", 1),
-        batch_size=raw_config.get("batch_size", 1),
-        steps_per_eval=raw_config.get("steps_per_eval", 2),
-        max_token_length=raw_config.get("max_token_length", 4096),
-        wandb_name=raw_config.get("wandb_name", "infinite_math_test"),
-        ensure_scores_are_not_same=raw_config.get("ensure_scores_are_not_same", False),
-        # InfiniteMath specific parameters
-        starting_level=raw_config.get("infinimath", {}).get("starting_level", 1),
-        progress_threshold=raw_config.get("infinimath", {}).get(
-            "progress_threshold", 0.7
-        ),
-        min_evaluations=raw_config.get("infinimath", {}).get("min_evaluations", 3),
-        correct_reward=raw_config.get("infinimath", {}).get("correct_reward", 1.0),
-        incorrect_reward=raw_config.get("infinimath", {}).get("incorrect_reward", -0.5),
-        apply_length_penalty=raw_config.get("infinimath", {}).get(
-            "apply_length_penalty", True
-        ),
-        length_threshold_ratio=raw_config.get("infinimath", {}).get(
-            "length_threshold_ratio", 0.6
-        ),
-        temperature=raw_config.get("infinimath", {}).get("temperature", 0.7),
-        top_p=raw_config.get("infinimath", {}).get("top_p", 0.9),
-        reward_functions=raw_config.get("infinimath", {}).get(
-            "reward_functions", ["accuracy", "format", "boxed"]
-        ),
-        accuracy_reward_weight=raw_config.get("infinimath", {}).get(
-            "accuracy_reward_weight", 1.0
-        ),
-        format_reward_weight=raw_config.get("infinimath", {}).get(
-            "format_reward_weight", 0.2
-        ),
-        boxed_reward_weight=raw_config.get("infinimath", {}).get(
-            "boxed_reward_weight", 0.3
-        ),
-    )
-
-    # Server configuration from config file or defaults
-    server_configs = []
-
-    if "server_configs" in raw_config:
-        for server_config in raw_config["server_configs"]:
-            api_key = server_config.get("api_key", os.environ.get("OPENAI_API_KEY"))
-            # Handle environment variable references like ${OPENAI_API_KEY}
-            if (
-                isinstance(api_key, str)
-                and api_key.startswith("${")
-                and api_key.endswith("}")
-            ):
-                env_var = api_key[2:-1]
-                api_key = os.environ.get(env_var, "")
-
-            server_configs.append(
-                OpenaiConfig(
-                    model_name=server_config.get("model_name", "gpt-4.1-nano"),
-                    base_url=server_config.get("base_url", None),
-                    api_key=api_key,
-                    num_requests_for_eval=server_config.get(
-                        "num_requests_for_eval", 70
-                    ),
-                )
-            )
-    else:
-        # Default configuration if not specified in config file
-        server_configs.append(
-            OpenaiConfig(
-                model_name="gpt-4.1-nano",
-                base_url=None,
-                api_key=os.environ.get("OPENAI_API_KEY"),
-                num_requests_for_eval=70,
-            )
+        env = InfiniteMathEnv(
+            config=config,
+            server_configs=server_configs,
+            slurm=False,
         )
+    except Exception as e:
+        logger.exception(f"Failed to initialize InfiniteMathEnv: {e}")
+        return
 
-    # Create the environment
-    env = InfiniteMathEnv(
-        config=config,
-        server_configs=server_configs,
-        slurm=False,
-    )
-
-    # Setup the environment
+    logger.info("Setting up environment...")
     await env.setup()
-    logger.info("Environment setup complete")
+    logger.info("Environment setup complete.")
 
-    # Log the number of evaluation problems
-    total_problems = sum(len(probs) for probs in env.eval_problems.values())
-    logger.info(
-        f"Using {total_problems} evaluation problems across {len(env.eval_problems)} difficulty levels"
-    )
-
-    # Get a math problem
+    logger.info("Getting a math problem...")
     item = await env.get_next_item()
     problem_prompt, solution, generator_id = item
 
-    logger.info(f"Problem: {dict(problem_prompt[0])['content']}")
-    logger.info(f"Solution: {solution}")
+    problem_content = dict(problem_prompt[0])['content']
+    logger.info(f"Problem (ID: {generator_id}, Level: {env.curriculum.get_current_level()}): {problem_content}")
+    logger.info(f"Expected Solution: {solution}")
 
-    # Collect trajectories
     logger.info("Collecting trajectories...")
     trajectories_data, backlog = await env.collect_trajectories(item)
+    
+    if not trajectories_data:
+        logger.error("No trajectories were collected.")
+        return
 
-    # Score the collected trajectories
+    logger.info(f"Collected {len(trajectories_data)} data points for scoring (should be 1 for group_size=1).")
+    
     logger.info("Scoring trajectories...")
     scored_data = await env.score(trajectories_data)
 
-    input("Press Enter to continue...")
-    # Print scores
-    logger.info(f"Scores: {scored_data['scores']}")
+    logger.info("\n========== Trajectory Summary ==========")
+    if scored_data and scored_data.get("messages") and scored_data.get("scores"):
+        for i, messages_list in enumerate(scored_data["messages"]):
+            assistant_response = ""
+            if messages_list and messages_list[-1].get("role") == "assistant":
+                assistant_response = messages_list[-1].get("content", "N/A")
+            
+            logger.info(f"--- Attempt {i+1} ---")
+            logger.info(f"Problem: {problem_content}")
+            logger.info(f"Full Assistant Response:\\n{assistant_response}")
+            logger.info(f"Score: {scored_data['scores'][i]}")
+            is_correct_task = env.check_answer(assistant_response, solution)
+            logger.info(f"Checked Correct by env.check_answer: {is_correct_task}")
 
-    # Log the correct/incorrect counts
-    correct_count = sum(1 for score in scored_data["scores"] if score > 0)
-    logger.info(f"Correct answers: {correct_count}/{len(scored_data['scores'])}")
 
-    # Test evaluation function specifically
+        correct_count_buffer = sum(env.percent_correct_buffer)
+        total_attempts_buffer = len(env.percent_correct_buffer)
+        
+        logger.info("\n--- Overall for this run ---")
+        logger.info(f"Expected Solution: {solution}")
+        logger.info(f"Score(s) from env.score: {scored_data['scores']}")
+        if total_attempts_buffer > 0:
+            logger.info(f"Correct based on internal buffer: {correct_count_buffer}/{total_attempts_buffer}")
+        else:
+            logger.info("No attempts recorded in percent_correct_buffer.")
+
+    else:
+        logger.error("Scored data is missing expected fields ('messages' or 'scores').")
+    
+    logger.info("=======================================")
+
+    # Re-add curriculum and evaluation testing
     logger.info("\n=== Testing Evaluation Function ===")
 
     # Record the current level
-    initial_level = env.curriculum.get_current_level()
-    logger.info(f"Current level before evaluation: {initial_level}")
+    initial_level_eval = env.curriculum.get_current_level()
+    logger.info(f"Current level before evaluation: {initial_level_eval}")
     logger.info(f"Level description: {env.curriculum.get_level_description()}")
     logger.info(f"Progress threshold: {env.curriculum.progress_threshold}")
     logger.info(f"Min evaluations needed: {env.curriculum.min_evaluations}")
@@ -205,65 +139,76 @@ async def main():
 
     # Display evaluation results
     logger.info("Evaluation metrics:")
-    for metric_name, metric_value in eval_metrics:
-        logger.info(f"  - {metric_name}: {metric_value}")
+    if eval_metrics:
+        for metric_name, metric_value in eval_metrics:
+            logger.info(f"  - {metric_name}: {metric_value}")
+    else:
+        logger.info("  No evaluation metrics returned.")
 
     # Check if the level advanced
-    new_level = env.curriculum.get_current_level()
-    if new_level > initial_level:
-        logger.info(f"Successfully advanced to level {new_level}!")
+    new_level_eval = env.curriculum.get_current_level()
+    if new_level_eval > initial_level_eval:
+        logger.info(f"Successfully advanced from level {initial_level_eval} to level {new_level_eval} during evaluation!")
         logger.info(f"New level description: {env.curriculum.get_level_description()}")
     else:
+        logger.info(f"Did not advance during evaluation. Remained at level {initial_level_eval}.")
         # Show current progress toward advancement
-        current_level = env.curriculum.get_current_level()
-        if current_level in env.curriculum.performance_history:
-            history = env.curriculum.performance_history[current_level]
+        current_level_desc = env.curriculum.get_current_level()
+        if current_level_desc in env.curriculum.performance_history:
+            history = env.curriculum.performance_history[current_level_desc]
             if len(history) >= env.curriculum.min_evaluations:
                 recent_history = history[-env.curriculum.min_evaluations :]
                 success_rate = sum(recent_history) / len(recent_history)
                 logger.info(
-                    f"Current success rate: {success_rate:.2f} (need {env.curriculum.progress_threshold} to advance)"
+                    f"Current success rate for level {current_level_desc}: {success_rate:.2f} (need {env.curriculum.progress_threshold} to advance)"
                 )
             else:
                 logger.info(
-                    f"Need more evaluations: {len(history)}/{env.curriculum.min_evaluations}"
+                    f"Need more evaluations for level {current_level_desc}: {len(history)}/{env.curriculum.min_evaluations}"
                 )
 
-    # Show all levels and their performance history
-    logger.info("\nPerformance history by level:")
-    for level in sorted(env.curriculum.performance_history.keys()):
-        history = env.curriculum.performance_history[level]
-        if history:
-            success_rate = sum(history) / len(history)
+    # Show all levels and their performance history after evaluation
+    logger.info("\nPerformance history by level (after evaluation run):")
+    for level_hist_key in sorted(env.curriculum.performance_history.keys()):
+        history_list = env.curriculum.performance_history[level_hist_key]
+        if history_list:
+            success_rate_hist = sum(history_list) / len(history_list)
             logger.info(
-                f"  Level {level}: {success_rate:.2f} ({sum(history)}/{len(history)} correct)"
+                f"  Level {level_hist_key}: {success_rate_hist:.2f} ({sum(history_list)}/{len(history_list)} correct)"
             )
         else:
-            logger.info(f"  Level {level}: No data")
+            logger.info(f"  Level {level_hist_key}: No data")
 
     # Test curriculum advancement with simulated performance history
-    logger.info("\n=== Testing Curriculum Advancement ===")
+    logger.info("\n=== Testing Curriculum Advancement Manually ===")
+    initial_level_manual_adv = env.curriculum.get_current_level()
+    logger.info(f"Starting manual advancement test from level: {initial_level_manual_adv}")
 
     # Simulate good performance at current level
-    for _ in range(env.config.min_evaluations):
-        # Get a problem from current level
-        item = await env.get_next_item()
-        _, _, generator_id = item
+    # Ensure we don't try to get items if curriculum is already at max level from previous eval
+    max_level_possible = max(env.curriculum.DIFFICULTY_LEVELS.keys())
+    if initial_level_manual_adv < max_level_possible:
+        logger.info(f"Simulating {config.min_evaluations} correct answers for level {initial_level_manual_adv}...")
+        for _ in range(config.min_evaluations): # Use config for min_evaluations
+            # Get a problem from current level to ensure generator_id is valid for the level
+            # The level might have changed due to the previous env.evaluate() call
+            problem_item_adv_test = await env.get_next_item() 
+            _, _, generator_id_adv_test = problem_item_adv_test
+            env.curriculum.record_performance(generator_id_adv_test, True)
+        
+        # Try to advance difficulty
+        did_advance = env.curriculum.advance_difficulty()
+        new_level_manual_adv = env.curriculum.get_current_level()
 
-        # Record positive performance
-        env.curriculum.record_performance(generator_id, True)
+        logger.info(f"Curriculum advancement test results:")
+        logger.info(f"  - Level before manual simulation: {initial_level_manual_adv}")
+        logger.info(f"  - Recorded {config.min_evaluations} correct answers manually.")
+        logger.info(f"  - Did advance: {did_advance}")
+        logger.info(f"  - Level after manual advancement attempt: {new_level_manual_adv}")
+    else:
+        logger.info(f"Skipping manual advancement simulation as current level {initial_level_manual_adv} is already max level {max_level_possible}.")
 
-    # Try to advance difficulty
-    did_advance = env.curriculum.advance_difficulty()
-    new_level = env.curriculum.get_current_level()
-
-    logger.info(f"Curriculum advancement test:")
-    logger.info(f"  - Starting level: {initial_level}")
-    logger.info(f"  - Recorded {env.config.min_evaluations} correct answers")
-    logger.info(f"  - Did advance: {did_advance}")
-    logger.info(f"  - New level: {new_level}")
-
-    logger.info("Test server completed successfully")
+    logger.info("InfiniteMath local runner completed successfully.")
 
 
 if __name__ == "__main__":