diff --git a/environments/infinimath/curriculum.py b/environments/infinimath/curriculum.py index 3a2d196a..c9c67cf9 100644 --- a/environments/infinimath/curriculum.py +++ b/environments/infinimath/curriculum.py @@ -16,151 +16,138 @@ class MathCurriculum: # Define difficulty levels and map generator IDs to each level DIFFICULTY_LEVELS = { # Level 1: Basic arithmetic operations - # Addition, Subtraction, Multiplication, - # Division, Square, Factorial, Absolute difference, Percentage, IsPrime 1: [ 0, # Addition 1, # Subtraction 2, # Multiplication 3, # Division 8, # Square - 31, # Factorial - 71, # Absolute difference between two numbers - 80, # Percentage of a number - 90, # isprime - ], + 31, # Factorial + 71, # Absolute difference between two numbers + 80, # Percentage of a number + 90, # isprime + ], # Level 2: Basic operations with fractions and pre-algebra - # Square Root, Basic Algebra, Fraction to Decimal, Fraction Division, - # Fraction Multiplication, Compare Fractions, Cube Root, Exponentiation, - # Power of Powers, Percentage difference/error, Is Composite 2: [ 6, # Square Root - 11, # Basic Algebra - 13, # Fraction to Decimal - 16, # Fraction Division - 28, # Fraction Multiplication - 44, # Compare Fractions - 47, # Cube Root - 53, # Exponentiation - 97, # Power of Powers - 118,# Percentage difference - 119,# Percentage error - 124,# Is Composite + 11, # Basic Algebra + 13, # Fraction to Decimal + 16, # Fraction Division + 28, # Fraction Multiplication + 44, # Compare Fractions + 47, # Cube Root + 53, # Exponentiation + 97, # Power of Powers + 118, # Percentage difference + 119, # Percentage error + 124, # Is Composite ], # Level 3: Basic geometry and more algebra - # Area of Triangle, Triangle exists check, Third Angle of Triangle, - # Distance between 2 points, Pythagorean Theorem, - # Fourth Angle of Quadrilateral, Sum of Angles of Polygon, - # Area of a Sector, Perimeter of Polygons, Circumference, Arc length, - # Area of Circle 3: [ - 18, # Area of Triangle - 19, # Triangle exists check - 22, # Third Angle of Triangle - 24, # Distance between 2 points - 25, # Pythagorean Theorem - 49, # Fourth Angle of Quadrilateral - 58, # Sum of Angles of Polygon - 75, # Area of a Sector - 96, # Perimeter of Polygons - 104,# Circumference - 108,# Arc length of Angle - 112,# Area of Circle - 115,# Area of Circle given center and a point on circle - ], + 18, # Area of Triangle + 19, # Triangle exists check + 22, # Third Angle of Triangle + 24, # Distance between 2 points + 25, # Pythagorean Theorem + 49, # Fourth Angle of Quadrilateral + 58, # Sum of Angles of Polygon + 75, # Area of a Sector + 96, # Perimeter of Polygons + 104, # Circumference + 108, # Arc length of Angle + 112, # Area of Circle + 115, # Area of Circle given center and a point on circle + ], # Level 4: More advanced algebra and basic statistics - # LCM, GCD, Midpoint, Factoring Quadratic, System of Equations, - # Linear Equations, Common Factors, Intersection of Two Lines, Simple Interest, - # Quadratic Equation, Mean and Median, Compound Interest, Combine Like terms 4: [ 9, # LCM (Least Common Multiple) - 10, # GCD (Greatest Common Denominator) - 20, # Midpoint of the two point - 21, # Factoring Quadratic - 23, # Solve a System of Equations in R^2 - 26, # Linear Equations - 40, # Common Factors - 41, # Intersection of Two Lines - 45, # Simple Interest - 50, # Quadratic Equation - 76, # Mean and Median - 78, # Compound Interest - 105,# Combine Like terms + 10, # GCD (Greatest Common Denominator) + 20, # Midpoint of the two point + 21, # Factoring Quadratic + 23, # Solve a System of Equations in R^2 + 26, # Linear Equations + 40, # Common Factors + 41, # Intersection of Two Lines + 45, # Simple Interest + 50, # Quadratic Equation + 76, # Mean and Median + 78, # Compound Interest + 105, # Combine Like terms ], # Level 5: Vectors, matrices, and solid geometry 5: [ - 17, # Integer Multiplication with 2x2 Matrix - 32, # Surface Area of Cube - 33, # Surface Area of Cuboid - 34, # Surface Area of Cylinder - 35, # Volume of Cube - 36, # Volume of Cuboid - 37, # Volume of cylinder - 38, # Surface Area of cone - 39, # Volume of cone - 43, # Cross Product of 2 Vectors - 46, # Multiplication of two matrices - 60, # Surface Area of Sphere - 61, # Volume of Sphere - 70, # Angle between 2 vectors - 72, # Dot Product of 2 Vectors - 77, # Determinant to 2x2 Matrix - 95, # Curved surface area of a cylinder - 113,# Volume of frustum - 117,# Volume of Hemisphere - 122,# Volume of pyramid - 123,# Surface area of pyramid - ], # Matrix Multiplication, Surface Areas, Volumes, Vector operations, etc. + 17, # Integer Multiplication with 2x2 Matrix + 32, # Surface Area of Cube + 33, # Surface Area of Cuboid + 34, # Surface Area of Cylinder + 35, # Volume of Cube + 36, # Volume of Cuboid + 37, # Volume of cylinder + 38, # Surface Area of cone + 39, # Volume of cone + 43, # Cross Product of 2 Vectors + 46, # Multiplication of two matrices + 60, # Surface Area of Sphere + 61, # Volume of Sphere + 70, # Angle between 2 vectors + 72, # Dot Product of 2 Vectors + 77, # Determinant to 2x2 Matrix + 95, # Curved surface area of a cylinder + 113, # Volume of frustum + 117, # Volume of Hemisphere + 122, # Volume of pyramid + 123, # Surface area of pyramid + ], # Level 6: Advanced topics (calculus, statistics, computer science) 6: [ 4, # Binary Complement 1s 5, # Modulo Division 7, # Power Rule Differentiation - 12, # Logarithm - 14, # Decimal to Binary - 15, # Binary to Decimal - 27, # Prime Factorisation - 30, # Combinations of Objects - 42, # Permutations - 48, # Power Rule Integration - 52, # Probability of a certain sum appearing on faces of dice - 54, # Confidence interval For sample S - 55, # Comparing surds - 56, # Fibonacci Series - 59, # Mean,Standard Deviation,Variance - 62, # nth Fibonacci number - 64, # Binary to Hexidecimal - 73, # Binary 2's Complement - 79, # Decimal to Hexadecimal - 84, # Converts decimal to octal - 88, # Trigonometric Differentiation - 89, # Definite Integral of Quadratic Equation - 91, # Binary Coded Decimal to Integer - 103,# Decimal to Binary Coded Decimal - 107,# Conditional Probability - 110,# Stationary Points - ], # Binary operations, Calculus, Combinatorics, Probability, etc. + 12, # Logarithm + 14, # Decimal to Binary + 15, # Binary to Decimal + 27, # Prime Factorisation + 30, # Combinations of Objects + 42, # Permutations + 48, # Power Rule Integration + 52, # Probability of a certain sum appearing on faces of dice + 54, # Confidence interval For sample S + 55, # Comparing surds + 56, # Fibonacci Series + 59, # Mean,Standard Deviation,Variance + 62, # nth Fibonacci number + 64, # Binary to Hexidecimal + 73, # Binary 2's Complement + 79, # Decimal to Hexadecimal + 84, # Converts decimal to octal + 88, # Trigonometric Differentiation + 89, # Definite Integral of Quadratic Equation + 91, # Binary Coded Decimal to Integer + 103, # Decimal to Binary Coded Decimal + 107, # Conditional Probability + 110, # Stationary Points + ], # Level 7: Most complex topics 7: [ - 65, # Multiplication of 2 complex numbers - 66, # Geometric Progression - 67, # Geometric Mean of N Numbers - 68, # Harmonic Mean of N Numbers - 69, # Euclidian norm or L2 norm of a vector - 74, # Inverse of a Matrix - 85, # Converts decimal to Roman Numerals - 92, # Complex To Polar Form - 93, # Union,Intersection,Difference of Two Sets - 94, # Base Conversion - 98, # Quotient of Powers with Same Base - 99, # Quotient of Powers with Same Power - 100,# complex Quadratic Equation - 101,# Leap Year or Not - 106,# signum function - 109,# Binomial distribution - 111,# Expanding Factored Binomial - 121,# Product of scientific notations - ], # Complex numbers, Advanced operations, etc. + 65, # Multiplication of 2 complex numbers + 66, # Geometric Progression + 67, # Geometric Mean of N Numbers + 68, # Harmonic Mean of N Numbers + 69, # Euclidian norm or L2 norm of a vector + 74, # Inverse of a Matrix + 85, # Converts decimal to Roman Numerals + 92, # Complex To Polar Form + 93, # Union,Intersection,Difference of Two Sets + 94, # Base Conversion + 98, # Quotient of Powers with Same Base + 99, # Quotient of Powers with Same Power + 100, # complex Quadratic Equation + 101, # Leap Year or Not + 106, # signum function + 109, # Binomial distribution + 111, # Expanding Factored Binomial + 121, # Product of scientific notations + ], } def __init__( diff --git a/environments/infinimath/infinimath_env.py b/environments/infinimath/infinimath_env.py index f4815904..7b0a5a35 100644 --- a/environments/infinimath/infinimath_env.py +++ b/environments/infinimath/infinimath_env.py @@ -3,7 +3,7 @@ import json import logging import random import re -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Tuple, Union from atroposlib.envs.base import BaseEnv, BaseEnvConfig, OpenaiConfig, ScoredDataGroup from atroposlib.utils.tokenize_for_trainer import tokenize_for_trainer diff --git a/environments/infinimath/infinimath_local_server.py b/environments/infinimath/infinimath_local_server.py index d170e443..84853db6 100644 --- a/environments/infinimath/infinimath_local_server.py +++ b/environments/infinimath/infinimath_local_server.py @@ -1,14 +1,11 @@ #!/usr/bin/env python3 -import argparse import asyncio import logging import os from dotenv import load_dotenv -from openai import OpenAI from atroposlib.envs.base import OpenaiConfig -from atroposlib.utils.config_handler import ConfigHandler from environments.infinimath.infinimath_env import ( InfiniteMathEnv, InfiniteMathEnvConfig, @@ -20,182 +17,119 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -def parse_arguments(): - parser = argparse.ArgumentParser(description="InfiniteMath environment server") - parser.add_argument( - "--config", - type=str, - default="infinimath", - help="Configuration file name (without .yaml extension or path for configs/envs/ directory, or full path)", - ) - return parser.parse_args() - - async def main(): - logger.info("Starting InfiniteMath environment server") + logger.info("Starting InfiniteMath environment local runner") - # Parse command line arguments - args = parse_arguments() + config = InfiniteMathEnvConfig( + tokenizer_name="NousResearch/Nous-Hermes-2-Yi-34B", + group_size=1, + use_wandb=False, + max_num_workers=1, + rollout_server_url="http://localhost:8000", + total_steps=1, + batch_size=1, + steps_per_eval=0, + max_token_length=2048, + wandb_name="infinite_math_local_debug", + ensure_scores_are_not_same=False, + starting_level=1, + progress_threshold=0.8, + min_evaluations=3, + correct_reward=1.0, + incorrect_reward=-0.5, + think_block_bonus=0.1, + boxed_answer_bonus=0.2, + apply_length_penalty=False, + length_threshold_ratio=0.6, + temperature=0.3, + top_p=0.9, + ) - # Initialize config handler and load configuration - config_handler = ConfigHandler() - - # Determine config path - if ( - os.path.isabs(args.config) - or "/" in args.config - or args.config.endswith(".yaml") - ): - config_path = args.config - else: - # short form that defaults to the envs directory - config_path = os.path.join( - config_handler.config_dir, f"envs/{args.config}.yaml" + server_configs = [ + OpenaiConfig( + model_name="NousResearch/Nous-Hermes-2-Yi-34B", + base_url=os.getenv("OPENAI_BASE_URL", "http://localhost:9004/v1"), + api_key=os.getenv("OPENAI_API_KEY", "dummy-key"), + num_requests_for_eval=0, ) + ] + + logger.info("Using hardcoded debug configuration.") + logger.debug(f"Env Config: {config}") + logger.debug(f"Server Configs: {server_configs}") - logger.info(f"Loading configuration from: {config_path}") try: - with open(config_path, "r") as f: - import yaml - - raw_config = yaml.safe_load(f) - logger.info(f"Loaded configuration successfully") - except Exception as e: - logger.error(f"Error loading config directly: {e}") - logger.info("Falling back to default config handler") - raw_config = config_handler.load_config(args) - - # Configure the InfiniteMath environment with values from config - config = InfiniteMathEnvConfig( - # Base environment parameters - tokenizer_name=raw_config.get( - "tokenizer_name", "NousResearch/DeepHermes-3-Llama-3-8B-Preview" - ), - group_size=raw_config.get("group_size", 1), - use_wandb=raw_config.get("use_wandb", False), - max_num_workers=raw_config.get("max_num_workers", 1), - rollout_server_url=raw_config.get( - "rollout_server_url", "http://localhost:8000" - ), - total_steps=raw_config.get("total_steps", 1), - batch_size=raw_config.get("batch_size", 1), - steps_per_eval=raw_config.get("steps_per_eval", 2), - max_token_length=raw_config.get("max_token_length", 4096), - wandb_name=raw_config.get("wandb_name", "infinite_math_test"), - ensure_scores_are_not_same=raw_config.get("ensure_scores_are_not_same", False), - # InfiniteMath specific parameters - starting_level=raw_config.get("infinimath", {}).get("starting_level", 1), - progress_threshold=raw_config.get("infinimath", {}).get( - "progress_threshold", 0.7 - ), - min_evaluations=raw_config.get("infinimath", {}).get("min_evaluations", 3), - correct_reward=raw_config.get("infinimath", {}).get("correct_reward", 1.0), - incorrect_reward=raw_config.get("infinimath", {}).get("incorrect_reward", -0.5), - apply_length_penalty=raw_config.get("infinimath", {}).get( - "apply_length_penalty", True - ), - length_threshold_ratio=raw_config.get("infinimath", {}).get( - "length_threshold_ratio", 0.6 - ), - temperature=raw_config.get("infinimath", {}).get("temperature", 0.7), - top_p=raw_config.get("infinimath", {}).get("top_p", 0.9), - reward_functions=raw_config.get("infinimath", {}).get( - "reward_functions", ["accuracy", "format", "boxed"] - ), - accuracy_reward_weight=raw_config.get("infinimath", {}).get( - "accuracy_reward_weight", 1.0 - ), - format_reward_weight=raw_config.get("infinimath", {}).get( - "format_reward_weight", 0.2 - ), - boxed_reward_weight=raw_config.get("infinimath", {}).get( - "boxed_reward_weight", 0.3 - ), - ) - - # Server configuration from config file or defaults - server_configs = [] - - if "server_configs" in raw_config: - for server_config in raw_config["server_configs"]: - api_key = server_config.get("api_key", os.environ.get("OPENAI_API_KEY")) - # Handle environment variable references like ${OPENAI_API_KEY} - if ( - isinstance(api_key, str) - and api_key.startswith("${") - and api_key.endswith("}") - ): - env_var = api_key[2:-1] - api_key = os.environ.get(env_var, "") - - server_configs.append( - OpenaiConfig( - model_name=server_config.get("model_name", "gpt-4.1-nano"), - base_url=server_config.get("base_url", None), - api_key=api_key, - num_requests_for_eval=server_config.get( - "num_requests_for_eval", 70 - ), - ) - ) - else: - # Default configuration if not specified in config file - server_configs.append( - OpenaiConfig( - model_name="gpt-4.1-nano", - base_url=None, - api_key=os.environ.get("OPENAI_API_KEY"), - num_requests_for_eval=70, - ) + env = InfiniteMathEnv( + config=config, + server_configs=server_configs, + slurm=False, ) + except Exception as e: + logger.exception(f"Failed to initialize InfiniteMathEnv: {e}") + return - # Create the environment - env = InfiniteMathEnv( - config=config, - server_configs=server_configs, - slurm=False, - ) - - # Setup the environment + logger.info("Setting up environment...") await env.setup() - logger.info("Environment setup complete") + logger.info("Environment setup complete.") - # Log the number of evaluation problems - total_problems = sum(len(probs) for probs in env.eval_problems.values()) - logger.info( - f"Using {total_problems} evaluation problems across {len(env.eval_problems)} difficulty levels" - ) - - # Get a math problem + logger.info("Getting a math problem...") item = await env.get_next_item() problem_prompt, solution, generator_id = item - logger.info(f"Problem: {dict(problem_prompt[0])['content']}") - logger.info(f"Solution: {solution}") + problem_content = dict(problem_prompt[0])['content'] + logger.info(f"Problem (ID: {generator_id}, Level: {env.curriculum.get_current_level()}): {problem_content}") + logger.info(f"Expected Solution: {solution}") - # Collect trajectories logger.info("Collecting trajectories...") trajectories_data, backlog = await env.collect_trajectories(item) + + if not trajectories_data: + logger.error("No trajectories were collected.") + return - # Score the collected trajectories + logger.info(f"Collected {len(trajectories_data)} data points for scoring (should be 1 for group_size=1).") + logger.info("Scoring trajectories...") scored_data = await env.score(trajectories_data) - input("Press Enter to continue...") - # Print scores - logger.info(f"Scores: {scored_data['scores']}") + logger.info("\n========== Trajectory Summary ==========") + if scored_data and scored_data.get("messages") and scored_data.get("scores"): + for i, messages_list in enumerate(scored_data["messages"]): + assistant_response = "" + if messages_list and messages_list[-1].get("role") == "assistant": + assistant_response = messages_list[-1].get("content", "N/A") + + logger.info(f"--- Attempt {i+1} ---") + logger.info(f"Problem: {problem_content}") + logger.info(f"Full Assistant Response:\\n{assistant_response}") + logger.info(f"Score: {scored_data['scores'][i]}") + is_correct_task = env.check_answer(assistant_response, solution) + logger.info(f"Checked Correct by env.check_answer: {is_correct_task}") - # Log the correct/incorrect counts - correct_count = sum(1 for score in scored_data["scores"] if score > 0) - logger.info(f"Correct answers: {correct_count}/{len(scored_data['scores'])}") - # Test evaluation function specifically + correct_count_buffer = sum(env.percent_correct_buffer) + total_attempts_buffer = len(env.percent_correct_buffer) + + logger.info("\n--- Overall for this run ---") + logger.info(f"Expected Solution: {solution}") + logger.info(f"Score(s) from env.score: {scored_data['scores']}") + if total_attempts_buffer > 0: + logger.info(f"Correct based on internal buffer: {correct_count_buffer}/{total_attempts_buffer}") + else: + logger.info("No attempts recorded in percent_correct_buffer.") + + else: + logger.error("Scored data is missing expected fields ('messages' or 'scores').") + + logger.info("=======================================") + + # Re-add curriculum and evaluation testing logger.info("\n=== Testing Evaluation Function ===") # Record the current level - initial_level = env.curriculum.get_current_level() - logger.info(f"Current level before evaluation: {initial_level}") + initial_level_eval = env.curriculum.get_current_level() + logger.info(f"Current level before evaluation: {initial_level_eval}") logger.info(f"Level description: {env.curriculum.get_level_description()}") logger.info(f"Progress threshold: {env.curriculum.progress_threshold}") logger.info(f"Min evaluations needed: {env.curriculum.min_evaluations}") @@ -205,65 +139,76 @@ async def main(): # Display evaluation results logger.info("Evaluation metrics:") - for metric_name, metric_value in eval_metrics: - logger.info(f" - {metric_name}: {metric_value}") + if eval_metrics: + for metric_name, metric_value in eval_metrics: + logger.info(f" - {metric_name}: {metric_value}") + else: + logger.info(" No evaluation metrics returned.") # Check if the level advanced - new_level = env.curriculum.get_current_level() - if new_level > initial_level: - logger.info(f"Successfully advanced to level {new_level}!") + new_level_eval = env.curriculum.get_current_level() + if new_level_eval > initial_level_eval: + logger.info(f"Successfully advanced from level {initial_level_eval} to level {new_level_eval} during evaluation!") logger.info(f"New level description: {env.curriculum.get_level_description()}") else: + logger.info(f"Did not advance during evaluation. Remained at level {initial_level_eval}.") # Show current progress toward advancement - current_level = env.curriculum.get_current_level() - if current_level in env.curriculum.performance_history: - history = env.curriculum.performance_history[current_level] + current_level_desc = env.curriculum.get_current_level() + if current_level_desc in env.curriculum.performance_history: + history = env.curriculum.performance_history[current_level_desc] if len(history) >= env.curriculum.min_evaluations: recent_history = history[-env.curriculum.min_evaluations :] success_rate = sum(recent_history) / len(recent_history) logger.info( - f"Current success rate: {success_rate:.2f} (need {env.curriculum.progress_threshold} to advance)" + f"Current success rate for level {current_level_desc}: {success_rate:.2f} (need {env.curriculum.progress_threshold} to advance)" ) else: logger.info( - f"Need more evaluations: {len(history)}/{env.curriculum.min_evaluations}" + f"Need more evaluations for level {current_level_desc}: {len(history)}/{env.curriculum.min_evaluations}" ) - # Show all levels and their performance history - logger.info("\nPerformance history by level:") - for level in sorted(env.curriculum.performance_history.keys()): - history = env.curriculum.performance_history[level] - if history: - success_rate = sum(history) / len(history) + # Show all levels and their performance history after evaluation + logger.info("\nPerformance history by level (after evaluation run):") + for level_hist_key in sorted(env.curriculum.performance_history.keys()): + history_list = env.curriculum.performance_history[level_hist_key] + if history_list: + success_rate_hist = sum(history_list) / len(history_list) logger.info( - f" Level {level}: {success_rate:.2f} ({sum(history)}/{len(history)} correct)" + f" Level {level_hist_key}: {success_rate_hist:.2f} ({sum(history_list)}/{len(history_list)} correct)" ) else: - logger.info(f" Level {level}: No data") + logger.info(f" Level {level_hist_key}: No data") # Test curriculum advancement with simulated performance history - logger.info("\n=== Testing Curriculum Advancement ===") + logger.info("\n=== Testing Curriculum Advancement Manually ===") + initial_level_manual_adv = env.curriculum.get_current_level() + logger.info(f"Starting manual advancement test from level: {initial_level_manual_adv}") # Simulate good performance at current level - for _ in range(env.config.min_evaluations): - # Get a problem from current level - item = await env.get_next_item() - _, _, generator_id = item + # Ensure we don't try to get items if curriculum is already at max level from previous eval + max_level_possible = max(env.curriculum.DIFFICULTY_LEVELS.keys()) + if initial_level_manual_adv < max_level_possible: + logger.info(f"Simulating {config.min_evaluations} correct answers for level {initial_level_manual_adv}...") + for _ in range(config.min_evaluations): # Use config for min_evaluations + # Get a problem from current level to ensure generator_id is valid for the level + # The level might have changed due to the previous env.evaluate() call + problem_item_adv_test = await env.get_next_item() + _, _, generator_id_adv_test = problem_item_adv_test + env.curriculum.record_performance(generator_id_adv_test, True) + + # Try to advance difficulty + did_advance = env.curriculum.advance_difficulty() + new_level_manual_adv = env.curriculum.get_current_level() - # Record positive performance - env.curriculum.record_performance(generator_id, True) + logger.info(f"Curriculum advancement test results:") + logger.info(f" - Level before manual simulation: {initial_level_manual_adv}") + logger.info(f" - Recorded {config.min_evaluations} correct answers manually.") + logger.info(f" - Did advance: {did_advance}") + logger.info(f" - Level after manual advancement attempt: {new_level_manual_adv}") + else: + logger.info(f"Skipping manual advancement simulation as current level {initial_level_manual_adv} is already max level {max_level_possible}.") - # Try to advance difficulty - did_advance = env.curriculum.advance_difficulty() - new_level = env.curriculum.get_current_level() - - logger.info(f"Curriculum advancement test:") - logger.info(f" - Starting level: {initial_level}") - logger.info(f" - Recorded {env.config.min_evaluations} correct answers") - logger.info(f" - Did advance: {did_advance}") - logger.info(f" - New level: {new_level}") - - logger.info("Test server completed successfully") + logger.info("InfiniteMath local runner completed successfully.") if __name__ == "__main__":