reasoning-gym/tests/test_polynomial_equations.py

from reasoning_gym.curricula.algebra.polynomial_equations_curriculum import PolynomialEquationsCurriculum
from reasoning_gym.exercises.algebra.polynomial_equations import PolynomialEquationsExercise
import unittest
import random
from sympy import solve, Symbol, Eq, parse_expr

class TestPolynomialEquationsParsing(unittest.TestCase):
    """Test parsing of polynomial expressions and terms"""

    def setUp(self):
        self.exercise = PolynomialEquationsExercise()

    def test_parse_expression(self):
        """Test parsing of polynomial expressions"""
        test_metadata = {
            'type': 'direct',
            'executed_parts': {
                'terms': ['2*x**2', '3*x', '1'],
                'operators': ['+', '+'],
                'variable': 'x'
            }
        }

        parsed = test_metadata['executed_parts']
        self.assertEqual(parsed["terms"], ["2*x**2", "3*x", "1"])
        self.assertEqual(parsed["operators"], ["+", "+"])
        self.assertEqual(parsed["variable"], "x")

    def test_parse_negative_terms(self):
        """Test parsing of expressions with negative terms"""
        test_metadata = {
            'type': 'direct',
            'executed_parts': {
                'terms': ['-2*x**2', '4*x'],
                'operators': ['+'],
                'variable': 'x'
            }
        }

        parsed = test_metadata['executed_parts']
        self.assertEqual(parsed["terms"], ["-2*x**2", "4*x"])
        self.assertEqual(parsed["operators"], ["+"])
        self.assertEqual(parsed["variable"], "x")

class TestPolynomialEquationsEvaluation(unittest.TestCase):
    """Test evaluation of polynomial equations"""

    def setUp(self):
        self.exercise = PolynomialEquationsExercise()

    def test_quadratic_equation(self):
        """Test evaluation of quadratic equations"""
        parsed = {
            "terms": ["x**2", "-5*x", "6"],
            "operators": ["+", "+"],
            "variable": "x"
        }
        result = self.exercise._evaluate_expression(parsed)
        expected = "[2.0, 3.0]"  # x^2 - 5x + 6 = 0 has roots at x = 2 and x = 3
        self.assertEqual(result, expected)

    def test_linear_equation(self):
        """Test evaluation of linear equations"""
        parsed = {
            "terms": ["2*x", "-4"],
            "operators": ["+"],
            "variable": "x"
        }
        result = self.exercise._evaluate_expression(parsed)
        expected = "[2.0]"  # 2x - 4 = 0 has root at x = 2
        self.assertEqual(result, expected)

    def test_no_real_solutions(self):
        """Test equations with no real solutions"""
        parsed = {
            "terms": ["x**2", "1"],
            "operators": ["+"],
            "variable": "x"
        }
        result = self.exercise._evaluate_expression(parsed)
        expected = "[]"  # x^2 + 1 = 0 has no real solutions
        self.assertEqual(result, expected)

class TestPolynomialEquationsGeneration(unittest.TestCase):
    """Test problem generation"""

    def setUp(self):
        self.curriculum = PolynomialEquationsCurriculum()
        self.exercise = PolynomialEquationsExercise()
        self.rng = random.Random(42)
        self.curriculum.rng = self.rng

    def test_problem_structure(self):
        """Test that generated problems have the correct structure"""
        problem = self.exercise.generate(self.curriculum)

        # Check basic structure
        self.assertIn("question", problem)
        self.assertIn("answer", problem)
        self.assertIn("metadata", problem)

        # Check metadata structure
        metadata = problem["metadata"]
        self.assertEqual(metadata["type"], "direct")
        self.assertIn("executed_parts", metadata)
        executed_parts = metadata["executed_parts"]
        self.assertIn("terms", executed_parts)
        self.assertIn("operators", executed_parts)
        self.assertIn("variable", executed_parts)

    def test_term_generation(self):
        """Test generation of polynomial terms"""
        # Set curriculum to basic settings
        self.curriculum.set_attr_level("coefficient_value", 0)  # 1-10
        self.curriculum.set_attr_level("max_degree", 0)  # degree 1
        self.curriculum.set_attr_level("sign", 0)  # No signs

        problem = self.exercise.generate(self.curriculum)
        executed_parts = problem["metadata"]["executed_parts"]

        # Check we have at least one term
        self.assertTrue(len(executed_parts["terms"]) > 0)

        # Check first term format
        first_term = executed_parts["terms"][0]
        self.assertTrue(isinstance(first_term, str))
        self.assertTrue(first_term.replace('*', '').replace('x', '').replace('-', '').replace('.', '').isdigit() or
                       first_term == 'x')

    def test_operator_generation(self):
        """Test generation of operators"""
        self.curriculum.set_attr_level("operators", 1)  # +, -
        self.curriculum.set_attr_level("num_terms", 0)  # 2 terms

        problem = self.exercise.generate(self.curriculum)
        executed_parts = problem["metadata"]["executed_parts"]

        # Check we have operators for n-1 terms
        self.assertEqual(len(executed_parts["operators"]), len(executed_parts["terms"]) - 1)

        # Check operator is valid
        if executed_parts["operators"]:
            self.assertIn(executed_parts["operators"][0], ["+", "-"])

class TestPolynomialEquationsComprehensive(unittest.TestCase):
    """Comprehensive tests for polynomial equations"""

    def setUp(self):
        self.curriculum = PolynomialEquationsCurriculum()
        self.exercise = PolynomialEquationsExercise()
        self.rng = random.Random(42)
        self.curriculum.rng = self.rng

    def test_variable_consistency(self):
        """Test that the same variable is used consistently throughout the equation"""
        num_samples = 50

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]
            var_name = executed_parts["variable"]

            # Check variable appears in question
            self.assertIn(var_name, problem["question"])

            # Check variable is used consistently in terms
            for term in executed_parts["terms"]:
                if var_name in term:  # If term has a variable
                    self.assertIn(var_name, term)

    def test_coefficient_ranges(self):
        """Test that coefficients are within expected ranges"""
        self.curriculum.set_attr_level("coefficient_value", 0)  # 1-10
        num_samples = 50

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]

            for term in executed_parts["terms"]:
                # Extract coefficient if term has one
                if '*' in term:
                    coeff = term.split('*')[0]
                    if coeff and coeff != '-':  # Skip if empty or just a minus sign
                        coeff = float(coeff)
                        self.assertLessEqual(abs(coeff), 10)
                        self.assertGreater(abs(coeff), 0)

    def test_degree_constraints(self):
        """Test that polynomial degrees respect the curriculum settings"""
        self.curriculum.set_attr_level("max_degree", 0)  # Level 0 means max degree 1
        num_samples = 50

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]

            max_degree = 0
            for term in executed_parts["terms"]:
                if "**" in term:
                    degree = int(term.split("**")[1])
                    max_degree = max(max_degree, degree)
                elif executed_parts["variable"] in term:  # Variable without exponent means degree 1
                    max_degree = max(max_degree, 1)

            self.assertLessEqual(max_degree, 1)

    def test_solution_validity(self):
        """Test that generated solutions are valid"""
        num_samples = 50

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]

            # Parse the answer string to get solutions
            solutions = eval(problem["answer"])  # Safe since we control the input

            if solutions:  # If there are real solutions
                # Verify each solution satisfies the equation
                var = Symbol(executed_parts["variable"])
                expr = executed_parts["terms"][0]

                # Reconstruct the expression
                for i, term in enumerate(executed_parts["terms"][1:], 1):
                    expr += f" {executed_parts['operators'][i-1]} {term}"

                # Verify each solution
                sympy_expr = parse_expr(expr)
                for sol in solutions:
                    result = abs(float(sympy_expr.subs(var, sol)))
                    self.assertAlmostEqual(result, 0, places=10)

    def test_comprehensive_random_evaluation(self):
        """Test 1000 random problems across all levels to verify correct generation and evaluation"""
        num_samples = 1000

        # Statistics tracking
        stats = {
            'operator_counts': {},      # Count of each operator used
            'degree_counts': {},        # Count of polynomial degrees
            'term_counts': {},          # Distribution of number of terms
            'variable_counts': {},      # Count of each variable used
            'coefficient_stats': {      # Track coefficient statistics
                'min': float('inf'),
                'max': float('-inf'),
                'total': 0,
                'count': 0,
                'unique': set()
            },
            'solution_stats': {         # Track solution statistics
                'no_solutions': 0,      # Count of equations with no real solutions
                'one_solution': 0,      # Count of equations with exactly one solution
                'two_solutions': 0,     # Count of equations with exactly two solutions
                'min': float('inf'),    # Minimum solution value
                'max': float('-inf'),   # Maximum solution value
            },
            'level_distribution': {     # Track curriculum level usage
                'max_degree': {},
                'num_terms': {},
                'coefficient_value': {},
                'operators': {},
                'sign': {},
                'var_name': {}
            }
        }

        for _ in range(num_samples):
            # Randomly set curriculum levels
            levels = {
                'max_degree': self.rng.randint(0, 2),
                'num_terms': self.rng.randint(0, 2),
                'coefficient_value': self.rng.randint(0, 2),
                'operators': self.rng.randint(0, 1),
                'sign': self.rng.randint(0, 1),
                'var_name': self.rng.randint(0, 2)
            }

            # Update level distribution stats
            for attr, level in levels.items():
                stats['level_distribution'][attr][level] = stats['level_distribution'][attr].get(level, 0) + 1

            # Set curriculum levels
            for attr, level in levels.items():
                self.curriculum.set_attr_level(attr, level)

            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]
            terms = executed_parts["terms"]
            operators = executed_parts["operators"]
            variable = executed_parts["variable"]

            # Update operator statistics
            for op in operators:
                stats['operator_counts'][op] = stats['operator_counts'].get(op, 0) + 1

            # Update term count statistics
            num_terms = len(terms)
            stats['term_counts'][num_terms] = stats['term_counts'].get(num_terms, 0) + 1

            # Update variable statistics
            stats['variable_counts'][variable] = stats['variable_counts'].get(variable, 0) + 1

            # Calculate and update degree statistics
            max_degree = 0
            for term in terms:
                if "**" in term:
                    degree = int(term.split("**")[1])
                    max_degree = max(max_degree, degree)
                elif variable in term:  # Variable without exponent means degree 1
                    max_degree = max(max_degree, 1)
            stats['degree_counts'][max_degree] = stats['degree_counts'].get(max_degree, 0) + 1

            # Update coefficient statistics
            for term in terms:
                if '*' in term:
                    coeff = term.split('*')[0]
                    if coeff and coeff not in ['-', '+']:
                        try:
                            value = abs(float(coeff))
                            stats['coefficient_stats']['min'] = min(stats['coefficient_stats']['min'], value)
                            stats['coefficient_stats']['max'] = max(stats['coefficient_stats']['max'], value)
                            stats['coefficient_stats']['total'] += value
                            stats['coefficient_stats']['count'] += 1
                            stats['coefficient_stats']['unique'].add(value)
                        except ValueError:
                            # Skip if coefficient is not a number (e.g., just a variable)
                            continue

            # Update solution statistics
            solutions = eval(problem["answer"])  # Safe since we control the input
            num_solutions = len(solutions)
            if num_solutions == 0:
                stats['solution_stats']['no_solutions'] += 1
            elif num_solutions == 1:
                stats['solution_stats']['one_solution'] += 1
                stats['solution_stats']['min'] = min(stats['solution_stats']['min'], solutions[0])
                stats['solution_stats']['max'] = max(stats['solution_stats']['max'], solutions[0])
            elif num_solutions == 2:
                stats['solution_stats']['two_solutions'] += 1
                stats['solution_stats']['min'] = min(stats['solution_stats']['min'], min(solutions))
                stats['solution_stats']['max'] = max(stats['solution_stats']['max'], max(solutions))

            # Verify solution correctness
            if solutions:  # If there are real solutions
                var = Symbol(variable)
                expr = terms[0]
                for i, term in enumerate(terms[1:], 1):
                    expr += f" {operators[i-1]} {term}"

                # Create local dict with the variable symbol
                local_dict = {variable: var}
                sympy_expr = parse_expr(expr, local_dict=local_dict)
                for sol in solutions:
                    result = abs(float(sympy_expr.subs(var, sol)))
                    self.assertAlmostEqual(result, 0, places=10)

        # Print comprehensive statistics
        print("\nComprehensive Random Evaluation Statistics:")
        print("-" * 50)

        print("\nOperator Distribution:")
        total_ops = sum(stats['operator_counts'].values())
        for op, count in sorted(stats['operator_counts'].items()):
            print(f"  {op}: {count} ({count/total_ops*100:.1f}%)")

        print("\nDegree Distribution:")
        total_eqs = num_samples
        for degree, count in sorted(stats['degree_counts'].items()):
            print(f"  Degree {degree}: {count} ({count/total_eqs*100:.1f}%)")

        print("\nTerm Count Distribution:")
        for terms, count in sorted(stats['term_counts'].items()):
            print(f"  {terms} terms: {count} ({count/total_eqs*100:.1f}%)")

        print("\nVariable Distribution:")
        total_vars = sum(stats['variable_counts'].values())
        for var, count in sorted(stats['variable_counts'].items()):
            print(f"  {var}: {count} ({count/total_vars*100:.1f}%)")

        print("\nCoefficient Statistics:")
        print(f"  Range: [{stats['coefficient_stats']['min']:.1f} to {stats['coefficient_stats']['max']:.1f}]")
        if stats['coefficient_stats']['count'] > 0:
            avg = stats['coefficient_stats']['total'] / stats['coefficient_stats']['count']
            print(f"  Average: {avg:.2f}")
            print(f"  Unique values: {len(stats['coefficient_stats']['unique'])}")

        print("\nSolution Statistics:")
        print(f"  No real solutions: {stats['solution_stats']['no_solutions']} ({stats['solution_stats']['no_solutions']/total_eqs*100:.1f}%)")
        print(f"  One solution: {stats['solution_stats']['one_solution']} ({stats['solution_stats']['one_solution']/total_eqs*100:.1f}%)")
        print(f"  Two solutions: {stats['solution_stats']['two_solutions']} ({stats['solution_stats']['two_solutions']/total_eqs*100:.1f}%)")
        if stats['solution_stats']['min'] != float('inf'):
            print(f"  Solution range: [{stats['solution_stats']['min']:.2f} to {stats['solution_stats']['max']:.2f}]")

        print("\nCurriculum Level Distribution:")
        for attr, levels in sorted(stats['level_distribution'].items()):
            print(f"\n  {attr}:")
            for level, count in sorted(levels.items()):
                print(f"    Level {level}: {count} ({count/total_eqs*100:.1f}%)")

        # Verify statistical properties
        # 1. Check we see all operators when using operator level 1
        if any(level == 1 for level in stats['level_distribution']['operators'].keys()):
            self.assertTrue(all(op in stats['operator_counts'] for op in ["+", "-"]),
                          "Not all operators were generated")

        # 2. Check degree distribution matches curriculum settings
        max_possible_degree = max(stats['degree_counts'].keys())
        self.assertLessEqual(max_possible_degree, 3, "Generated degree exceeds maximum allowed")

        # 3. Check term count constraints
        min_terms = min(stats['term_counts'].keys())
        max_terms = max(stats['term_counts'].keys())
        self.assertGreaterEqual(min_terms, 2, "Generated equations with too few terms")
        self.assertLessEqual(max_terms, 4, "Generated equations with too many terms")

        # 4. Check coefficient ranges
        if stats['coefficient_stats']['count'] > 0:
            self.assertGreater(len(stats['coefficient_stats']['unique']), 3,
                             "Too few unique coefficients generated")
            self.assertGreater(stats['coefficient_stats']['min'], 0,
                             "Generated zero or negative coefficients")
            self.assertLessEqual(stats['coefficient_stats']['max'], 100,
                               "Generated coefficients exceed maximum allowed")

        # 5. Check solution distribution
        total_with_solutions = stats['solution_stats']['one_solution'] + stats['solution_stats']['two_solutions']
        if total_with_solutions > 0:
            self.assertGreater(stats['solution_stats']['one_solution'], 0,
                             "No equations with exactly one solution generated")
            self.assertGreater(stats['solution_stats']['two_solutions'], 0,
                             "No equations with exactly two solutions generated")

class TestPolynomialEquationsGenerate(unittest.TestCase):
    """Test the generate function with different curriculum settings"""

    def setUp(self):
        self.curriculum = PolynomialEquationsCurriculum()
        self.exercise = PolynomialEquationsExercise()
        self.rng = random.Random(42)  # Fixed seed for reproducibility
        self.curriculum.rng = self.rng

    def test_generate_basic_linear(self):
        """Test generation of basic linear equations"""
        # Configure curriculum for simple linear equations
        self.curriculum.set_attr_level("max_degree", 0)  # Linear equations
        self.curriculum.set_attr_level("num_terms", 0)  # 2 terms
        self.curriculum.set_attr_level("coefficient_value", 0)  # Small coefficients
        self.curriculum.set_attr_level("sign", 0)  # No signs
        self.curriculum.set_attr_level("operators", 0)  # Only +

        problem = self.exercise.generate(self.curriculum)

        # Verify structure
        self.assertIn("question", problem)
        self.assertIn("answer", problem)
        self.assertIn("metadata", problem)

        # Verify terms and operators
        executed_parts = problem["metadata"]["executed_parts"]
        self.assertTrue(len(executed_parts["terms"]) >= 2, "Not enough terms generated")
        self.assertTrue(len(executed_parts["operators"]) >= 1, "No operators generated")

        # Verify operator is addition
        self.assertEqual(executed_parts["operators"][0], "+")

        # Verify terms have correct degree
        for term in executed_parts["terms"]:
            self.assertNotIn("**", term, "Term should not have exponent > 1")

    def test_generate_with_signs(self):
        """Test generation with positive/negative signs"""
        self.curriculum.set_attr_level("operators", 0)  # Only +
        self.curriculum.set_attr_level("num_terms", 0)  # 2 terms
        self.curriculum.set_attr_level("sign", 1)  # Allow -
        self.curriculum.set_attr_level("max_degree", 0)  # Linear equations

        num_samples = 50
        terms_seen = []

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]
            terms_seen.extend(executed_parts["terms"])

        # Check we see both positive and negative terms
        has_negative = any(term.startswith('-') for term in terms_seen)
        has_positive = any(not term.startswith('-') for term in terms_seen)
        self.assertTrue(has_positive, "No positive terms generated")
        self.assertTrue(has_negative, "No negative terms generated")

    def test_term_count_distribution(self):
        """Test that term counts follow the correct distribution"""
        self.curriculum.set_attr_level("num_terms", 2)  # 2-4 terms
        num_samples = 100
        term_counts = []

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]
            term_count = len(executed_parts["terms"])
            term_counts.append(term_count)
            self.assertTrue(2 <= term_count <= 4, f"Term count {term_count} outside valid range [2,4]")

        # Verify we see different term counts
        unique_counts = set(term_counts)
        self.assertTrue(len(unique_counts) > 1, "Only one term count generated")

    def test_operator_distribution(self):
        """Test distribution of operators"""
        self.curriculum.set_attr_level("operators", 1)  # +, -
        num_samples = 100
        operators_seen = []

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]
            operators_seen.extend(executed_parts["operators"])

        # Check we see both operators
        has_plus = "+" in operators_seen
        has_minus = "-" in operators_seen
        self.assertTrue(has_plus, "No + operators generated")
        self.assertTrue(has_minus, "No - operators generated")

    def test_variable_distribution(self):
        """Test distribution of variable names"""
        self.curriculum.set_attr_level("var_name", 0)  # x, y, z
        num_samples = 100
        variables_seen = set()

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]
            variables_seen.add(executed_parts["variable"])

        # Check we see multiple variables
        self.assertTrue(len(variables_seen) > 1, "Only one variable name generated")
        self.assertTrue(all(var in "xyz" for var in variables_seen),
                       f"Invalid variables generated: {variables_seen}")

    def test_coefficient_distribution(self):
        """Test distribution of coefficient values"""
        self.curriculum.set_attr_level("coefficient_value", 0)  # 1-10
        num_samples = 100
        coefficients = []

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]

            for term in executed_parts["terms"]:
                if '*' in term:
                    coeff = term.split('*')[0]
                    if coeff and coeff not in ['-', '+']:
                        coefficients.append(abs(float(coeff)))

        # Check coefficient range
        self.assertTrue(all(1 <= c <= 10 for c in coefficients),
                       "Coefficients outside valid range [1,10]")
        # Check we see different values
        unique_coeffs = set(coefficients)
        self.assertTrue(len(unique_coeffs) > 3,
                       f"Too few unique coefficients: {unique_coeffs}")

    def test_error_handling(self):
        """Test error handling in equation generation"""
        # Test with invalid attribute level
        with self.assertRaises(ValueError):
            self.curriculum.set_attr_level("max_degree", 999)

        # Test with invalid attribute name
        with self.assertRaises(KeyError):
            self.curriculum.set_attr_level("invalid_attr", 0)

if __name__ == '__main__':
    unittest.main()