reasoning-gym/tests/test_simple_equations.py

from reasoning_gym.curricula.algebra.simple_equations_curriculum import SimpleEquationsCurriculum
from reasoning_gym.exercises.algebra.simple_equations import SimpleEquationsExercise
import unittest
import random
from sympy import solve, Symbol, Eq, parse_expr

class TestSimpleEquationsParsing(unittest.TestCase):
    """Test parsing of linear equation expressions and terms"""

    def setUp(self):
        self.exercise = SimpleEquationsExercise()

    def test_parse_expression(self):
        """Test parsing of basic linear expressions"""
        test_metadata = {
            'type': 'direct',
            'executed_parts': {
                'lhs_terms': ['2*x', '3'],
                'rhs_terms': ['5'],
                'lhs_operators': ['+'],
                'rhs_operators': [],
                'variable': 'x'
            }
        }

        parsed = test_metadata['executed_parts']
        self.assertEqual(parsed["lhs_terms"], ["2*x", "3"])
        self.assertEqual(parsed["rhs_terms"], ["5"])
        self.assertEqual(parsed["lhs_operators"], ["+"])
        self.assertEqual(parsed["rhs_operators"], [])
        self.assertEqual(parsed["variable"], "x")

    def test_parse_negative_terms(self):
        """Test parsing of expressions with negative terms"""
        test_metadata = {
            'type': 'direct',
            'executed_parts': {
                'lhs_terms': ['-2*x', '4'],
                'rhs_terms': ['-1'],
                'lhs_operators': ['+'],
                'rhs_operators': [],
                'variable': 'x'
            }
        }

        parsed = test_metadata['executed_parts']
        self.assertEqual(parsed["lhs_terms"], ["-2*x", "4"])
        self.assertEqual(parsed["rhs_terms"], ["-1"])
        self.assertEqual(parsed["lhs_operators"], ["+"])
        self.assertEqual(parsed["rhs_operators"], [])
        self.assertEqual(parsed["variable"], "x")

class TestSimpleEquationsEvaluation(unittest.TestCase):
    """Test evaluation of linear equations"""

    def setUp(self):
        self.exercise = SimpleEquationsExercise()

    def test_basic_equation(self):
        """Test evaluation of basic linear equations"""
        parsed = {
            "lhs_terms": ["2*x", "3"],
            "rhs_terms": ["7"],
            "lhs_operators": ["+"],
            "rhs_operators": [],
            "variable": "x"
        }
        result = self.exercise._evaluate_expression(parsed)
        expected = "2.0"  # 2x + 3 = 7 has solution x = 2
        self.assertEqual(result, expected)

    def test_negative_coefficients(self):
        """Test evaluation with negative coefficients"""
        parsed = {
            "lhs_terms": ["-2*x", "4"],
            "rhs_terms": ["0"],
            "lhs_operators": ["+"],
            "rhs_operators": [],
            "variable": "x"
        }
        result = self.exercise._evaluate_expression(parsed)
        expected = "2.0"  # -2x + 4 = 0 has solution x = 2
        self.assertEqual(result, expected)

    def test_multiple_terms(self):
        """Test equations with multiple terms"""
        parsed = {
            "lhs_terms": ["x", "2", "3"],
            "rhs_terms": ["10"],
            "lhs_operators": ["+", "+"],
            "rhs_operators": [],
            "variable": "x"
        }
        result = self.exercise._evaluate_expression(parsed)
        expected = "5.0"  # x + 2 + 3 = 10 has solution x = 5
        self.assertEqual(result, expected)

class TestSimpleEquationsGeneration(unittest.TestCase):
    """Test problem generation"""

    def setUp(self):
        self.curriculum = SimpleEquationsCurriculum()
        self.exercise = SimpleEquationsExercise()
        self.rng = random.Random(42)
        self.curriculum.rng = self.rng

    def test_problem_structure(self):
        """Test that generated problems have the correct structure"""
        problem = self.exercise.generate(self.curriculum)

        # Check basic structure
        self.assertIn("question", problem)
        self.assertIn("answer", problem)
        self.assertIn("metadata", problem)

        # Check metadata structure
        metadata = problem["metadata"]
        self.assertEqual(metadata["type"], "direct")
        self.assertIn("executed_parts", metadata)
        executed_parts = metadata["executed_parts"]
        self.assertIn("lhs_terms", executed_parts)
        self.assertIn("rhs_terms", executed_parts)
        self.assertIn("lhs_operators", executed_parts)
        self.assertIn("rhs_operators", executed_parts)
        self.assertIn("variable", executed_parts)

    def test_term_generation(self):
        """Test generation of equation terms"""
        # Set curriculum to basic settings
        self.curriculum.set_attr_level("value", 0)  # 1-10
        self.curriculum.set_attr_level("sign", 0)  # No signs
        self.curriculum.set_attr_level("var_name", 0)  # Basic variables

        problem = self.exercise.generate(self.curriculum)
        executed_parts = problem["metadata"]["executed_parts"]

        # Check we have at least one term
        self.assertTrue(len(executed_parts["lhs_terms"]) > 0)

        # Check first term format
        first_term = executed_parts["lhs_terms"][0]
        self.assertTrue(isinstance(first_term, str))
        if '*' in first_term:
            coeff = first_term.split('*')[0]
            self.assertTrue(coeff.replace('-', '').isdigit() or coeff in ['', '-'])

    def test_operator_generation(self):
        """Test generation of operators"""
        self.curriculum.set_attr_level("operators", 1)  # +, -
        self.curriculum.set_attr_level("num_terms", 1)  # 3 terms

        problem = self.exercise.generate(self.curriculum)
        executed_parts = problem["metadata"]["executed_parts"]

        # Check we have operators for n-1 terms
        self.assertEqual(len(executed_parts["lhs_operators"]), len(executed_parts["lhs_terms"]) - 1)

        # Check operator is valid
        if executed_parts["lhs_operators"]:
            self.assertIn(executed_parts["lhs_operators"][0], ["+", "-"])

class TestSimpleEquationsComprehensive(unittest.TestCase):
    """Comprehensive tests for simple equations"""

    def setUp(self):
        self.curriculum = SimpleEquationsCurriculum()
        self.exercise = SimpleEquationsExercise()
        self.rng = random.Random(42)
        self.curriculum.rng = self.rng

    def test_variable_consistency(self):
        """Test that the same variable is used consistently throughout the equation"""
        num_samples = 50

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]
            var_name = executed_parts["variable"]

            # Check variable appears in question
            self.assertIn(var_name, problem["question"])

            # Check variable is used consistently in terms
            for term in executed_parts["lhs_terms"] + executed_parts["rhs_terms"]:
                if var_name in term:  # If term has a variable
                    self.assertIn(var_name, term)

    def test_coefficient_ranges(self):
        """Test that coefficients are within expected ranges"""
        self.curriculum.set_attr_level("value", 0)  # 1-10
        num_samples = 50

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]

            for term in executed_parts["lhs_terms"] + executed_parts["rhs_terms"]:
                # Extract coefficient if term has one
                if '*' in term:
                    coeff = term.split('*')[0]
                    if coeff and coeff != '-':  # Skip if empty or just a minus sign
                        coeff = float(coeff)
                        self.assertLessEqual(abs(coeff), 10)
                        self.assertGreater(abs(coeff), 0)

    def test_solution_validity(self):
        """Test that generated solutions are valid"""
        num_samples = 50

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]
            solution = float(problem["answer"])

            # Verify solution satisfies the equation
            var = Symbol(executed_parts["variable"])

            # Build left and right expressions
            lhs = executed_parts["lhs_terms"][0]
            for i, term in enumerate(executed_parts["lhs_terms"][1:], 1):
                lhs += f" {executed_parts['lhs_operators'][i-1]} {term}"

            rhs = executed_parts["rhs_terms"][0]
            for i, term in enumerate(executed_parts["rhs_terms"][1:], 1):
                rhs += f" {executed_parts['rhs_operators'][i-1]} {term}"

            # Parse expressions
            lhs_expr = parse_expr(lhs, local_dict={executed_parts["variable"]: var})
            rhs_expr = parse_expr(rhs, local_dict={executed_parts["variable"]: var})

            # Verify solution
            lhs_val = float(lhs_expr.subs(var, solution))
            rhs_val = float(rhs_expr.subs(var, solution))
            self.assertAlmostEqual(lhs_val, rhs_val, places=10)

    def test_comprehensive_random_evaluation(self):
        """Test 1000 random problems across all levels to verify correct generation and evaluation"""
        num_samples = 1000

        # Statistics tracking
        stats = {
            'operator_counts': {},      # Count of each operator used
            'term_counts': {},          # Distribution of number of terms
            'variable_counts': {},      # Count of each variable used
            'coefficient_stats': {      # Track coefficient statistics
                'min': float('inf'),
                'max': float('-inf'),
                'total': 0,
                'count': 0,
                'unique': set()
            },
            'solution_stats': {         # Track solution statistics
                'min': float('inf'),    # Minimum solution value
                'max': float('-inf'),   # Maximum solution value
                'total': 0,
                'count': 0
            },
            'var_side_stats': {        # Track which side variables appear on
                'lhs_only': 0,         # Variable only on left side
                'rhs_only': 0,         # Variable only on right side
                'both_sides': 0,       # Variable on both sides
                'total': 0
            },
            'level_distribution': {     # Track curriculum level usage
                'num_terms': {},
                'value': {},
                'operators': {},
                'sign': {},
                'var_name': {}
            }
        }

        for _ in range(num_samples):
            # Randomly set curriculum levels
            for attr in self.curriculum.attributes:
                level = random.randint(0, len(self.curriculum.attributes[attr].levels) - 1)
                self.curriculum.set_attr_level(attr, level)
                stats['level_distribution'][attr][level] = stats['level_distribution'][attr].get(level, 0) + 1

            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]

            # Update operator statistics
            for op in executed_parts["lhs_operators"] + executed_parts["rhs_operators"]:
                stats['operator_counts'][op] = stats['operator_counts'].get(op, 0) + 1

            # Update term count statistics (count terms on each side separately)
            lhs_terms = len(executed_parts["lhs_terms"])
            rhs_terms = len(executed_parts["rhs_terms"])
            max_side_terms = max(lhs_terms, rhs_terms)
            stats['term_counts'][max_side_terms] = stats['term_counts'].get(max_side_terms, 0) + 1

            # Update variable statistics
            var = executed_parts["variable"]
            stats['variable_counts'][var] = stats['variable_counts'].get(var, 0) + 1

            # Update variable side statistics
            var_in_lhs = any(var in term for term in executed_parts["lhs_terms"])
            var_in_rhs = any(var in term for term in executed_parts["rhs_terms"])

            if var_in_lhs and var_in_rhs:
                stats['var_side_stats']['both_sides'] += 1
            elif var_in_lhs:
                stats['var_side_stats']['lhs_only'] += 1
            elif var_in_rhs:
                stats['var_side_stats']['rhs_only'] += 1
            stats['var_side_stats']['total'] += 1

            # Update coefficient statistics
            for term in executed_parts["lhs_terms"] + executed_parts["rhs_terms"]:
                if '*' in term:
                    coeff = term.split('*')[0]
                    if coeff and coeff not in ['-', '+']:
                        try:
                            value = abs(float(coeff))
                            stats['coefficient_stats']['min'] = min(stats['coefficient_stats']['min'], value)
                            stats['coefficient_stats']['max'] = max(stats['coefficient_stats']['max'], value)
                            stats['coefficient_stats']['total'] += value
                            stats['coefficient_stats']['count'] += 1
                            stats['coefficient_stats']['unique'].add(value)
                        except ValueError:
                            continue

            # Update solution statistics
            solution = float(problem["answer"])
            stats['solution_stats']['min'] = min(stats['solution_stats']['min'], solution)
            stats['solution_stats']['max'] = max(stats['solution_stats']['max'], solution)
            stats['solution_stats']['total'] += solution
            stats['solution_stats']['count'] += 1

            # Verify solution correctness
            var = Symbol(executed_parts["variable"])
            lhs = executed_parts["lhs_terms"][0]
            for i, term in enumerate(executed_parts["lhs_terms"][1:], 1):
                lhs += f" {executed_parts['lhs_operators'][i-1]} {term}"
            rhs = executed_parts["rhs_terms"][0]
            for i, term in enumerate(executed_parts["rhs_terms"][1:], 1):
                rhs += f" {executed_parts['rhs_operators'][i-1]} {term}"

            lhs_expr = parse_expr(lhs, local_dict={executed_parts["variable"]: var})
            rhs_expr = parse_expr(rhs, local_dict={executed_parts["variable"]: var})
            lhs_val = float(lhs_expr.subs(var, solution))
            rhs_val = float(rhs_expr.subs(var, solution))
            self.assertAlmostEqual(lhs_val, rhs_val, places=10)

        # Print comprehensive statistics
        print("\nComprehensive Random Evaluation Statistics:")
        print("-" * 50)

        print("\nOperator Distribution:")
        total_ops = sum(stats['operator_counts'].values())
        for op, count in sorted(stats['operator_counts'].items()):
            print(f"  {op}: {count} ({count/total_ops*100:.1f}%)")

        print("\nTerm Count Distribution (per side):")
        total_eqs = num_samples
        for terms, count in sorted(stats['term_counts'].items()):
            print(f"  {terms} terms: {count} ({count/total_eqs*100:.1f}%)")

        print("\nVariable Distribution:")
        total_vars = sum(stats['variable_counts'].values())
        for var, count in sorted(stats['variable_counts'].items()):
            print(f"  {var}: {count} ({count/total_vars*100:.1f}%)")

        print("\nVariable Side Distribution:")
        total_eqs = stats['var_side_stats']['total']
        print(f"  Left side only: {stats['var_side_stats']['lhs_only']} ({stats['var_side_stats']['lhs_only']/total_eqs*100:.1f}%)")
        print(f"  Right side only: {stats['var_side_stats']['rhs_only']} ({stats['var_side_stats']['rhs_only']/total_eqs*100:.1f}%)")
        print(f"  Both sides: {stats['var_side_stats']['both_sides']} ({stats['var_side_stats']['both_sides']/total_eqs*100:.1f}%)")

        print("\nCoefficient Statistics:")
        print(f"  Range: [{stats['coefficient_stats']['min']:.1f} to {stats['coefficient_stats']['max']:.1f}]")
        if stats['coefficient_stats']['count'] > 0:
            avg = stats['coefficient_stats']['total'] / stats['coefficient_stats']['count']
            print(f"  Average: {avg:.2f}")
            print(f"  Unique values: {len(stats['coefficient_stats']['unique'])}")

        print("\nSolution Statistics:")
        print(f"  Range: [{stats['solution_stats']['min']:.2f} to {stats['solution_stats']['max']:.2f}]")
        if stats['solution_stats']['count'] > 0:
            avg = stats['solution_stats']['total'] / stats['solution_stats']['count']
            print(f"  Average: {avg:.2f}")

        print("\nCurriculum Level Distribution:")
        for attr, levels in sorted(stats['level_distribution'].items()):
            print(f"\n  {attr}:")
            for level, count in sorted(levels.items()):
                print(f"    Level {level}: {count} ({count/total_eqs*100:.1f}%)")

        # Verify statistical properties
        # 1. Check we see all operators when using operator level 1
        if any(level == 1 for level in stats['level_distribution']['operators'].keys()):
            self.assertTrue(all(op in stats['operator_counts'] for op in ["+", "-"]),
                          "Not all operators were generated")

        # 2. Check term count constraints (per side)
        min_terms = min(stats['term_counts'].keys())
        max_terms = max(stats['term_counts'].keys())
        self.assertGreaterEqual(min_terms, 1, "Generated equations with too few terms per side")
        self.assertLessEqual(max_terms, 4, "Generated equations with too many terms per side")

        # 3. Check coefficient ranges
        if stats['coefficient_stats']['count'] > 0:
            self.assertGreater(len(stats['coefficient_stats']['unique']), 3,
                             "Too few unique coefficients generated")
            self.assertGreater(stats['coefficient_stats']['min'], 0,
                             "Generated zero or negative coefficients")
            self.assertLessEqual(stats['coefficient_stats']['max'], 100,
                               "Generated coefficients exceed maximum allowed")

class TestSimpleEquationsGenerate(unittest.TestCase):
    """Test the generate function with different curriculum settings"""

    def setUp(self):
        self.curriculum = SimpleEquationsCurriculum()
        self.exercise = SimpleEquationsExercise()
        self.rng = random.Random(42)  # Fixed seed for reproducibility
        self.curriculum.rng = self.rng

    def test_generate_basic_equation(self):
        """Test generation of basic linear equations"""
        # Configure curriculum for simple equations
        self.curriculum.set_attr_level("num_terms", 0)  # 2 terms
        self.curriculum.set_attr_level("value", 0)  # Small values
        self.curriculum.set_attr_level("operators", 0)  # Only +
        self.curriculum.set_attr_level("sign", 0)  # No signs
        self.curriculum.set_attr_level("var_name", 0)  # Basic variables

        problem = self.exercise.generate(self.curriculum)

        # Verify structure
        self.assertIn("question", problem)
        self.assertIn("answer", problem)
        self.assertIn("metadata", problem)

        # Verify terms and operators
        executed_parts = problem["metadata"]["executed_parts"]
        self.assertTrue(len(executed_parts["lhs_terms"]) >= 1, "Not enough terms generated")
        self.assertTrue(len(executed_parts["rhs_terms"]) >= 1, "Not enough terms generated")

        # Verify operator is addition if present
        if executed_parts["lhs_operators"]:
            self.assertEqual(executed_parts["lhs_operators"][0], "+")
        if executed_parts["rhs_operators"]:
            self.assertEqual(executed_parts["rhs_operators"][0], "+")

    def test_coefficient_distribution(self):
        """Test distribution of coefficient values"""
        self.curriculum.set_attr_level("value", 0)  # 1-10
        num_samples = 100
        coefficients = []

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            executed_parts = problem["metadata"]["executed_parts"]

            for term in executed_parts["lhs_terms"] + executed_parts["rhs_terms"]:
                if '*' in term:
                    coeff = term.split('*')[0]
                    if coeff and coeff not in ['-', '+']:
                        coefficients.append(abs(float(coeff)))

        # Check coefficient range
        self.assertTrue(all(1 <= c <= 10 for c in coefficients),
                       "Coefficients outside valid range [1,10]")
        # Check we see different values
        unique_coeffs = set(coefficients)
        self.assertTrue(len(unique_coeffs) > 3,
                       f"Too few unique coefficients: {unique_coeffs}")

    def test_error_handling(self):
        """Test error handling in equation generation"""
        # Test with invalid attribute level
        with self.assertRaises(ValueError):
            self.curriculum.set_attr_level("value", 999)

        # Test with invalid attribute name
        with self.assertRaises(KeyError):
            self.curriculum.set_attr_level("invalid_attr", 0)

if __name__ == '__main__':
    unittest.main()