reasoning-gym/tests/test_base_conversion.py

"""Unit tests for the base conversion exercise."""

from enum import verify
from reasoning_gym.curricula.algorithmic.base_conversion_curriculum import BaseConversionCurriculum
from reasoning_gym.exercises.algorithmic.base_conversion import BaseConversionExercise
import unittest
import random
from collections import defaultdict

class TestBaseConversionParsing(unittest.TestCase):
    """Test parsing of base conversion metadata"""

    def setUp(self):
        self.exercise = BaseConversionExercise()

    def test_parse_expression_basic(self):
        """Test parsing of basic base conversion metadata"""
        test_metadata = {
            "source_value": {"val": "1010"},
            "source_base": {"base": "binary"},
            "target_base": {"base": "hexadecimal", "hint": ""}
        }
        parsed = self.exercise._parse_expression(test_metadata)
        self.assertEqual(parsed["source_value"], "1010")
        self.assertEqual(parsed["source_base"], 2)
        self.assertEqual(parsed["target_base"], 16)

    def test_parse_base_names(self):
        """Test parsing of different base names"""
        test_cases = [
            ({"base": "binary"}, 2),
            ({"base": "octal"}, 8),
            ({"base": "decimal"}, 10),
            ({"base": "hexadecimal"}, 16),
            ({"base": "base-3"}, 3),
            ({"base": "base-36"}, 36)
        ]
        for base_dict, expected in test_cases:
            metadata = {
                "source_value": {"val": "0"},
                "source_base": base_dict,
                "target_base": {"base": "decimal", "hint": ""}
            }
            parsed = self.exercise._parse_expression(metadata)
            self.assertEqual(parsed["source_base"], expected)

    def test_invalid_base_name(self):
        """Test handling of invalid base names"""
        metadata = {
            "source_value": {"val": "0"},
            "source_base": {"base": "invalid"},
            "target_base": {"base": "decimal", "hint": ""}
        }
        with self.assertRaises(ValueError):
            self.exercise._parse_expression(metadata)

    def test_parse_with_hints(self):
        """Test parsing with different hint configurations"""
        test_cases = [
            ({"hint": ""}, ""),
            ({"hint": " (use lowercase letters a-z for digits above 9)"}, " (use lowercase letters a-z for digits above 9)"),
            ({"hint": " (hint: convert to decimal first)"}, " (hint: convert to decimal first)")
        ]
        for hint_dict, expected in test_cases:
            metadata = {
                "source_value": {"val": "0"},
                "source_base": {"base": "binary"},
                "target_base": {"base": "hexadecimal", "hint": hint_dict["hint"]}
            }
            parsed = self.exercise._parse_expression(metadata)
            self.assertEqual(parsed["source_base"], 2)
            self.assertEqual(parsed["target_base"], 16)

class TestBaseConversionEvaluation(unittest.TestCase):
    """Test evaluation of base conversion problems"""

    def setUp(self):
        self.exercise = BaseConversionExercise()

    def test_binary_to_decimal(self):
        """Test binary to decimal conversion"""
        test_cases = [
            ("1010", "10"),    # 10 in decimal
            ("1111", "15"),    # 15 in decimal
            ("10000", "16"),   # 16 in decimal
            ("0", "0"),        # 0 in any base is 0
            ("1", "1")         # 1 in any base is 1
        ]
        for binary, expected in test_cases:
            parsed = {
                "source_value": binary,
                "source_base": 2,
                "target_base": 10
            }
            result = self.exercise._evaluate_expression(parsed)
            self.assertEqual(result, expected)

    def test_decimal_to_hex(self):
        """Test decimal to hexadecimal conversion"""
        test_cases = [
            ("255", "ff"),     # Max 8-bit value
            ("16", "10"),      # Power of 16
            ("10", "a"),       # Single hex digit
            ("0", "0"),        # Zero
            ("4096", "1000")   # Power of 16
        ]
        for decimal, expected in test_cases:
            parsed = {
                "source_value": decimal,
                "source_base": 10,
                "target_base": 16
            }
            result = self.exercise._evaluate_expression(parsed)
            self.assertEqual(result, expected)

    def test_hex_to_octal(self):
        """Test hexadecimal to octal conversion"""
        test_cases = [
            ("ff", "377"),     # Max 8-bit value
            ("10", "20"),      # Simple conversion
            ("a5", "245"),     # Mixed digits and letters
            ("0", "0"),        # Zero
            ("100", "400")     # Power of 16
        ]
        for hex_val, expected in test_cases:
            parsed = {
                "source_value": hex_val,
                "source_base": 16,
                "target_base": 8
            }
            result = self.exercise._evaluate_expression(parsed)
            self.assertEqual(result, expected)

    def test_zero_value(self):
        """Test conversion of zero in any base"""
        bases = [2, 3, 8, 10, 16, 36]  # Test more bases
        for source_base in bases:
            for target_base in bases:
                parsed = {
                    "source_value": "0",
                    "source_base": source_base,
                    "target_base": target_base
                }
                result = self.exercise._evaluate_expression(parsed)
                self.assertEqual(result, "0")

    def test_invalid_digits(self):
        """Test handling of invalid digits for given base"""
        test_cases = [
            ("123", 2),    # Invalid binary
            ("9", 8),      # Invalid octal
            ("g", 16),     # Invalid hex
            ("z", 35)      # Invalid for base-35
        ]
        for value, base in test_cases:
            parsed = {
                "source_value": value,
                "source_base": base,
                "target_base": 10
            }
            result = self.exercise._evaluate_expression(parsed)
            self.assertTrue(result.startswith("Error"))

    def test_edge_cases(self):
        """Test edge cases and boundary values"""
        test_cases = [
            # Max values for different bases
            ("11111111", 2, 16, "ff"),          # Max 8-bit binary to hex
            ("77777777", 8, 16, "ffffff"),      # Large octal to hex
            ("ffffff", 16, 2, "111111111111111111111111"),  # Large hex to binary
            # Single digits
            ("1", 2, 36, "1"),
            ("z", 36, 2, "100011"),             # Corrected: 'z' in base-36 is 35, which is 100011 in binary
            # Alternating patterns
            ("101010", 2, 8, "52"),
            ("aaaaaa", 16, 10, "11184810")
        ]
        for value, source_base, target_base, expected in test_cases:
            parsed = {
                "source_value": value,
                "source_base": source_base,
                "target_base": target_base
            }
            result = self.exercise._evaluate_expression(parsed)
            self.assertEqual(result, expected)

class TestBaseConversionGeneration(unittest.TestCase):
    """Test problem generation"""

    def setUp(self):
        self.curriculum = BaseConversionCurriculum()
        self.exercise = BaseConversionExercise()
        self.rng = random.Random(42)
        self.curriculum.rng = self.rng

    def test_problem_structure(self):
        """Test that generated problems have the correct structure"""
        problem = self.exercise.generate(self.curriculum)

        # Check basic structure
        self.assertIn("question", problem)
        self.assertIn("answer", problem)
        self.assertIn("metadata", problem)

        # Check metadata structure
        metadata = problem["metadata"]
        self.assertEqual(metadata["type"], "direct")
        self.assertIn("executed_parts", metadata)
        executed_parts = metadata["executed_parts"]
        self.assertIn("source_value", executed_parts)
        self.assertIn("source_base", executed_parts)
        self.assertIn("target_base", executed_parts)

    def test_value_ranges(self):
        """Test that generated values are within expected ranges"""
        # Test all value levels
        level_max_values = {0: 100, 1: 1000, 2: 10000}

        for level, max_value in level_max_values.items():
            self.curriculum.set_attr_level("value", level)
            problem = self.exercise.generate(self.curriculum)
            decimal_val = int(problem["metadata"]["executed_parts"]["source_value"],
                            problem["metadata"]["executed_parts"]["source_base"])
            self.assertLessEqual(decimal_val, max_value)

    def test_base_ranges(self):
        """Test that bases are within expected ranges"""
        # Test all base range levels
        level_max_bases = {0: 16, 1: 26, 2: 36}

        for level, max_base in level_max_bases.items():
            self.curriculum.set_attr_level("base_range", level)
            problem = self.exercise.generate(self.curriculum)
            source_base = problem["metadata"]["executed_parts"]["source_base"]
            target_base = problem["metadata"]["executed_parts"]["target_base"]
            self.assertLessEqual(source_base, max_base)
            self.assertLessEqual(target_base, max_base)
            self.assertGreaterEqual(source_base, 2)
            self.assertGreaterEqual(target_base, 2)

    def test_template_variation(self):
        """Test that different templates are used"""
        templates_seen = set()
        num_samples = 100

        for _ in range(num_samples):
            problem = self.exercise.generate(self.curriculum)
            templates_seen.add(problem["question"].split(":")[0])  # Get the question pattern

        self.assertGreater(len(templates_seen), 1, "Not enough template variation")

class TestBaseConversionComprehensive(unittest.TestCase):
    """Comprehensive tests for base conversion"""

    def setUp(self):
        self.curriculum = BaseConversionCurriculum()
        self.exercise = BaseConversionExercise()
        self.rng = random.Random(42)
        self.curriculum.rng = self.rng

    def _extract_base(self, text):
        """Helper method to extract base from problem text."""
        if "binary" in text.lower():
            return 2
        if "octal" in text.lower():
            return 8
        if "decimal" in text.lower():
            return 10
        if "hexadecimal" in text.lower():
            return 16

        # Try to find base-N pattern
        import re
        match = re.search(r'base-(\d+)', text.lower())
        if match:
            return int(match.group(1))
        return None

    def test_all_base_combinations(self):
        """Test conversion between all possible base combinations"""
        bases = [2, 8, 10, 16, 36]  # Test common bases
        test_values = ["10", "ff", "xyz", "777", "42"]  # Test values

        for source_base in bases:
            for target_base in bases:
                for value in test_values:
                    try:
                        # Skip if value is invalid for source base
                        int(value, min(source_base, 36))
                    except ValueError:
                        continue

                    parsed = {
                        "source_value": value,
                        "source_base": source_base,
                        "target_base": target_base
                    }
                    result = self.exercise._evaluate_expression(parsed)

                    # Verify result by converting back
                    try:
                        decimal = int(result, target_base)
                        original = int(value, source_base)
                        self.assertEqual(decimal, original)
                    except ValueError:
                        self.fail(f"Invalid conversion: {value} from base {source_base} to base {target_base}")

    def test_hint_inclusion(self):
        """Test that hints are included appropriately"""
        # Test with hints enabled
        self.curriculum.set_attr_level("hint", 0)
        problem = self.exercise.generate(self.curriculum)
        if problem["metadata"]["executed_parts"]["target_base"] > 10:
            self.assertIn("use lowercase letters", problem["question"].lower())

        # Test with hints disabled
        self.curriculum.set_attr_level("hint", 1)
        problem = self.exercise.generate(self.curriculum)
        self.assertNotIn("use lowercase letters", problem["question"].lower())

    def test_base_names(self):
        """Test that base names are used correctly"""
        # Test with basic names
        self.curriculum.set_attr_level("base_names", 0)
        problem = self.exercise.generate(self.curriculum)
        question = problem["question"].lower()
        self.assertTrue(any(name in question for name in ["binary", "hexadecimal", "base-"]))

        # Test with extended names
        self.curriculum.set_attr_level("base_names", 1)
        problem = self.exercise.generate(self.curriculum)
        question = problem["question"].lower()
        self.assertTrue(any(name in question for name in ["octal", "decimal", "base-"]))

    def test_comprehensive_random_evaluation(self):
        """Test random evaluation with all base combinations and track statistics."""
        self.rng = random.Random(42)  # Fixed seed for reproducibility
        self.curriculum.rng = self.rng

        # Track statistics
        base_name_usage = defaultdict(int)
        source_bases = defaultdict(int)
        target_bases = defaultdict(int)
        values = []
        hint_count = 0
        total_samples = 1000

        # Generate test cases
        for _ in range(total_samples):
            # Set random attribute levels
            for attr in ["value", "base_range"]:
                self.curriculum.set_attr_level(attr, self.rng.randint(0, 2))
            for attr in ["base_names", "hint"]:
                self.curriculum.set_attr_level(attr, self.rng.randint(0, 1))

            # Generate and evaluate a random problem
            problem = self.exercise.generate(self.curriculum)

            # Track statistics
            if "binary" in problem["question"].lower():
                base_name_usage["binary"] += 1
            elif "octal" in problem["question"].lower():
                base_name_usage["octal"] += 1
            elif "hexadecimal" in problem["question"].lower():
                base_name_usage["hexadecimal"] += 1
            elif "decimal" in problem["question"].lower():
                base_name_usage["decimal"] += 1
            else:
                base_name_usage["other"] += 1

            # Track source and target bases
            metadata = problem["metadata"]["executed_parts"]
            source_base = metadata["source_base"]
            target_base = metadata["target_base"]

            if source_base:
                source_bases[source_base] += 1
            if target_base:
                target_bases[target_base] += 1

            # Track if hints are included
            if "(use lowercase letters a-z for digits above 9)" in problem["question"]:
                hint_count += 1

            # Track value statistics
            try:
                value = int(metadata["source_value"], source_base)
                values.append(value)
            except ValueError:
                pass

        # Print statistics
        print("\nBase name usage:")
        for name, count in base_name_usage.items():
            print(f"  {name}: {count}")

        print("\nSource bases used (35 bases):")
        for base in range(2, 37):
            if source_bases[base] > 0:
                print(f"  base-{base}: {source_bases[base]}")

        print("\nTarget bases used (35 bases):")
        for base in range(2, 37):
            if target_bases[base] > 0:
                print(f"  base-{base}: {target_bases[base]}")

        print("\nValue statistics:")
        if values:
            print(f"  Min value: {min(values)}")
            print(f"  Max value: {max(values)}")
            print(f"  Average value: {sum(values) / len(values):.2f}")
        print(f"  Total samples with hints: {hint_count} / {total_samples}")

        # verify statistics
        self.assertTrue(base_name_usage["hexadecimal"] >= 4, "Hexadecimal base name was not used enough")
        self.assertTrue(len(source_bases) >= 10, "Not enough different source bases used")
        self.assertTrue(len(target_bases) >= 10, "Not enough different target bases used")
        self.assertTrue(hint_count > 0, "No hints were included")
        self.assertTrue(hint_count < total_samples, "Too many hints were included")

if __name__ == '__main__':
    unittest.main()