diff --git a/examples/exercises/algorithmic/letter_jumble_examples.py b/examples/exercises/algorithmic/letter_jumble_examples.py new file mode 100644 index 00000000..86b2c84a --- /dev/null +++ b/examples/exercises/algorithmic/letter_jumble_examples.py @@ -0,0 +1,105 @@ +"""Examples of generated problems from the LetterJumble exercise. + +This file demonstrates different types of letter jumble problems that can be generated +at various difficulty levels. +""" + +import random +from reasoning_gym.curricula.algorithmic.letter_jumble_curriculum import LetterJumbleCurriculum +from reasoning_gym.exercises.algorithmic.letter_jumble import LetterJumbleExercise + +def main(): + # Initialize with fixed seed for reproducibility + curriculum = LetterJumbleCurriculum() + exercise = LetterJumbleExercise() + curriculum.rng = random.Random(42) + + print("\n========================================\n") + + # Level 0: Basic word scrambling + curriculum.set_attr_level("word_length", 0) # Short words (up to 5 chars) + curriculum.set_attr_level("num_words", 0) # Few words (up to 3) + curriculum.set_attr_level("corruption_level", 0) # Light scrambling (0.3) + curriculum.set_attr_level("consecutive_words", 0) # Consecutive words + curriculum.set_attr_level("preserve_length", 0) # Preserve first 4 chars + problem = exercise.generate(curriculum) + print("Level 0 (Basic Word Scrambling):") + print(problem) + + print("\n========================================\n") + + # Level 1: Medium difficulty + curriculum.set_attr_level("word_length", 1) # Medium words (up to 8 chars) + curriculum.set_attr_level("num_words", 1) # More words (up to 5) + curriculum.set_attr_level("corruption_level", 1) # Medium scrambling (0.6) + curriculum.set_attr_level("consecutive_words", 0) # Consecutive words + curriculum.set_attr_level("preserve_length", 0) # Preserve first 4 chars + problem = exercise.generate(curriculum) + print("Level 1 (Medium Difficulty):") + print(problem) + + print("\n========================================\n") + + # Level 2: Advanced scrambling + curriculum.set_attr_level("word_length", 2) # Long words (up to 64 chars) + curriculum.set_attr_level("num_words", 2) # Many words (up to 20) + curriculum.set_attr_level("corruption_level", 2) # Heavy scrambling (0.9) + curriculum.set_attr_level("consecutive_words", 1) # Non-consecutive words + curriculum.set_attr_level("preserve_length", 1) # Preserve first 2 chars + problem = exercise.generate(curriculum) + print("Level 2 (Advanced Scrambling):") + print(problem) + + print("\n========================================\n") + + # Random Examples with Different Seeds + print("Random Examples (Different Seeds):") + for seed in range(10, 15): + curriculum.rng = random.Random(seed) + # Randomly set curriculum levels + curriculum.set_attr_level("word_length", random.randint(0, 2)) + curriculum.set_attr_level("num_words", random.randint(0, 2)) + curriculum.set_attr_level("corruption_level", random.randint(0, 2)) + curriculum.set_attr_level("consecutive_words", random.randint(0, 1)) + curriculum.set_attr_level("preserve_length", random.randint(0, 1)) + problem = exercise.generate(curriculum) + print(f"\nRandom Example (Seed {seed}):") + print(problem) + + print("\n========================================\n") + + # Special Cases + print("Special Cases:") + + # Case 1: Maximum length single word with minimal preservation + curriculum.set_attr_level("word_length", 2) # Long words + curriculum.set_attr_level("num_words", 0) # Single word + curriculum.set_attr_level("corruption_level", 2) # Heavy scrambling + curriculum.set_attr_level("consecutive_words", 0) # Consecutive (doesn't matter for single word) + curriculum.set_attr_level("preserve_length", 1) # Preserve first 2 chars + problem = exercise.generate(curriculum) + print("\nLong Single Word (Minimal Preservation):") + print(problem) + + # Case 2: Many short words with maximum preservation + curriculum.set_attr_level("word_length", 0) # Short words + curriculum.set_attr_level("num_words", 2) # Many words + curriculum.set_attr_level("corruption_level", 1) # Medium scrambling + curriculum.set_attr_level("consecutive_words", 1) # Non-consecutive + curriculum.set_attr_level("preserve_length", 0) # Preserve first 4 chars + problem = exercise.generate(curriculum) + print("\nMany Short Words (Maximum Preservation):") + print(problem) + + # Case 3: Medium words with balanced preservation + curriculum.set_attr_level("word_length", 1) # Medium words + curriculum.set_attr_level("num_words", 1) # Medium number of words + curriculum.set_attr_level("corruption_level", 0) # Light scrambling + curriculum.set_attr_level("consecutive_words", 0) # Consecutive + curriculum.set_attr_level("preserve_length", 1) # Preserve first 2 chars + problem = exercise.generate(curriculum) + print("\nMedium Words (Balanced Preservation):") + print(problem) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/reasoning_gym/algorithmic/letter_jumble.py b/reasoning_gym/algorithmic/letter_jumble.py index 9919ccf0..5a06fc04 100644 --- a/reasoning_gym/algorithmic/letter_jumble.py +++ b/reasoning_gym/algorithmic/letter_jumble.py @@ -1,103 +1,66 @@ -"""Word letter jumbling task generator""" +"""Exercise definition for letter jumble exercises.""" -import re -from dataclasses import dataclass -from random import Random -from typing import List, Optional +from typing import Dict, Any +from reasoning_gym.core.template import Template -from reasoning_gym.data import read_data_file +class LetterJumbleExercise: + """Exercise generator for word jumbling tasks.""" -from ..factory import ProceduralDataset, register_dataset + def __init__(self): + self.curriculum = None + def generate(self, curriculum: Any) -> Dict[str, Any]: + """ + Generate a word jumbling problem using the curriculum. -@dataclass -class LetterJumbleConfig: - """Configuration for letter jumbling task generation""" + Returns: + Dict containing: + - question: str (e.g. "Unscramble these words: OLHEL DLWOR") + - answer: str (the original words) + - metadata: dict with details (scrambled_words, original_words, etc.) + """ + self.curriculum = curriculum + template = curriculum.get_template(curriculum.rng) + return template.eval(self, curriculum.rng) - min_word_len: int = 1 # Minimum word length - max_word_len: int = 64 # Maximum word length - min_words: int = 3 # Minimum words per task - max_words: int = 20 # Maximum words per task - min_corruption_level: float = 0.1 # Minimum fraction of characters to swap - max_corruption_level: float = 0.9 # Maximum fraction of characters to swap - consecutive_words: bool = True # Whether to select consecutive words from text - seed: Optional[int] = None - size: int = 500 # Virtual dataset size + def _parse_expression(self, metadata: Dict[str, Any]) -> Dict[str, Any]: + """Parse the expression from the metadata. - def validate(self) -> None: - """Validate configuration parameters""" - assert self.min_word_len > 0, "min_word_len must be positive" - assert self.max_word_len >= self.min_word_len, "max_word_len must be >= min_word_len" - assert self.min_words > 0, "min_words must be positive" - assert self.max_words >= self.min_words, "max_words must be >= min_words" - assert 0 <= self.min_corruption_level <= 1, "min_corruption_level must be in [0,1]" - assert 0 <= self.max_corruption_level <= 1, "max_corruption_level must be in [0,1]" - assert ( - self.max_corruption_level >= self.min_corruption_level - ), "max_corruption_level must be >= min_corruption_level" - - -class LetterJumbleDataset(ProceduralDataset): - """Generates word letter jumbling tasks""" - - def __init__(self, config: LetterJumbleConfig): - super().__init__(config=config, seed=config.seed, size=config.size) - - # Load and preprocess text - text = read_data_file("in_the_year_2889.txt") - # Extract words and filter by length - self.words = [ - word - for word in re.findall(r"\b\w+\b", text) - if self.config.min_word_len <= len(word) <= self.config.max_word_len and word.isalpha() - ] - - def _scramble_word(self, word: str, corruption_level: float, rng: Random) -> str: - """Scramble a word by swapping random pairs of characters""" - if len(word) < 2: # Can't scramble 1-character words - return word - - word = list(word) - num_swaps = max(1, int(len(word) * corruption_level)) # Ensure at least one swap - - for _ in range(num_swaps): - # Pick two different random positions - pos1, pos2 = rng.sample(range(len(word)), 2) - # Swap characters - word[pos1], word[pos2] = word[pos2], word[pos1] - - return "".join(word) - - def __getitem__(self, idx: int) -> dict: - """Generate a single word jumbling task""" - rng = Random(self.seed + idx) - - # Select number of words and corruption level - num_words = rng.randint(self.config.min_words, self.config.max_words) - corruption_level = rng.uniform(self.config.min_corruption_level, self.config.max_corruption_level) - - # Select words based on configuration - if self.config.consecutive_words: - # Select consecutive words from a random starting position - start_idx = rng.randint(0, len(self.words) - num_words) - selected_words = self.words[start_idx : start_idx + num_words] - else: - # Select random words - selected_words = rng.sample(self.words, num_words) - - # Scramble each word - scrambled_words = [self._scramble_word(word, corruption_level, rng) for word in selected_words] - - return { - "question": f"Unscramble these words: {' '.join(scrambled_words)}", - "answer": " ".join(selected_words), - "metadata": { - "num_words": num_words, - "corruption_level": corruption_level, - "scrambled_words": scrambled_words, - "original_words": selected_words, - }, + The metadata structure from the template system: + { + "scrambled": { + "scrambled_words": str, # Space-separated scrambled words + "original_words": List[str] # List of original words + } } + Args: + metadata: The metadata containing the expression information. -register_dataset("letter_jumble", LetterJumbleDataset, LetterJumbleConfig) + Returns: + A dictionary containing: + - scrambled_words: List[str] of scrambled words + - original_words: List[str] of original words + """ + # Extract the scrambled and original words from metadata + template_data = metadata["scrambled"] + scrambled_words = template_data["scrambled_words"].split() + original_words = template_data["original_words"] + + return { + "scrambled_words": scrambled_words, + "original_words": original_words + } + + def _evaluate_expression(self, parsed_data: Dict[str, Any]) -> str: + """Evaluate the expression using the parsed data. + + Args: + parsed_data: Dictionary containing: + - scrambled_words: List[str] of scrambled words + - original_words: List[str] of original words + + Returns: + The answer string (space-separated original words). + """ + return " ".join(parsed_data["original_words"]) diff --git a/reasoning_gym/curricula/algorithmic/__init__.py b/reasoning_gym/curricula/algorithmic/__init__.py index 02c14233..16823f69 100644 --- a/reasoning_gym/curricula/algorithmic/__init__.py +++ b/reasoning_gym/curricula/algorithmic/__init__.py @@ -1,8 +1,10 @@ from .base_conversion_curriculum import BaseConversionCurriculum from .caesar_cipher_curriculum import CaesarCipherCurriculum from .letter_counting_curriculum import LetterCountingCurriculum +from .letter_jumble_curriculum import LetterJumbleCurriculum __all__ = [ "BaseConversionCurriculum", "CaesarCipherCurriculum", - "LetterCountingCurriculum" + "LetterCountingCurriculum", + "LetterJumbleCurriculum" ] diff --git a/reasoning_gym/curricula/algorithmic/letter_jumble_curriculum.py b/reasoning_gym/curricula/algorithmic/letter_jumble_curriculum.py new file mode 100644 index 00000000..e2064242 --- /dev/null +++ b/reasoning_gym/curricula/algorithmic/letter_jumble_curriculum.py @@ -0,0 +1,122 @@ +""" +Curriculum definition for letter jumble exercises. +""" + +from typing import Dict, Any +from reasoning_gym.core.base_curriculum import BaseCurriculum +from reasoning_gym.core.attributes import AttributeDefinition, AttributeType +from reasoning_gym.core.template import Template +from reasoning_gym.data import read_data_file + +class LetterJumbleCurriculum(BaseCurriculum): + def __init__(self): + super().__init__("LetterJumbleCurriculum") + import re + self.words = [word for word in re.findall(r"\b\w+\b", read_data_file("in_the_year_2889.txt")) if word.isalpha()] + + def _init_curriculum(self) -> None: + """Initialize the letter jumble curriculum configuration""" + # Define valid attribute types + self._valid_types = { + AttributeType.STATIC, # For boolean flags + AttributeType.UBOUND, # For ranges like word length, num words + AttributeType.APPEND # For accumulating options + } + + # Define attributes + self._attributes = { + "word_length": AttributeDefinition( + levels=[7, 12, 64], # From min_word_len/max_word_len + default_level=0, + description="Maximum word length", + attr_type=AttributeType.UBOUND, + min_value=1 # Ensure at least 2 chars for scrambling + ), + "preserve_length": AttributeDefinition( + levels=[4, 2], + default_level=0, + description="Word length to preserve", + attr_type=AttributeType.STATIC + ), + "num_words": AttributeDefinition( + levels=[3, 5, 20], # From min_words/max_words + default_level=0, + description="Number of words to scramble", + attr_type=AttributeType.UBOUND, + min_value=1 # Ensure at least 1 word + ), + "corruption_level": AttributeDefinition( + levels=[0.1, 0.3, 0.9], # From min/max_corruption_level + default_level=0, + description="Fraction of characters to swap", + attr_type=AttributeType.UBOUND, + min_value=0.1 + ), + "consecutive_words": AttributeDefinition( + levels=[True, False], + default_level=0, + description="Whether to select consecutive words", + attr_type=AttributeType.APPEND + ) + } + + # Define templates with symbolic placeholders + self._templates = [ + Template( + template="Unscramble these words: \"{scrambled}\"", + parts={"scrambled": "word_list"} + ), + Template( + template="What are the original words? \"{scrambled}\"", + parts={"scrambled": "word_list"} + ), + Template( + template="Rearrange the letters to find the original words: \"{scrambled}\"", + parts={"scrambled": "word_list"} + ) + ] + + # Define symbolic structure + self._symbolic = { + # Shared variables that need to be consistent across templates + "shared_vars": { + # Selected original words that will be scrambled + "selected_words": lambda refs: ( + n_words := refs["num_words"](), + pool := self.words, + refs["dataset_rng"].sample(pool, n_words) if not refs["consecutive_words"]() else + ( + start := refs["dataset_rng"].randint(0, max(0, len(pool)-n_words)), + pool[start:start + n_words] + )[-1] + )[-1] + }, + # Value generators for dynamic content + "generators": { + # Scramble a single word based on corruption level + "scramble_word": lambda refs: lambda lst: ( + [ + (i, j, lst.__setitem__(i, lst[j]), lst.__setitem__(j, temp)) # Debugging: keep track of indices and assignments + for _ in range(max(0, int(len(lst) * refs["corruption_level"]()))) + for i, j in [refs["dataset_rng"].sample(range(len(lst)), 2)] + for temp in [lst[i]] # Introduce temp variable for correct swap + ], + "".join(lst) + )[-1], + # Generate scrambled version of all selected words + "scramble_all": lambda refs: lambda: [ + refs["scramble_word"](refs)(list(word)) if len(word) > refs["preserve_length"]() else word + for word in refs["selected_words"](refs) + ] + }, + # Template composition + "templates": { + "word_list": lambda refs: { + "template": "{scrambled_words}", + "parts": { + "scrambled_words": lambda refs=refs: " ".join(refs["scramble_all"](refs)()), + "original_words": lambda refs=refs: refs["selected_words"](refs) + } + } + } + } \ No newline at end of file diff --git a/reasoning_gym/exercises/algorithmic/__init__.py b/reasoning_gym/exercises/algorithmic/__init__.py index 94a2ec4e..5606b465 100644 --- a/reasoning_gym/exercises/algorithmic/__init__.py +++ b/reasoning_gym/exercises/algorithmic/__init__.py @@ -9,7 +9,7 @@ Algorithmic tasks for training reasoning capabilities: from .base_conversion import BaseConversionExercise from .caesar_cipher import CaesarCipherExercise from .letter_counting import LetterCountingExercise -# from .letter_jumble import LetterJumbleExercise +from .letter_jumble import LetterJumbleExercise # from .number_filtering import NumberFilteringExercise # from .number_sorting import NumberSortingExercise # from .sentence_reordering import SentenceReorderingExercise @@ -23,7 +23,7 @@ __all__ = [ "BaseConversionExercise", "CaesarCipherExercise", "LetterCountingExercise", - # "LetterJumbleDataset", + "LetterJumbleExercise", # "NumberFilteringDataset", # "NumberSortingDataset", # "SentenceReorderingDataset", diff --git a/tests/test_letter_jumble.py b/tests/test_letter_jumble.py index 8203f2f0..8abbbb8c 100644 --- a/tests/test_letter_jumble.py +++ b/tests/test_letter_jumble.py @@ -1,121 +1,289 @@ -"""Tests for letter jumbling task generation""" +"""Unit tests for the letter jumble exercise.""" -from random import Random +from reasoning_gym.curricula.algorithmic.letter_jumble_curriculum import LetterJumbleCurriculum +from reasoning_gym.exercises.algorithmic.letter_jumble import LetterJumbleExercise +import unittest +import random +from collections import defaultdict -import pytest +class TestLetterJumbleParsing(unittest.TestCase): + """Test parsing of letter jumble metadata""" -from reasoning_gym.algorithmic.letter_jumble import LetterJumbleConfig, LetterJumbleDataset + def setUp(self): + self.exercise = LetterJumbleExercise() + def test_parse_expression_basic(self): + """Test parsing of basic letter jumble metadata""" + test_metadata = { + "scrambled": { + "scrambled_words": "EHLLO DLWOR", + "original_words": ["HELLO", "WORLD"] + } + } + parsed = self.exercise._parse_expression(test_metadata) + self.assertEqual(parsed["scrambled_words"], ["EHLLO", "DLWOR"]) + self.assertEqual(parsed["original_words"], ["HELLO", "WORLD"]) -def test_letter_jumble_config_validation(): - """Test that invalid configs raise appropriate errors""" - with pytest.raises(AssertionError): - config = LetterJumbleConfig(min_word_len=0) - config.validate() + def test_parse_with_spaces(self): + """Test parsing with spaces and punctuation""" + test_metadata = { + "scrambled": { + "scrambled_words": "EHLLO DLWOR!", + "original_words": ["HELLO", "WORLD!"] + } + } + parsed = self.exercise._parse_expression(test_metadata) + self.assertEqual(parsed["scrambled_words"], ["EHLLO", "DLWOR!"]) + self.assertEqual(parsed["original_words"], ["HELLO", "WORLD!"]) - with pytest.raises(AssertionError): - config = LetterJumbleConfig(min_words=10, max_words=5) - config.validate() + def test_parse_mixed_case(self): + """Test parsing with mixed case text""" + test_metadata = { + "scrambled": { + "scrambled_words": "HeLlO WoRlD", + "original_words": ["hElLo", "wOrLd"] + } + } + parsed = self.exercise._parse_expression(test_metadata) + self.assertEqual(parsed["scrambled_words"], ["HeLlO", "WoRlD"]) + self.assertEqual(parsed["original_words"], ["hElLo", "wOrLd"]) - with pytest.raises(AssertionError): - config = LetterJumbleConfig(min_corruption_level=-0.1) - config.validate() +class TestLetterJumbleEvaluation(unittest.TestCase): + """Test evaluation of letter jumble problems""" - with pytest.raises(AssertionError): - config = LetterJumbleConfig(max_corruption_level=1.1) - config.validate() + def setUp(self): + self.exercise = LetterJumbleExercise() + def test_basic_unscrambling(self): + """Test basic unscrambling cases""" + test_cases = [ + (["EHLLO"], "HELLO"), # Single word + (["EHLLO", "DLWOR"], "HELLO WORLD"), # Two words + (["AAAA"], "AAAA"), # Same letters + (["ZBAC"], "ABCZ"), # Sorted order + (["HELLO"], "HELLO") # Already unscrambled + ] + for scrambled, expected in test_cases: + parsed = { + "scrambled_words": scrambled, + "original_words": expected.split() + } + result = self.exercise._evaluate_expression(parsed) + self.assertEqual(result, expected) -def test_letter_jumble_deterministic(): - """Test that dataset generates same items with same seed""" - config = LetterJumbleConfig(seed=42, size=10) - dataset1 = LetterJumbleDataset(config) - dataset2 = LetterJumbleDataset(config) + def test_mixed_case_unscrambling(self): + """Test unscrambling with mixed case""" + test_cases = [ + (["HeLlO"], "hElLo"), # Mixed case, single word + (["WoRlD", "HeLlO"], "wOrLd hElLo"), # Mixed case, multiple words + (["AbCdE"], "aBcDe") # Mixed case, alternating + ] + for scrambled, expected in test_cases: + parsed = { + "scrambled_words": scrambled, + "original_words": expected.split() + } + result = self.exercise._evaluate_expression(parsed) + self.assertEqual(result, expected) - for i in range(len(dataset1)): - assert dataset1[i] == dataset2[i] + def test_with_spaces_and_punctuation(self): + """Test unscrambling with spaces and punctuation""" + test_cases = [ + (["EHLLO!", "DLWOR?"], "HELLO! WORLD?"), + (["EHLLO.", "DLWOR."], "HELLO. WORLD."), + (["EHLLO,", "DLWOR,"], "HELLO, WORLD,") + ] + for scrambled, expected in test_cases: + parsed = { + "scrambled_words": scrambled, + "original_words": expected.split() + } + result = self.exercise._evaluate_expression(parsed) + self.assertEqual(result, expected) +class TestLetterJumbleGeneration(unittest.TestCase): + """Test problem generation""" -def test_letter_jumble_scrambling(): - """Test the word scrambling logic""" - config = LetterJumbleConfig( - min_word_len=4, - max_word_len=8, - min_words=1, - max_words=1, - min_corruption_level=0.5, - max_corruption_level=0.5, - size=1, - seed=42, - ) - dataset = LetterJumbleDataset(config) + def setUp(self): + self.curriculum = LetterJumbleCurriculum() + self.exercise = LetterJumbleExercise() + self.rng = random.Random(42) + self.curriculum.rng = self.rng - # Test with known word - word = "testing" - rng = Random(42) - scrambled = dataset._scramble_word(word, 0.5, rng) + def test_problem_structure(self): + """Test that generated problems have the correct structure""" + problem = self.exercise.generate(self.curriculum) - # Verify scrambled word: - # - Has same length as original - assert len(scrambled) == len(word) - # - Contains same characters - assert sorted(scrambled) == sorted(word) - # - Is different from original (with high probability given 0.5 corruption) - assert scrambled != word + # Check basic structure + self.assertIn("question", problem) + self.assertIn("answer", problem) + self.assertIn("metadata", problem) + # Check metadata structure + metadata = problem["metadata"] + self.assertEqual(metadata["type"], "direct") + self.assertIn("executed_parts", metadata) + executed_parts = metadata["executed_parts"] + self.assertIn("scrambled_words", executed_parts) + self.assertIn("original_words", executed_parts) -def test_letter_jumble_dataset_items(): - """Test basic properties of generated items""" - config = LetterJumbleConfig( - min_word_len=4, - max_word_len=8, - min_words=3, - max_words=5, - min_corruption_level=0.1, - max_corruption_level=0.3, - size=50, - seed=42, - ) - dataset = LetterJumbleDataset(config) + def test_word_length_ranges(self): + """Test that word lengths are within expected ranges""" + # Test all word length levels + level_max_lengths = {0: 5, 1: 8, 2: 64} - for i in range(len(dataset)): - item = dataset[i] + for level, max_length in level_max_lengths.items(): + self.curriculum.set_attr_level("word_length", level) + problem = self.exercise.generate(self.curriculum) + words = problem["metadata"]["executed_parts"]["original_words"] + for word in words: + self.assertLessEqual(len(word), max_length) + self.assertGreaterEqual(len(word), 2) # Min length is 2 - # Check item structure - assert isinstance(item, dict) - assert "question" in item - assert "answer" in item - assert "metadata" in item + def test_word_count_ranges(self): + """Test that word counts are within expected ranges""" + # Test all word count levels + level_word_counts = {0: 3, 1: 5, 2: 20} - # Check metadata - metadata = item["metadata"] - assert "num_words" in metadata - assert "corruption_level" in metadata - assert "scrambled_words" in metadata - assert "original_words" in metadata + for level, max_words in level_word_counts.items(): + self.curriculum.set_attr_level("num_words", level) + problem = self.exercise.generate(self.curriculum) + words = problem["metadata"]["executed_parts"]["original_words"] + self.assertLessEqual(len(words), max_words) + self.assertGreaterEqual(len(words), 1) # Min words is 1 - # Verify word counts - num_words = metadata["num_words"] - assert config.min_words <= num_words <= config.max_words - assert len(metadata["scrambled_words"]) == num_words - assert len(metadata["original_words"]) == num_words +class TestLetterJumbleComprehensive(unittest.TestCase): + """Comprehensive tests for letter jumble""" - # Verify corruption level - assert config.min_corruption_level <= metadata["corruption_level"] <= config.max_corruption_level + def setUp(self): + self.curriculum = LetterJumbleCurriculum() + self.exercise = LetterJumbleExercise() + self.rng = random.Random(42) + self.curriculum.rng = self.rng - # Verify word properties - for word in metadata["original_words"]: - assert config.min_word_len <= len(word) <= config.max_word_len - assert word.isalpha() + def test_corruption_levels(self): + """Test different corruption levels""" + corruption_levels = [0.1, 0.3, 0.9] + num_samples = 100 # Test with multiple samples + # Test each level + for level, expected_corruption in enumerate(corruption_levels): + self.curriculum.set_attr_level("corruption_level", level) + differences = [] -def test_letter_jumble_iteration(): - """Test that iteration respects dataset size""" - config = LetterJumbleConfig(size=5, seed=42) - dataset = LetterJumbleDataset(config) + # Generate multiple problems to measure average corruption + for _ in range(num_samples): + problem = self.exercise.generate(self.curriculum) + metadata = problem["metadata"]["executed_parts"] + # Calculate character differences + preserve_len = self.curriculum.attributes["preserve_length"].levels[self.curriculum.get_attr_level("preserve_length")] + for orig, scrambled in zip(metadata["original_words"], metadata["scrambled_words"]): + if len(orig) > preserve_len: + diff_count = sum(1 for a, b in zip(orig, scrambled) if a != b) + differences.append(diff_count / len(orig)) - items = list(dataset) - assert len(items) == config.size + # Check average corruption level is reasonable + # It's okay if actual corruption is lower than target due to: + # 1. Some swaps might cancel out previous swaps + # 2. The same characters might be swapped multiple times + # 3. The preserve_length attribute prevents some characters from being swapped + # 4. For short words, even a few swaps can make them readable + if differences: + avg_corruption = sum(differences) / len(differences) + # Only check that we don't exceed target by too much + self.assertLess(avg_corruption, expected_corruption + 0.1, + f"Corruption level {avg_corruption:.2f} too high (target: {expected_corruption:.2f})") + # And ensure we have some corruption + self.assertGreater(avg_corruption, 0.02, + f"Corruption level {avg_corruption:.2f} too low (should be above 0.02)") - # Test multiple iterations yield same items - assert items == list(dataset) + def test_template_variation(self): + """Test that different templates are used""" + templates_seen = set() + num_samples = 100 + + for _ in range(num_samples): + problem = self.exercise.generate(self.curriculum) + templates_seen.add(problem["question"].split(":")[0]) + + self.assertGreater(len(templates_seen), 1, "Not enough template variation") + + def test_comprehensive_random_evaluation(self): + """Test random evaluation with various configurations and track statistics.""" + self.rng = random.Random(42) # Fixed seed for reproducibility + self.curriculum.rng = self.rng + + # Track statistics + word_lengths = defaultdict(int) + word_counts = defaultdict(int) + corruption_levels = defaultdict(list) + consecutive_words_count = 0 + total_samples = 1000 + + # Generate test cases + for _ in range(total_samples): + # Set random attribute levels + for attr in self.curriculum.attributes: + max_level = len(self.curriculum.attributes[attr].levels) - 1 + self.curriculum.set_attr_level(attr, self.rng.randint(0, max_level)) + + # Generate and evaluate a random problem + problem = self.exercise.generate(self.curriculum) + metadata = problem["metadata"]["executed_parts"] + original_words = metadata["original_words"] + scrambled_words = metadata["scrambled_words"] + + # Track statistics + word_counts[len(original_words)] += 1 + for word in original_words: + word_lengths[len(word)] += 1 + + # Calculate corruption levels + for orig, scrambled in zip(original_words, scrambled_words): + preserve_len = self.curriculum.attributes["preserve_length"].levels[self.curriculum.get_attr_level("preserve_length")] + if len(orig) > preserve_len: + diff_count = sum(1 for a, b in zip(orig, scrambled) if a != b) + corruption_levels[len(orig)].append(diff_count / len(orig)) + + # Check if words are consecutive in source text + if len(original_words) > 1: + text = " ".join(self.curriculum.words) + phrase = " ".join(original_words) + if phrase in text: + consecutive_words_count += 1 + + # Verify scrambling is valid + for orig, scrambled in zip(original_words, scrambled_words): + # Check lengths match + self.assertEqual(len(orig), len(scrambled)) + # Check same letters are used + self.assertEqual(sorted(orig), sorted(scrambled)) + + # Print statistics + print("\nWord length distribution:") + for length, count in sorted(word_lengths.items()): + print(f" Length {length}: {count}") + + print("\nWord count distribution:") + for count, freq in sorted(word_counts.items()): + print(f" {count} words: {freq}") + + print("\nAverage corruption levels by word length:") + for length, levels in sorted(corruption_levels.items()): + avg = sum(levels) / len(levels) if levels else 0 + print(f" Length {length}: {avg:.2f}") + + print(f"\nConsecutive words: {consecutive_words_count}/{total_samples}") + + # Verify statistical properties + self.assertTrue(any(length >= 8 for length in word_lengths), + "No long words generated") + self.assertTrue(any(count >= 3 for count in word_counts.values()), + "Not enough variation in word counts") + self.assertTrue(consecutive_words_count > 0, + "No consecutive words generated") + self.assertTrue(consecutive_words_count < total_samples, + "Too many consecutive words") + +if __name__ == '__main__': + unittest.main() \ No newline at end of file