Refactor LetterJumble

This commit is contained in:
EduardDurech 2025-02-09 12:36:07 +00:00
parent b8ce5a8a5d
commit 18b6e71fa9
6 changed files with 550 additions and 190 deletions

View file

@ -0,0 +1,105 @@
"""Examples of generated problems from the LetterJumble exercise.
This file demonstrates different types of letter jumble problems that can be generated
at various difficulty levels.
"""
import random
from reasoning_gym.curricula.algorithmic.letter_jumble_curriculum import LetterJumbleCurriculum
from reasoning_gym.exercises.algorithmic.letter_jumble import LetterJumbleExercise
def main():
# Initialize with fixed seed for reproducibility
curriculum = LetterJumbleCurriculum()
exercise = LetterJumbleExercise()
curriculum.rng = random.Random(42)
print("\n========================================\n")
# Level 0: Basic word scrambling
curriculum.set_attr_level("word_length", 0) # Short words (up to 5 chars)
curriculum.set_attr_level("num_words", 0) # Few words (up to 3)
curriculum.set_attr_level("corruption_level", 0) # Light scrambling (0.3)
curriculum.set_attr_level("consecutive_words", 0) # Consecutive words
curriculum.set_attr_level("preserve_length", 0) # Preserve first 4 chars
problem = exercise.generate(curriculum)
print("Level 0 (Basic Word Scrambling):")
print(problem)
print("\n========================================\n")
# Level 1: Medium difficulty
curriculum.set_attr_level("word_length", 1) # Medium words (up to 8 chars)
curriculum.set_attr_level("num_words", 1) # More words (up to 5)
curriculum.set_attr_level("corruption_level", 1) # Medium scrambling (0.6)
curriculum.set_attr_level("consecutive_words", 0) # Consecutive words
curriculum.set_attr_level("preserve_length", 0) # Preserve first 4 chars
problem = exercise.generate(curriculum)
print("Level 1 (Medium Difficulty):")
print(problem)
print("\n========================================\n")
# Level 2: Advanced scrambling
curriculum.set_attr_level("word_length", 2) # Long words (up to 64 chars)
curriculum.set_attr_level("num_words", 2) # Many words (up to 20)
curriculum.set_attr_level("corruption_level", 2) # Heavy scrambling (0.9)
curriculum.set_attr_level("consecutive_words", 1) # Non-consecutive words
curriculum.set_attr_level("preserve_length", 1) # Preserve first 2 chars
problem = exercise.generate(curriculum)
print("Level 2 (Advanced Scrambling):")
print(problem)
print("\n========================================\n")
# Random Examples with Different Seeds
print("Random Examples (Different Seeds):")
for seed in range(10, 15):
curriculum.rng = random.Random(seed)
# Randomly set curriculum levels
curriculum.set_attr_level("word_length", random.randint(0, 2))
curriculum.set_attr_level("num_words", random.randint(0, 2))
curriculum.set_attr_level("corruption_level", random.randint(0, 2))
curriculum.set_attr_level("consecutive_words", random.randint(0, 1))
curriculum.set_attr_level("preserve_length", random.randint(0, 1))
problem = exercise.generate(curriculum)
print(f"\nRandom Example (Seed {seed}):")
print(problem)
print("\n========================================\n")
# Special Cases
print("Special Cases:")
# Case 1: Maximum length single word with minimal preservation
curriculum.set_attr_level("word_length", 2) # Long words
curriculum.set_attr_level("num_words", 0) # Single word
curriculum.set_attr_level("corruption_level", 2) # Heavy scrambling
curriculum.set_attr_level("consecutive_words", 0) # Consecutive (doesn't matter for single word)
curriculum.set_attr_level("preserve_length", 1) # Preserve first 2 chars
problem = exercise.generate(curriculum)
print("\nLong Single Word (Minimal Preservation):")
print(problem)
# Case 2: Many short words with maximum preservation
curriculum.set_attr_level("word_length", 0) # Short words
curriculum.set_attr_level("num_words", 2) # Many words
curriculum.set_attr_level("corruption_level", 1) # Medium scrambling
curriculum.set_attr_level("consecutive_words", 1) # Non-consecutive
curriculum.set_attr_level("preserve_length", 0) # Preserve first 4 chars
problem = exercise.generate(curriculum)
print("\nMany Short Words (Maximum Preservation):")
print(problem)
# Case 3: Medium words with balanced preservation
curriculum.set_attr_level("word_length", 1) # Medium words
curriculum.set_attr_level("num_words", 1) # Medium number of words
curriculum.set_attr_level("corruption_level", 0) # Light scrambling
curriculum.set_attr_level("consecutive_words", 0) # Consecutive
curriculum.set_attr_level("preserve_length", 1) # Preserve first 2 chars
problem = exercise.generate(curriculum)
print("\nMedium Words (Balanced Preservation):")
print(problem)
if __name__ == "__main__":
main()

View file

@ -1,103 +1,66 @@
"""Word letter jumbling task generator""" """Exercise definition for letter jumble exercises."""
import re from typing import Dict, Any
from dataclasses import dataclass from reasoning_gym.core.template import Template
from random import Random
from typing import List, Optional
from reasoning_gym.data import read_data_file class LetterJumbleExercise:
"""Exercise generator for word jumbling tasks."""
from ..factory import ProceduralDataset, register_dataset def __init__(self):
self.curriculum = None
def generate(self, curriculum: Any) -> Dict[str, Any]:
"""
Generate a word jumbling problem using the curriculum.
@dataclass Returns:
class LetterJumbleConfig: Dict containing:
"""Configuration for letter jumbling task generation""" - question: str (e.g. "Unscramble these words: OLHEL DLWOR")
- answer: str (the original words)
- metadata: dict with details (scrambled_words, original_words, etc.)
"""
self.curriculum = curriculum
template = curriculum.get_template(curriculum.rng)
return template.eval(self, curriculum.rng)
min_word_len: int = 1 # Minimum word length def _parse_expression(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
max_word_len: int = 64 # Maximum word length """Parse the expression from the metadata.
min_words: int = 3 # Minimum words per task
max_words: int = 20 # Maximum words per task
min_corruption_level: float = 0.1 # Minimum fraction of characters to swap
max_corruption_level: float = 0.9 # Maximum fraction of characters to swap
consecutive_words: bool = True # Whether to select consecutive words from text
seed: Optional[int] = None
size: int = 500 # Virtual dataset size
def validate(self) -> None: The metadata structure from the template system:
"""Validate configuration parameters""" {
assert self.min_word_len > 0, "min_word_len must be positive" "scrambled": {
assert self.max_word_len >= self.min_word_len, "max_word_len must be >= min_word_len" "scrambled_words": str, # Space-separated scrambled words
assert self.min_words > 0, "min_words must be positive" "original_words": List[str] # List of original words
assert self.max_words >= self.min_words, "max_words must be >= min_words" }
assert 0 <= self.min_corruption_level <= 1, "min_corruption_level must be in [0,1]"
assert 0 <= self.max_corruption_level <= 1, "max_corruption_level must be in [0,1]"
assert (
self.max_corruption_level >= self.min_corruption_level
), "max_corruption_level must be >= min_corruption_level"
class LetterJumbleDataset(ProceduralDataset):
"""Generates word letter jumbling tasks"""
def __init__(self, config: LetterJumbleConfig):
super().__init__(config=config, seed=config.seed, size=config.size)
# Load and preprocess text
text = read_data_file("in_the_year_2889.txt")
# Extract words and filter by length
self.words = [
word
for word in re.findall(r"\b\w+\b", text)
if self.config.min_word_len <= len(word) <= self.config.max_word_len and word.isalpha()
]
def _scramble_word(self, word: str, corruption_level: float, rng: Random) -> str:
"""Scramble a word by swapping random pairs of characters"""
if len(word) < 2: # Can't scramble 1-character words
return word
word = list(word)
num_swaps = max(1, int(len(word) * corruption_level)) # Ensure at least one swap
for _ in range(num_swaps):
# Pick two different random positions
pos1, pos2 = rng.sample(range(len(word)), 2)
# Swap characters
word[pos1], word[pos2] = word[pos2], word[pos1]
return "".join(word)
def __getitem__(self, idx: int) -> dict:
"""Generate a single word jumbling task"""
rng = Random(self.seed + idx)
# Select number of words and corruption level
num_words = rng.randint(self.config.min_words, self.config.max_words)
corruption_level = rng.uniform(self.config.min_corruption_level, self.config.max_corruption_level)
# Select words based on configuration
if self.config.consecutive_words:
# Select consecutive words from a random starting position
start_idx = rng.randint(0, len(self.words) - num_words)
selected_words = self.words[start_idx : start_idx + num_words]
else:
# Select random words
selected_words = rng.sample(self.words, num_words)
# Scramble each word
scrambled_words = [self._scramble_word(word, corruption_level, rng) for word in selected_words]
return {
"question": f"Unscramble these words: {' '.join(scrambled_words)}",
"answer": " ".join(selected_words),
"metadata": {
"num_words": num_words,
"corruption_level": corruption_level,
"scrambled_words": scrambled_words,
"original_words": selected_words,
},
} }
Args:
metadata: The metadata containing the expression information.
register_dataset("letter_jumble", LetterJumbleDataset, LetterJumbleConfig) Returns:
A dictionary containing:
- scrambled_words: List[str] of scrambled words
- original_words: List[str] of original words
"""
# Extract the scrambled and original words from metadata
template_data = metadata["scrambled"]
scrambled_words = template_data["scrambled_words"].split()
original_words = template_data["original_words"]
return {
"scrambled_words": scrambled_words,
"original_words": original_words
}
def _evaluate_expression(self, parsed_data: Dict[str, Any]) -> str:
"""Evaluate the expression using the parsed data.
Args:
parsed_data: Dictionary containing:
- scrambled_words: List[str] of scrambled words
- original_words: List[str] of original words
Returns:
The answer string (space-separated original words).
"""
return " ".join(parsed_data["original_words"])

View file

@ -1,8 +1,10 @@
from .base_conversion_curriculum import BaseConversionCurriculum from .base_conversion_curriculum import BaseConversionCurriculum
from .caesar_cipher_curriculum import CaesarCipherCurriculum from .caesar_cipher_curriculum import CaesarCipherCurriculum
from .letter_counting_curriculum import LetterCountingCurriculum from .letter_counting_curriculum import LetterCountingCurriculum
from .letter_jumble_curriculum import LetterJumbleCurriculum
__all__ = [ __all__ = [
"BaseConversionCurriculum", "BaseConversionCurriculum",
"CaesarCipherCurriculum", "CaesarCipherCurriculum",
"LetterCountingCurriculum" "LetterCountingCurriculum",
"LetterJumbleCurriculum"
] ]

View file

@ -0,0 +1,122 @@
"""
Curriculum definition for letter jumble exercises.
"""
from typing import Dict, Any
from reasoning_gym.core.base_curriculum import BaseCurriculum
from reasoning_gym.core.attributes import AttributeDefinition, AttributeType
from reasoning_gym.core.template import Template
from reasoning_gym.data import read_data_file
class LetterJumbleCurriculum(BaseCurriculum):
def __init__(self):
super().__init__("LetterJumbleCurriculum")
import re
self.words = [word for word in re.findall(r"\b\w+\b", read_data_file("in_the_year_2889.txt")) if word.isalpha()]
def _init_curriculum(self) -> None:
"""Initialize the letter jumble curriculum configuration"""
# Define valid attribute types
self._valid_types = {
AttributeType.STATIC, # For boolean flags
AttributeType.UBOUND, # For ranges like word length, num words
AttributeType.APPEND # For accumulating options
}
# Define attributes
self._attributes = {
"word_length": AttributeDefinition(
levels=[7, 12, 64], # From min_word_len/max_word_len
default_level=0,
description="Maximum word length",
attr_type=AttributeType.UBOUND,
min_value=1 # Ensure at least 2 chars for scrambling
),
"preserve_length": AttributeDefinition(
levels=[4, 2],
default_level=0,
description="Word length to preserve",
attr_type=AttributeType.STATIC
),
"num_words": AttributeDefinition(
levels=[3, 5, 20], # From min_words/max_words
default_level=0,
description="Number of words to scramble",
attr_type=AttributeType.UBOUND,
min_value=1 # Ensure at least 1 word
),
"corruption_level": AttributeDefinition(
levels=[0.1, 0.3, 0.9], # From min/max_corruption_level
default_level=0,
description="Fraction of characters to swap",
attr_type=AttributeType.UBOUND,
min_value=0.1
),
"consecutive_words": AttributeDefinition(
levels=[True, False],
default_level=0,
description="Whether to select consecutive words",
attr_type=AttributeType.APPEND
)
}
# Define templates with symbolic placeholders
self._templates = [
Template(
template="Unscramble these words: \"{scrambled}\"",
parts={"scrambled": "word_list"}
),
Template(
template="What are the original words? \"{scrambled}\"",
parts={"scrambled": "word_list"}
),
Template(
template="Rearrange the letters to find the original words: \"{scrambled}\"",
parts={"scrambled": "word_list"}
)
]
# Define symbolic structure
self._symbolic = {
# Shared variables that need to be consistent across templates
"shared_vars": {
# Selected original words that will be scrambled
"selected_words": lambda refs: (
n_words := refs["num_words"](),
pool := self.words,
refs["dataset_rng"].sample(pool, n_words) if not refs["consecutive_words"]() else
(
start := refs["dataset_rng"].randint(0, max(0, len(pool)-n_words)),
pool[start:start + n_words]
)[-1]
)[-1]
},
# Value generators for dynamic content
"generators": {
# Scramble a single word based on corruption level
"scramble_word": lambda refs: lambda lst: (
[
(i, j, lst.__setitem__(i, lst[j]), lst.__setitem__(j, temp)) # Debugging: keep track of indices and assignments
for _ in range(max(0, int(len(lst) * refs["corruption_level"]())))
for i, j in [refs["dataset_rng"].sample(range(len(lst)), 2)]
for temp in [lst[i]] # Introduce temp variable for correct swap
],
"".join(lst)
)[-1],
# Generate scrambled version of all selected words
"scramble_all": lambda refs: lambda: [
refs["scramble_word"](refs)(list(word)) if len(word) > refs["preserve_length"]() else word
for word in refs["selected_words"](refs)
]
},
# Template composition
"templates": {
"word_list": lambda refs: {
"template": "{scrambled_words}",
"parts": {
"scrambled_words": lambda refs=refs: " ".join(refs["scramble_all"](refs)()),
"original_words": lambda refs=refs: refs["selected_words"](refs)
}
}
}
}

View file

@ -9,7 +9,7 @@ Algorithmic tasks for training reasoning capabilities:
from .base_conversion import BaseConversionExercise from .base_conversion import BaseConversionExercise
from .caesar_cipher import CaesarCipherExercise from .caesar_cipher import CaesarCipherExercise
from .letter_counting import LetterCountingExercise from .letter_counting import LetterCountingExercise
# from .letter_jumble import LetterJumbleExercise from .letter_jumble import LetterJumbleExercise
# from .number_filtering import NumberFilteringExercise # from .number_filtering import NumberFilteringExercise
# from .number_sorting import NumberSortingExercise # from .number_sorting import NumberSortingExercise
# from .sentence_reordering import SentenceReorderingExercise # from .sentence_reordering import SentenceReorderingExercise
@ -23,7 +23,7 @@ __all__ = [
"BaseConversionExercise", "BaseConversionExercise",
"CaesarCipherExercise", "CaesarCipherExercise",
"LetterCountingExercise", "LetterCountingExercise",
# "LetterJumbleDataset", "LetterJumbleExercise",
# "NumberFilteringDataset", # "NumberFilteringDataset",
# "NumberSortingDataset", # "NumberSortingDataset",
# "SentenceReorderingDataset", # "SentenceReorderingDataset",

View file

@ -1,121 +1,289 @@
"""Tests for letter jumbling task generation""" """Unit tests for the letter jumble exercise."""
from random import Random from reasoning_gym.curricula.algorithmic.letter_jumble_curriculum import LetterJumbleCurriculum
from reasoning_gym.exercises.algorithmic.letter_jumble import LetterJumbleExercise
import unittest
import random
from collections import defaultdict
import pytest class TestLetterJumbleParsing(unittest.TestCase):
"""Test parsing of letter jumble metadata"""
from reasoning_gym.algorithmic.letter_jumble import LetterJumbleConfig, LetterJumbleDataset def setUp(self):
self.exercise = LetterJumbleExercise()
def test_parse_expression_basic(self):
"""Test parsing of basic letter jumble metadata"""
test_metadata = {
"scrambled": {
"scrambled_words": "EHLLO DLWOR",
"original_words": ["HELLO", "WORLD"]
}
}
parsed = self.exercise._parse_expression(test_metadata)
self.assertEqual(parsed["scrambled_words"], ["EHLLO", "DLWOR"])
self.assertEqual(parsed["original_words"], ["HELLO", "WORLD"])
def test_letter_jumble_config_validation(): def test_parse_with_spaces(self):
"""Test that invalid configs raise appropriate errors""" """Test parsing with spaces and punctuation"""
with pytest.raises(AssertionError): test_metadata = {
config = LetterJumbleConfig(min_word_len=0) "scrambled": {
config.validate() "scrambled_words": "EHLLO DLWOR!",
"original_words": ["HELLO", "WORLD!"]
}
}
parsed = self.exercise._parse_expression(test_metadata)
self.assertEqual(parsed["scrambled_words"], ["EHLLO", "DLWOR!"])
self.assertEqual(parsed["original_words"], ["HELLO", "WORLD!"])
with pytest.raises(AssertionError): def test_parse_mixed_case(self):
config = LetterJumbleConfig(min_words=10, max_words=5) """Test parsing with mixed case text"""
config.validate() test_metadata = {
"scrambled": {
"scrambled_words": "HeLlO WoRlD",
"original_words": ["hElLo", "wOrLd"]
}
}
parsed = self.exercise._parse_expression(test_metadata)
self.assertEqual(parsed["scrambled_words"], ["HeLlO", "WoRlD"])
self.assertEqual(parsed["original_words"], ["hElLo", "wOrLd"])
with pytest.raises(AssertionError): class TestLetterJumbleEvaluation(unittest.TestCase):
config = LetterJumbleConfig(min_corruption_level=-0.1) """Test evaluation of letter jumble problems"""
config.validate()
with pytest.raises(AssertionError): def setUp(self):
config = LetterJumbleConfig(max_corruption_level=1.1) self.exercise = LetterJumbleExercise()
config.validate()
def test_basic_unscrambling(self):
"""Test basic unscrambling cases"""
test_cases = [
(["EHLLO"], "HELLO"), # Single word
(["EHLLO", "DLWOR"], "HELLO WORLD"), # Two words
(["AAAA"], "AAAA"), # Same letters
(["ZBAC"], "ABCZ"), # Sorted order
(["HELLO"], "HELLO") # Already unscrambled
]
for scrambled, expected in test_cases:
parsed = {
"scrambled_words": scrambled,
"original_words": expected.split()
}
result = self.exercise._evaluate_expression(parsed)
self.assertEqual(result, expected)
def test_letter_jumble_deterministic(): def test_mixed_case_unscrambling(self):
"""Test that dataset generates same items with same seed""" """Test unscrambling with mixed case"""
config = LetterJumbleConfig(seed=42, size=10) test_cases = [
dataset1 = LetterJumbleDataset(config) (["HeLlO"], "hElLo"), # Mixed case, single word
dataset2 = LetterJumbleDataset(config) (["WoRlD", "HeLlO"], "wOrLd hElLo"), # Mixed case, multiple words
(["AbCdE"], "aBcDe") # Mixed case, alternating
]
for scrambled, expected in test_cases:
parsed = {
"scrambled_words": scrambled,
"original_words": expected.split()
}
result = self.exercise._evaluate_expression(parsed)
self.assertEqual(result, expected)
for i in range(len(dataset1)): def test_with_spaces_and_punctuation(self):
assert dataset1[i] == dataset2[i] """Test unscrambling with spaces and punctuation"""
test_cases = [
(["EHLLO!", "DLWOR?"], "HELLO! WORLD?"),
(["EHLLO.", "DLWOR."], "HELLO. WORLD."),
(["EHLLO,", "DLWOR,"], "HELLO, WORLD,")
]
for scrambled, expected in test_cases:
parsed = {
"scrambled_words": scrambled,
"original_words": expected.split()
}
result = self.exercise._evaluate_expression(parsed)
self.assertEqual(result, expected)
class TestLetterJumbleGeneration(unittest.TestCase):
"""Test problem generation"""
def test_letter_jumble_scrambling(): def setUp(self):
"""Test the word scrambling logic""" self.curriculum = LetterJumbleCurriculum()
config = LetterJumbleConfig( self.exercise = LetterJumbleExercise()
min_word_len=4, self.rng = random.Random(42)
max_word_len=8, self.curriculum.rng = self.rng
min_words=1,
max_words=1,
min_corruption_level=0.5,
max_corruption_level=0.5,
size=1,
seed=42,
)
dataset = LetterJumbleDataset(config)
# Test with known word def test_problem_structure(self):
word = "testing" """Test that generated problems have the correct structure"""
rng = Random(42) problem = self.exercise.generate(self.curriculum)
scrambled = dataset._scramble_word(word, 0.5, rng)
# Verify scrambled word: # Check basic structure
# - Has same length as original self.assertIn("question", problem)
assert len(scrambled) == len(word) self.assertIn("answer", problem)
# - Contains same characters self.assertIn("metadata", problem)
assert sorted(scrambled) == sorted(word)
# - Is different from original (with high probability given 0.5 corruption)
assert scrambled != word
# Check metadata structure
metadata = problem["metadata"]
self.assertEqual(metadata["type"], "direct")
self.assertIn("executed_parts", metadata)
executed_parts = metadata["executed_parts"]
self.assertIn("scrambled_words", executed_parts)
self.assertIn("original_words", executed_parts)
def test_letter_jumble_dataset_items(): def test_word_length_ranges(self):
"""Test basic properties of generated items""" """Test that word lengths are within expected ranges"""
config = LetterJumbleConfig( # Test all word length levels
min_word_len=4, level_max_lengths = {0: 5, 1: 8, 2: 64}
max_word_len=8,
min_words=3,
max_words=5,
min_corruption_level=0.1,
max_corruption_level=0.3,
size=50,
seed=42,
)
dataset = LetterJumbleDataset(config)
for i in range(len(dataset)): for level, max_length in level_max_lengths.items():
item = dataset[i] self.curriculum.set_attr_level("word_length", level)
problem = self.exercise.generate(self.curriculum)
words = problem["metadata"]["executed_parts"]["original_words"]
for word in words:
self.assertLessEqual(len(word), max_length)
self.assertGreaterEqual(len(word), 2) # Min length is 2
# Check item structure def test_word_count_ranges(self):
assert isinstance(item, dict) """Test that word counts are within expected ranges"""
assert "question" in item # Test all word count levels
assert "answer" in item level_word_counts = {0: 3, 1: 5, 2: 20}
assert "metadata" in item
# Check metadata for level, max_words in level_word_counts.items():
metadata = item["metadata"] self.curriculum.set_attr_level("num_words", level)
assert "num_words" in metadata problem = self.exercise.generate(self.curriculum)
assert "corruption_level" in metadata words = problem["metadata"]["executed_parts"]["original_words"]
assert "scrambled_words" in metadata self.assertLessEqual(len(words), max_words)
assert "original_words" in metadata self.assertGreaterEqual(len(words), 1) # Min words is 1
# Verify word counts class TestLetterJumbleComprehensive(unittest.TestCase):
num_words = metadata["num_words"] """Comprehensive tests for letter jumble"""
assert config.min_words <= num_words <= config.max_words
assert len(metadata["scrambled_words"]) == num_words
assert len(metadata["original_words"]) == num_words
# Verify corruption level def setUp(self):
assert config.min_corruption_level <= metadata["corruption_level"] <= config.max_corruption_level self.curriculum = LetterJumbleCurriculum()
self.exercise = LetterJumbleExercise()
self.rng = random.Random(42)
self.curriculum.rng = self.rng
# Verify word properties def test_corruption_levels(self):
for word in metadata["original_words"]: """Test different corruption levels"""
assert config.min_word_len <= len(word) <= config.max_word_len corruption_levels = [0.1, 0.3, 0.9]
assert word.isalpha() num_samples = 100 # Test with multiple samples
# Test each level
for level, expected_corruption in enumerate(corruption_levels):
self.curriculum.set_attr_level("corruption_level", level)
differences = []
def test_letter_jumble_iteration(): # Generate multiple problems to measure average corruption
"""Test that iteration respects dataset size""" for _ in range(num_samples):
config = LetterJumbleConfig(size=5, seed=42) problem = self.exercise.generate(self.curriculum)
dataset = LetterJumbleDataset(config) metadata = problem["metadata"]["executed_parts"]
# Calculate character differences
preserve_len = self.curriculum.attributes["preserve_length"].levels[self.curriculum.get_attr_level("preserve_length")]
for orig, scrambled in zip(metadata["original_words"], metadata["scrambled_words"]):
if len(orig) > preserve_len:
diff_count = sum(1 for a, b in zip(orig, scrambled) if a != b)
differences.append(diff_count / len(orig))
items = list(dataset) # Check average corruption level is reasonable
assert len(items) == config.size # It's okay if actual corruption is lower than target due to:
# 1. Some swaps might cancel out previous swaps
# 2. The same characters might be swapped multiple times
# 3. The preserve_length attribute prevents some characters from being swapped
# 4. For short words, even a few swaps can make them readable
if differences:
avg_corruption = sum(differences) / len(differences)
# Only check that we don't exceed target by too much
self.assertLess(avg_corruption, expected_corruption + 0.1,
f"Corruption level {avg_corruption:.2f} too high (target: {expected_corruption:.2f})")
# And ensure we have some corruption
self.assertGreater(avg_corruption, 0.02,
f"Corruption level {avg_corruption:.2f} too low (should be above 0.02)")
# Test multiple iterations yield same items def test_template_variation(self):
assert items == list(dataset) """Test that different templates are used"""
templates_seen = set()
num_samples = 100
for _ in range(num_samples):
problem = self.exercise.generate(self.curriculum)
templates_seen.add(problem["question"].split(":")[0])
self.assertGreater(len(templates_seen), 1, "Not enough template variation")
def test_comprehensive_random_evaluation(self):
"""Test random evaluation with various configurations and track statistics."""
self.rng = random.Random(42) # Fixed seed for reproducibility
self.curriculum.rng = self.rng
# Track statistics
word_lengths = defaultdict(int)
word_counts = defaultdict(int)
corruption_levels = defaultdict(list)
consecutive_words_count = 0
total_samples = 1000
# Generate test cases
for _ in range(total_samples):
# Set random attribute levels
for attr in self.curriculum.attributes:
max_level = len(self.curriculum.attributes[attr].levels) - 1
self.curriculum.set_attr_level(attr, self.rng.randint(0, max_level))
# Generate and evaluate a random problem
problem = self.exercise.generate(self.curriculum)
metadata = problem["metadata"]["executed_parts"]
original_words = metadata["original_words"]
scrambled_words = metadata["scrambled_words"]
# Track statistics
word_counts[len(original_words)] += 1
for word in original_words:
word_lengths[len(word)] += 1
# Calculate corruption levels
for orig, scrambled in zip(original_words, scrambled_words):
preserve_len = self.curriculum.attributes["preserve_length"].levels[self.curriculum.get_attr_level("preserve_length")]
if len(orig) > preserve_len:
diff_count = sum(1 for a, b in zip(orig, scrambled) if a != b)
corruption_levels[len(orig)].append(diff_count / len(orig))
# Check if words are consecutive in source text
if len(original_words) > 1:
text = " ".join(self.curriculum.words)
phrase = " ".join(original_words)
if phrase in text:
consecutive_words_count += 1
# Verify scrambling is valid
for orig, scrambled in zip(original_words, scrambled_words):
# Check lengths match
self.assertEqual(len(orig), len(scrambled))
# Check same letters are used
self.assertEqual(sorted(orig), sorted(scrambled))
# Print statistics
print("\nWord length distribution:")
for length, count in sorted(word_lengths.items()):
print(f" Length {length}: {count}")
print("\nWord count distribution:")
for count, freq in sorted(word_counts.items()):
print(f" {count} words: {freq}")
print("\nAverage corruption levels by word length:")
for length, levels in sorted(corruption_levels.items()):
avg = sum(levels) / len(levels) if levels else 0
print(f" Length {length}: {avg:.2f}")
print(f"\nConsecutive words: {consecutive_words_count}/{total_samples}")
# Verify statistical properties
self.assertTrue(any(length >= 8 for length in word_lengths),
"No long words generated")
self.assertTrue(any(count >= 3 for count in word_counts.values()),
"Not enough variation in word counts")
self.assertTrue(consecutive_words_count > 0,
"No consecutive words generated")
self.assertTrue(consecutive_words_count < total_samples,
"Too many consecutive words")
if __name__ == '__main__':
unittest.main()