diff --git a/examples/exercises/algorithmic/caesar_cipher_examples.py b/examples/exercises/algorithmic/caesar_cipher_examples.py new file mode 100644 index 00000000..4bb8ca16 --- /dev/null +++ b/examples/exercises/algorithmic/caesar_cipher_examples.py @@ -0,0 +1,97 @@ +"""Examples of generated problems from the CaesarCipher exercise. + +This file demonstrates different types of Caesar cipher problems that can be generated +at various difficulty levels. +""" + +from reasoning_gym.curricula.algorithmic.caesar_cipher_curriculum import CaesarCipherCurriculum +from reasoning_gym.exercises.algorithmic.caesar_cipher import CaesarCipherExercise +import random + +def main(): + # Initialize with fixed seed for reproducibility + curriculum = CaesarCipherCurriculum() + exercise = CaesarCipherExercise() + curriculum.rng = random.Random(42) + + print("\n========================================\n") + + # Level 0: Basic decryption with short text and small rotation + curriculum.set_attr_level("num_words", 0) # Short text (5 words) + curriculum.set_attr_level("rotation", 0) # Small rotation (1-3) + curriculum.set_attr_level("text_case", 0) # UPPER case only + problem = exercise.generate(curriculum) + print("Level 0 (Basic Decryption):") + print(problem) + + print("\n========================================\n") + + # Level 1: Medium length text with larger rotation + curriculum.set_attr_level("num_words", 1) # Medium text (10 words) + curriculum.set_attr_level("rotation", 2) # Medium rotation (10-15) + curriculum.set_attr_level("text_case", 1) # lower case only + problem = exercise.generate(curriculum) + print("Level 1 (Medium Length Text):") + print(problem) + + print("\n========================================\n") + + # Level 2: Long text with mixed case and large rotation + curriculum.set_attr_level("num_words", 2) # Long text (20 words) + curriculum.set_attr_level("rotation", 4) # Large rotation (20-25) + curriculum.set_attr_level("text_case", 2) # Mixed case with preserved capitalization + problem = exercise.generate(curriculum) + print("Level 2 (Complex Text):") + print(problem) + + print("\n========================================\n") + + # Random Examples with Different Seeds + print("Random Examples (Different Seeds):") + for seed in range(10, 15): + curriculum.rng = random.Random(seed) + # Randomly set curriculum levels + curriculum.set_attr_level("num_words", random.randint(0, 2)) + curriculum.set_attr_level("rotation", random.randint(0, 4)) + curriculum.set_attr_level("text_case", random.randint(0, 2)) + problem = exercise.generate(curriculum) + print(f"\nRandom Example (Seed {seed}):") + print(problem) + + print("\n========================================\n") + + # Special Cases + print("Special Cases:") + + # Case 1: Maximum length with small rotation + for attr in curriculum.attributes: # Reset all attributes + curriculum.set_attr_level(attr, 0) + curriculum.set_attr_level("num_words", 2) # Maximum words (20) + curriculum.set_attr_level("rotation", 0) # Small rotation (1-3) + curriculum.set_attr_level("text_case", 0) # UPPER case + problem = exercise.generate(curriculum) + print("\nLong Text with Small Rotation:") + print(problem) + + # Case 2: Short text with maximum rotation + for attr in curriculum.attributes: # Reset all attributes + curriculum.set_attr_level(attr, 0) + curriculum.set_attr_level("num_words", 0) # Minimum words (5) + curriculum.set_attr_level("rotation", 4) # Maximum rotation (20-25) + curriculum.set_attr_level("text_case", 2) # Mixed case + problem = exercise.generate(curriculum) + print("\nShort Text with Large Rotation:") + print(problem) + + # Case 3: Medium text with mixed case + for attr in curriculum.attributes: # Reset all attributes + curriculum.set_attr_level(attr, 0) + curriculum.set_attr_level("num_words", 1) # Medium length (10 words) + curriculum.set_attr_level("rotation", 2) # Medium rotation (10-15) + curriculum.set_attr_level("text_case", 2) # Mixed case + problem = exercise.generate(curriculum) + print("\nMedium Text with Mixed Case:") + print(problem) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/reasoning_gym/algorithmic/caesar_cipher.py b/reasoning_gym/algorithmic/caesar_cipher.py index 01b8f4ed..30a93dc5 100644 --- a/reasoning_gym/algorithmic/caesar_cipher.py +++ b/reasoning_gym/algorithmic/caesar_cipher.py @@ -1,84 +1,60 @@ -"""Caesar cipher task generator""" +"""Caesar cipher exercise that generates encryption/decryption tasks.""" -import re -from dataclasses import dataclass -from random import Random -from string import ascii_uppercase -from typing import List, Optional +from typing import Dict, Any -from reasoning_gym.data import read_data_file +class CaesarCipherExercise: + """Exercise generator for Caesar cipher encryption/decryption tasks.""" -from ..factory import ProceduralDataset, register_dataset + def __init__(self): + self.curriculum = None + def generate(self, curriculum: Any) -> Dict[str, Any]: + """ + Generate a Caesar cipher problem using the curriculum. -@dataclass -class CaesarCipherConfig: - """Configuration for Caesar cipher task generation""" + Returns: + Dict containing: + - question: str (e.g. "Decrypt this Caesar cipher text: KHOOR") + - answer: str (the decrypted text) + - metadata: dict with details (rotation, cipher_text, clear_text) + """ + self.curriculum = curriculum + template = curriculum.get_template(curriculum.rng) + return template.eval(self, curriculum.rng) - delimiter: str = "." # Delimiter for splitting text into sentences - min_words: int = 3 # Minimum words per sentence - max_words: int = 20 # Maximum words per sentence - min_rotation: int = 1 # Minimum Caesar rotation - max_rotation: int = 25 # Maximum Caesar rotation - seed: Optional[int] = None - size: int = 500 # Virtual dataset size - - def validate(self) -> None: - """Validate configuration parameters""" - assert self.min_words > 0, "min_words must be positive" - assert self.max_words >= self.min_words, "max_words must be >= min_words" - assert 0 < self.min_rotation <= self.max_rotation < 26, "rotation must be in range [1,25]" - - -class CaesarCipherDataset(ProceduralDataset): - """Generates Caesar cipher encryption/decryption tasks""" - - def __init__(self, config: CaesarCipherConfig): - super().__init__(config=config, seed=config.seed, size=config.size) - - # Load and preprocess text - text = read_data_file("in_the_year_2889.txt") - - # Split into sentences and filter - sentences = [s.strip() for s in text.split(config.delimiter) if s.strip()] - - # Process each sentence - self.valid_sentences = [] - for sentence in sentences: - # Split into words and filter for alpha-only - words = [w.upper() for w in sentence.split() if w.isalpha()] - if self.config.min_words <= len(words) <= self.config.max_words: - self.valid_sentences.append(" ".join(words)) - - def _caesar_encrypt(self, text: str, rotation: int) -> str: - """Apply Caesar cipher encryption with given rotation""" - result = [] - for char in text: - if char.isalpha(): - # Convert to 0-25 range, rotate, convert back to ASCII - base = ord("A") - rotated = (ord(char) - base + rotation) % 26 - result.append(chr(base + rotated)) - else: - result.append(char) - return "".join(result) - - def __getitem__(self, idx: int) -> dict: - """Generate a single Caesar cipher task""" - rng = Random(self.seed + idx) - - # Select random sentence and rotation - sentence = rng.choice(self.valid_sentences) - rotation = rng.randint(self.config.min_rotation, self.config.max_rotation) - - # Generate cipher text - cipher_text = self._caesar_encrypt(sentence, rotation) + def _parse_expression(self, metadata: Dict[str, Any]) -> Dict[str, Any]: + """ + Parse the template metadata into structured data. + The metadata structure is expected to be: + { + "cipher_text": { + "encrypted_text": str, # The encrypted text + "clear_text": str, # The original text + "rotation": int # The rotation value + } + } + Returns: + Dictionary containing parsed data for evaluation + """ return { - "question": f"Decrypt this Caesar cipher text: {cipher_text}", - "answer": sentence, - "metadata": {"rotation": rotation, "cipher_text": cipher_text, "clear_text": sentence}, + "cipher_text": metadata["cipher_text"]["encrypted_text"], + "clear_text": metadata["cipher_text"]["clear_text"], + "rotation": metadata["cipher_text"]["rotation"] } + def _evaluate_expression(self, parsed: Dict[str, Any]) -> str: + """ + Return the clear text for decryption problems. -register_dataset("caesar_cipher", CaesarCipherDataset, CaesarCipherConfig) + Args: + parsed: Dictionary containing: + - cipher_text: str (the encrypted text) + - clear_text: str (the original text) + - rotation: int (the rotation value) + Returns: + String with the decrypted text (clear_text) + """ + # For the current curriculum, we only handle decryption + # and the clear_text is already provided in the metadata + return parsed["clear_text"] diff --git a/reasoning_gym/curricula/__init__.py b/reasoning_gym/curricula/__init__.py index f4c071b1..87029b77 100644 --- a/reasoning_gym/curricula/__init__.py +++ b/reasoning_gym/curricula/__init__.py @@ -1,5 +1,5 @@ from .algebra import * -# from .algorithmic import * +from .algorithmic import * from .arithmetic import * # from .code import * # from .cognition import * @@ -11,9 +11,7 @@ from .arithmetic import * # Re-export all Curriculum classes __all__ = [] for module in [ - arithmetic, - algebra, - # algorithmic, arithmetic, code, - # cognition, games, geometry, graphs, logic + algebra, algorithmic, arithmetic, + # code, cognition, games, geometry, graphs, logic ]: __all__.extend([name for name in module.__all__ if name.endswith('Curriculum')]) \ No newline at end of file diff --git a/reasoning_gym/curricula/algorithmic/__init__.py b/reasoning_gym/curricula/algorithmic/__init__.py index 53d8f82a..f1f9c779 100644 --- a/reasoning_gym/curricula/algorithmic/__init__.py +++ b/reasoning_gym/curricula/algorithmic/__init__.py @@ -1,5 +1,6 @@ from .base_conversion_curriculum import BaseConversionCurriculum - +from .caesar_cipher_curriculum import CaesarCipherCurriculum __all__ = [ "BaseConversionCurriculum", + "CaesarCipherCurriculum", ] diff --git a/reasoning_gym/curricula/algorithmic/caesar_cipher_curriculum.py b/reasoning_gym/curricula/algorithmic/caesar_cipher_curriculum.py new file mode 100644 index 00000000..eeced4fe --- /dev/null +++ b/reasoning_gym/curricula/algorithmic/caesar_cipher_curriculum.py @@ -0,0 +1,112 @@ +"""Curriculum definition for Caesar cipher exercises.""" + +from typing import Dict, Any, List +from reasoning_gym.core.base_curriculum import BaseCurriculum +from reasoning_gym.core.attributes import AttributeDefinition, AttributeType +from reasoning_gym.core.template import Template +from reasoning_gym.data import read_data_file + + +class CaesarCipherCurriculum(BaseCurriculum): + def __init__(self): + super().__init__("CaesarCipherCurriculum") + import re + self.text_data = re.findall(r"[aA-zZ]+", read_data_file("in_the_year_2889.txt")) + + def _init_curriculum(self) -> None: + """Initialize the Caesar cipher curriculum configuration""" + # Define valid attribute types + self._valid_types = { + AttributeType.STATIC, # For fixed values like delimiter + AttributeType.UBOUND, # For ranges like words, rotation + AttributeType.APPEND # For accumulating options + } + + # Define attributes + self._attributes = { + "num_words": AttributeDefinition( + levels=[5, 10, 20], + default_level=0, + description="Number of words in the sentence", + attr_type=AttributeType.UBOUND, + min_value=3 # Ensure at least 3 words + ), + "rotation": AttributeDefinition( + levels=[1, 3, 10, 15, 25], + default_level=0, + description="Caesar cipher rotation value", + attr_type=AttributeType.UBOUND, + min_value=1 # Ensure at least rotation of 1 + ), + "text_case": AttributeDefinition( + levels=["UPPER", "lower", "Mixed"], + default_level=0, + description="Text case style", + attr_type=AttributeType.APPEND + ) + } + + # Define templates with symbolic placeholders + self._templates = [ + Template( + template="Decrypt this Caesar cipher text: {cipher_text}", + parts={"cipher_text": "cipher_text"} + ), + Template( + template="What is the original text for this Caesar cipher: {cipher_text}", + parts={"cipher_text": "cipher_text"} + ), + Template( + template="This text was encrypted using a Caesar cipher with rotation {rotation}:\n{cipher_text}\nWhat was the original text?", + parts={"cipher_text": "cipher_text", "rotation": "rotation_value"} + ) + ] + + # Define symbolic structure + self._symbolic = { + # Define composition templates + "templates": { + "cipher_text": lambda refs: { + "template": "{encrypted_text}", + "parts": { + "encrypted_text": lambda refs=refs: refs["encrypt"](refs), + "clear_text": lambda refs=refs: refs["clear_text"](refs), + "rotation": lambda refs=refs: refs["rot"](refs) + } + }, + # Rotation value template + "rotation_value": lambda refs: { + "template": "{value}", + "parts": { + "value": lambda refs=refs: refs["rot"](refs) + } + } + }, + # Define shared variables that need to be consistent + "shared_vars": { + "clear_text": lambda refs: ( + case := refs["txt_case"](refs), + "".join(c.lower() if (case!="UPPER" and (case=="lower" or (refs["dataset_rng"].random() < 0.5))) else c.upper() + for c in refs["read_text"](refs)) + )[-1], + "txt_case": lambda refs: refs["text_case"](), + "rot": lambda refs: refs["rotation"]() + }, + # Define value generators + "generators": { + "encrypt": lambda refs: ( + rot := refs["rot"](refs), + case := refs["txt_case"](refs), + "".join( + chr(((ord(c.upper()) - ord("A") + rot) % 26) + + (ord("A") if (case!="UPPER" and (case=="lower" or (refs["dataset_rng"].random() < 0.5))) else ord("a"))) + if c.isalpha() else c + for c in refs["clear_text"](refs) + ) + )[-1], + "read_text": lambda refs: ( + idx := refs["dataset_rng"].randint(0, len(self.text_data) - 1), + " ".join(self.text_data[idx:idx+refs["num_words"]()]) + )[-1] + } + } \ No newline at end of file diff --git a/reasoning_gym/exercises/__init__.py b/reasoning_gym/exercises/__init__.py index 22f3c604..fdc46527 100644 --- a/reasoning_gym/exercises/__init__.py +++ b/reasoning_gym/exercises/__init__.py @@ -1,5 +1,5 @@ from .algebra import * -# from .algorithmic import * +from .algorithmic import * from .arithmetic import * # from .code import * # from .cognition import * @@ -11,9 +11,7 @@ from .arithmetic import * # Re-export all Dataset classes __all__ = [] for module in [ - arithmetic, - algebra, - # algorithmic, arithmetic, code, - # cognition, games, geometry, graphs, logic + algebra, algorithmic, arithmetic, + # code, cognition, games, geometry, graphs, logic ]: __all__.extend([name for name in module.__all__ if name.endswith('Exercise')]) \ No newline at end of file diff --git a/reasoning_gym/exercises/algorithmic/__init__.py b/reasoning_gym/exercises/algorithmic/__init__.py index 5034eecd..e52035d1 100644 --- a/reasoning_gym/exercises/algorithmic/__init__.py +++ b/reasoning_gym/exercises/algorithmic/__init__.py @@ -7,7 +7,7 @@ Algorithmic tasks for training reasoning capabilities: """ from .base_conversion import BaseConversionExercise -# from .caesar_cipher import CaesarCipherExercise +from .caesar_cipher import CaesarCipherExercise # from .letter_counting import LetterCountingExercise # from .letter_jumble import LetterJumbleExercise # from .number_filtering import NumberFilteringExercise @@ -21,7 +21,7 @@ from .base_conversion import BaseConversionExercise __all__ = [ # "SpellBackwardDataset", "BaseConversionExercise", - # "CaesarCipherDataset", + "CaesarCipherExercise", # "LetterCountingDataset", # "LetterJumbleDataset", # "NumberFilteringDataset", diff --git a/tests/test_caesar_cipher.py b/tests/test_caesar_cipher.py index fa572d8d..fb51a3f6 100644 --- a/tests/test_caesar_cipher.py +++ b/tests/test_caesar_cipher.py @@ -1,100 +1,272 @@ -"""Tests for Caesar cipher task generation""" +"""Unit tests for the Caesar cipher exercise.""" -import pytest +from reasoning_gym.curricula.algorithmic.caesar_cipher_curriculum import CaesarCipherCurriculum +from reasoning_gym.exercises.algorithmic.caesar_cipher import CaesarCipherExercise +import unittest +import random +from collections import defaultdict -from reasoning_gym.algorithmic.caesar_cipher import CaesarCipherConfig, CaesarCipherDataset +class TestCaesarCipherParsing(unittest.TestCase): + """Test parsing of Caesar cipher metadata""" + def setUp(self): + self.exercise = CaesarCipherExercise() -def test_caesar_cipher_config_validation(): - """Test that invalid configs raise appropriate errors""" - with pytest.raises(AssertionError): - config = CaesarCipherConfig(min_words=0) - config.validate() + def test_parse_expression_basic(self): + """Test parsing of basic Caesar cipher metadata""" + test_metadata = { + "cipher_text": { + "encrypted_text": "KHOOR", + "clear_text": "HELLO", + "rotation": 3 + } + } + parsed = self.exercise._parse_expression(test_metadata) + self.assertEqual(parsed["cipher_text"], "KHOOR") + self.assertEqual(parsed["clear_text"], "HELLO") + self.assertEqual(parsed["rotation"], 3) - with pytest.raises(AssertionError): - config = CaesarCipherConfig(min_words=10, max_words=5) - config.validate() + def test_parse_with_spaces(self): + """Test parsing with spaces and punctuation""" + test_metadata = { + "cipher_text": { + "encrypted_text": "KHOOR ZRUOG!", + "clear_text": "HELLO WORLD!", + "rotation": 3 + } + } + parsed = self.exercise._parse_expression(test_metadata) + self.assertEqual(parsed["cipher_text"], "KHOOR ZRUOG!") + self.assertEqual(parsed["clear_text"], "HELLO WORLD!") + self.assertEqual(parsed["rotation"], 3) - with pytest.raises(AssertionError): - config = CaesarCipherConfig(min_rotation=0) - config.validate() + def test_parse_mixed_case(self): + """Test parsing with mixed case text""" + test_metadata = { + "cipher_text": { + "encrypted_text": "KhOoR", + "clear_text": "HeLlO", + "rotation": 3 + } + } + parsed = self.exercise._parse_expression(test_metadata) + self.assertEqual(parsed["cipher_text"], "KhOoR") + self.assertEqual(parsed["clear_text"], "HeLlO") + self.assertEqual(parsed["rotation"], 3) - with pytest.raises(AssertionError): - config = CaesarCipherConfig(max_rotation=26) - config.validate() +class TestCaesarCipherEvaluation(unittest.TestCase): + """Test evaluation of Caesar cipher problems""" + def setUp(self): + self.exercise = CaesarCipherExercise() -def test_caesar_cipher_deterministic(): - """Test that dataset generates same items with same seed""" - config = CaesarCipherConfig(seed=42, size=10) - dataset1 = CaesarCipherDataset(config) - dataset2 = CaesarCipherDataset(config) + def test_basic_decryption(self): + """Test basic decryption cases""" + test_cases = [ + ("KHOOR", "HELLO", 3), # Basic uppercase + ("khoor", "hello", 3), # Basic lowercase + ("WORLD", "WORLD", 0), # No rotation + ("ABCDE", "ZABCD", 1), # Wrap around + ("hello", "hello", 26) # Full rotation + ] + for cipher_text, clear_text, rotation in test_cases: + parsed = { + "cipher_text": cipher_text, + "clear_text": clear_text, + "rotation": rotation + } + result = self.exercise._evaluate_expression(parsed) + self.assertEqual(result, clear_text) - for i in range(len(dataset1)): - assert dataset1[i] == dataset2[i] + def test_mixed_case_decryption(self): + """Test decryption with mixed case""" + test_cases = [ + ("HeLlO", "HeLlO", 26), # Mixed case, full rotation + ("WoRlD", "WoRlD", 0), # Mixed case, no rotation + ("AbCdE", "ZaBcD", 1) # Mixed case, wrap around + ] + for cipher_text, clear_text, rotation in test_cases: + parsed = { + "cipher_text": cipher_text, + "clear_text": clear_text, + "rotation": rotation + } + result = self.exercise._evaluate_expression(parsed) + self.assertEqual(result, clear_text) + def test_with_spaces_and_punctuation(self): + """Test decryption with spaces and punctuation""" + test_cases = [ + ("KHOOR ZRUOG!", "HELLO WORLD!", 3), + ("Pb Pbvwhub!", "My Mystery!", 3), + ("ABCDE. FGHIJ?", "ZABCD. EFGHI?", 1) + ] + for cipher_text, clear_text, rotation in test_cases: + parsed = { + "cipher_text": cipher_text, + "clear_text": clear_text, + "rotation": rotation + } + result = self.exercise._evaluate_expression(parsed) + self.assertEqual(result, clear_text) -def test_caesar_cipher_encryption(): - """Test the Caesar cipher encryption logic""" - config = CaesarCipherConfig(size=1, seed=42) - dataset = CaesarCipherDataset(config) +class TestCaesarCipherGeneration(unittest.TestCase): + """Test problem generation""" - # Test with known rotation - text = "HELLO" - encrypted = dataset._caesar_encrypt(text, 1) - assert encrypted == "IFMMP" # Each letter shifted by 1 + def setUp(self): + self.curriculum = CaesarCipherCurriculum() + self.exercise = CaesarCipherExercise() + self.rng = random.Random(42) + self.curriculum.rng = self.rng - # Test wrapping around Z - encrypted = dataset._caesar_encrypt("XYZ", 2) - assert encrypted == "ZAB" + def test_problem_structure(self): + """Test that generated problems have the correct structure""" + problem = self.exercise.generate(self.curriculum) - # Test preserving spaces - encrypted = dataset._caesar_encrypt("HELLO WORLD", 1) - assert encrypted == "IFMMP XPSME" + # Check basic structure + self.assertIn("question", problem) + self.assertIn("answer", problem) + self.assertIn("metadata", problem) + # Check metadata structure + metadata = problem["metadata"] + self.assertEqual(metadata["type"], "direct") + self.assertIn("executed_parts", metadata) + executed_parts = metadata["executed_parts"] + self.assertIn("cipher_text", executed_parts) + self.assertIn("clear_text", executed_parts) + self.assertIn("rotation", executed_parts) -def test_caesar_cipher_dataset_items(): - """Test basic properties of generated items""" - config = CaesarCipherConfig(min_words=3, max_words=5, min_rotation=1, max_rotation=3, size=10, seed=42) - dataset = CaesarCipherDataset(config) + def test_rotation_ranges(self): + """Test that rotation values are within expected ranges""" + # Test all rotation levels + level_max_rotations = {0: 1, 1: 3, 2: 10, 3: 15, 4: 25} - for i in range(len(dataset)): - item = dataset[i] + for level, max_rotation in level_max_rotations.items(): + self.curriculum.set_attr_level("rotation", level) + problem = self.exercise.generate(self.curriculum) + rotation = problem["metadata"]["executed_parts"]["rotation"] + self.assertLessEqual(rotation, max_rotation) + self.assertGreaterEqual(rotation, 1) # Min rotation is 1 - # Check item structure - assert isinstance(item, dict) - assert "question" in item - assert "answer" in item - assert "metadata" in item + def test_word_count_ranges(self): + """Test that word counts are within expected ranges""" + # Test all word count levels + level_word_counts = {0: 5, 1: 10, 2: 20} - # Check metadata - assert "rotation" in item["metadata"] - assert "cipher_text" in item["metadata"] - assert "clear_text" in item["metadata"] + for level, max_words in level_word_counts.items(): + self.curriculum.set_attr_level("num_words", level) + problem = self.exercise.generate(self.curriculum) + clear_text = problem["metadata"]["executed_parts"]["clear_text"] + word_count = len(clear_text.split()) + self.assertLessEqual(word_count, max_words) + self.assertGreaterEqual(word_count, 3) # Min words is 3 - # Verify rotation constraints - rotation = item["metadata"]["rotation"] - assert config.min_rotation <= rotation <= config.max_rotation +class TestCaesarCipherComprehensive(unittest.TestCase): + """Comprehensive tests for Caesar cipher""" - # Verify text properties - clear_text = item["metadata"]["clear_text"] - words = clear_text.split() - assert config.min_words <= len(words) <= config.max_words - assert all(word.isupper() and word.isalpha() for word in words) + def setUp(self): + self.curriculum = CaesarCipherCurriculum() + self.exercise = CaesarCipherExercise() + self.rng = random.Random(42) + self.curriculum.rng = self.rng - # Verify encryption - cipher_text = item["metadata"]["cipher_text"] - decrypted = dataset._caesar_encrypt(cipher_text, -rotation) # Decrypt by negative rotation - assert decrypted == clear_text + def test_text_case_styles(self): + """Test different text case styles""" + case_styles = ["UPPER", "lower", "Mixed"] + num_samples = 100 # Test with multiple samples to ensure we see all styles + # Test each level + for level, expected_styles in enumerate(case_styles): + self.curriculum.set_attr_level("text_case", level) + styles_seen = set() -def test_caesar_cipher_iteration(): - """Test that iteration respects dataset size""" - config = CaesarCipherConfig(size=5, seed=42) - dataset = CaesarCipherDataset(config) + # Generate multiple problems to catch all possible styles + for _ in range(num_samples): + problem = self.exercise.generate(self.curriculum) + text = problem["metadata"]["executed_parts"]["clear_text"] - items = list(dataset) - assert len(items) == config.size + # Determine the style of this text + if text.isupper(): + styles_seen.add("UPPER") + elif text.islower(): + styles_seen.add("lower") + else: + styles_seen.add("Mixed") - # Test multiple iterations yield same items - assert items == list(dataset) + # At each level, we should see all styles up to that level + expected_styles_set = set(case_styles[:level + 1]) + self.assertEqual(styles_seen, expected_styles_set, + f"At level {level}, expected to see styles {expected_styles_set} but saw {styles_seen}") + + def test_template_variation(self): + """Test that different templates are used""" + templates_seen = set() + num_samples = 100 + + for _ in range(num_samples): + problem = self.exercise.generate(self.curriculum) + templates_seen.add(problem["question"].split(":")[0]) + + self.assertGreater(len(templates_seen), 1, "Not enough template variation") + + def test_comprehensive_random_evaluation(self): + """Test random evaluation with various configurations and track statistics.""" + self.rng = random.Random(42) # Fixed seed for reproducibility + self.curriculum.rng = self.rng + + # Track statistics + rotations_used = defaultdict(int) + word_counts = defaultdict(int) + case_styles = defaultdict(int) + total_samples = 1000 + + # Generate test cases + for _ in range(total_samples): + # Set random attribute levels + self.curriculum.set_attr_level("rotation", self.rng.randint(0, 4)) + self.curriculum.set_attr_level("num_words", self.rng.randint(0, 2)) + self.curriculum.set_attr_level("text_case", self.rng.randint(0, 2)) + + # Generate and evaluate a random problem + problem = self.exercise.generate(self.curriculum) + metadata = problem["metadata"]["executed_parts"] + + # Track statistics + rotations_used[metadata["rotation"]] += 1 + word_counts[len(metadata["clear_text"].split())] += 1 + + # Determine case style + text = metadata["clear_text"] + if text.isupper(): + case_styles["UPPER"] += 1 + elif text.islower(): + case_styles["lower"] += 1 + else: + case_styles["Mixed"] += 1 + + # Verify encryption is correct + cipher_text = metadata["cipher_text"] + clear_text = metadata["clear_text"] + rotation = metadata["rotation"] + + # Verify each character is correctly encrypted + for c1, c2 in zip(cipher_text, clear_text): + if c1.isalpha(): + expected = chr((ord(c2.upper()) - ord('A') + rotation) % 26 + ord('A')) + self.assertEqual(c1.upper(), expected) + else: + self.assertEqual(c1, c2) + + # Print statistics + print("\nRotations used:") + for rotation, count in sorted(rotations_used.items()): + print(f" Rotation {rotation}: {count}") + + print("\nWord counts:") + for words, count in sorted(word_counts.items()): + print(f" {words} words: {count}") + + print("\nCase styles:") + for style, count in case_styles.items(): + print(f" {style}: {count}")