diff --git a/examples/exercises/algorithmic/letter_counting_examples.py b/examples/exercises/algorithmic/letter_counting_examples.py new file mode 100644 index 00000000..710be5e8 --- /dev/null +++ b/examples/exercises/algorithmic/letter_counting_examples.py @@ -0,0 +1,97 @@ +"""Examples of generated problems from the LetterCounting exercise. + +This file demonstrates different types of letter counting problems that can be generated +at various difficulty levels. +""" + +from reasoning_gym.curricula.algorithmic.letter_counting_curriculum import LetterCountingCurriculum +from reasoning_gym.exercises.algorithmic.letter_counting import LetterCountingExercise +import random + +def main(): + # Initialize with fixed seed for reproducibility + curriculum = LetterCountingCurriculum() + exercise = LetterCountingExercise() + curriculum.rng = random.Random(42) + + print("\n========================================\n") + + # Level 0: Basic counting with short text and case insensitive + curriculum.set_attr_level("num_words", 0) # Short text (5 words) + curriculum.set_attr_level("case_sensitivity", 0) # Case insensitive + curriculum.set_attr_level("letter_selection", 0) # Common letters + problem = exercise.generate(curriculum) + print("Level 0 (Basic Counting):") + print(problem) + + print("\n========================================\n") + + # Level 1: Medium length text with case sensitivity + curriculum.set_attr_level("num_words", 1) # Medium text (10 words) + curriculum.set_attr_level("case_sensitivity", 1) # Case sensitive + curriculum.set_attr_level("letter_selection", 1) # All letters + problem = exercise.generate(curriculum) + print("Level 1 (Case Sensitive Counting):") + print(problem) + + print("\n========================================\n") + + # Level 2: Long text with rare letters + curriculum.set_attr_level("num_words", 2) # Long text (15 words) + curriculum.set_attr_level("case_sensitivity", 1) # Case sensitive + curriculum.set_attr_level("letter_selection", 2) # Rare letters + problem = exercise.generate(curriculum) + print("Level 2 (Rare Letters):") + print(problem) + + print("\n========================================\n") + + # Random Examples with Different Seeds + print("Random Examples (Different Seeds):") + for seed in range(10, 15): + curriculum.rng = random.Random(seed) + # Randomly set curriculum levels + curriculum.set_attr_level("num_words", random.randint(0, 2)) + curriculum.set_attr_level("case_sensitivity", random.randint(0, 1)) + curriculum.set_attr_level("letter_selection", random.randint(0, 2)) + problem = exercise.generate(curriculum) + print(f"\nRandom Example (Seed {seed}):") + print(problem) + + print("\n========================================\n") + + # Special Cases + print("Special Cases:") + + # Case 1: Maximum length with case insensitive common letters + for attr in curriculum.attributes: # Reset all attributes + curriculum.set_attr_level(attr, 0) + curriculum.set_attr_level("num_words", 2) # Maximum words (15) + curriculum.set_attr_level("case_sensitivity", 0) # Case insensitive + curriculum.set_attr_level("letter_selection", 0) # Common letters + problem = exercise.generate(curriculum) + print("\nLong Text with Common Letters:") + print(problem) + + # Case 2: Short text with case sensitive rare letters + for attr in curriculum.attributes: # Reset all attributes + curriculum.set_attr_level(attr, 0) + curriculum.set_attr_level("num_words", 0) # Minimum words (5) + curriculum.set_attr_level("case_sensitivity", 1) # Case sensitive + curriculum.set_attr_level("letter_selection", 2) # Rare letters + problem = exercise.generate(curriculum) + print("\nShort Text with Rare Letters:") + print(problem) + + # Case 3: Medium text with all letters + for attr in curriculum.attributes: # Reset all attributes + curriculum.set_attr_level(attr, 0) + curriculum.set_attr_level("num_words", 1) # Medium length (10 words) + curriculum.set_attr_level("case_sensitivity", 1) # Case sensitive + curriculum.set_attr_level("letter_selection", 1) # All letters + problem = exercise.generate(curriculum) + print("\nMedium Text with All Letters:") + print(problem) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/reasoning_gym/algorithmic/letter_counting.py b/reasoning_gym/algorithmic/letter_counting.py index 8f2590dd..9304e9cd 100644 --- a/reasoning_gym/algorithmic/letter_counting.py +++ b/reasoning_gym/algorithmic/letter_counting.py @@ -1,66 +1,63 @@ -"""Letter counting task generator""" +"""Letter counting exercise that generates tasks to count letter occurrences in text.""" -import re -from dataclasses import dataclass -from random import Random -from typing import List, Optional +from typing import Dict, Any -from reasoning_gym.data import read_data_file +class LetterCountingExercise: + """Exercise generator for letter counting tasks.""" -from ..factory import ProceduralDataset, register_dataset + def __init__(self): + self.curriculum = None + def generate(self, curriculum: Any) -> Dict[str, Any]: + """ + Generate a letter counting problem using the curriculum. -@dataclass -class LetterCountingConfig: - """Configuration for letter counting task generation""" + Returns: + Dict containing: + - question: str (e.g. "How many times does 'a' appear in 'banana'?") + - answer: str (the count as a string) + - metadata: dict with details (text, target_letter, etc.) + """ + self.curriculum = curriculum + template = curriculum.get_template(curriculum.rng) + return template.eval(self, curriculum.rng) - min_words: int = 5 # Minimum words in span - max_words: int = 15 # Maximum words in span - seed: Optional[int] = None - size: int = 500 # Virtual dataset size + def _parse_expression(self, metadata: Dict[str, Any]) -> Dict[str, Any]: + """ + Parse the template metadata into structured data. - def validate(self) -> None: - """Validate configuration parameters""" - assert self.min_words > 0, "min_words must be positive" - assert self.max_words >= self.min_words, "max_words must be >= min_words" - - -class LetterCountingDataset(ProceduralDataset): - """Generates letter counting tasks from text spans""" - - def __init__(self, config: LetterCountingConfig): - super().__init__(config=config, seed=config.seed, size=config.size) - - # Load and preprocess text - text = read_data_file("in_the_year_2889.txt") - # Extract words and clean them to contain only alphanumeric characters - self.words = [word for word in re.findall(r"\b\w+\b", text) if word.isalnum()] - - def __getitem__(self, idx: int) -> dict: - """Generate a single letter counting task""" - rng = Random(self.seed + idx) - - # Select random span of words - span_length = rng.randint(self.config.min_words, self.config.max_words) - start_idx = rng.randint(0, len(self.words) - span_length) - span = self.words[start_idx : start_idx + span_length] - - # Get all unique letters from span - letters = set("".join(span).lower()) - if not letters: - letters = {"a"} # Fallback if span has no letters - - # Select random letter that appears in the span - target_letter = rng.choice(sorted(letters)) - - # Count occurrences - count = sum(word.lower().count(target_letter) for word in span) - - return { - "question": f'How many times does the letter "{target_letter}" appear in the text: "{" ".join(span)}"?', - "answer": str(count), - "metadata": {"span_length": span_length, "target_letter": target_letter, "span": span}, + The metadata structure from the template system: + { + "text": {"text": str}, # The text span to analyze + "letter": {"letter": str}, # The letter to count + "case_sensitivity": {"sensitivity": str} # "sensitive" or "insensitive" } + Returns: + Dictionary containing: + - text: str (the text to analyze) + - target_letter: str (the letter to count) + - case_sensitive: bool (whether to count case sensitively) + """ + return { + "text": metadata["text"]["text"], + "target_letter": metadata["letter"]["letter"], + "case_sensitive": metadata["case_sensitivity"]["sensitivity"] == "sensitive" + } -register_dataset("letter_counting", LetterCountingDataset, LetterCountingConfig) + def _evaluate_expression(self, parsed: Dict[str, Any]) -> str: + """ + Count occurrences of the target letter in the text. + + Args: + parsed: Dictionary containing: + - text: str (the text to analyze) + - target_letter: str (the letter to count) + - case_sensitive: bool (whether to count case sensitively) + Returns: + String representation of the count + """ + if parsed["case_sensitive"]: + return str(parsed["text"].count(parsed["target_letter"])) + else: + return str(parsed["text"].lower().count(parsed["target_letter"].lower())) diff --git a/reasoning_gym/curricula/algorithmic/__init__.py b/reasoning_gym/curricula/algorithmic/__init__.py index f1f9c779..02c14233 100644 --- a/reasoning_gym/curricula/algorithmic/__init__.py +++ b/reasoning_gym/curricula/algorithmic/__init__.py @@ -1,6 +1,8 @@ from .base_conversion_curriculum import BaseConversionCurriculum from .caesar_cipher_curriculum import CaesarCipherCurriculum +from .letter_counting_curriculum import LetterCountingCurriculum __all__ = [ "BaseConversionCurriculum", "CaesarCipherCurriculum", + "LetterCountingCurriculum" ] diff --git a/reasoning_gym/curricula/algorithmic/letter_counting_curriculum.py b/reasoning_gym/curricula/algorithmic/letter_counting_curriculum.py new file mode 100644 index 00000000..dca39838 --- /dev/null +++ b/reasoning_gym/curricula/algorithmic/letter_counting_curriculum.py @@ -0,0 +1,110 @@ +"""Curriculum definition for letter counting exercises.""" + +from typing import Dict, Any +from reasoning_gym.core.base_curriculum import BaseCurriculum +from reasoning_gym.core.attributes import AttributeDefinition, AttributeType +from reasoning_gym.core.template import Template +from reasoning_gym.data import read_data_file + + +class LetterCountingCurriculum(BaseCurriculum): + def __init__(self): + super().__init__("LetterCountingCurriculum") + import re + self.words = [word for word in re.findall(r"\b\w+\b", read_data_file("in_the_year_2889.txt")) + if word.isalnum()] + + def _init_curriculum(self) -> None: + """Initialize the letter counting curriculum configuration""" + # Define valid attribute types + self._valid_types = { + AttributeType.STATIC, # For fixed values + AttributeType.UBOUND, # For ranges like span length + AttributeType.APPEND # For accumulating options + } + + # Define attributes + self._attributes = { + "num_words": AttributeDefinition( + levels=[5, 10, 15], # From min_words/max_words + default_level=0, + description="Number of words in the text span", + attr_type=AttributeType.UBOUND, + min_value=1 # Ensure at least 1 word + ), + "case_sensitivity": AttributeDefinition( + levels=[False, True], + default_level=0, + description="Whether letter counting is case sensitive", + attr_type=AttributeType.STATIC + ), + "letter_selection": AttributeDefinition( + levels=["common", "all", "rare"], + default_level=0, + description="Strategy for selecting target letter", + attr_type=AttributeType.APPEND + ) + } + + # Define templates with symbolic placeholders + self._templates = [ + Template( + template='How many times {case_sensitivity} does the letter "{letter}" appear in the text: "{text}"?', + parts={"text": "text_span", "letter": "target_letter", "case_sensitivity": "case_sensitivity"} + ), + Template( + template='Count the occurrences of "{letter}" in: "{text}" {case_sensitivity}', + parts={"text": "text_span", "letter": "target_letter", "case_sensitivity": "case_sensitivity"} + ), + Template( + template='In the text "{text}", how many times {case_sensitivity} does the letter "{letter}" appear?', + parts={"text": "text_span", "letter": "target_letter", "case_sensitivity": "case_sensitivity"} + ) + ] + + # Define symbolic structure + self._symbolic = { + # Define shared variables that need to be consistent + "shared_vars": { + "selected_span": lambda refs: ( + n_words := refs["num_words"](), + idx := refs["dataset_rng"].randint(0, len(self.words) - n_words), + span := self.words[idx:idx+n_words], + " ".join(span) + )[-1], + "is_case_sensitive": lambda refs: refs["case_sensitivity"](), + }, + # Define value generators + "generators": { + "get_letter": lambda refs: ( + text := refs["selected_span"](refs), + text := text.lower() if not refs["is_case_sensitive"](refs) else text, + strategy := refs["letter_selection"](), + letters := set(c for c in text if c.isalpha()), + freqs := {c: text.count(c) for c in letters}, + sorted_letters := sorted(letters, key=lambda c: (-freqs[c] if strategy == "common" else freqs[c])), + refs["dataset_rng"].choice(sorted_letters if strategy == "all" else sorted_letters[:2]) + )[-1] + }, + # Define composition templates + "templates": { + "text_span": lambda refs: { + "template": "{text}", + "parts": { + "text": lambda refs=refs: refs["selected_span"](refs) + } + }, + "target_letter": lambda refs: { + "template": "{letter}", + "parts": { + "letter": lambda refs=refs: refs["get_letter"](refs) + } + }, + "case_sensitivity": lambda refs: { + "template": "(case {sensitivity})", + "parts": { + "sensitivity": lambda refs=refs: "sensitive" if refs["is_case_sensitive"](refs) else "insensitive" + } + } + } + } \ No newline at end of file diff --git a/reasoning_gym/exercises/algorithmic/__init__.py b/reasoning_gym/exercises/algorithmic/__init__.py index e52035d1..94a2ec4e 100644 --- a/reasoning_gym/exercises/algorithmic/__init__.py +++ b/reasoning_gym/exercises/algorithmic/__init__.py @@ -8,7 +8,7 @@ Algorithmic tasks for training reasoning capabilities: from .base_conversion import BaseConversionExercise from .caesar_cipher import CaesarCipherExercise -# from .letter_counting import LetterCountingExercise +from .letter_counting import LetterCountingExercise # from .letter_jumble import LetterJumbleExercise # from .number_filtering import NumberFilteringExercise # from .number_sorting import NumberSortingExercise @@ -22,7 +22,7 @@ __all__ = [ # "SpellBackwardDataset", "BaseConversionExercise", "CaesarCipherExercise", - # "LetterCountingDataset", + "LetterCountingExercise", # "LetterJumbleDataset", # "NumberFilteringDataset", # "NumberSortingDataset", diff --git a/tests/test_letter_counting.py b/tests/test_letter_counting.py index 7c6e9bd1..e17aef35 100644 --- a/tests/test_letter_counting.py +++ b/tests/test_letter_counting.py @@ -1,78 +1,321 @@ -"""Tests for letter counting task generation""" +"""Tests for the letter counting exercise.""" -import pytest - -from reasoning_gym.algorithmic.letter_counting import LetterCountingConfig, LetterCountingDataset +from reasoning_gym.curricula.algorithmic.letter_counting_curriculum import LetterCountingCurriculum +from reasoning_gym.exercises.algorithmic.letter_counting import LetterCountingExercise +import unittest +import random -def test_letter_counting_config_validation(): - """Test that invalid configs raise appropriate errors""" - with pytest.raises(AssertionError): - config = LetterCountingConfig(min_words=0) - config.validate() +class TestLetterCountingParsing(unittest.TestCase): + """Test parsing of expressions and metadata.""" - with pytest.raises(AssertionError): - config = LetterCountingConfig(min_words=10, max_words=5) - config.validate() + def setUp(self): + self.exercise = LetterCountingExercise() + + def test_parse_expression(self): + """Test parsing of metadata into structured data.""" + test_metadata = { + "text": {"text": "hello world"}, + "letter": {"letter": "l"}, + "case_sensitivity": {"sensitivity": "sensitive"} + } + + parsed = self.exercise._parse_expression(test_metadata) + self.assertEqual(parsed["text"], "hello world") + self.assertEqual(parsed["target_letter"], "l") + self.assertTrue(parsed["case_sensitive"]) + + def test_parse_case_insensitive(self): + """Test parsing with case insensitive setting.""" + test_metadata = { + "text": {"text": "Hello World"}, + "letter": {"letter": "L"}, + "case_sensitivity": {"sensitivity": "insensitive"} + } + + parsed = self.exercise._parse_expression(test_metadata) + self.assertEqual(parsed["text"], "Hello World") + self.assertEqual(parsed["target_letter"], "L") + self.assertFalse(parsed["case_sensitive"]) -def test_letter_counting_dataset_deterministic(): - """Test that dataset generates same items with same seed""" - config = LetterCountingConfig(seed=42, size=10) - dataset1 = LetterCountingDataset(config) - dataset2 = LetterCountingDataset(config) +class TestLetterCountingEvaluation(unittest.TestCase): + """Test evaluation of letter counting expressions.""" - for i in range(len(dataset1)): - assert dataset1[i] == dataset2[i] + def setUp(self): + self.exercise = LetterCountingExercise() + + def test_case_sensitive_counting(self): + """Test counting letters with case sensitivity.""" + test_cases = [ + { + "text": "hello", + "target_letter": "l", + "case_sensitive": True, + "expected": "2" + }, + { + "text": "Hello", + "target_letter": "l", + "case_sensitive": True, + "expected": "2" + }, + { + "text": "HELLO", + "target_letter": "l", + "case_sensitive": True, + "expected": "0" + } + ] + + for case in test_cases: + result = self.exercise._evaluate_expression(case) + self.assertEqual(result, case["expected"], + f"Failed to count '{case['target_letter']}' in '{case['text']}' case sensitively") + + def test_case_insensitive_counting(self): + """Test counting letters without case sensitivity.""" + test_cases = [ + { + "text": "hello", + "target_letter": "l", + "case_sensitive": False, + "expected": "2" + }, + { + "text": "Hello", + "target_letter": "L", + "case_sensitive": False, + "expected": "2" + }, + { + "text": "HELLO", + "target_letter": "l", + "case_sensitive": False, + "expected": "2" + } + ] + + for case in test_cases: + result = self.exercise._evaluate_expression(case) + self.assertEqual(result, case["expected"], + f"Failed to count '{case['target_letter']}' in '{case['text']}' case insensitively") + + def test_empty_string(self): + """Test counting in empty string.""" + parsed = { + "text": "", + "target_letter": "a", + "case_sensitive": True + } + result = self.exercise._evaluate_expression(parsed) + self.assertEqual(result, "0") -def test_letter_counting_dataset_items(): - """Test basic properties of generated items""" - config = LetterCountingConfig(min_words=3, max_words=6, size=10, seed=42) - dataset = LetterCountingDataset(config) +class TestLetterCountingGeneration(unittest.TestCase): + """Test problem generation.""" - for i in range(len(dataset)): - item = dataset[i] - # Check item structure - assert isinstance(item, dict) - assert "question" in item - assert "answer" in item - assert "metadata" in item + def setUp(self): + self.curriculum = LetterCountingCurriculum() + self.exercise = LetterCountingExercise() + self.rng = random.Random(42) + self.curriculum.rng = self.rng - # Check metadata - assert "span_length" in item["metadata"] - assert "target_letter" in item["metadata"] - assert "span" in item["metadata"] + # Add some test words to ensure we have content + self.curriculum.words = ["hello", "world", "test", "example", "python", + "programming", "language", "code", "algorithm", "data"] - # Verify span length constraints - span = item["metadata"]["span"] - assert len(span) >= config.min_words - assert len(span) <= config.max_words + def test_problem_structure(self): + """Test that generated problems have the correct structure.""" + problem = self.exercise.generate(self.curriculum) - # Verify letter counting - target_letter = item["metadata"]["target_letter"] - count = sum(word.lower().count(target_letter) for word in span) - assert str(count) == item["answer"] + # Check basic structure + self.assertIn("question", problem) + self.assertIn("answer", problem) + self.assertIn("metadata", problem) + + # Check metadata structure + metadata = problem["metadata"] + self.assertEqual(metadata["type"], "direct") + self.assertIn("executed_parts", metadata) + executed_parts = metadata["executed_parts"] + self.assertIn("text", executed_parts) + self.assertIn("target_letter", executed_parts) + self.assertIn("case_sensitive", executed_parts) + + def test_text_generation(self): + """Test generation of text spans.""" + num_samples = 50 + texts_seen = set() + + for _ in range(num_samples): + problem = self.exercise.generate(self.curriculum) + text = problem["metadata"]["executed_parts"]["text"] + texts_seen.add(text) + + # Verify text is not empty + self.assertTrue(len(text) > 0, "Empty text generated") + # Verify text contains only valid characters + self.assertTrue(all(c.isalnum() or c.isspace() for c in text), + f"Invalid characters in text: {text}") + + # Verify we get different texts + self.assertTrue(len(texts_seen) > 1, "Only one text pattern generated") + + def test_letter_selection(self): + """Test selection of target letters.""" + num_samples = 50 + letters_seen = set() + + for _ in range(num_samples): + problem = self.exercise.generate(self.curriculum) + letter = problem["metadata"]["executed_parts"]["target_letter"] + letters_seen.add(letter) + + # Verify letter is a single character + self.assertEqual(len(letter), 1, f"Invalid letter length: {letter}") + # Verify letter is alphabetic + self.assertTrue(letter.isalpha(), f"Non-alphabetic letter: {letter}") + + # Verify we get different letters + self.assertTrue(len(letters_seen) > 1, "Only one letter generated") -def test_letter_counting_dataset_iteration(): - """Test that iteration respects dataset size""" - config = LetterCountingConfig(size=5, seed=42) - dataset = LetterCountingDataset(config) +class TestLetterCountingComprehensive(unittest.TestCase): + """Comprehensive tests for letter counting.""" - items = list(dataset) - assert len(items) == config.size + def setUp(self): + self.curriculum = LetterCountingCurriculum() + self.exercise = LetterCountingExercise() + self.rng = random.Random(42) + self.curriculum.rng = self.rng - # Test multiple iterations yield same items - assert items == list(dataset) + # Add more test words to ensure we have enough content + self.curriculum.words = [ + "hello", "world", "test", "example", "python", "programming", "language", + "code", "algorithm", "data", "computer", "science", "software", "development", + "testing", "debugging", "function", "variable", "constant", "loop", "condition", + "string", "integer", "float", "boolean", "array", "list", "dictionary", "set", + "class", "object", "method", "inheritance", "polymorphism", "encapsulation", + "abstraction", "interface", "implementation", "module", "package", "library", + "framework", "application", "system", "network", "database", "security", + "authentication", "authorization", "validation", "verification" + ] + + def test_case_sensitivity_levels(self): + """Test that both case sensitivity levels are used.""" + num_samples = 100 + sensitivities_seen = set() + + # Set other attributes to stable values + self.curriculum.set_attr_level("num_words", 0) # 5 words + self.curriculum.set_attr_level("letter_selection", 0) # common letters + + # Try both sensitivity levels + for level in [0, 1]: # False, True + self.curriculum.set_attr_level("case_sensitivity", level) + + for _ in range(num_samples // 2): + problem = self.exercise.generate(self.curriculum) + case_sensitive = problem["metadata"]["executed_parts"]["case_sensitive"] + sensitivities_seen.add(case_sensitive) + + # Verify we see both sensitivity settings + self.assertEqual(len(sensitivities_seen), 2, + f"Only saw case sensitivities: {sensitivities_seen}") + + def test_comprehensive_random_evaluation(self): + """Test 1000 problems with varying attribute levels.""" + num_samples = 1000 + + # Statistics tracking + stats = { + 'text_lengths': {}, # Distribution of text lengths + 'letter_frequencies': {}, # Frequency of target letters + 'case_sensitivity': { # Count of case sensitive vs insensitive + 'sensitive': 0, + 'insensitive': 0 + }, + 'answer_distribution': {}, # Distribution of letter counts + 'word_counts': {}, # Distribution of word counts + 'attribute_levels': { # Track curriculum levels used + 'num_words': set(), + 'case_sensitivity': set(), + 'letter_selection': set() + } + } + + for _ in range(num_samples): + # Randomly vary all attribute levels + self.curriculum.set_attr_level("num_words", self.rng.randint(0, 2)) # 5, 10, or 15 words + self.curriculum.set_attr_level("case_sensitivity", self.rng.randint(0, 1)) # False or True + self.curriculum.set_attr_level("letter_selection", self.rng.randint(0, 2)) # common, all, or rare + + problem = self.exercise.generate(self.curriculum) + metadata = problem["metadata"]["executed_parts"] + text = metadata["text"] + letter = metadata["target_letter"] + case_sensitive = metadata["case_sensitive"] + + # Update text length statistics + text_len = len(text) + stats['text_lengths'][text_len] = stats['text_lengths'].get(text_len, 0) + 1 + + # Update letter frequency statistics + stats['letter_frequencies'][letter] = stats['letter_frequencies'].get(letter, 0) + 1 + + # Update case sensitivity statistics + if case_sensitive: + stats['case_sensitivity']['sensitive'] += 1 + else: + stats['case_sensitivity']['insensitive'] += 1 + + # Update answer distribution + answer = int(problem["answer"]) + stats['answer_distribution'][answer] = stats['answer_distribution'].get(answer, 0) + 1 + + # Update word count statistics + word_count = len(text.split()) + stats['word_counts'][word_count] = stats['word_counts'].get(word_count, 0) + 1 + + # Verify answer correctness + parsed = { + "text": {"text": text}, + "letter": {"letter": letter}, + "case_sensitivity": {"sensitivity": "sensitive" if case_sensitive else "insensitive"} + } + expected = self.exercise._evaluate_expression(self.exercise._parse_expression(parsed)) + self.assertEqual(problem["answer"], expected, + f"Wrong answer for counting '{letter}' in '{text}' (case_sensitive={case_sensitive})") + + # Print statistics + print("\nComprehensive Random Evaluation Statistics:") + print("-" * 50) + + print("\nText Length Distribution:") + for length, count in sorted(stats['text_lengths'].items()): + print(f" Length {length}: {count} ({count/num_samples*100:.1f}%)") + + print("\nLetter Frequency Distribution:") + total_letters = sum(stats['letter_frequencies'].values()) + for letter, count in sorted(stats['letter_frequencies'].items()): + print(f" '{letter}': {count} ({count/total_letters*100:.1f}%)") + + print("\nCase Sensitivity Distribution:") + for sensitivity, count in stats['case_sensitivity'].items(): + print(f" {sensitivity}: {count} ({count/num_samples*100:.1f}%)") + # Verify we see case sensitive problems + if sensitivity == 'sensitive': + self.assertGreater(count, 0, "No case sensitive problems generated") + + print("\nAnswer Distribution:") + for count, freq in sorted(stats['answer_distribution'].items()): + print(f" Count {count}: {freq} ({freq/num_samples*100:.1f}%)") + + print("\nWord Count Distribution:") + for words, count in sorted(stats['word_counts'].items()): + print(f" {words} words: {count} ({count/num_samples*100:.1f}%)") -def test_letter_counting_text_preprocessing(): - """Test that text preprocessing handles edge cases""" - config = LetterCountingConfig(size=1, seed=42) - dataset = LetterCountingDataset(config) - - # Verify words were extracted from text - assert len(dataset.words) > 0 - # Verify words contain only word characters - assert all(word.isalnum() for word in dataset.words) +if __name__ == '__main__': + unittest.main() \ No newline at end of file