diff --git a/GALLERY.md b/GALLERY.md index ad0248ce..ff56e124 100644 --- a/GALLERY.md +++ b/GALLERY.md @@ -20,6 +20,7 @@ This gallery shows examples from all available datasets using their default conf - [game_of_life](#game_of_life) - [gcd](#gcd) - [group_anagrams](#group_anagrams) +- [ransom_note](#ransom_note) - [gsm_symbolic](#gsm_symbolic) - [intermediate_integration](#intermediate_integration) - [largest_island](#largest_island) @@ -967,6 +968,91 @@ Metadata: {'words': ['eagerest', 'granitite', 'helium', 'nizam', 'nazim', 'strip ```` + +### ransom_note + +Check if you can construct a ransom note from letters in a magazine + +Default configuration +```python +max_note_length: int = 10 # Maximum length of the ransom note +max_magazine_length = 30 # Maximum length of the magazine +p_solvable: float = 0.5 # Probability that the ransom note can be constructed +``` + +Example tasks: +```` +Sample 1: +Question: Given two strings representing a ransom note and a magazine, return True if you can construct the ransom note using the letters in the magazine, and False otherwise. + +Each letter in the magazine string can only be used once in your ransom note. + +Ransom note: w +Magazine: addyplkvt + +Answer: False + +Metadata: {'ransom_note': 'w', 'magazine': 'addyplkvt', 'solution': False, 'solvable': False} + +-------------------------------------------------- + +Sample 2: +Question: Given two strings representing a ransom note and a magazine, return True if you can construct the ransom note using the letters in the magazine, and False otherwise. + +Each letter in the magazine string can only be used once in your ransom note. + +Ransom note: ztx +Magazine: zizohkpdrhvdoaxtrxosmerfb + +Answer: True + +Metadata: {'ransom_note': 'ztx', 'magazine': 'zizohkpdrhvdoaxtrxosmerfb', 'solution': True, 'solvable': True} + +-------------------------------------------------- + +Sample 3: +Question: Given two strings representing a ransom note and a magazine, return True if you can construct the ransom note using the letters in the magazine, and False otherwise. + +Each letter in the magazine string can only be used once in your ransom note. + +Ransom note: rdmalrnnc +Magazine: nzbepjclllltnmdaucybmraddrr + +Answer: True + +Metadata: {'ransom_note': 'rdmalrnnc', 'magazine': 'nzbepjclllltnmdaucybmraddrr', 'solution': True, 'solvable': True} + +-------------------------------------------------- + +Sample 4: +Question: Given two strings representing a ransom note and a magazine, return True if you can construct the ransom note using the letters in the magazine, and False otherwise. + +Each letter in the magazine string can only be used once in your ransom note. + +Ransom note: cjyaqjka +Magazine: crjqaiajkyc + +Answer: True + +Metadata: {'ransom_note': 'cjyaqjka', 'magazine': 'crjqaiajkyc', 'solution': True, 'solvable': True} + +-------------------------------------------------- + +Sample 5: +Question: Given two strings representing a ransom note and a magazine, return True if you can construct the ransom note using the letters in the magazine, and False otherwise. + +Each letter in the magazine string can only be used once in your ransom note. + +Ransom note: eusvpne +Magazine: irzhvekszobxnsrexnpkltchp + +Answer: False + +Metadata: {'ransom_note': 'eusvpne', 'magazine': 'irzhvekszobxnsrexnpkltchp', 'solution': False, 'solvable': False} + +-------------------------------------------------- +```` + ### gsm_symbolic Default configuration: ```python diff --git a/README.md b/README.md index b623eebc..0dd159b9 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,7 @@ See the [Dataset Gallery](GALLERY.md) for a complete list of available datasets - `WordSequenceReversalDataset`: Reverse word order in text spans - `WordLadderDataset`: Generate word ladder puzzles where one word is transformed into another by changing one letter at a time - `GroupAnagramsDataset`: Group anagrams together in a list of words +- `RansomNoteDataset`: Check if a ransom note can be created from a given set of letters in a magazine ### Code Tasks diff --git a/reasoning_gym/algorithmic/__init__.py b/reasoning_gym/algorithmic/__init__.py index 4e4688bf..e5326af5 100644 --- a/reasoning_gym/algorithmic/__init__.py +++ b/reasoning_gym/algorithmic/__init__.py @@ -14,6 +14,7 @@ from .letter_jumble import LetterJumbleConfig, LetterJumbleDataset from .number_filtering import NumberFilteringConfig, NumberFilteringDataset from .number_sorting import NumberSortingConfig, NumberSortingDataset from .palindrome_generation import PalindromeConfig, PalindromeDataset +from .ransom_note import RansomNoteConfig, RansomNoteDataset from .sentence_reordering import SentenceReorderingConfig, SentenceReorderingDataset from .spell_backward import SpellBackwardConfig, SpellBackwardDataset from .word_ladder import WordLadderConfig, WordLadderDataset @@ -48,4 +49,6 @@ __all__ = [ "PalindromeDataset", "GroupAnagramsConfig", "GroupAnagramsDataset", + "RansomNoteConfig", + "RansomNoteDataset", ] diff --git a/reasoning_gym/algorithmic/ransom_note.py b/reasoning_gym/algorithmic/ransom_note.py new file mode 100644 index 00000000..d3367bfc --- /dev/null +++ b/reasoning_gym/algorithmic/ransom_note.py @@ -0,0 +1,99 @@ +"""Check if you can construct a ransom note from letters in a magazine. + +A popular Leetcode problem: +https://leetcode.com/problems/ransom-note/description/ +""" + +from collections import defaultdict +from dataclasses import dataclass +from random import Random +from typing import Optional + +from ..factory import ProceduralDataset, register_dataset + +MAX_NOTE_LENGTH = 100_000 +MAX_MAGAZINE_LENGTH = 100_001 + +QUESTION_TEMPLATE = """Given two strings representing a ransom note and a magazine, return True if you can construct the ransom note using the letters in the magazine, and False otherwise. + +Each letter in the magazine string can only be used once in your ransom note. + +Ransom note: {ransom_note} +Magazine: {magazine} +""" + + +@dataclass +class RansomNoteConfig: + """Configuration for Ransom Note dataset generation""" + + max_note_length: int = 10 # Maximum length of the ransom note + max_magazine_length: int = 30 # Maximum length of the magazine + p_solvable: float = 0.5 # Probability that the ransom note can be constructed + + size: int = 500 # Virtual dataset size + seed: Optional[int] = None + + def validate(self): + """Validate configuration parameters""" + assert 1 <= self.max_note_length <= MAX_NOTE_LENGTH, "max_note_length must be between 1 and MAX_NOTE_LENGTH" + assert ( + 2 <= self.max_magazine_length <= MAX_MAGAZINE_LENGTH + ), "max_magazine_length must be between 2 and MAX_MAGAZINE_LENGTH" + assert self.max_note_length < self.max_magazine_length, "max_note_length must be less than max_magazine_length" + assert 0 <= self.p_solvable <= 1, "p_solvable must be between 0 and 1" + + +class RansomNoteDataset(ProceduralDataset): + """Generates Ransom Note exercises with configurable difficulty""" + + def __init__(self, config: RansomNoteConfig): + super().__init__(config=config, seed=config.seed, size=config.size) + self.letters = {chr(i) for i in range(ord("a"), ord("z") + 1)} + + def _get_inputs(self, rng: Random, solvable: bool) -> tuple[str, str]: + """Generate random ransom note and magazine""" + ransom_note_len = rng.randint(1, self.config.max_note_length) + ransom_note = [rng.choice(list(self.letters)) for _ in range(ransom_note_len)] + + magazine_len = rng.randint(ransom_note_len, self.config.max_magazine_length) + magazine = ransom_note.copy() + if solvable: + magazine.extend([rng.choice(list(self.letters)) for _ in range(magazine_len - ransom_note_len)]) + else: + remove_letter = rng.choice(magazine) + magazine.remove(remove_letter) + magazine.extend( + [rng.choice(list(self.letters - {remove_letter})) for _ in range(magazine_len - ransom_note_len + 1)] + ) + + rng.shuffle(ransom_note) + rng.shuffle(magazine) + return "".join(ransom_note), "".join(magazine) + + def _can_construct(self, ransom_note: str, magazine: str) -> bool: + """Check if ransom note can be constructed from magazine""" + count = defaultdict(int) + for c in magazine: + count[c] += 1 + for c in ransom_note: + if count[c] <= 0: + return False + count[c] -= 1 + return True + + def __getitem__(self, idx: int) -> dict: + """Generate a single Group Anagrams question""" + rng = Random(self.seed + idx) + solvable = rng.random() < self.config.p_solvable + ransom_note, magazine = self._get_inputs(rng, solvable) + answer = self._can_construct(ransom_note, magazine) + + return { + "question": QUESTION_TEMPLATE.format(ransom_note=ransom_note, magazine=magazine), + "answer": str(answer), + "metadata": {"ransom_note": ransom_note, "magazine": magazine, "solution": answer, "solvable": solvable}, + } + + +register_dataset("ransom_note", RansomNoteDataset, RansomNoteConfig) diff --git a/tests/test_ransom_note.py b/tests/test_ransom_note.py new file mode 100644 index 00000000..9615e8b6 --- /dev/null +++ b/tests/test_ransom_note.py @@ -0,0 +1,111 @@ +"""Tests for Ransom Note questions generation""" + +import json + +import pytest + +from reasoning_gym.algorithmic.ransom_note import RansomNoteConfig, RansomNoteDataset + + +def test_ransom_note_config_validation(): + """Test that invalid configs raise appropriate errors""" + with pytest.raises(AssertionError): + config = RansomNoteConfig(max_note_length=-1) # Negative not allowed + config.validate() + + with pytest.raises(AssertionError): + config = RansomNoteConfig(max_note_length=0) # Zero not allowed + config.validate() + + with pytest.raises(AssertionError): + config = RansomNoteConfig(max_magazine_length=-1) # Negative not allowed + config.validate() + + with pytest.raises(AssertionError): + config = RansomNoteConfig(max_magazine_length=0) # Zero not allowed + config.validate() + + with pytest.raises(AssertionError): + config = RansomNoteConfig(max_magazine_length=1) # One not allowed + config.validate() + + with pytest.raises(AssertionError): + config = RansomNoteConfig( + max_note_length=3, max_magazine_length=2 + ) # max_note_length must be less than max_magazine_length + config.validate() + + with pytest.raises(AssertionError): + config = RansomNoteConfig(p_solvable=-0.01) # p_solvable must be between 0 and 1 + config.validate() + + with pytest.raises(AssertionError): + config = RansomNoteConfig(p_solvable=1.01) # p_solvable must be between 0 and 1 + config.validate() + + +def test_ransom_note_dataset_deterministic(): + """Test that dataset generates same items with same seed""" + config = RansomNoteConfig(seed=42, size=10) + dataset1 = RansomNoteDataset(config) + dataset2 = RansomNoteDataset(config) + + for i in range(len(dataset1)): + assert dataset1[i] == dataset2[i] + + +def test_group_anagrams_dataset_items(): + """Test basic properties of generated items""" + config = RansomNoteConfig(max_note_length=10, max_magazine_length=30, size=10, seed=42) + dataset = RansomNoteDataset(config) + + for i in range(len(dataset)): + item = dataset[i] + # Check item structure + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + + # Check metadata + assert "ransom_note" in item["metadata"] + assert "magazine" in item["metadata"] + assert "solution" in item["metadata"] + assert "solvable" in item["metadata"] + + ransom_note = item["metadata"]["ransom_note"] + magazine = item["metadata"]["magazine"] + solution = item["metadata"]["solution"] + solvable = item["metadata"]["solvable"] + + # Verify dimensions + assert len(ransom_note) <= config.max_note_length + assert len(ransom_note) <= len(magazine) + assert len(magazine) <= config.max_magazine_length + assert solution == solvable + + +def test_ransom_note_dataset_iteration(): + """Test that iteration respects dataset size""" + config = RansomNoteConfig(size=5, seed=42) + dataset = RansomNoteDataset(config) + + items = list(dataset) + assert len(items) == config.size + + # Test multiple iterations yield same items + assert items == list(dataset) + + +def test_ransom_note_answer(): + """Test the _can_construct method""" + config = RansomNoteConfig(seed=42) + dataset = RansomNoteDataset(config) + + # Correct solution + ransom_note, magazine = "ab", "badhergh" + assert dataset._can_construct(ransom_note, magazine) == True + + # Inorrect solution + ransom_note, magazine = "az", "badhergh" + assert dataset._can_construct(ransom_note, magazine) == False