diff --git a/GALLERY.md b/GALLERY.md index 1d09a54f..a731b732 100644 --- a/GALLERY.md +++ b/GALLERY.md @@ -49,6 +49,7 @@ This gallery shows examples from all available datasets using their default conf - [tower_of_hanoi](#tower_of_hanoi) - [word_ladder](#word_ladder) - [group_anagrams](#group_anagrams) +- [palindrome_partitioning](#palindrome_partitioning) - [word_sequence_reversal](#word_sequence_reversal) - [word_sorting](#word_sorting) - [zebra_puzzles](#zebra_puzzles) @@ -2295,6 +2296,118 @@ Metadata: {'words': ['eagerest', 'granitite', 'helium', 'nizam', 'nazim', 'strip ``` +### palindrome_partitioning + +Partition a string into palindromic substrings + +Default configuration: +```python +size = 500 +``` + +Example tasks: +```` +Example 1: +Question: Given a string, partition it such that every substring is a palindrome. + +A palindrome is a word that reads the same backward as forward. + +You may return all possible palindrome partitioning in any order. + +Example: +Input: "aab" +Output: [["a","a","b"],["aa","b"]] + +Partition the following string into palindromes: +begun + +Answer: [["b", "e", "g", "u", "n"]] + +Metadata: {'string': 'begun', 'solution': [['b', 'e', 'g', 'u', 'n']]} + +-------------------------------------------------- + +Example 2: +Question: Given a string, partition it such that every substring is a palindrome. + +A palindrome is a word that reads the same backward as forward. + +You may return all possible palindrome partitioning in any order. + +Example: +Input: "aab" +Output: [["a","a","b"],["aa","b"]] + +Partition the following string into palindromes: +condense + +Answer: [["c", "o", "n", "d", "e", "n", "s", "e"]] + +Metadata: {'string': 'condense', 'solution': [['c', 'o', 'n', 'd', 'e', 'n', 's', 'e']]} + +-------------------------------------------------- + +Example 3: +Question: Given a string, partition it such that every substring is a palindrome. + +A palindrome is a word that reads the same backward as forward. + +You may return all possible palindrome partitioning in any order. + +Example: +Input: "aab" +Output: [["a","a","b"],["aa","b"]] + +Partition the following string into palindromes: +located + +Answer: [["l", "o", "c", "a", "t", "e", "d"]] + +Metadata: {'string': 'located', 'solution': [['l', 'o', 'c', 'a', 't', 'e', 'd']]} + +-------------------------------------------------- + +Example 4: +Question: Given a string, partition it such that every substring is a palindrome. + +A palindrome is a word that reads the same backward as forward. + +You may return all possible palindrome partitioning in any order. + +Example: +Input: "aab" +Output: [["a","a","b"],["aa","b"]] + +Partition the following string into palindromes: +shall + +Answer: [["s", "h", "a", "l", "l"], ["s", "h", "a", "ll"]] + +Metadata: {'string': 'shall', 'solution': [['s', 'h', 'a', 'l', 'l'], ['s', 'h', 'a', 'll']]} + +-------------------------------------------------- + +Example 5: +Question: Given a string, partition it such that every substring is a palindrome. + +A palindrome is a word that reads the same backward as forward. + +You may return all possible palindrome partitioning in any order. + +Example: +Input: "aab" +Output: [["a","a","b"],["aa","b"]] + +Partition the following string into palindromes: +if + +Answer: [["i", "f"]] + +Metadata: {'string': 'if', 'solution': [['i', 'f']]} + +-------------------------------------------------- +```` + ### word_sequence_reversal Generates word sequence reversal tasks from text spans diff --git a/README.md b/README.md index 9335a1d2..018a2b1d 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,7 @@ See the [Dataset Gallery](GALLERY.md) for a complete list of available datasets - `WordSequenceReversalDataset`: Reverse word order in text spans - `WordLadderDataset`: Generate word ladder puzzles where one word is transformed into another by changing one letter at a time - `GroupAnagramsDataset`: Group anagrams together in a list of words +- `PalindromePartitioningDataset`: Partition a string into palindromic substrings ### Code Tasks diff --git a/reasoning_gym/algorithmic/__init__.py b/reasoning_gym/algorithmic/__init__.py index 4e4688bf..9fdcf01c 100644 --- a/reasoning_gym/algorithmic/__init__.py +++ b/reasoning_gym/algorithmic/__init__.py @@ -14,6 +14,7 @@ from .letter_jumble import LetterJumbleConfig, LetterJumbleDataset from .number_filtering import NumberFilteringConfig, NumberFilteringDataset from .number_sorting import NumberSortingConfig, NumberSortingDataset from .palindrome_generation import PalindromeConfig, PalindromeDataset +from .palindrome_partitioning import PalindromePartitioningConfig, PalindromePartitioningDataset from .sentence_reordering import SentenceReorderingConfig, SentenceReorderingDataset from .spell_backward import SpellBackwardConfig, SpellBackwardDataset from .word_ladder import WordLadderConfig, WordLadderDataset @@ -48,4 +49,6 @@ __all__ = [ "PalindromeDataset", "GroupAnagramsConfig", "GroupAnagramsDataset", + "PalindromePartitioningConfig", + "PalindromePartitioningDataset", ] diff --git a/reasoning_gym/algorithmic/palindrome_partitioning.py b/reasoning_gym/algorithmic/palindrome_partitioning.py new file mode 100644 index 00000000..db4d024e --- /dev/null +++ b/reasoning_gym/algorithmic/palindrome_partitioning.py @@ -0,0 +1,129 @@ +"""Given a string, return all possible partitions of the string such that each substring is a palindrome. + +A popular Leetcode problem: +https://leetcode.com/problems/palindrome-partitioning/description/ +""" + +import json +import re +from dataclasses import dataclass +from random import Random +from typing import Dict, Optional + +from ..data import read_data_file +from ..factory import ProceduralDataset, register_dataset + +QUESTION_TEMPLATE = """Given a string, partition it such that every substring is a palindrome. + +A palindrome is a word that reads the same backward as forward. + +You may return all possible palindrome partitioning in any order. + +Example: +Input: "aab" +Output: [["a","a","b"],["aa","b"]] + +Partition the following string into palindromes: +{string} +""" + + +@dataclass +class PalindromePartitioningConfig: + """Configuration for Palindrome Partitioning dataset generation""" + + size: int = 500 # Virtual dataset size + seed: Optional[int] = None + + def validate(self): + """Validate configuration parameters""" + pass + + +class PalindromePartitioningDataset(ProceduralDataset): + """Generates Palindrome Partitioning exercises with configurable difficulty""" + + def __init__(self, config: PalindromePartitioningConfig): + super().__init__(config=config, seed=config.seed, size=config.size) + self.words = [ + re.sub(r"\W+", "", word.strip()) for word in read_data_file("in_the_year_2889.txt").split() if word.strip() + ] + + def __len__(self) -> int: + return self.config.size + + def __iter__(self): + self._current_idx = 0 + return self + + def __next__(self): + if self._current_idx >= self.config.size: + raise StopIteration + item = self[self._current_idx] + self._current_idx += 1 + return item + + def _sort_list(self, lst: list[list[str]]) -> list[list[str]]: + """Sort the list of palindrome partitions""" + return sorted([sublist for sublist in lst], key=lambda x: x[0] if x else "") + + def _palindrome_partitioning(self, string: str) -> list[list[str]]: + """Return all possible palindrome partitions of a string""" + if not string: + return [] + dp = {} + + def is_palindrome(i, j) -> bool: + if i >= j: + return True + if (i, j) in dp: + return dp[(i, j)] + dp[(i, j)] = string[i] == string[j] and is_palindrome(i + 1, j - 1) + return dp[(i, j)] + + res, temp = [], [] + + def _partition(idx) -> None: + if idx >= len(string): + res.append(temp[:]) + for i in range(idx, len(string)): + if is_palindrome(idx, i): + temp.append(string[idx : i + 1]) + _partition(i + 1) + temp.pop() + + _partition(0) + return self._sort_list(res) + + def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float: + """Score a single Palindrome Partitioning question""" + reward = 0 + if answer is not None: + try: + answer = json.loads(answer) + oracle = entry["metadata"]["solution"] + answer_str = json.dumps(self._sort_list(answer)) + oracle_str = json.dumps(self._sort_list(oracle)) + if answer_str == oracle_str: + reward = 1 + else: + reward = 0.01 + except Exception: + reward = 0 + return reward + + def __getitem__(self, idx: int) -> dict: + """Generate a single Palindrome Partitioning question""" + rng = Random(self.seed + idx) + string = rng.choice(self.words) + answer = self._palindrome_partitioning(string) + answer_str = json.dumps(answer) + + return { + "question": QUESTION_TEMPLATE.format(string=string), + "answer": answer_str, + "metadata": {"string": string, "solution": answer}, + } + + +register_dataset("palindrome_partitioning", PalindromePartitioningDataset, PalindromePartitioningConfig) diff --git a/tests/test_palindrome_partitioning.py b/tests/test_palindrome_partitioning.py new file mode 100644 index 00000000..a81c44bf --- /dev/null +++ b/tests/test_palindrome_partitioning.py @@ -0,0 +1,111 @@ +"""Tests for Palindrome Partitioning questions generation""" + +import json + +from reasoning_gym.algorithmic.palindrome_partitioning import ( + PalindromePartitioningConfig, + PalindromePartitioningDataset, +) + + +def test_palindrome_partitioning_dataset_deterministic(): + """Test that dataset generates same items with same seed""" + config = PalindromePartitioningConfig(seed=42, size=10) + dataset1 = PalindromePartitioningDataset(config) + dataset2 = PalindromePartitioningDataset(config) + + for i in range(len(dataset1)): + assert dataset1[i] == dataset2[i] + + +def test_palindrome_partitioning_dataset_items(): + """Test basic properties of generated items""" + config = PalindromePartitioningConfig(size=10, seed=42) + dataset = PalindromePartitioningDataset(config) + + for i in range(len(dataset)): + item = dataset[i] + # Check item structure + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + + # Check metadata + assert "string" in item["metadata"] + assert "solution" in item["metadata"] + string = item["metadata"]["string"] + solution = item["metadata"]["solution"] + + # Verify string is not empty + assert len(string) > 0 + + # At least one partitioning exists (each letter is a palindrome) + assert len(solution) >= 1 + + # Verify each partitioning reconstructs the original string + assert all(len(partitioning) > 0 for partitioning in solution) + assert all("".join(partitioning) == string for partitioning in solution) + + +def test_palindrome_partitioning_dataset_iteration(): + """Test that iteration respects dataset size""" + config = PalindromePartitioningConfig(size=5, seed=42) + dataset = PalindromePartitioningDataset(config) + + items = list(dataset) + assert len(items) == config.size + + # Test multiple iterations yield same items + assert items == list(dataset) + + +def test_palindrome_partitioning_answer(): + """Test the _palindrome_partitioning method""" + config = PalindromePartitioningConfig(seed=42) + dataset = PalindromePartitioningDataset(config) + + # General use case + word = "afternoon" + correct = [ + ["a", "f", "t", "e", "r", "n", "o", "o", "n"], + ["a", "f", "t", "e", "r", "n", "oo", "n"], + ["a", "f", "t", "e", "r", "noon"], + ] + assert json.dumps(dataset._palindrome_partitioning(word)) == json.dumps(correct) + + # Single letter word + word = "a" + correct = [["a"]] + assert json.dumps(dataset._palindrome_partitioning(word)) == json.dumps(correct) + + # Empty string + word = "" + correct = [] + assert json.dumps(dataset._palindrome_partitioning(word)) == json.dumps(correct) + + +def test_palindrome_partitioning_score_answer(): + """Test the score_answer method""" + config = PalindromePartitioningConfig(seed=42) + dataset = PalindromePartitioningDataset(config) + + # Verify the scoring function is permutation invariant + answer = json.dumps([["n", "o", "o", "n"], ["no", "on"], ["noon"]]) + item = {"metadata": {"solution": [["no", "on"], ["noon"], ["n", "o", "o", "n"]]}} + assert dataset.score_answer(answer, item) == 1 + + # Verify the score is 0.01 when incorrect + answer = json.dumps([["n", "o", "o", "n"], ["no", "on"]]) + item = {"metadata": {"solution": [["no", "on"], ["noon"], ["n", "o", "o", "n"]]}} + assert dataset.score_answer(answer, item) == 0.01 + + # Verify the score is 0 when answer is None + answer = None + item = {"metadata": {"solution": [["no", "on"], ["noon"], ["n", "o", "o", "n"]]}} + assert dataset.score_answer(answer, item) == 0 + + # Verify the score is 0 when answer is malformed JSON + answer = '["n", "o", "o", "n"], ["no", "on"], ["noon"]' + item = {"metadata": {"solution": [["no", "on"], ["noon"], ["n", "o", "o", "n"]]}} + assert dataset.score_answer(answer, item) == 0