diff --git a/GALLERY.md b/GALLERY.md index 9914ca2b..035b3f9d 100644 --- a/GALLERY.md +++ b/GALLERY.md @@ -58,7 +58,8 @@ This gallery shows examples from all available datasets using their default conf - [tower_of_hanoi](#tower_of_hanoi) - [tsumego](#tsumego) - [word_ladder](#word_ladder) -- [word_sequence_reversal](#word_sequence_reversal) +- [group_anagrams](#group_anagrams) +p- [word_sequence_reversal](#word_sequence_reversal) - [word_sorting](#word_sorting) - [zebra_puzzles](#zebra_puzzles) @@ -3102,6 +3103,84 @@ Metadata: {'start_word': 'SNOG', 'end_word': 'SUQS', 'word_length': 4, 'chain_le ```` + +### group_anagrams + +Group anagrams together in a list of words. + +Default configuration +```python +anagram_groups: int = 10 # Groups of anagrams present in the input +max_words_per_group: int = 5 # Maximum number of words in a single anagram group +``` + +Example tasks: +``` +Example 1: +Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once. + +Your job is to group the anagrams together. You can return the answer in any order. + +Example: +Input: ["eat", "tea", "tan", "ate", "nat", "bat"] +Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]] +Explanation: + - There is no string in the input that can be rearranged to form "bat". + - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other. + +Group the following list of words into anagrams: +["tinglers", "argonon", "ditas", "palinodist", "merocyte", "conterminal", "canny", "nancy", "outasight", "autosight", "oversauciness", "applauders", "suprapedal"] + +Answer: [["applauders", "suprapedal"], ["argonon"], ["autosight", "outasight"], ["canny", "nancy"], ["conterminal"], ["ditas"], ["merocyte"], ["oversauciness"], ["palinodist"], ["tinglers"]] + +Metadata: {'words': ['tinglers', 'argonon', 'ditas', 'palinodist', 'merocyte', 'conterminal', 'canny', 'nancy', 'outasight', 'autosight', 'oversauciness', 'applauders', 'suprapedal'], 'solution': [['applauders', 'suprapedal'], ['argonon'], ['autosight', 'outasight'], ['canny', 'nancy'], ['conterminal'], ['ditas'], ['merocyte'], ['oversauciness'], ['palinodist'], ['tinglers']]} + +-------------------------------------------------- + +Example 2: +Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once. + +Your job is to group the anagrams together. You can return the answer in any order. + +Example: +Input: ["eat", "tea", "tan", "ate", "nat", "bat"] +Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]] +Explanation: + - There is no string in the input that can be rearranged to form "bat". + - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other. + +Group the following list of words into anagrams: +["regear", "escrod", "coders", "decors", "credos", "scored", "semitaur", "muriates", "peripterous", "zanies", "expatiater", "wooled", "meningomyelocele", "myelomeningocele", "vainest", "natives", "naivest", "preludes", "repulsed"] + +Answer: [["coders", "credos", "decors", "escrod", "scored"], ["expatiater"], ["meningomyelocele", "myelomeningocele"], ["muriates", "semitaur"], ["naivest", "natives", "vainest"], ["peripterous"], ["preludes", "repulsed"], ["regear"], ["wooled"], ["zanies"]] + +Metadata: {'words': ['regear', 'escrod', 'coders', 'decors', 'credos', 'scored', 'semitaur', 'muriates', 'peripterous', 'zanies', 'expatiater', 'wooled', 'meningomyelocele', 'myelomeningocele', 'vainest', 'natives', 'naivest', 'preludes', 'repulsed'], 'solution': [['coders', 'credos', 'decors', 'escrod', 'scored'], ['expatiater'], ['meningomyelocele', 'myelomeningocele'], ['muriates', 'semitaur'], ['naivest', 'natives', 'vainest'], ['peripterous'], ['preludes', 'repulsed'], ['regear'], ['wooled'], ['zanies']]} + +-------------------------------------------------- + +Example 3: +Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once. + +Your job is to group the anagrams together. You can return the answer in any order. + +Example: +Input: ["eat", "tea", "tan", "ate", "nat", "bat"] +Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]] +Explanation: + - There is no string in the input that can be rearranged to form "bat". + - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other. + +Group the following list of words into anagrams: +["eagerest", "granitite", "helium", "nizam", "nazim", "striplings", "slipstring", "rearrest", "arrester", "bf", "tadpolism", "canun", "cunan", "isotonic"] + +Answer: [["arrester", "rearrest"], ["bf"], ["canun", "cunan"], ["eagerest"], ["granitite"], ["helium"], ["isotonic"], ["nazim", "nizam"], ["slipstring", "striplings"], ["tadpolism"]] + +Metadata: {'words': ['eagerest', 'granitite', 'helium', 'nizam', 'nazim', 'striplings', 'slipstring', 'rearrest', 'arrester', 'bf', 'tadpolism', 'canun', 'cunan', 'isotonic'], 'solution': [['arrester', 'rearrest'], ['bf'], ['canun', 'cunan'], ['eagerest'], ['granitite'], ['helium'], ['isotonic'], ['nazim', 'nizam'], ['slipstring', 'striplings'], ['tadpolism']]} + +-------------------------------------------------- +``` + + ### word_sequence_reversal Generates word sequence reversal tasks from text spans diff --git a/reasoning_gym/algorithmic/__init__.py b/reasoning_gym/algorithmic/__init__.py index 1582718d..9acc5007 100644 --- a/reasoning_gym/algorithmic/__init__.py +++ b/reasoning_gym/algorithmic/__init__.py @@ -19,6 +19,7 @@ from .ransom_note import RansomNoteConfig, RansomNoteDataset from .rotate_matrix import RotateMatrixConfig, RotateMatrixDataset from .sentence_reordering import SentenceReorderingConfig, SentenceReorderingDataset from .spell_backward import SpellBackwardConfig, SpellBackwardDataset +from .spiral_matrix import SpiralMatrixConfig, SpiralMatrixDataset from .word_ladder import WordLadderConfig, WordLadderDataset from .word_sequence_reversal import WordSequenceReversalConfig, WordSequenceReversalDataset from .word_sorting import TextTransformation, WordSortingConfig, WordSortingDataset @@ -51,6 +52,8 @@ __all__ = [ "PalindromeDataset", "GroupAnagramsConfig", "GroupAnagramsDataset", + "SpiralMatrixConfig", + "SpiralMatrixDataset", "RansomNoteConfig", "RansomNoteDataset", "IsomorphicStringsConfig", diff --git a/reasoning_gym/algorithmic/spiral_matrix.py b/reasoning_gym/algorithmic/spiral_matrix.py new file mode 100644 index 00000000..2fc99666 --- /dev/null +++ b/reasoning_gym/algorithmic/spiral_matrix.py @@ -0,0 +1,115 @@ +"""Print elements of a matrix in spiral order. + +A popular Leetcode problem: +https://leetcode.com/problems/spiral-matrix/description/ +""" + +from dataclasses import dataclass +from random import Random +from typing import Optional + +from ..factory import ProceduralDataset, register_dataset + +QUESTION_TEMPLATE = """Given a matrix, your job is to generate a list of elements in spiral order, starting from the top-left element. + +Example: + +Input: +1 2 3 +4 5 6 +7 8 9 + +Output: 1 2 3 6 9 8 7 4 5 + +For the matrix below, what is the list of elements in spiral order? +{matrix} +""" + + +@dataclass +class SpiralMatrixConfig: + """Configuration for Spiral Matrix dataset generation""" + + max_n: int = 10 # Maximum number of rows/cols in the matrix + + size: int = 500 # Virtual dataset size + seed: Optional[int] = None + + def validate(self): + """Validate configuration parameters""" + assert 1 <= self.max_n, "max_n must be at least 1" + + +class SpiralMatrixDataset(ProceduralDataset): + """Generates Spiral Matrix exercises with configurable difficulty""" + + def __init__(self, config: SpiralMatrixConfig): + super().__init__(config=config, seed=config.seed, size=config.size) + + def _get_matrix(self, rng: Random) -> list[list[int]]: + """Generate a random matrix""" + n = rng.randint(1, self.config.max_n) + numbers = [rng.randint(0, 9) for _ in range(n**2)] + rng.shuffle(numbers) + matrix = [numbers[i * n : (i + 1) * n] for i in range(n)] + return matrix + + def _get_spiral(self, matrix: list[list[int]]) -> list[int]: + """Return the elements of the matrix in spiral order""" + t, b = 0, len(matrix) + l, r = 0, len(matrix[0]) + + out = [] + + while True: + for i in range(l, r): + out.append(matrix[t][i]) + t += 1 + if t == b: + break + + for i in range(t, b): + out.append(matrix[i][r - 1]) + r -= 1 + if l == r: + break + + for i in range(r - 1, l - 1, -1): + out.append(matrix[b - 1][i]) + b -= 1 + if t == b: + break + + for i in range(b - 1, t - 1, -1): + out.append(matrix[i][l]) + l += 1 + if l == r: + break + + return out + + def _matrix_to_str(self, matrix: list[list[int]]) -> str: + """Get a string representation of the matrix""" + return "\n".join(" ".join(str(x) for x in row) for row in matrix) + + def _list_to_str(self, array: list[int]) -> str: + """Get a string representation of the array""" + return " ".join(str(x) for x in array) + + def __getitem__(self, idx: int) -> dict: + """Generate a single Spiral Matrix question""" + rng = Random(self.seed + idx) + + matrix = self._get_matrix(rng) + matrix_str = self._matrix_to_str(matrix) + answer = self._get_spiral(matrix) + answer_str = self._list_to_str(answer) + + return { + "question": QUESTION_TEMPLATE.format(matrix=matrix_str), + "answer": answer_str, + "metadata": {"matrix": matrix, "solution": answer}, + } + + +register_dataset("spiral_matrix", SpiralMatrixDataset, SpiralMatrixConfig) diff --git a/tests/test_spiral_matrix.py b/tests/test_spiral_matrix.py new file mode 100644 index 00000000..fc707310 --- /dev/null +++ b/tests/test_spiral_matrix.py @@ -0,0 +1,86 @@ +"""Tests for Spiral Matrix questions generation""" + +import pytest + +from reasoning_gym.algorithmic.spiral_matrix import SpiralMatrixConfig, SpiralMatrixDataset + + +def test_spiral_matrix_config_validation(): + """Test that invalid configs raise appropriate errors""" + with pytest.raises(AssertionError): + config = SpiralMatrixConfig(max_n=-1) # Negative not allowed + config.validate() + + with pytest.raises(AssertionError): + config = SpiralMatrixConfig(max_n=0) # Zero not allowed + config.validate() + + +def test_spiral_matrix_dataset_deterministic(): + """Test that dataset generates same items with same seed""" + config = SpiralMatrixConfig(seed=42, size=10) + dataset1 = SpiralMatrixDataset(config) + dataset2 = SpiralMatrixDataset(config) + + for i in range(len(dataset1)): + assert dataset1[i] == dataset2[i] + + +def test_spiral_matrix_dataset_items(): + """Test basic properties of generated items""" + config = SpiralMatrixConfig(max_n=5, size=10, seed=42) + dataset = SpiralMatrixDataset(config) + + for i in range(len(dataset)): + item = dataset[i] + # Check item structure + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + + # Check metadata + assert "matrix" in item["metadata"] + assert "solution" in item["metadata"] + + matrix = item["metadata"]["matrix"] + solution = item["metadata"]["solution"] + + # Verify list dimensions + assert len(matrix) <= config.max_n + assert all(len(row) <= config.max_n for row in matrix) + assert sum(len(row) for row in matrix) == len(solution) + + +def test_spiral_matrix_dataset_iteration(): + """Test that iteration respects dataset size""" + config = SpiralMatrixConfig(size=5, seed=42) + dataset = SpiralMatrixDataset(config) + + items = list(dataset) + assert len(items) == config.size + + # Test multiple iterations yield same items + assert items == list(dataset) + + +def test_spiral_matrix_answer(): + """Test the _get_spiral method""" + config = SpiralMatrixConfig(seed=42) + dataset = SpiralMatrixDataset(config) + + # One element + matrix = [[0]] + assert dataset._get_spiral(matrix) == [0] + + # One row + matrix = [[0, 1, 2]] + assert dataset._get_spiral(matrix) == [0, 1, 2] + + # One column + matrix = [[0], [1], [2]] + assert dataset._get_spiral(matrix) == [0, 1, 2] + + # 2D grid + matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] + assert dataset._get_spiral(matrix) == [1, 2, 3, 6, 9, 8, 7, 4, 5]