diff --git a/GALLERY.md b/GALLERY.md
index 6180ca5c..0022ed7e 100644
--- a/GALLERY.md
+++ b/GALLERY.md
@@ -46,6 +46,7 @@ This gallery shows examples from all available datasets using their default conf
 - [time_intervals](#time_intervals)
 - [tower_of_hanoi](#tower_of_hanoi)
 - [word_ladder](#word_ladder)
+- [group_anagrams](#group_anagrams)
 - [word_sequence_reversal](#word_sequence_reversal)
 - [word_sorting](#word_sorting)
 - [zebra_puzzles](#zebra_puzzles)
@@ -2136,6 +2137,85 @@ Metadata: {'start_word': 'SNOG', 'end_word': 'SUQS', 'word_length': 4, 'chain_le
 
 ````
 
+
+### group_anagrams
+
+Group anagrams together in a list of words. 
+
+Default configuration
+```python
+anagram_groups: int = 10  # Groups of anagrams present in the input
+max_words_per_group: int = 5  # Maximum number of words in a single anagram group
+```
+
+Example tasks:
+```
+Example 1:
+Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+Your job is to group the anagrams together. You can return the answer in any order.
+
+Example:
+Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
+Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
+Explanation:
+    - There is no string in the input that can be rearranged to form "bat".
+    - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+
+Group the following list of words into anagrams:
+["tinglers", "argonon", "ditas", "palinodist", "merocyte", "conterminal", "canny", "nancy", "outasight", "autosight", "oversauciness", "applauders", "suprapedal"]
+
+Answer: [["applauders", "suprapedal"], ["argonon"], ["autosight", "outasight"], ["canny", "nancy"], ["conterminal"], ["ditas"], ["merocyte"], ["oversauciness"], ["palinodist"], ["tinglers"]]
+
+Metadata: {'words': ['tinglers', 'argonon', 'ditas', 'palinodist', 'merocyte', 'conterminal', 'canny', 'nancy', 'outasight', 'autosight', 'oversauciness', 'applauders', 'suprapedal'], 'solution': [['applauders', 'suprapedal'], ['argonon'], ['autosight', 'outasight'], ['canny', 'nancy'], ['conterminal'], ['ditas'], ['merocyte'], ['oversauciness'], ['palinodist'], ['tinglers']]}
+
+--------------------------------------------------
+
+Example 2:
+Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+Your job is to group the anagrams together. You can return the answer in any order.
+
+Example:
+Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
+Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
+Explanation:
+    - There is no string in the input that can be rearranged to form "bat".
+    - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+
+Group the following list of words into anagrams:
+["regear", "escrod", "coders", "decors", "credos", "scored", "semitaur", "muriates", "peripterous", "zanies", "expatiater", "wooled", "meningomyelocele", "myelomeningocele", "vainest", "natives", "naivest", "preludes", "repulsed"]
+
+Answer: [["coders", "credos", "decors", "escrod", "scored"], ["expatiater"], ["meningomyelocele", "myelomeningocele"], ["muriates", "semitaur"], ["naivest", "natives", "vainest"], ["peripterous"], ["preludes", "repulsed"], ["regear"], ["wooled"], ["zanies"]]
+
+Metadata: {'words': ['regear', 'escrod', 'coders', 'decors', 'credos', 'scored', 'semitaur', 'muriates', 'peripterous', 'zanies', 'expatiater', 'wooled', 'meningomyelocele', 'myelomeningocele', 'vainest', 'natives', 'naivest', 'preludes', 'repulsed'], 'solution': [['coders', 'credos', 'decors', 'escrod', 'scored'], ['expatiater'], ['meningomyelocele', 'myelomeningocele'], ['muriates', 'semitaur'], ['naivest', 'natives', 'vainest'], ['peripterous'], ['preludes', 'repulsed'], ['regear'], ['wooled'], ['zanies']]}
+
+--------------------------------------------------
+
+Example 3:
+Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+Your job is to group the anagrams together. You can return the answer in any order.
+
+Example:
+Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
+Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
+Explanation:
+    - There is no string in the input that can be rearranged to form "bat".
+    - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+
+Group the following list of words into anagrams:
+["eagerest", "granitite", "helium", "nizam", "nazim", "striplings", "slipstring", "rearrest", "arrester", "bf", "tadpolism", "canun", "cunan", "isotonic"]
+
+Answer: [["arrester", "rearrest"], ["bf"], ["canun", "cunan"], ["eagerest"], ["granitite"], ["helium"], ["isotonic"], ["nazim", "nizam"], ["slipstring", "striplings"], ["tadpolism"]]
+
+Metadata: {'words': ['eagerest', 'granitite', 'helium', 'nizam', 'nazim', 'striplings', 'slipstring', 'rearrest', 'arrester', 'bf', 'tadpolism', 'canun', 'cunan', 'isotonic'], 'solution': [['arrester', 'rearrest'], ['bf'], ['canun', 'cunan'], ['eagerest'], ['granitite'], ['helium'], ['isotonic'], ['nazim', 'nizam'], ['slipstring', 'striplings'], ['tadpolism']]}
+
+--------------------------------------------------
+```
+
+
+
 ### word_sequence_reversal
 Generates word sequence reversal tasks from text spans
 
diff --git a/README.md b/README.md
index 0fedd335..ba11d308 100644
--- a/README.md
+++ b/README.md
@@ -98,6 +98,7 @@ See the [Dataset Gallery](GALLERY.md) for a complete list of available datasets
 - `SpellBackwardDataset`: Spell individual words backward (e.g. "sun" -> "nus")
 - `WordSequenceReversalDataset`: Reverse word order in text spans
 - `WordLadderDataset`: Generate word ladder puzzles where one word is transformed into another by changing one letter at a time
+- `GroupAnagramsDataset`: Group anagrams together in a list of words
 
 ### <small>Code Tasks</small>
 
diff --git a/reasoning_gym/algorithmic/__init__.py b/reasoning_gym/algorithmic/__init__.py
index 8224a019..4e4688bf 100644
--- a/reasoning_gym/algorithmic/__init__.py
+++ b/reasoning_gym/algorithmic/__init__.py
@@ -8,6 +8,7 @@ Algorithmic tasks for training reasoning capabilities:
 
 from .base_conversion import BaseConversionConfig, BaseConversionDataset
 from .caesar_cipher import CaesarCipherConfig, CaesarCipherDataset
+from .group_anagrams import GroupAnagramsConfig, GroupAnagramsDataset
 from .letter_counting import LetterCountingConfig, LetterCountingDataset
 from .letter_jumble import LetterJumbleConfig, LetterJumbleDataset
 from .number_filtering import NumberFilteringConfig, NumberFilteringDataset
@@ -45,4 +46,6 @@ __all__ = [
     "WordLadderDataset",
     "PalindromeConfig",
     "PalindromeDataset",
+    "GroupAnagramsConfig",
+    "GroupAnagramsDataset",
 ]
diff --git a/reasoning_gym/algorithmic/group_anagrams.py b/reasoning_gym/algorithmic/group_anagrams.py
new file mode 100644
index 00000000..bbfac766
--- /dev/null
+++ b/reasoning_gym/algorithmic/group_anagrams.py
@@ -0,0 +1,128 @@
+"""Group all anagrams together in a list.
+
+Anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+A popular Leetcode problem:
+https://leetcode.com/problems/group-anagrams/description/
+"""
+
+import json
+from collections import defaultdict
+from dataclasses import dataclass
+from random import Random
+from typing import Dict, Optional
+
+from ..factory import ProceduralDataset, register_dataset
+
+MAX_ANAGRAM_GROUPS = 500
+
+QUESTION_TEMPLATE = """An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+Your job is to group the anagrams together. You can return the answer in any order.
+
+Example:
+Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
+Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
+Explanation:
+    - There is no string in the input that can be rearranged to form "bat".
+    - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+
+Group the following list of words into anagrams:
+{words}
+"""
+
+
+@dataclass
+class GroupAnagramsConfig:
+    """Configuration for Group Anagrams dataset generation"""
+
+    anagram_groups: int = 10  # Groups of anagrams present in the input
+    max_words_per_group: int = 5  # Maximum number of words in a single anagram group
+
+    size: int = 500  # Virtual dataset size
+    seed: Optional[int] = None
+
+    def validate(self):
+        """Validate configuration parameters"""
+        assert (
+            1 <= self.anagram_groups <= MAX_ANAGRAM_GROUPS
+        ), f"anagram_groups must be between 1 and {MAX_ANAGRAM_GROUPS}"
+        assert 1 <= self.max_words_per_group, "max_words_per_group must be at least 1"
+
+
+class GroupAnagramsDataset(ProceduralDataset):
+    """Generates Group Anagrams exercises with configurable difficulty"""
+
+    def __init__(self, config: GroupAnagramsConfig):
+        super().__init__(config=config, seed=config.seed, size=config.size)
+        with open("reasoning_gym/data/anagrams.jsonl") as f:
+            self.anagrams = [json.loads(line)["words"] for line in f]
+
+    def __len__(self) -> int:
+        return self.config.size
+
+    def __iter__(self):
+        self._current_idx = 0
+        return self
+
+    def __next__(self):
+        if self._current_idx >= self.config.size:
+            raise StopIteration
+        item = self[self._current_idx]
+        self._current_idx += 1
+        return item
+
+    def _get_anagram_words(self, rng: Random) -> list[str]:
+        """Generate a list of words with anagrams"""
+        words = []
+        for sample in rng.sample(self.anagrams, self.config.anagram_groups):
+            anagrams = rng.sample(sample, rng.randint(1, min(len(sample), self.config.max_words_per_group)))
+            words.extend(anagrams)
+        return words
+
+    def _sort_nested_list(self, lst: list[list[str]]) -> list[list[str]]:
+        return sorted([sorted(sublist) for sublist in lst], key=lambda x: x[0] if x else "")
+
+    def _group_anagrams(self, words: list[str]) -> list[list[str]]:
+        def _codify(word):
+            code = [0] * 26
+            for c in word:
+                code[ord(c) - ord("a")] += 1
+            return tuple(code)
+
+        res = defaultdict(list)
+        for word in words:
+            code = _codify(word)
+            res[code].append(word)
+
+        anagrams = list(res.values())
+        return self._sort_nested_list(anagrams)
+
+    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+        reward = 0
+        if answer is not None:
+            answer = json.loads(answer)
+            oracle = entry["metadata"]["solution"]
+            answer_str = json.dumps(self._sort_nested_list(answer))
+            oracle_str = json.dumps(self._sort_nested_list(oracle))
+            if answer_str == oracle_str:
+                reward = 1
+            else:
+                reward = 0.01
+        return reward
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single Group Anagrams question"""
+        rng = Random(self.seed + idx)
+        words = self._get_anagram_words(rng)
+        answer = self._group_anagrams(words)
+        answer_str = json.dumps(answer)
+
+        return {
+            "question": QUESTION_TEMPLATE.format(words=json.dumps(words)),
+            "answer": answer_str,
+            "metadata": {"words": words, "solution": answer},
+        }
+
+
+register_dataset("group_anagrams", GroupAnagramsDataset, GroupAnagramsConfig)
diff --git a/tests/test_group_anagrams.py b/tests/test_group_anagrams.py
new file mode 100644
index 00000000..288b4ec4
--- /dev/null
+++ b/tests/test_group_anagrams.py
@@ -0,0 +1,116 @@
+"""Tests for Group Anagrams questions generation"""
+
+import json
+
+import pytest
+
+from reasoning_gym.algorithmic.group_anagrams import GroupAnagramsConfig, GroupAnagramsDataset
+
+
+def test_group_anagrams_config_validation():
+    """Test that invalid configs raise appropriate errors"""
+    with pytest.raises(AssertionError):
+        config = GroupAnagramsConfig(anagram_groups=-1)  # Negative not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = GroupAnagramsConfig(anagram_groups=0)  # Zero not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = GroupAnagramsConfig(max_words_per_group=-1)  # Negative not allowed
+        config.validate()
+
+    with pytest.raises(AssertionError):
+        config = GroupAnagramsConfig(max_words_per_group=0)  # Zero not allowed
+        config.validate()
+
+
+def test_group_anagrams_dataset_deterministic():
+    """Test that dataset generates same items with same seed"""
+    config = GroupAnagramsConfig(seed=42, size=10)
+    dataset1 = GroupAnagramsDataset(config)
+    dataset2 = GroupAnagramsDataset(config)
+
+    for i in range(len(dataset1)):
+        assert dataset1[i] == dataset2[i]
+
+
+def test_group_anagrams_dataset_items():
+    """Test basic properties of generated items"""
+    config = GroupAnagramsConfig(anagram_groups=5, max_words_per_group=3, size=10, seed=42)
+    dataset = GroupAnagramsDataset(config)
+
+    for i in range(len(dataset)):
+        item = dataset[i]
+        # Check item structure
+        assert isinstance(item, dict)
+        assert "question" in item
+        assert "answer" in item
+        assert "metadata" in item
+
+        # Check metadata
+        assert "words" in item["metadata"]
+        assert "solution" in item["metadata"]
+
+        words = item["metadata"]["words"]
+        solution = item["metadata"]["solution"]
+
+        # Verify list dimensions
+        assert len(words) > 5
+        assert len(solution) == 5
+        assert all(len(group) <= 3 for group in solution)
+
+
+def test_group_anagrams_dataset_iteration():
+    """Test that iteration respects dataset size"""
+    config = GroupAnagramsConfig(size=5, seed=42)
+    dataset = GroupAnagramsDataset(config)
+
+    items = list(dataset)
+    assert len(items) == config.size
+
+    # Test multiple iterations yield same items
+    assert items == list(dataset)
+
+
+def test_group_anagrams_answer():
+    """Test the _group_anagrams method"""
+    config = GroupAnagramsConfig(seed=42)
+    dataset = GroupAnagramsDataset(config)
+
+    # General use case
+    words = ["eat", "tea", "tan", "ate", "nat", "bat"]
+    correct = [["ate", "eat", "tea"], ["bat"], ["nat", "tan"]]
+    assert json.dumps(dataset._group_anagrams(words)) == json.dumps(correct)
+
+    # Single word
+    words = ["a"]
+    correct = [["a"]]
+    assert json.dumps(dataset._group_anagrams(words)) == json.dumps(correct)
+
+    # Empty list
+    words = []
+    correct = []
+    assert json.dumps(dataset._group_anagrams(words)) == json.dumps(correct)
+
+
+def test_group_anagrams_score_answer():
+    """Test the score_answer method"""
+    config = GroupAnagramsConfig(seed=42)
+    dataset = GroupAnagramsDataset(config)
+
+    # Verify the scoring function is permutation invariant
+    answer = json.dumps([["bat"], ["nat", "tan"], ["ate", "eat", "tea"]])
+    item = {"metadata": {"solution": [["ate", "eat", "tea"], ["bat"], ["nat", "tan"]]}}
+    assert dataset.score_answer(answer, item) == 1
+
+    # Verify the score is 0.01 when incorrect
+    answer = json.dumps([["ate", "eat"], ["bat", "tea"], ["nat", "tan"]])
+    item = {"metadata": {"solution": [["ate", "eat", "tea"], ["bat"], ["nat", "tan"]]}}
+    assert dataset.score_answer(answer, item) == 0.01
+
+    # Verify the score is 0 when answer is None
+    answer = None
+    item = {"metadata": {"solution": [["ate", "eat", "tea"], ["bat"], ["nat", "tan"]]}}
+    assert dataset.score_answer(answer, item) == 0