Merge branch 'main' of https://github.com/open-thought/reasoning-gym into env/string-splitting

2026-04-25 17:10:51 +00:00 · 2025-02-14 17:51:18 +01:00 · 2025-02-14 17:51:18 +01:00 · ba9b81ab45
commit ba9b81ab45
parent 10481312f7 0169c0fba4
26 changed files with 1400 additions and 169 deletions
--- a/reasoning_gym/algorithmic/init.py
+++ b/reasoning_gym/algorithmic/init.py
@ -11,6 +11,8 @@ from .base_conversion import BaseConversionConfig, BaseConversionDataset
 from .binary_matrix import BinaryMatrixConfig, BinaryMatrixDataset
 from .caesar_cipher import CaesarCipherConfig, CaesarCipherDataset
 from .count_primes import CountPrimesConfig, CountPrimesDataset
+from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
+from .graph_color import GraphColorConfig, GraphColorDataset
 from .group_anagrams import GroupAnagramsConfig, GroupAnagramsDataset
 from .isomorphic_strings import IsomorphicStringsConfig, IsomorphicStringsDataset
 from .letter_counting import LetterCountingConfig, LetterCountingDataset
@ -28,6 +30,7 @@ from .spiral_matrix import SpiralMatrixConfig, SpiralMatrixDataset
 from .string_insertion import StringInsertionConfig, StringInsertionDataset
 from .string_manipulation import StringManipulationConfig, StringManipulationDataset
 from .string_splitting import StringSplittingConfig, StringSplittingDataset
+from .string_synthesis import StringSynthesisConfig, StringSynthesisDataset
 from .word_ladder import WordLadderConfig, WordLadderDataset
 from .word_sequence_reversal import WordSequenceReversalConfig, WordSequenceReversalDataset
 from .word_sorting import TextTransformation, WordSortingConfig, WordSortingDataset
@ -39,6 +42,8 @@ __all__ = [
    "BaseConversionDataset",
    "CaesarCipherConfig",
    "CaesarCipherDataset",
+    "GameOfLifeConfig",
+    "GameOfLifeDataset",
    "LetterCountingConfig",
    "LetterCountingDataset",
    "LetterJumbleConfig",
@ -78,10 +83,14 @@ __all__ = [
    "ABDataset",
    "CountPrimesConfig",
    "CountPrimesDataset",
+    "GraphColorConfig",
+    "GraphColorDataset",
    "StringInsertionConfig",
    "StringInsertionDataset",
    "StringManipulationConfig",
    "StringManipulationDataset",
    "StringSplittingConfig",
    "StringSplittingDataset",
+    "StringSynthesisConfig",
+    "StringSynthesisDataset",
 ]
--- a/reasoning_gym/algorithmic/game_of_life.py
+++ b/reasoning_gym/algorithmic/game_of_life.py
@ -0,0 +1,117 @@
+import json
+from dataclasses import dataclass
+from random import Random
+from typing import Dict, Optional
+
+import cellpylib as cpl
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+@dataclass
+class GameOfLifeConfig:
+    """Configuration for sudoku puzzle generation"""
+
+    grid_size_x: int = 10
+    grid_size_y: int = 10
+    filled_cells: int = 100  # actually a max
+    simulation_steps: int = 1
+    seed: Optional[int] = None
+    size: int = 500
+
+    def validate(self):
+        """Validate configuration parameters"""
+        assert 3 <= self.grid_size_x <= 999, "grid_size_x must be between 0 and 999"
+        assert 3 <= self.grid_size_y <= 999, "grid_size_y must be between 0 and 999"
+        assert self.simulation_steps >= 0, "simulation_steps must be gte 0"
+        assert self.filled_cells <= self.grid_size_x * self.grid_size_y, "filled_cells must fit in x times y"
+
+
+class GameOfLifeDataset(ProceduralDataset):
+    """Generates Game of Life games with configurable parameters"""
+
+    def __init__(self, config: GameOfLifeConfig):
+        self._prompt_templates = [
+            "What will this Game of Life board look like after {simulation_steps} steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}."
+        ]
+
+        super().__init__(config=config, seed=config.seed, size=config.size)
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single GameOfLife task
+
+        Returns:
+            dict with keys:
+                - question: str, the task description
+                - answer: str, a solution string
+                - metadata: dict with generation parameters
+        """
+        rng = Random(self.seed + idx)
+
+        # Make the board
+        board = cpl.init_simple2d(self.config.grid_size_x, self.config.grid_size_y)
+        board[:, :, :] = 0
+
+        # Add the cells
+        for i in range(0, self.config.filled_cells):
+            rx = rng.randint(0, self.config.grid_size_x - 1)
+            ry = rng.randint(0, self.config.grid_size_y - 1)
+            board[:, rx, ry] = 1
+
+        # Simulate the result to get the answer
+        evolved = cpl.evolve2d(
+            board,
+            timesteps=self.config.simulation_steps + 1,
+            apply_rule=cpl.game_of_life_rule,
+            memoize="recursive",
+        )
+
+        rows = [json.dumps(board[0, i].tolist(), separators=(",", ":")) for i in range(board.shape[1])]
+        board_str = "[" + ",\n ".join(rows) + "]"
+
+        final_step = evolved[-1]
+        final_step_list = final_step.tolist()
+        result_str = json.dumps(final_step_list, separators=(",", ":"))
+
+        return {
+            "question": rng.choice(self._prompt_templates).format(
+                simulation_steps=self.config.simulation_steps, board=board_str
+            ),
+            "answer": result_str,
+            "metadata": {
+                "grid_size_x": self.config.grid_size_x,
+                "grid_size_y": self.config.grid_size_y,
+                "filled_cells": self.config.filled_cells,
+                "simulation_steps": self.config.simulation_steps,
+            },
+        }
+
+    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+        """Determine if the solution provided solves the GoL task.
+
+        The function awards 1.0 for a correct answer.
+
+        Args:
+            answer (Optional[str]): The user's answer.
+            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+        Returns:
+            float: The computed score between 0.0 and 1.0.
+        """
+
+        if answer == None:
+            return 0.0
+
+        try:
+            ans_arr = json.loads(answer)
+            correct_arr = json.loads(entry["answer"])
+
+            if correct_arr != ans_arr:
+                return 0.01
+            else:
+                return 1.0  # Yay
+        except Exception as e:
+            return 0.01
+
+
+register_dataset("game_of_life", GameOfLifeDataset, GameOfLifeConfig)
--- a/reasoning_gym/algorithmic/graph_color.py
+++ b/reasoning_gym/algorithmic/graph_color.py
@ -0,0 +1,236 @@
+import json
+from dataclasses import dataclass
+from random import Random
+from typing import Dict, Optional
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+def generate_random_graph(rng, num_vertices, edge_probability=0.3):
+    """
+    Generate an undirected random graph.
+
+    Args:
+        num_vertices (int): The number of vertices.
+        edge_probability (float): Probability for an edge to exist between any two vertices.
+
+    Returns:
+        tuple: (vertices, edges)
+            - vertices: A list of vertex identifiers (0 to num_vertices-1).
+            - edges: A list of tuples (u, v) representing undirected edges.
+    """
+    vertices = list(range(num_vertices))
+    edges = []
+    for i in range(num_vertices):
+        for j in range(i + 1, num_vertices):
+            if rng.random() < edge_probability:
+                edges.append((i, j))
+    return vertices, edges
+
+
+def generate_graph_coloring_puzzle(rng, num_vertices=10, edge_probability=0.3, num_colors=3):
+    """
+    Generates a graph coloring puzzle.
+
+    Args:
+        num_vertices (int): Number of vertices in the graph.
+        edge_probability (float): Probability that an edge exists between any two vertices.
+        num_colors (int): Number of allowed colors.
+
+    Returns:
+        dict: A dictionary with the following keys:
+            - "vertices": List of vertices.
+            - "edges": List of edges (tuples).
+            - "num_colors": The number of allowed colors.
+            - "color_options": A list of allowed colors (e.g., [1, 2, ..., num_colors]).
+    """
+    vertices, edges = generate_random_graph(rng, num_vertices, edge_probability)
+    puzzle = {
+        "vertices": vertices,
+        "edges": edges,
+        "num_colors": num_colors,
+        "color_options": list(range(1, num_colors + 1)),
+    }
+    return puzzle
+
+
+def verify_graph_coloring_solution(puzzle, coloring):
+    """
+    Verifies that a candidate coloring is a valid solution to the graph coloring puzzle.
+
+    Args:
+        puzzle (dict): The puzzle specification containing 'vertices', 'edges', and 'color_options'.
+        coloring (dict): A dictionary mapping each vertex to a color. The keys can be integers or strings.
+
+    Returns:
+        tuple: (is_valid, message) where is_valid is a boolean and message is a string explanation.
+    """
+    vertices = puzzle["vertices"]
+    edges = puzzle["edges"]
+    allowed_colors = set(puzzle["color_options"])
+
+    # Helper function to get a vertex's color regardless of key type.
+    def get_color(vertex):
+        # If the key matches as-is, return it.
+        if vertex in coloring:
+            return coloring[vertex]
+        # If the vertex is an integer and its string form is a key, return that.
+        elif isinstance(vertex, int) and str(vertex) in coloring:
+            return coloring[str(vertex)]
+        # If the vertex is a string, try to convert it to int and look it up.
+        elif isinstance(vertex, str):
+            try:
+                vertex_int = int(vertex)
+                if vertex_int in coloring:
+                    return coloring[vertex_int]
+            except ValueError:
+                pass
+        # If no matching key is found, signal an error.
+        raise KeyError(f"Vertex {vertex} has not been assigned a color.")
+
+    # Check that every vertex has been assigned a color.
+    for vertex in vertices:
+        try:
+            get_color(vertex)
+        except KeyError:
+            return False, f"Not all vertices have been assigned a color (missing vertex {vertex})."
+
+    # Check that only allowed colors are used.
+    for vertex in vertices:
+        try:
+            color = get_color(vertex)
+        except KeyError as e:
+            return False, str(e)
+        if color not in allowed_colors:
+            return False, f"Vertex {vertex} uses an invalid color: {color}."
+
+    # Ensure that adjacent vertices do not share the same color.
+    for u, v in edges:
+        try:
+            color_u = get_color(u)
+            color_v = get_color(v)
+        except KeyError as e:
+            return False, str(e)
+        if color_u == color_v:
+            return False, f"Adjacent vertices {u} and {v} both have color {color_u}."
+
+    return True, "The coloring is valid."
+
+
+def greedy_graph_coloring(puzzle):
+    """
+    Attempts to color the graph using a simple greedy algorithm.
+    (Note: This may fail if the graph requires more than the given number of colors.)
+
+    Args:
+        puzzle (dict): The puzzle specification.
+
+    Returns:
+        dict or None: A dictionary mapping vertices to colors if successful; otherwise, None.
+    """
+    vertices = puzzle["vertices"]
+    edges = puzzle["edges"]
+    color_options = puzzle["color_options"]
+
+    # Build an adjacency list for each vertex.
+    adjacency = {v: set() for v in vertices}
+    for u, v in edges:
+        adjacency[u].add(v)
+        adjacency[v].add(u)
+
+    coloring = {}
+    for v in vertices:
+        # Find colors already used by neighbors.
+        neighbor_colors = {coloring.get(neighbor) for neighbor in adjacency[v] if neighbor in coloring}
+        # Pick the first available color not used by any neighbor.
+        available = [color for color in color_options if color not in neighbor_colors]
+        if not available:
+            return None  # Failed to color with the given number of colors.
+        coloring[v] = available[0]
+    return coloring
+
+
+@dataclass
+class GraphColorConfig:
+    """Configuration for GraphColor puzzle generation"""
+
+    num_colors: int = 4
+    num_vertices: int = 10
+    edge_probability: float = 0.4
+    seed: Optional[int] = None
+    size: int = 500
+
+    def validate(self):
+        """Validate configuration parameters"""
+        assert self.edge_probability < 1, "edge_probability must be less than 1"
+
+
+class GraphColorDataset(ProceduralDataset):
+    """Generates graph coloring problems with configurable parameters"""
+
+    def __init__(self, config: GraphColorConfig):
+        super().__init__(config=config, seed=config.seed, size=config.size)
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single GraphColor task
+
+        Returns:
+            dict with keys:
+                - question: str, the task description
+                - answer: str, a solution string
+                - metadata: dict with generation parameters
+        """
+        rng = Random(self.seed + idx)
+
+        puzzle = None
+        solution = None
+        while solution is None:
+            puzzle = generate_graph_coloring_puzzle(
+                rng=rng,
+                num_vertices=self.config.num_vertices,
+                edge_probability=self.config.edge_probability,
+                num_colors=self.config.num_colors,
+            )
+            solution = greedy_graph_coloring(puzzle)
+
+        edges = str(puzzle["edges"])
+        question = f"""Please provide a coloring for this graph such that every vertex is not connected to a vertex of the same color. The graph has these properties:
+
+Vertices: {puzzle["vertices"]}
+Edges: {edges}
+Possible colors: {puzzle["color_options"]}
+
+Return your solution as a JSON map of verteces to colors. (For example: {{0: 1, 1: 2, 2: 3}})
+"""
+
+        return {
+            "question": question,
+            "answer": None,
+            "metadata": {"possible_answer": solution, "puzzle": puzzle},
+        }
+
+    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+        """Determine if the solution provided solves the GraphColor task.
+
+        The function awards 1.0 for a correct answer.
+
+        Args:
+            answer (Optional[str]): The user's answer.
+            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+        Returns:
+            float: The computed score between 0.0 and 1.0.
+        """
+
+        if answer == None:
+            return 0.0
+
+        danswer = json.loads(answer)
+        solved, failure = verify_graph_coloring_solution(entry["metadata"]["puzzle"], danswer)
+        if not solved:
+            return 0.01
+        else:
+            return 1.0  # Yay
+
+
+register_dataset("graph_color", GraphColorDataset, GraphColorConfig)
--- a/reasoning_gym/algorithmic/letter_jumble.py
+++ b/reasoning_gym/algorithmic/letter_jumble.py
@ -3,7 +3,7 @@
 import re
 from dataclasses import dataclass
 from random import Random
-from typing import Optional
+from typing import Dict, Optional

 from reasoning_gym.data import read_data_file

@ -99,5 +99,27 @@ class LetterJumbleDataset(ProceduralDataset):
            },
        }

+    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+        """Determine if the solution provided solves this task.
+
+        The function awards 1.0 for a correct answer.
+
+        Args:
+            answer (Optional[str]): The user's answer.
+            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+        Returns:
+            float: The computed score between 0.0 and 1.0.
+        """
+
+        if answer == None:
+            return 0.0
+
+        s_answer = answer.strip().lower()
+        if not s_answer == entry["answer"].strip().lower():
+            return 0.01
+        else:
+            return 1.0
+

 register_dataset("letter_jumble", LetterJumbleDataset, LetterJumbleConfig)
--- a/reasoning_gym/algorithmic/number_sorting.py
+++ b/reasoning_gym/algorithmic/number_sorting.py
@ -34,6 +34,11 @@ class NumberSortingDataset(ProceduralDataset):

    def __init__(self, config: NumberSortingConfig):
        super().__init__(config=config, seed=config.seed, size=config.size)
+        self.added_instruction = """
+Please follow the instruction below:
+## 1. Let all your answers be a list of numbers. Instead of reporting your answer as -69, -13, 1, 7, 11, 43, 59, 61, use ['-69', '-13', '1', '7', '11', '43', '59', '61'] instead
+## 2. Convert all numbers in the square brackets as strings. For example, ['-69', '-13', '1', '7', '11', '43', '59', '61']
+"""

    def _format_number(self, num: float, decimals: int) -> str:
        """Format number with specified decimal places"""
@ -78,9 +83,10 @@ class NumberSortingDataset(ProceduralDataset):
        is_ascending = rng.choice([True, False])
        direction = "ascending" if is_ascending else "descending"
        answer = asc_answer if is_ascending else desc_answer
+        question = f"Sort these numbers in {direction} order: {', '.join(number_strs)}" + self.added_instruction

        return {
-            "question": f"Sort these numbers in {direction} order: {', '.join(number_strs)}",
+            "question": question,
            "answer": str(answer),
            "metadata": {"original_numbers": number_strs, "direction": direction, "sorted_numbers": answer},
        }
--- a/reasoning_gym/algorithmic/palindrome_generation.py
+++ b/reasoning_gym/algorithmic/palindrome_generation.py
@ -53,7 +53,7 @@ class PalindromeDataset(ProceduralDataset):
        palindrome = self._assemble_palindrome(letters)

        question_str = (
-            "Rearrange these letters to form a palindrome. A palindrome is a word, phrase, or sequence that reads the same forward and backward.\n\n"
+            "Rearrange these letters to form a palindrome. A palindrome is a word, phrase, or sequence that reads the same forward and backward. If there are multiple answers, only respond with one of them.\n\n"
            "For example, if the letters are: a, a, b — a valid palindrome is: aba.\n\n"
            f"Your letters: {', '.join(scrambled_letters)}\n\n"
            "What palindrome can you form from these letters?"
--- a/reasoning_gym/algorithmic/ransom_note.py
+++ b/reasoning_gym/algorithmic/ransom_note.py
@ -7,7 +7,7 @@ https://leetcode.com/problems/ransom-note/description/
 from collections import defaultdict
 from dataclasses import dataclass
 from random import Random
-from typing import Optional
+from typing import Dict, Optional

 from ..factory import ProceduralDataset, register_dataset

@ -95,5 +95,27 @@ class RansomNoteDataset(ProceduralDataset):
            "metadata": {"ransom_note": ransom_note, "magazine": magazine, "solution": answer, "solvable": solvable},
        }

+    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+        """Determine if the solution provided solves this task.
+
+        The function awards 1.0 for a correct answer.
+
+        Args:
+            answer (Optional[str]): The user's answer.
+            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+        Returns:
+            float: The computed score between 0.0 and 1.0.
+        """
+
+        if answer == None:
+            return 0.0
+
+        s_answer = answer.strip()
+        if not s_answer == str(entry["answer"]):
+            return 0.01
+        else:
+            return 1.0
+

 register_dataset("ransom_note", RansomNoteDataset, RansomNoteConfig)
--- a/reasoning_gym/algorithmic/string_synthesis.py
+++ b/reasoning_gym/algorithmic/string_synthesis.py
@ -0,0 +1,139 @@
+"""Iteratively synthesizes a string by inserting characters according to a pattern.
+
+https://github.com/yongchao98/CodeSteer-v1.0/blob/main/create_dataset/create_dataset_string_synthesis.py
+"""
+
+from dataclasses import dataclass
+from random import Random
+from typing import Optional
+
+from ..factory import ProceduralDataset, register_dataset
+
+QUESTION_TEMPLATE = """There are nine different blocks [A] [B] [C] {{A}} {{B}} {{C}} (A) (B) (C)
+1. One [A], one [B], and one [C] can be combined to form one {{A}}.
+2. One [A] and one [B] can be combined to form one {{C}}.
+3. One [B] and one [C] can be combined to form one {{B}}.
+4. Two [C] can be combined to form one {{C}}.
+5. One {{A}} and one {{C}} can be combined to form one (A) and one (B).
+6. Two {{B}} can be combined to form one (C).
+
+Given a certain number of initial blocks, your job is to cycle through the rules 1-6 above, synthesizing new blocks until no more rules can be applied, or until a state (counts of each block type) is repeated.
+In the case a state is repeated the answer is the state before the repetition!
+
+The output should be the count of each block type after the rules have been applied in the order they are listed above.
+For example 1 0 3 0 2 0 0 0 1 means that you have 1 [A] 0 [B] 3 [C] 0 {{A}} 2 {{B}} 0 {{C}} 0 (A) 0 (B) 1 (C).
+
+Example:
+- Input: You have 2 [A], 3 [B], and 3 [C].
+- Output: 0 0 0 2 1 0 0 0 0
+- Explanation:
+    0. Initial state: 2 3 3 0 0 0 0 0 0
+    1. We can apply Rule 1 and obtain 1 {{A}}. New state: 1 2 2 1 0 0 0 0 0
+    2. We can apply Rule 1 again and obtain 1 {{A}}. New state 0 1 1 2 0 0 0 0 0
+    3. We can apply Rule 3 and obtain 1 {{B}}. New state 0 0 0 2 1 0 0 0 0
+    4. No more rules can be applied. The answer is 0 0 0 2 1 0 0 0 0
+
+Now, you have {A_square} [A], {B_square} [B], and {C_square} [C] blocks. Provide the count of each block type after applying the above rules.
+"""
+
+
+@dataclass
+class StringSynthesisConfig:
+    """Configuration for String Synthesis dataset generation"""
+
+    min_initial_blocks: int = 0  # Minimum number of initial blocks
+    max_initial_blocks: int = 5  # Maximum number of initial blocks
+    max_iterations: int = 1_000  # Maximum number of iterations to apply the rules (Safety check for infinite loops)
+
+    size: int = 500  # Virtual dataset size
+    seed: Optional[int] = None
+
+    def validate(self):
+        """Validate configuration parameters"""
+        assert 0 <= self.min_initial_blocks, "min_initial_blocks must be non-negative"
+        assert (
+            self.min_initial_blocks <= self.max_initial_blocks
+        ), "min_initial_blocks must be less than or equal to max_initial_blocks"
+        assert 0 < self.max_iterations, "max_iterations must be positive"
+
+
+class StringSynthesisDataset(ProceduralDataset):
+    """Generates String Synthesis exercises with configurable difficulty"""
+
+    def __init__(self, config: StringSynthesisConfig):
+        super().__init__(config=config, seed=config.seed, size=config.size)
+
+    def _apply_rule(self, counts: list[int]) -> list[int]:
+        """
+        Apply the first applicable rule to the given counts.
+        In case no rule is applicable, the counts are returned unchanged.
+        """
+        # label the indices for the counts
+        A_square, B_square, C_square, A_curly, B_curly, C_curly, A_round, B_round, C_round = range(9)
+        # Rule 1: One [A], one [B], and one [C] can be combined to form one {A}
+        if counts[A_square] >= 1 and counts[B_square] >= 1 and counts[C_square] >= 1:
+            counts[A_square] -= 1
+            counts[B_square] -= 1
+            counts[C_square] -= 1
+            counts[A_curly] += 1
+        # Rule 2: One [A] and one [B] can be combined to form one {C}
+        elif counts[A_square] >= 1 and counts[B_square] >= 1:
+            counts[A_square] -= 1
+            counts[B_square] -= 1
+            counts[C_curly] += 1
+        # Rule 3: One [B] and one [C] can be combined to form one {B}
+        elif counts[B_square] >= 1 and counts[C_square] >= 1:
+            counts[B_square] -= 1
+            counts[C_square] -= 1
+            counts[B_curly] += 1
+        # Rule 4: Two [C] can be combined to form one {C}
+        elif counts[C_square] >= 2:
+            counts[C_square] -= 2
+            counts[C_curly] += 1
+        # Rule 5: One {A} and one {C} can be combined to form one (A) and one (B)
+        elif counts[A_curly] >= 1 and counts[C_curly] >= 1:
+            counts[A_curly] -= 1
+            counts[C_curly] -= 1
+            counts[A_round] += 1
+            counts[B_round] += 1
+        # Rule 6: Two {B} can be combined to form one (C)
+        elif counts[B_curly] >= 2:
+            counts[B_curly] -= 2
+            counts[C_round] += 1
+        return counts
+
+    def _get_answer(self, A_square: int, B_square: int, C_square: int) -> list[list[int]]:
+        """Calculate the answer for a given input"""
+        # [A] [B] [C] {A} {B} {C} (A) (B) (C)
+        counts = [A_square, B_square, C_square] + [0 for _ in range(6)]
+        states = [counts]
+
+        for _ in range(self.config.max_iterations):
+            new_counts = self._apply_rule(counts[:])
+            if new_counts in states:
+                break
+            states.append(new_counts)
+            counts = new_counts
+
+        return states
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single String Synthesis question"""
+        rng = Random(self.seed + idx)
+
+        A_square = rng.randint(self.config.min_initial_blocks, self.config.max_initial_blocks)
+        B_square = rng.randint(self.config.min_initial_blocks, self.config.max_initial_blocks)
+        C_square = rng.randint(self.config.min_initial_blocks, self.config.max_initial_blocks)
+
+        states = self._get_answer(A_square, B_square, C_square)
+        answer = states[-1]
+        answer_str = " ".join(str(x) for x in answer)
+
+        return {
+            "question": QUESTION_TEMPLATE.format(A_square=A_square, B_square=B_square, C_square=C_square),
+            "answer": answer_str,
+            "metadata": {"states": states, "solution": answer},
+        }
+
+
+register_dataset("string_synthesis", StringSynthesisDataset, StringSynthesisConfig)
--- a/reasoning_gym/algorithmic/word_sorting.py
+++ b/reasoning_gym/algorithmic/word_sorting.py
@ -4,7 +4,7 @@ import re
 from dataclasses import dataclass
 from enum import StrEnum
 from random import Random
-from typing import List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple

 from ..data import read_data_file
 from ..factory import ProceduralDataset, register_dataset
@ -105,5 +105,27 @@ class WordSortingDataset(ProceduralDataset):
            },
        }

+    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+        """Determine if the solution provided solves this task.
+
+        The function awards 1.0 for a correct answer.
+
+        Args:
+            answer (Optional[str]): The user's answer.
+            entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+        Returns:
+            float: The computed score between 0.0 and 1.0.
+        """
+
+        if answer == None:
+            return 0.0
+
+        s_answer = answer.strip().replace(" ", "")
+        if not s_answer == entry["answer"].strip().replace(" ", ""):
+            return 0.01
+        else:
+            return 1.0
+

 register_dataset("word_sorting", WordSortingDataset, WordSortingConfig)