Merge branch 'main' of https://github.com/open-thought/reasoning-gym into env/string-splitting

This commit is contained in:
Zafir Stojanovski 2025-02-14 17:51:18 +01:00
commit ba9b81ab45
26 changed files with 1400 additions and 169 deletions

View file

@ -11,6 +11,8 @@ from .base_conversion import BaseConversionConfig, BaseConversionDataset
from .binary_matrix import BinaryMatrixConfig, BinaryMatrixDataset
from .caesar_cipher import CaesarCipherConfig, CaesarCipherDataset
from .count_primes import CountPrimesConfig, CountPrimesDataset
from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
from .graph_color import GraphColorConfig, GraphColorDataset
from .group_anagrams import GroupAnagramsConfig, GroupAnagramsDataset
from .isomorphic_strings import IsomorphicStringsConfig, IsomorphicStringsDataset
from .letter_counting import LetterCountingConfig, LetterCountingDataset
@ -28,6 +30,7 @@ from .spiral_matrix import SpiralMatrixConfig, SpiralMatrixDataset
from .string_insertion import StringInsertionConfig, StringInsertionDataset
from .string_manipulation import StringManipulationConfig, StringManipulationDataset
from .string_splitting import StringSplittingConfig, StringSplittingDataset
from .string_synthesis import StringSynthesisConfig, StringSynthesisDataset
from .word_ladder import WordLadderConfig, WordLadderDataset
from .word_sequence_reversal import WordSequenceReversalConfig, WordSequenceReversalDataset
from .word_sorting import TextTransformation, WordSortingConfig, WordSortingDataset
@ -39,6 +42,8 @@ __all__ = [
"BaseConversionDataset",
"CaesarCipherConfig",
"CaesarCipherDataset",
"GameOfLifeConfig",
"GameOfLifeDataset",
"LetterCountingConfig",
"LetterCountingDataset",
"LetterJumbleConfig",
@ -78,10 +83,14 @@ __all__ = [
"ABDataset",
"CountPrimesConfig",
"CountPrimesDataset",
"GraphColorConfig",
"GraphColorDataset",
"StringInsertionConfig",
"StringInsertionDataset",
"StringManipulationConfig",
"StringManipulationDataset",
"StringSplittingConfig",
"StringSplittingDataset",
"StringSynthesisConfig",
"StringSynthesisDataset",
]

View file

@ -0,0 +1,117 @@
import json
from dataclasses import dataclass
from random import Random
from typing import Dict, Optional
import cellpylib as cpl
from ..factory import ProceduralDataset, register_dataset
@dataclass
class GameOfLifeConfig:
"""Configuration for sudoku puzzle generation"""
grid_size_x: int = 10
grid_size_y: int = 10
filled_cells: int = 100 # actually a max
simulation_steps: int = 1
seed: Optional[int] = None
size: int = 500
def validate(self):
"""Validate configuration parameters"""
assert 3 <= self.grid_size_x <= 999, "grid_size_x must be between 0 and 999"
assert 3 <= self.grid_size_y <= 999, "grid_size_y must be between 0 and 999"
assert self.simulation_steps >= 0, "simulation_steps must be gte 0"
assert self.filled_cells <= self.grid_size_x * self.grid_size_y, "filled_cells must fit in x times y"
class GameOfLifeDataset(ProceduralDataset):
"""Generates Game of Life games with configurable parameters"""
def __init__(self, config: GameOfLifeConfig):
self._prompt_templates = [
"What will this Game of Life board look like after {simulation_steps} steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}."
]
super().__init__(config=config, seed=config.seed, size=config.size)
def __getitem__(self, idx: int) -> dict:
"""Generate a single GameOfLife task
Returns:
dict with keys:
- question: str, the task description
- answer: str, a solution string
- metadata: dict with generation parameters
"""
rng = Random(self.seed + idx)
# Make the board
board = cpl.init_simple2d(self.config.grid_size_x, self.config.grid_size_y)
board[:, :, :] = 0
# Add the cells
for i in range(0, self.config.filled_cells):
rx = rng.randint(0, self.config.grid_size_x - 1)
ry = rng.randint(0, self.config.grid_size_y - 1)
board[:, rx, ry] = 1
# Simulate the result to get the answer
evolved = cpl.evolve2d(
board,
timesteps=self.config.simulation_steps + 1,
apply_rule=cpl.game_of_life_rule,
memoize="recursive",
)
rows = [json.dumps(board[0, i].tolist(), separators=(",", ":")) for i in range(board.shape[1])]
board_str = "[" + ",\n ".join(rows) + "]"
final_step = evolved[-1]
final_step_list = final_step.tolist()
result_str = json.dumps(final_step_list, separators=(",", ":"))
return {
"question": rng.choice(self._prompt_templates).format(
simulation_steps=self.config.simulation_steps, board=board_str
),
"answer": result_str,
"metadata": {
"grid_size_x": self.config.grid_size_x,
"grid_size_y": self.config.grid_size_y,
"filled_cells": self.config.filled_cells,
"simulation_steps": self.config.simulation_steps,
},
}
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
"""Determine if the solution provided solves the GoL task.
The function awards 1.0 for a correct answer.
Args:
answer (Optional[str]): The user's answer.
entry (Dict[str, any]): The original dataset entry containing the correct answer.
Returns:
float: The computed score between 0.0 and 1.0.
"""
if answer == None:
return 0.0
try:
ans_arr = json.loads(answer)
correct_arr = json.loads(entry["answer"])
if correct_arr != ans_arr:
return 0.01
else:
return 1.0 # Yay
except Exception as e:
return 0.01
register_dataset("game_of_life", GameOfLifeDataset, GameOfLifeConfig)

View file

@ -0,0 +1,236 @@
import json
from dataclasses import dataclass
from random import Random
from typing import Dict, Optional
from ..factory import ProceduralDataset, register_dataset
def generate_random_graph(rng, num_vertices, edge_probability=0.3):
"""
Generate an undirected random graph.
Args:
num_vertices (int): The number of vertices.
edge_probability (float): Probability for an edge to exist between any two vertices.
Returns:
tuple: (vertices, edges)
- vertices: A list of vertex identifiers (0 to num_vertices-1).
- edges: A list of tuples (u, v) representing undirected edges.
"""
vertices = list(range(num_vertices))
edges = []
for i in range(num_vertices):
for j in range(i + 1, num_vertices):
if rng.random() < edge_probability:
edges.append((i, j))
return vertices, edges
def generate_graph_coloring_puzzle(rng, num_vertices=10, edge_probability=0.3, num_colors=3):
"""
Generates a graph coloring puzzle.
Args:
num_vertices (int): Number of vertices in the graph.
edge_probability (float): Probability that an edge exists between any two vertices.
num_colors (int): Number of allowed colors.
Returns:
dict: A dictionary with the following keys:
- "vertices": List of vertices.
- "edges": List of edges (tuples).
- "num_colors": The number of allowed colors.
- "color_options": A list of allowed colors (e.g., [1, 2, ..., num_colors]).
"""
vertices, edges = generate_random_graph(rng, num_vertices, edge_probability)
puzzle = {
"vertices": vertices,
"edges": edges,
"num_colors": num_colors,
"color_options": list(range(1, num_colors + 1)),
}
return puzzle
def verify_graph_coloring_solution(puzzle, coloring):
"""
Verifies that a candidate coloring is a valid solution to the graph coloring puzzle.
Args:
puzzle (dict): The puzzle specification containing 'vertices', 'edges', and 'color_options'.
coloring (dict): A dictionary mapping each vertex to a color. The keys can be integers or strings.
Returns:
tuple: (is_valid, message) where is_valid is a boolean and message is a string explanation.
"""
vertices = puzzle["vertices"]
edges = puzzle["edges"]
allowed_colors = set(puzzle["color_options"])
# Helper function to get a vertex's color regardless of key type.
def get_color(vertex):
# If the key matches as-is, return it.
if vertex in coloring:
return coloring[vertex]
# If the vertex is an integer and its string form is a key, return that.
elif isinstance(vertex, int) and str(vertex) in coloring:
return coloring[str(vertex)]
# If the vertex is a string, try to convert it to int and look it up.
elif isinstance(vertex, str):
try:
vertex_int = int(vertex)
if vertex_int in coloring:
return coloring[vertex_int]
except ValueError:
pass
# If no matching key is found, signal an error.
raise KeyError(f"Vertex {vertex} has not been assigned a color.")
# Check that every vertex has been assigned a color.
for vertex in vertices:
try:
get_color(vertex)
except KeyError:
return False, f"Not all vertices have been assigned a color (missing vertex {vertex})."
# Check that only allowed colors are used.
for vertex in vertices:
try:
color = get_color(vertex)
except KeyError as e:
return False, str(e)
if color not in allowed_colors:
return False, f"Vertex {vertex} uses an invalid color: {color}."
# Ensure that adjacent vertices do not share the same color.
for u, v in edges:
try:
color_u = get_color(u)
color_v = get_color(v)
except KeyError as e:
return False, str(e)
if color_u == color_v:
return False, f"Adjacent vertices {u} and {v} both have color {color_u}."
return True, "The coloring is valid."
def greedy_graph_coloring(puzzle):
"""
Attempts to color the graph using a simple greedy algorithm.
(Note: This may fail if the graph requires more than the given number of colors.)
Args:
puzzle (dict): The puzzle specification.
Returns:
dict or None: A dictionary mapping vertices to colors if successful; otherwise, None.
"""
vertices = puzzle["vertices"]
edges = puzzle["edges"]
color_options = puzzle["color_options"]
# Build an adjacency list for each vertex.
adjacency = {v: set() for v in vertices}
for u, v in edges:
adjacency[u].add(v)
adjacency[v].add(u)
coloring = {}
for v in vertices:
# Find colors already used by neighbors.
neighbor_colors = {coloring.get(neighbor) for neighbor in adjacency[v] if neighbor in coloring}
# Pick the first available color not used by any neighbor.
available = [color for color in color_options if color not in neighbor_colors]
if not available:
return None # Failed to color with the given number of colors.
coloring[v] = available[0]
return coloring
@dataclass
class GraphColorConfig:
"""Configuration for GraphColor puzzle generation"""
num_colors: int = 4
num_vertices: int = 10
edge_probability: float = 0.4
seed: Optional[int] = None
size: int = 500
def validate(self):
"""Validate configuration parameters"""
assert self.edge_probability < 1, "edge_probability must be less than 1"
class GraphColorDataset(ProceduralDataset):
"""Generates graph coloring problems with configurable parameters"""
def __init__(self, config: GraphColorConfig):
super().__init__(config=config, seed=config.seed, size=config.size)
def __getitem__(self, idx: int) -> dict:
"""Generate a single GraphColor task
Returns:
dict with keys:
- question: str, the task description
- answer: str, a solution string
- metadata: dict with generation parameters
"""
rng = Random(self.seed + idx)
puzzle = None
solution = None
while solution is None:
puzzle = generate_graph_coloring_puzzle(
rng=rng,
num_vertices=self.config.num_vertices,
edge_probability=self.config.edge_probability,
num_colors=self.config.num_colors,
)
solution = greedy_graph_coloring(puzzle)
edges = str(puzzle["edges"])
question = f"""Please provide a coloring for this graph such that every vertex is not connected to a vertex of the same color. The graph has these properties:
Vertices: {puzzle["vertices"]}
Edges: {edges}
Possible colors: {puzzle["color_options"]}
Return your solution as a JSON map of verteces to colors. (For example: {{0: 1, 1: 2, 2: 3}})
"""
return {
"question": question,
"answer": None,
"metadata": {"possible_answer": solution, "puzzle": puzzle},
}
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
"""Determine if the solution provided solves the GraphColor task.
The function awards 1.0 for a correct answer.
Args:
answer (Optional[str]): The user's answer.
entry (Dict[str, any]): The original dataset entry containing the correct answer.
Returns:
float: The computed score between 0.0 and 1.0.
"""
if answer == None:
return 0.0
danswer = json.loads(answer)
solved, failure = verify_graph_coloring_solution(entry["metadata"]["puzzle"], danswer)
if not solved:
return 0.01
else:
return 1.0 # Yay
register_dataset("graph_color", GraphColorDataset, GraphColorConfig)

View file

@ -3,7 +3,7 @@
import re
from dataclasses import dataclass
from random import Random
from typing import Optional
from typing import Dict, Optional
from reasoning_gym.data import read_data_file
@ -99,5 +99,27 @@ class LetterJumbleDataset(ProceduralDataset):
},
}
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
"""Determine if the solution provided solves this task.
The function awards 1.0 for a correct answer.
Args:
answer (Optional[str]): The user's answer.
entry (Dict[str, any]): The original dataset entry containing the correct answer.
Returns:
float: The computed score between 0.0 and 1.0.
"""
if answer == None:
return 0.0
s_answer = answer.strip().lower()
if not s_answer == entry["answer"].strip().lower():
return 0.01
else:
return 1.0
register_dataset("letter_jumble", LetterJumbleDataset, LetterJumbleConfig)

View file

@ -34,6 +34,11 @@ class NumberSortingDataset(ProceduralDataset):
def __init__(self, config: NumberSortingConfig):
super().__init__(config=config, seed=config.seed, size=config.size)
self.added_instruction = """
Please follow the instruction below:
## 1. Let all your answers be a list of numbers. Instead of reporting your answer as -69, -13, 1, 7, 11, 43, 59, 61, use ['-69', '-13', '1', '7', '11', '43', '59', '61'] instead
## 2. Convert all numbers in the square brackets as strings. For example, ['-69', '-13', '1', '7', '11', '43', '59', '61']
"""
def _format_number(self, num: float, decimals: int) -> str:
"""Format number with specified decimal places"""
@ -78,9 +83,10 @@ class NumberSortingDataset(ProceduralDataset):
is_ascending = rng.choice([True, False])
direction = "ascending" if is_ascending else "descending"
answer = asc_answer if is_ascending else desc_answer
question = f"Sort these numbers in {direction} order: {', '.join(number_strs)}" + self.added_instruction
return {
"question": f"Sort these numbers in {direction} order: {', '.join(number_strs)}",
"question": question,
"answer": str(answer),
"metadata": {"original_numbers": number_strs, "direction": direction, "sorted_numbers": answer},
}

View file

@ -53,7 +53,7 @@ class PalindromeDataset(ProceduralDataset):
palindrome = self._assemble_palindrome(letters)
question_str = (
"Rearrange these letters to form a palindrome. A palindrome is a word, phrase, or sequence that reads the same forward and backward.\n\n"
"Rearrange these letters to form a palindrome. A palindrome is a word, phrase, or sequence that reads the same forward and backward. If there are multiple answers, only respond with one of them.\n\n"
"For example, if the letters are: a, a, b — a valid palindrome is: aba.\n\n"
f"Your letters: {', '.join(scrambled_letters)}\n\n"
"What palindrome can you form from these letters?"

View file

@ -7,7 +7,7 @@ https://leetcode.com/problems/ransom-note/description/
from collections import defaultdict
from dataclasses import dataclass
from random import Random
from typing import Optional
from typing import Dict, Optional
from ..factory import ProceduralDataset, register_dataset
@ -95,5 +95,27 @@ class RansomNoteDataset(ProceduralDataset):
"metadata": {"ransom_note": ransom_note, "magazine": magazine, "solution": answer, "solvable": solvable},
}
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
"""Determine if the solution provided solves this task.
The function awards 1.0 for a correct answer.
Args:
answer (Optional[str]): The user's answer.
entry (Dict[str, any]): The original dataset entry containing the correct answer.
Returns:
float: The computed score between 0.0 and 1.0.
"""
if answer == None:
return 0.0
s_answer = answer.strip()
if not s_answer == str(entry["answer"]):
return 0.01
else:
return 1.0
register_dataset("ransom_note", RansomNoteDataset, RansomNoteConfig)

View file

@ -0,0 +1,139 @@
"""Iteratively synthesizes a string by inserting characters according to a pattern.
https://github.com/yongchao98/CodeSteer-v1.0/blob/main/create_dataset/create_dataset_string_synthesis.py
"""
from dataclasses import dataclass
from random import Random
from typing import Optional
from ..factory import ProceduralDataset, register_dataset
QUESTION_TEMPLATE = """There are nine different blocks [A] [B] [C] {{A}} {{B}} {{C}} (A) (B) (C)
1. One [A], one [B], and one [C] can be combined to form one {{A}}.
2. One [A] and one [B] can be combined to form one {{C}}.
3. One [B] and one [C] can be combined to form one {{B}}.
4. Two [C] can be combined to form one {{C}}.
5. One {{A}} and one {{C}} can be combined to form one (A) and one (B).
6. Two {{B}} can be combined to form one (C).
Given a certain number of initial blocks, your job is to cycle through the rules 1-6 above, synthesizing new blocks until no more rules can be applied, or until a state (counts of each block type) is repeated.
In the case a state is repeated the answer is the state before the repetition!
The output should be the count of each block type after the rules have been applied in the order they are listed above.
For example 1 0 3 0 2 0 0 0 1 means that you have 1 [A] 0 [B] 3 [C] 0 {{A}} 2 {{B}} 0 {{C}} 0 (A) 0 (B) 1 (C).
Example:
- Input: You have 2 [A], 3 [B], and 3 [C].
- Output: 0 0 0 2 1 0 0 0 0
- Explanation:
0. Initial state: 2 3 3 0 0 0 0 0 0
1. We can apply Rule 1 and obtain 1 {{A}}. New state: 1 2 2 1 0 0 0 0 0
2. We can apply Rule 1 again and obtain 1 {{A}}. New state 0 1 1 2 0 0 0 0 0
3. We can apply Rule 3 and obtain 1 {{B}}. New state 0 0 0 2 1 0 0 0 0
4. No more rules can be applied. The answer is 0 0 0 2 1 0 0 0 0
Now, you have {A_square} [A], {B_square} [B], and {C_square} [C] blocks. Provide the count of each block type after applying the above rules.
"""
@dataclass
class StringSynthesisConfig:
"""Configuration for String Synthesis dataset generation"""
min_initial_blocks: int = 0 # Minimum number of initial blocks
max_initial_blocks: int = 5 # Maximum number of initial blocks
max_iterations: int = 1_000 # Maximum number of iterations to apply the rules (Safety check for infinite loops)
size: int = 500 # Virtual dataset size
seed: Optional[int] = None
def validate(self):
"""Validate configuration parameters"""
assert 0 <= self.min_initial_blocks, "min_initial_blocks must be non-negative"
assert (
self.min_initial_blocks <= self.max_initial_blocks
), "min_initial_blocks must be less than or equal to max_initial_blocks"
assert 0 < self.max_iterations, "max_iterations must be positive"
class StringSynthesisDataset(ProceduralDataset):
"""Generates String Synthesis exercises with configurable difficulty"""
def __init__(self, config: StringSynthesisConfig):
super().__init__(config=config, seed=config.seed, size=config.size)
def _apply_rule(self, counts: list[int]) -> list[int]:
"""
Apply the first applicable rule to the given counts.
In case no rule is applicable, the counts are returned unchanged.
"""
# label the indices for the counts
A_square, B_square, C_square, A_curly, B_curly, C_curly, A_round, B_round, C_round = range(9)
# Rule 1: One [A], one [B], and one [C] can be combined to form one {A}
if counts[A_square] >= 1 and counts[B_square] >= 1 and counts[C_square] >= 1:
counts[A_square] -= 1
counts[B_square] -= 1
counts[C_square] -= 1
counts[A_curly] += 1
# Rule 2: One [A] and one [B] can be combined to form one {C}
elif counts[A_square] >= 1 and counts[B_square] >= 1:
counts[A_square] -= 1
counts[B_square] -= 1
counts[C_curly] += 1
# Rule 3: One [B] and one [C] can be combined to form one {B}
elif counts[B_square] >= 1 and counts[C_square] >= 1:
counts[B_square] -= 1
counts[C_square] -= 1
counts[B_curly] += 1
# Rule 4: Two [C] can be combined to form one {C}
elif counts[C_square] >= 2:
counts[C_square] -= 2
counts[C_curly] += 1
# Rule 5: One {A} and one {C} can be combined to form one (A) and one (B)
elif counts[A_curly] >= 1 and counts[C_curly] >= 1:
counts[A_curly] -= 1
counts[C_curly] -= 1
counts[A_round] += 1
counts[B_round] += 1
# Rule 6: Two {B} can be combined to form one (C)
elif counts[B_curly] >= 2:
counts[B_curly] -= 2
counts[C_round] += 1
return counts
def _get_answer(self, A_square: int, B_square: int, C_square: int) -> list[list[int]]:
"""Calculate the answer for a given input"""
# [A] [B] [C] {A} {B} {C} (A) (B) (C)
counts = [A_square, B_square, C_square] + [0 for _ in range(6)]
states = [counts]
for _ in range(self.config.max_iterations):
new_counts = self._apply_rule(counts[:])
if new_counts in states:
break
states.append(new_counts)
counts = new_counts
return states
def __getitem__(self, idx: int) -> dict:
"""Generate a single String Synthesis question"""
rng = Random(self.seed + idx)
A_square = rng.randint(self.config.min_initial_blocks, self.config.max_initial_blocks)
B_square = rng.randint(self.config.min_initial_blocks, self.config.max_initial_blocks)
C_square = rng.randint(self.config.min_initial_blocks, self.config.max_initial_blocks)
states = self._get_answer(A_square, B_square, C_square)
answer = states[-1]
answer_str = " ".join(str(x) for x in answer)
return {
"question": QUESTION_TEMPLATE.format(A_square=A_square, B_square=B_square, C_square=C_square),
"answer": answer_str,
"metadata": {"states": states, "solution": answer},
}
register_dataset("string_synthesis", StringSynthesisDataset, StringSynthesisConfig)

View file

@ -4,7 +4,7 @@ import re
from dataclasses import dataclass
from enum import StrEnum
from random import Random
from typing import List, Optional, Tuple
from typing import Dict, List, Optional, Tuple
from ..data import read_data_file
from ..factory import ProceduralDataset, register_dataset
@ -105,5 +105,27 @@ class WordSortingDataset(ProceduralDataset):
},
}
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
"""Determine if the solution provided solves this task.
The function awards 1.0 for a correct answer.
Args:
answer (Optional[str]): The user's answer.
entry (Dict[str, any]): The original dataset entry containing the correct answer.
Returns:
float: The computed score between 0.0 and 1.0.
"""
if answer == None:
return 0.0
s_answer = answer.strip().replace(" ", "")
if not s_answer == entry["answer"].strip().replace(" ", ""):
return 0.01
else:
return 1.0
register_dataset("word_sorting", WordSortingDataset, WordSortingConfig)