mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-25 17:10:51 +00:00
Merge branch 'main' of https://github.com/open-thought/reasoning-gym into env/string-splitting
This commit is contained in:
commit
ba9b81ab45
26 changed files with 1400 additions and 169 deletions
|
|
@ -11,6 +11,8 @@ from .base_conversion import BaseConversionConfig, BaseConversionDataset
|
|||
from .binary_matrix import BinaryMatrixConfig, BinaryMatrixDataset
|
||||
from .caesar_cipher import CaesarCipherConfig, CaesarCipherDataset
|
||||
from .count_primes import CountPrimesConfig, CountPrimesDataset
|
||||
from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
|
||||
from .graph_color import GraphColorConfig, GraphColorDataset
|
||||
from .group_anagrams import GroupAnagramsConfig, GroupAnagramsDataset
|
||||
from .isomorphic_strings import IsomorphicStringsConfig, IsomorphicStringsDataset
|
||||
from .letter_counting import LetterCountingConfig, LetterCountingDataset
|
||||
|
|
@ -28,6 +30,7 @@ from .spiral_matrix import SpiralMatrixConfig, SpiralMatrixDataset
|
|||
from .string_insertion import StringInsertionConfig, StringInsertionDataset
|
||||
from .string_manipulation import StringManipulationConfig, StringManipulationDataset
|
||||
from .string_splitting import StringSplittingConfig, StringSplittingDataset
|
||||
from .string_synthesis import StringSynthesisConfig, StringSynthesisDataset
|
||||
from .word_ladder import WordLadderConfig, WordLadderDataset
|
||||
from .word_sequence_reversal import WordSequenceReversalConfig, WordSequenceReversalDataset
|
||||
from .word_sorting import TextTransformation, WordSortingConfig, WordSortingDataset
|
||||
|
|
@ -39,6 +42,8 @@ __all__ = [
|
|||
"BaseConversionDataset",
|
||||
"CaesarCipherConfig",
|
||||
"CaesarCipherDataset",
|
||||
"GameOfLifeConfig",
|
||||
"GameOfLifeDataset",
|
||||
"LetterCountingConfig",
|
||||
"LetterCountingDataset",
|
||||
"LetterJumbleConfig",
|
||||
|
|
@ -78,10 +83,14 @@ __all__ = [
|
|||
"ABDataset",
|
||||
"CountPrimesConfig",
|
||||
"CountPrimesDataset",
|
||||
"GraphColorConfig",
|
||||
"GraphColorDataset",
|
||||
"StringInsertionConfig",
|
||||
"StringInsertionDataset",
|
||||
"StringManipulationConfig",
|
||||
"StringManipulationDataset",
|
||||
"StringSplittingConfig",
|
||||
"StringSplittingDataset",
|
||||
"StringSynthesisConfig",
|
||||
"StringSynthesisDataset",
|
||||
]
|
||||
|
|
|
|||
117
reasoning_gym/algorithmic/game_of_life.py
Normal file
117
reasoning_gym/algorithmic/game_of_life.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
import json
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Dict, Optional
|
||||
|
||||
import cellpylib as cpl
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
|
||||
@dataclass
|
||||
class GameOfLifeConfig:
|
||||
"""Configuration for sudoku puzzle generation"""
|
||||
|
||||
grid_size_x: int = 10
|
||||
grid_size_y: int = 10
|
||||
filled_cells: int = 100 # actually a max
|
||||
simulation_steps: int = 1
|
||||
seed: Optional[int] = None
|
||||
size: int = 500
|
||||
|
||||
def validate(self):
|
||||
"""Validate configuration parameters"""
|
||||
assert 3 <= self.grid_size_x <= 999, "grid_size_x must be between 0 and 999"
|
||||
assert 3 <= self.grid_size_y <= 999, "grid_size_y must be between 0 and 999"
|
||||
assert self.simulation_steps >= 0, "simulation_steps must be gte 0"
|
||||
assert self.filled_cells <= self.grid_size_x * self.grid_size_y, "filled_cells must fit in x times y"
|
||||
|
||||
|
||||
class GameOfLifeDataset(ProceduralDataset):
|
||||
"""Generates Game of Life games with configurable parameters"""
|
||||
|
||||
def __init__(self, config: GameOfLifeConfig):
|
||||
self._prompt_templates = [
|
||||
"What will this Game of Life board look like after {simulation_steps} steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]])\n\n{board}."
|
||||
]
|
||||
|
||||
super().__init__(config=config, seed=config.seed, size=config.size)
|
||||
|
||||
def __getitem__(self, idx: int) -> dict:
|
||||
"""Generate a single GameOfLife task
|
||||
|
||||
Returns:
|
||||
dict with keys:
|
||||
- question: str, the task description
|
||||
- answer: str, a solution string
|
||||
- metadata: dict with generation parameters
|
||||
"""
|
||||
rng = Random(self.seed + idx)
|
||||
|
||||
# Make the board
|
||||
board = cpl.init_simple2d(self.config.grid_size_x, self.config.grid_size_y)
|
||||
board[:, :, :] = 0
|
||||
|
||||
# Add the cells
|
||||
for i in range(0, self.config.filled_cells):
|
||||
rx = rng.randint(0, self.config.grid_size_x - 1)
|
||||
ry = rng.randint(0, self.config.grid_size_y - 1)
|
||||
board[:, rx, ry] = 1
|
||||
|
||||
# Simulate the result to get the answer
|
||||
evolved = cpl.evolve2d(
|
||||
board,
|
||||
timesteps=self.config.simulation_steps + 1,
|
||||
apply_rule=cpl.game_of_life_rule,
|
||||
memoize="recursive",
|
||||
)
|
||||
|
||||
rows = [json.dumps(board[0, i].tolist(), separators=(",", ":")) for i in range(board.shape[1])]
|
||||
board_str = "[" + ",\n ".join(rows) + "]"
|
||||
|
||||
final_step = evolved[-1]
|
||||
final_step_list = final_step.tolist()
|
||||
result_str = json.dumps(final_step_list, separators=(",", ":"))
|
||||
|
||||
return {
|
||||
"question": rng.choice(self._prompt_templates).format(
|
||||
simulation_steps=self.config.simulation_steps, board=board_str
|
||||
),
|
||||
"answer": result_str,
|
||||
"metadata": {
|
||||
"grid_size_x": self.config.grid_size_x,
|
||||
"grid_size_y": self.config.grid_size_y,
|
||||
"filled_cells": self.config.filled_cells,
|
||||
"simulation_steps": self.config.simulation_steps,
|
||||
},
|
||||
}
|
||||
|
||||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||
"""Determine if the solution provided solves the GoL task.
|
||||
|
||||
The function awards 1.0 for a correct answer.
|
||||
|
||||
Args:
|
||||
answer (Optional[str]): The user's answer.
|
||||
entry (Dict[str, any]): The original dataset entry containing the correct answer.
|
||||
|
||||
Returns:
|
||||
float: The computed score between 0.0 and 1.0.
|
||||
"""
|
||||
|
||||
if answer == None:
|
||||
return 0.0
|
||||
|
||||
try:
|
||||
ans_arr = json.loads(answer)
|
||||
correct_arr = json.loads(entry["answer"])
|
||||
|
||||
if correct_arr != ans_arr:
|
||||
return 0.01
|
||||
else:
|
||||
return 1.0 # Yay
|
||||
except Exception as e:
|
||||
return 0.01
|
||||
|
||||
|
||||
register_dataset("game_of_life", GameOfLifeDataset, GameOfLifeConfig)
|
||||
236
reasoning_gym/algorithmic/graph_color.py
Normal file
236
reasoning_gym/algorithmic/graph_color.py
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
import json
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Dict, Optional
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
|
||||
def generate_random_graph(rng, num_vertices, edge_probability=0.3):
|
||||
"""
|
||||
Generate an undirected random graph.
|
||||
|
||||
Args:
|
||||
num_vertices (int): The number of vertices.
|
||||
edge_probability (float): Probability for an edge to exist between any two vertices.
|
||||
|
||||
Returns:
|
||||
tuple: (vertices, edges)
|
||||
- vertices: A list of vertex identifiers (0 to num_vertices-1).
|
||||
- edges: A list of tuples (u, v) representing undirected edges.
|
||||
"""
|
||||
vertices = list(range(num_vertices))
|
||||
edges = []
|
||||
for i in range(num_vertices):
|
||||
for j in range(i + 1, num_vertices):
|
||||
if rng.random() < edge_probability:
|
||||
edges.append((i, j))
|
||||
return vertices, edges
|
||||
|
||||
|
||||
def generate_graph_coloring_puzzle(rng, num_vertices=10, edge_probability=0.3, num_colors=3):
|
||||
"""
|
||||
Generates a graph coloring puzzle.
|
||||
|
||||
Args:
|
||||
num_vertices (int): Number of vertices in the graph.
|
||||
edge_probability (float): Probability that an edge exists between any two vertices.
|
||||
num_colors (int): Number of allowed colors.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary with the following keys:
|
||||
- "vertices": List of vertices.
|
||||
- "edges": List of edges (tuples).
|
||||
- "num_colors": The number of allowed colors.
|
||||
- "color_options": A list of allowed colors (e.g., [1, 2, ..., num_colors]).
|
||||
"""
|
||||
vertices, edges = generate_random_graph(rng, num_vertices, edge_probability)
|
||||
puzzle = {
|
||||
"vertices": vertices,
|
||||
"edges": edges,
|
||||
"num_colors": num_colors,
|
||||
"color_options": list(range(1, num_colors + 1)),
|
||||
}
|
||||
return puzzle
|
||||
|
||||
|
||||
def verify_graph_coloring_solution(puzzle, coloring):
|
||||
"""
|
||||
Verifies that a candidate coloring is a valid solution to the graph coloring puzzle.
|
||||
|
||||
Args:
|
||||
puzzle (dict): The puzzle specification containing 'vertices', 'edges', and 'color_options'.
|
||||
coloring (dict): A dictionary mapping each vertex to a color. The keys can be integers or strings.
|
||||
|
||||
Returns:
|
||||
tuple: (is_valid, message) where is_valid is a boolean and message is a string explanation.
|
||||
"""
|
||||
vertices = puzzle["vertices"]
|
||||
edges = puzzle["edges"]
|
||||
allowed_colors = set(puzzle["color_options"])
|
||||
|
||||
# Helper function to get a vertex's color regardless of key type.
|
||||
def get_color(vertex):
|
||||
# If the key matches as-is, return it.
|
||||
if vertex in coloring:
|
||||
return coloring[vertex]
|
||||
# If the vertex is an integer and its string form is a key, return that.
|
||||
elif isinstance(vertex, int) and str(vertex) in coloring:
|
||||
return coloring[str(vertex)]
|
||||
# If the vertex is a string, try to convert it to int and look it up.
|
||||
elif isinstance(vertex, str):
|
||||
try:
|
||||
vertex_int = int(vertex)
|
||||
if vertex_int in coloring:
|
||||
return coloring[vertex_int]
|
||||
except ValueError:
|
||||
pass
|
||||
# If no matching key is found, signal an error.
|
||||
raise KeyError(f"Vertex {vertex} has not been assigned a color.")
|
||||
|
||||
# Check that every vertex has been assigned a color.
|
||||
for vertex in vertices:
|
||||
try:
|
||||
get_color(vertex)
|
||||
except KeyError:
|
||||
return False, f"Not all vertices have been assigned a color (missing vertex {vertex})."
|
||||
|
||||
# Check that only allowed colors are used.
|
||||
for vertex in vertices:
|
||||
try:
|
||||
color = get_color(vertex)
|
||||
except KeyError as e:
|
||||
return False, str(e)
|
||||
if color not in allowed_colors:
|
||||
return False, f"Vertex {vertex} uses an invalid color: {color}."
|
||||
|
||||
# Ensure that adjacent vertices do not share the same color.
|
||||
for u, v in edges:
|
||||
try:
|
||||
color_u = get_color(u)
|
||||
color_v = get_color(v)
|
||||
except KeyError as e:
|
||||
return False, str(e)
|
||||
if color_u == color_v:
|
||||
return False, f"Adjacent vertices {u} and {v} both have color {color_u}."
|
||||
|
||||
return True, "The coloring is valid."
|
||||
|
||||
|
||||
def greedy_graph_coloring(puzzle):
|
||||
"""
|
||||
Attempts to color the graph using a simple greedy algorithm.
|
||||
(Note: This may fail if the graph requires more than the given number of colors.)
|
||||
|
||||
Args:
|
||||
puzzle (dict): The puzzle specification.
|
||||
|
||||
Returns:
|
||||
dict or None: A dictionary mapping vertices to colors if successful; otherwise, None.
|
||||
"""
|
||||
vertices = puzzle["vertices"]
|
||||
edges = puzzle["edges"]
|
||||
color_options = puzzle["color_options"]
|
||||
|
||||
# Build an adjacency list for each vertex.
|
||||
adjacency = {v: set() for v in vertices}
|
||||
for u, v in edges:
|
||||
adjacency[u].add(v)
|
||||
adjacency[v].add(u)
|
||||
|
||||
coloring = {}
|
||||
for v in vertices:
|
||||
# Find colors already used by neighbors.
|
||||
neighbor_colors = {coloring.get(neighbor) for neighbor in adjacency[v] if neighbor in coloring}
|
||||
# Pick the first available color not used by any neighbor.
|
||||
available = [color for color in color_options if color not in neighbor_colors]
|
||||
if not available:
|
||||
return None # Failed to color with the given number of colors.
|
||||
coloring[v] = available[0]
|
||||
return coloring
|
||||
|
||||
|
||||
@dataclass
|
||||
class GraphColorConfig:
|
||||
"""Configuration for GraphColor puzzle generation"""
|
||||
|
||||
num_colors: int = 4
|
||||
num_vertices: int = 10
|
||||
edge_probability: float = 0.4
|
||||
seed: Optional[int] = None
|
||||
size: int = 500
|
||||
|
||||
def validate(self):
|
||||
"""Validate configuration parameters"""
|
||||
assert self.edge_probability < 1, "edge_probability must be less than 1"
|
||||
|
||||
|
||||
class GraphColorDataset(ProceduralDataset):
|
||||
"""Generates graph coloring problems with configurable parameters"""
|
||||
|
||||
def __init__(self, config: GraphColorConfig):
|
||||
super().__init__(config=config, seed=config.seed, size=config.size)
|
||||
|
||||
def __getitem__(self, idx: int) -> dict:
|
||||
"""Generate a single GraphColor task
|
||||
|
||||
Returns:
|
||||
dict with keys:
|
||||
- question: str, the task description
|
||||
- answer: str, a solution string
|
||||
- metadata: dict with generation parameters
|
||||
"""
|
||||
rng = Random(self.seed + idx)
|
||||
|
||||
puzzle = None
|
||||
solution = None
|
||||
while solution is None:
|
||||
puzzle = generate_graph_coloring_puzzle(
|
||||
rng=rng,
|
||||
num_vertices=self.config.num_vertices,
|
||||
edge_probability=self.config.edge_probability,
|
||||
num_colors=self.config.num_colors,
|
||||
)
|
||||
solution = greedy_graph_coloring(puzzle)
|
||||
|
||||
edges = str(puzzle["edges"])
|
||||
question = f"""Please provide a coloring for this graph such that every vertex is not connected to a vertex of the same color. The graph has these properties:
|
||||
|
||||
Vertices: {puzzle["vertices"]}
|
||||
Edges: {edges}
|
||||
Possible colors: {puzzle["color_options"]}
|
||||
|
||||
Return your solution as a JSON map of verteces to colors. (For example: {{0: 1, 1: 2, 2: 3}})
|
||||
"""
|
||||
|
||||
return {
|
||||
"question": question,
|
||||
"answer": None,
|
||||
"metadata": {"possible_answer": solution, "puzzle": puzzle},
|
||||
}
|
||||
|
||||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||
"""Determine if the solution provided solves the GraphColor task.
|
||||
|
||||
The function awards 1.0 for a correct answer.
|
||||
|
||||
Args:
|
||||
answer (Optional[str]): The user's answer.
|
||||
entry (Dict[str, any]): The original dataset entry containing the correct answer.
|
||||
|
||||
Returns:
|
||||
float: The computed score between 0.0 and 1.0.
|
||||
"""
|
||||
|
||||
if answer == None:
|
||||
return 0.0
|
||||
|
||||
danswer = json.loads(answer)
|
||||
solved, failure = verify_graph_coloring_solution(entry["metadata"]["puzzle"], danswer)
|
||||
if not solved:
|
||||
return 0.01
|
||||
else:
|
||||
return 1.0 # Yay
|
||||
|
||||
|
||||
register_dataset("graph_color", GraphColorDataset, GraphColorConfig)
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
import re
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Optional
|
||||
from typing import Dict, Optional
|
||||
|
||||
from reasoning_gym.data import read_data_file
|
||||
|
||||
|
|
@ -99,5 +99,27 @@ class LetterJumbleDataset(ProceduralDataset):
|
|||
},
|
||||
}
|
||||
|
||||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||
"""Determine if the solution provided solves this task.
|
||||
|
||||
The function awards 1.0 for a correct answer.
|
||||
|
||||
Args:
|
||||
answer (Optional[str]): The user's answer.
|
||||
entry (Dict[str, any]): The original dataset entry containing the correct answer.
|
||||
|
||||
Returns:
|
||||
float: The computed score between 0.0 and 1.0.
|
||||
"""
|
||||
|
||||
if answer == None:
|
||||
return 0.0
|
||||
|
||||
s_answer = answer.strip().lower()
|
||||
if not s_answer == entry["answer"].strip().lower():
|
||||
return 0.01
|
||||
else:
|
||||
return 1.0
|
||||
|
||||
|
||||
register_dataset("letter_jumble", LetterJumbleDataset, LetterJumbleConfig)
|
||||
|
|
|
|||
|
|
@ -34,6 +34,11 @@ class NumberSortingDataset(ProceduralDataset):
|
|||
|
||||
def __init__(self, config: NumberSortingConfig):
|
||||
super().__init__(config=config, seed=config.seed, size=config.size)
|
||||
self.added_instruction = """
|
||||
Please follow the instruction below:
|
||||
## 1. Let all your answers be a list of numbers. Instead of reporting your answer as -69, -13, 1, 7, 11, 43, 59, 61, use ['-69', '-13', '1', '7', '11', '43', '59', '61'] instead
|
||||
## 2. Convert all numbers in the square brackets as strings. For example, ['-69', '-13', '1', '7', '11', '43', '59', '61']
|
||||
"""
|
||||
|
||||
def _format_number(self, num: float, decimals: int) -> str:
|
||||
"""Format number with specified decimal places"""
|
||||
|
|
@ -78,9 +83,10 @@ class NumberSortingDataset(ProceduralDataset):
|
|||
is_ascending = rng.choice([True, False])
|
||||
direction = "ascending" if is_ascending else "descending"
|
||||
answer = asc_answer if is_ascending else desc_answer
|
||||
question = f"Sort these numbers in {direction} order: {', '.join(number_strs)}" + self.added_instruction
|
||||
|
||||
return {
|
||||
"question": f"Sort these numbers in {direction} order: {', '.join(number_strs)}",
|
||||
"question": question,
|
||||
"answer": str(answer),
|
||||
"metadata": {"original_numbers": number_strs, "direction": direction, "sorted_numbers": answer},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ class PalindromeDataset(ProceduralDataset):
|
|||
palindrome = self._assemble_palindrome(letters)
|
||||
|
||||
question_str = (
|
||||
"Rearrange these letters to form a palindrome. A palindrome is a word, phrase, or sequence that reads the same forward and backward.\n\n"
|
||||
"Rearrange these letters to form a palindrome. A palindrome is a word, phrase, or sequence that reads the same forward and backward. If there are multiple answers, only respond with one of them.\n\n"
|
||||
"For example, if the letters are: a, a, b — a valid palindrome is: aba.\n\n"
|
||||
f"Your letters: {', '.join(scrambled_letters)}\n\n"
|
||||
"What palindrome can you form from these letters?"
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ https://leetcode.com/problems/ransom-note/description/
|
|||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Optional
|
||||
from typing import Dict, Optional
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
|
|
@ -95,5 +95,27 @@ class RansomNoteDataset(ProceduralDataset):
|
|||
"metadata": {"ransom_note": ransom_note, "magazine": magazine, "solution": answer, "solvable": solvable},
|
||||
}
|
||||
|
||||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||
"""Determine if the solution provided solves this task.
|
||||
|
||||
The function awards 1.0 for a correct answer.
|
||||
|
||||
Args:
|
||||
answer (Optional[str]): The user's answer.
|
||||
entry (Dict[str, any]): The original dataset entry containing the correct answer.
|
||||
|
||||
Returns:
|
||||
float: The computed score between 0.0 and 1.0.
|
||||
"""
|
||||
|
||||
if answer == None:
|
||||
return 0.0
|
||||
|
||||
s_answer = answer.strip()
|
||||
if not s_answer == str(entry["answer"]):
|
||||
return 0.01
|
||||
else:
|
||||
return 1.0
|
||||
|
||||
|
||||
register_dataset("ransom_note", RansomNoteDataset, RansomNoteConfig)
|
||||
|
|
|
|||
139
reasoning_gym/algorithmic/string_synthesis.py
Normal file
139
reasoning_gym/algorithmic/string_synthesis.py
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
"""Iteratively synthesizes a string by inserting characters according to a pattern.
|
||||
|
||||
https://github.com/yongchao98/CodeSteer-v1.0/blob/main/create_dataset/create_dataset_string_synthesis.py
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Optional
|
||||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
QUESTION_TEMPLATE = """There are nine different blocks [A] [B] [C] {{A}} {{B}} {{C}} (A) (B) (C)
|
||||
1. One [A], one [B], and one [C] can be combined to form one {{A}}.
|
||||
2. One [A] and one [B] can be combined to form one {{C}}.
|
||||
3. One [B] and one [C] can be combined to form one {{B}}.
|
||||
4. Two [C] can be combined to form one {{C}}.
|
||||
5. One {{A}} and one {{C}} can be combined to form one (A) and one (B).
|
||||
6. Two {{B}} can be combined to form one (C).
|
||||
|
||||
Given a certain number of initial blocks, your job is to cycle through the rules 1-6 above, synthesizing new blocks until no more rules can be applied, or until a state (counts of each block type) is repeated.
|
||||
In the case a state is repeated the answer is the state before the repetition!
|
||||
|
||||
The output should be the count of each block type after the rules have been applied in the order they are listed above.
|
||||
For example 1 0 3 0 2 0 0 0 1 means that you have 1 [A] 0 [B] 3 [C] 0 {{A}} 2 {{B}} 0 {{C}} 0 (A) 0 (B) 1 (C).
|
||||
|
||||
Example:
|
||||
- Input: You have 2 [A], 3 [B], and 3 [C].
|
||||
- Output: 0 0 0 2 1 0 0 0 0
|
||||
- Explanation:
|
||||
0. Initial state: 2 3 3 0 0 0 0 0 0
|
||||
1. We can apply Rule 1 and obtain 1 {{A}}. New state: 1 2 2 1 0 0 0 0 0
|
||||
2. We can apply Rule 1 again and obtain 1 {{A}}. New state 0 1 1 2 0 0 0 0 0
|
||||
3. We can apply Rule 3 and obtain 1 {{B}}. New state 0 0 0 2 1 0 0 0 0
|
||||
4. No more rules can be applied. The answer is 0 0 0 2 1 0 0 0 0
|
||||
|
||||
Now, you have {A_square} [A], {B_square} [B], and {C_square} [C] blocks. Provide the count of each block type after applying the above rules.
|
||||
"""
|
||||
|
||||
|
||||
@dataclass
|
||||
class StringSynthesisConfig:
|
||||
"""Configuration for String Synthesis dataset generation"""
|
||||
|
||||
min_initial_blocks: int = 0 # Minimum number of initial blocks
|
||||
max_initial_blocks: int = 5 # Maximum number of initial blocks
|
||||
max_iterations: int = 1_000 # Maximum number of iterations to apply the rules (Safety check for infinite loops)
|
||||
|
||||
size: int = 500 # Virtual dataset size
|
||||
seed: Optional[int] = None
|
||||
|
||||
def validate(self):
|
||||
"""Validate configuration parameters"""
|
||||
assert 0 <= self.min_initial_blocks, "min_initial_blocks must be non-negative"
|
||||
assert (
|
||||
self.min_initial_blocks <= self.max_initial_blocks
|
||||
), "min_initial_blocks must be less than or equal to max_initial_blocks"
|
||||
assert 0 < self.max_iterations, "max_iterations must be positive"
|
||||
|
||||
|
||||
class StringSynthesisDataset(ProceduralDataset):
|
||||
"""Generates String Synthesis exercises with configurable difficulty"""
|
||||
|
||||
def __init__(self, config: StringSynthesisConfig):
|
||||
super().__init__(config=config, seed=config.seed, size=config.size)
|
||||
|
||||
def _apply_rule(self, counts: list[int]) -> list[int]:
|
||||
"""
|
||||
Apply the first applicable rule to the given counts.
|
||||
In case no rule is applicable, the counts are returned unchanged.
|
||||
"""
|
||||
# label the indices for the counts
|
||||
A_square, B_square, C_square, A_curly, B_curly, C_curly, A_round, B_round, C_round = range(9)
|
||||
# Rule 1: One [A], one [B], and one [C] can be combined to form one {A}
|
||||
if counts[A_square] >= 1 and counts[B_square] >= 1 and counts[C_square] >= 1:
|
||||
counts[A_square] -= 1
|
||||
counts[B_square] -= 1
|
||||
counts[C_square] -= 1
|
||||
counts[A_curly] += 1
|
||||
# Rule 2: One [A] and one [B] can be combined to form one {C}
|
||||
elif counts[A_square] >= 1 and counts[B_square] >= 1:
|
||||
counts[A_square] -= 1
|
||||
counts[B_square] -= 1
|
||||
counts[C_curly] += 1
|
||||
# Rule 3: One [B] and one [C] can be combined to form one {B}
|
||||
elif counts[B_square] >= 1 and counts[C_square] >= 1:
|
||||
counts[B_square] -= 1
|
||||
counts[C_square] -= 1
|
||||
counts[B_curly] += 1
|
||||
# Rule 4: Two [C] can be combined to form one {C}
|
||||
elif counts[C_square] >= 2:
|
||||
counts[C_square] -= 2
|
||||
counts[C_curly] += 1
|
||||
# Rule 5: One {A} and one {C} can be combined to form one (A) and one (B)
|
||||
elif counts[A_curly] >= 1 and counts[C_curly] >= 1:
|
||||
counts[A_curly] -= 1
|
||||
counts[C_curly] -= 1
|
||||
counts[A_round] += 1
|
||||
counts[B_round] += 1
|
||||
# Rule 6: Two {B} can be combined to form one (C)
|
||||
elif counts[B_curly] >= 2:
|
||||
counts[B_curly] -= 2
|
||||
counts[C_round] += 1
|
||||
return counts
|
||||
|
||||
def _get_answer(self, A_square: int, B_square: int, C_square: int) -> list[list[int]]:
|
||||
"""Calculate the answer for a given input"""
|
||||
# [A] [B] [C] {A} {B} {C} (A) (B) (C)
|
||||
counts = [A_square, B_square, C_square] + [0 for _ in range(6)]
|
||||
states = [counts]
|
||||
|
||||
for _ in range(self.config.max_iterations):
|
||||
new_counts = self._apply_rule(counts[:])
|
||||
if new_counts in states:
|
||||
break
|
||||
states.append(new_counts)
|
||||
counts = new_counts
|
||||
|
||||
return states
|
||||
|
||||
def __getitem__(self, idx: int) -> dict:
|
||||
"""Generate a single String Synthesis question"""
|
||||
rng = Random(self.seed + idx)
|
||||
|
||||
A_square = rng.randint(self.config.min_initial_blocks, self.config.max_initial_blocks)
|
||||
B_square = rng.randint(self.config.min_initial_blocks, self.config.max_initial_blocks)
|
||||
C_square = rng.randint(self.config.min_initial_blocks, self.config.max_initial_blocks)
|
||||
|
||||
states = self._get_answer(A_square, B_square, C_square)
|
||||
answer = states[-1]
|
||||
answer_str = " ".join(str(x) for x in answer)
|
||||
|
||||
return {
|
||||
"question": QUESTION_TEMPLATE.format(A_square=A_square, B_square=B_square, C_square=C_square),
|
||||
"answer": answer_str,
|
||||
"metadata": {"states": states, "solution": answer},
|
||||
}
|
||||
|
||||
|
||||
register_dataset("string_synthesis", StringSynthesisDataset, StringSynthesisConfig)
|
||||
|
|
@ -4,7 +4,7 @@ import re
|
|||
from dataclasses import dataclass
|
||||
from enum import StrEnum
|
||||
from random import Random
|
||||
from typing import List, Optional, Tuple
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from ..data import read_data_file
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
|
@ -105,5 +105,27 @@ class WordSortingDataset(ProceduralDataset):
|
|||
},
|
||||
}
|
||||
|
||||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||
"""Determine if the solution provided solves this task.
|
||||
|
||||
The function awards 1.0 for a correct answer.
|
||||
|
||||
Args:
|
||||
answer (Optional[str]): The user's answer.
|
||||
entry (Dict[str, any]): The original dataset entry containing the correct answer.
|
||||
|
||||
Returns:
|
||||
float: The computed score between 0.0 and 1.0.
|
||||
"""
|
||||
|
||||
if answer == None:
|
||||
return 0.0
|
||||
|
||||
s_answer = answer.strip().replace(" ", "")
|
||||
if not s_answer == entry["answer"].strip().replace(" ", ""):
|
||||
return 0.01
|
||||
else:
|
||||
return 1.0
|
||||
|
||||
|
||||
register_dataset("word_sorting", WordSortingDataset, WordSortingConfig)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue