mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-30 17:40:45 +00:00
Add GO hints, legend, disallow numeric answer, store expected string answer
This commit is contained in:
parent
2458d3a646
commit
81cb7aa42b
3 changed files with 150 additions and 32 deletions
127
GALLERY.md
127
GALLERY.md
|
|
@ -35,6 +35,7 @@ This gallery shows examples from all available datasets using their default conf
|
|||
- [number_sorting](#number_sorting)
|
||||
- [palindrome](#palindrome)
|
||||
- [polynomial_equations](#polynomial_equations)
|
||||
- [polynomial_multiplication](#polynomial_multiplication)
|
||||
- [prime_factorization](#prime_factorization)
|
||||
- [propositional_logic](#propositional_logic)
|
||||
- [quantum_lock](#quantum_lock)
|
||||
|
|
@ -49,6 +50,7 @@ This gallery shows examples from all available datasets using their default conf
|
|||
- [syllogism](#syllogism)
|
||||
- [time_intervals](#time_intervals)
|
||||
- [tower_of_hanoi](#tower_of_hanoi)
|
||||
- [tsumego](#tsumego)
|
||||
- [word_ladder](#word_ladder)
|
||||
- [word_sequence_reversal](#word_sequence_reversal)
|
||||
- [word_sorting](#word_sorting)
|
||||
|
|
@ -1659,6 +1661,46 @@ Metadata: {'polynomial_expr': '71*n**3 - 2*n - 29', 'variable': 'n', 'degree': 3
|
|||
|
||||
````
|
||||
|
||||
### polynomial_multiplication
|
||||
Generates [min_polynomials, max_polynomials] random polynomials of degree in [min_degree, max_degree].
|
||||
- The polynomial is formed by summing random terms of the form: coeff * x^exponent.
|
||||
- Then we find "F = P_0 * ... * P_1" using Sympy.
|
||||
|
||||
Default configuration:
|
||||
```python
|
||||
min_terms = 2
|
||||
max_terms = 4
|
||||
min_value = 1
|
||||
max_value = 100
|
||||
min_degree = 1
|
||||
max_degree = 3
|
||||
min_polynomials = 2
|
||||
max_polynomials = 3
|
||||
single_variable = (True,)
|
||||
operators = ('+', '-')
|
||||
seed = 42
|
||||
size = 500
|
||||
```
|
||||
|
||||
Example tasks:
|
||||
````
|
||||
Example 1:
|
||||
Question: Calculate the following: (65*x - 72)*(105*x - 125)
|
||||
Answer: 6825*x**2 - 15685*x + 9000
|
||||
Metadata: {'polynomial_expr': '(65*x - 72)*(105*x - 125)', 'single_variable': (True,), 'result': '6825*x**2 - 15685*x + 9000'}
|
||||
|
||||
Example 2:
|
||||
Question: Calculate the following: (-9*x**2 - 28*x)*(86*x**2 - 2*x - 13)
|
||||
Answer: -774*x**4 - 2390*x**3 + 173*x**2 + 364*x
|
||||
Metadata: {'polynomial_expr': '(-9*x**2 - 28*x)*(86*x**2 - 2*x - 13)', 'single_variable': (True,), 'result': '-774*x**4 - 2390*x**3 + 173*x**2 + 364*x'}
|
||||
|
||||
Example 3:
|
||||
Question: Calculate the following: (43 - 91*x)*(3*x**2 - 10*x)*(71*x**3 - 2*x - 29)
|
||||
Answer: -19383*x**6 + 73769*x**5 - 29984*x**4 + 5839*x**3 - 29271*x**2 + 12470*x
|
||||
Metadata: {'polynomial_expr': '(43 - 91*x)*(3*x**2 - 10*x)*(71*x**3 - 2*x - 29)', 'single_variable': (True,), 'result': '-19383*x**6 + 73769*x**5 - 29984*x**4 + 5839*x**3 - 29271*x**2 + 12470*x'}
|
||||
|
||||
````
|
||||
|
||||
### prime_factorization
|
||||
Generates prime factorization tasks
|
||||
|
||||
|
|
@ -2356,6 +2398,91 @@ Metadata: {'num_disks': 6, 'num_pegs': 3, 'start_peg': 1, 'target_peg': 2, 'auxi
|
|||
|
||||
````
|
||||
|
||||
### tsumego
|
||||
Generates Tsumego problems with configurable parameters
|
||||
|
||||
Default configuration:
|
||||
```python
|
||||
min_board_size = 9
|
||||
max_board_size = 13
|
||||
max_stones = 15
|
||||
size = 100
|
||||
seed = 42
|
||||
```
|
||||
|
||||
Example tasks:
|
||||
````
|
||||
Example 1:
|
||||
Question: Tsumego time. Black to play and capture some stones.
|
||||
Find the key move.
|
||||
|
||||
A B C D E F G H I
|
||||
9 X . . . X . . . .
|
||||
8 . . . . . . . . .
|
||||
7 . O . O . . X . .
|
||||
6 . . . . . . . . O
|
||||
5 O . . O . . . . .
|
||||
4 . X O O . . . . .
|
||||
3 . . . O . . . . .
|
||||
2 . . . . . . . . .
|
||||
1 . O . O . . X . .
|
||||
|
||||
X - Black
|
||||
O - White
|
||||
|
||||
Specify your move in coordinates (e.g. 'C4' for column C, row 4)
|
||||
Answer: E6
|
||||
Metadata: {'difficulty': {'board_size': 9}, 'board': [['X', '.', '.', '.', 'X', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', 'O', '.', 'O', '.', '.', 'X', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', 'O'], ['O', '.', '.', 'O', '.', '.', '.', '.', '.'], ['.', 'X', 'O', 'O', '.', '.', '.', '.', '.'], ['.', '.', '.', 'O', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', 'O', '.', 'O', '.', '.', 'X', '.', '.']], 'solution': (5, 4)}
|
||||
|
||||
Example 2:
|
||||
Question: Tsumego time. Black to play and capture some stones.
|
||||
Find the key move.
|
||||
|
||||
A B C D E F G H I
|
||||
9 . . O . . . . . .
|
||||
8 . X O . . . . . .
|
||||
7 . . . O . . . . .
|
||||
6 . . O O . . . . .
|
||||
5 . . O O . . . . .
|
||||
4 . X . . . . . . O
|
||||
3 . X . . . . X . .
|
||||
2 O . O . . . . . .
|
||||
1 . . . . O . . . .
|
||||
|
||||
X - Black
|
||||
O - White
|
||||
|
||||
Specify your move in coordinates (e.g. 'C4' for column C, row 4)
|
||||
Answer: E4
|
||||
Metadata: {'difficulty': {'board_size': 9}, 'board': [['.', '.', 'O', '.', '.', '.', '.', '.', '.'], ['.', 'X', 'O', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', 'O', '.', '.', '.', '.', '.'], ['.', '.', 'O', 'O', '.', '.', '.', '.', '.'], ['.', '.', 'O', 'O', '.', '.', '.', '.', '.'], ['.', 'X', '.', '.', '.', '.', '.', '.', 'O'], ['.', 'X', '.', '.', '.', '.', 'X', '.', '.'], ['O', '.', 'O', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', 'O', '.', '.', '.', '.']], 'solution': (3, 4)}
|
||||
|
||||
Example 3:
|
||||
Question: Tsumego time. Black to play and capture some stones.
|
||||
Find the key move.
|
||||
|
||||
A B C D E F G H I J K L
|
||||
12 . . . . . . . . . . . .
|
||||
11 . . X . . . . . . . . .
|
||||
10 . . . . . . . . . . . .
|
||||
9 . . . . . . . . . . . .
|
||||
8 X . . . . X . . . X . .
|
||||
7 . X . . . . . O . . . .
|
||||
6 . . . . . . O O . . . O
|
||||
5 . . . . . . . O . . . .
|
||||
4 . O . . . . . . O . . O
|
||||
3 X . . . . . . . . . . .
|
||||
2 . . . . . . . . . . . .
|
||||
1 . . . . . . . . . . X .
|
||||
|
||||
X - Black
|
||||
O - White
|
||||
|
||||
Specify your move in coordinates (e.g. 'C4' for column C, row 4)
|
||||
Answer: I7
|
||||
Metadata: {'difficulty': {'board_size': 12}, 'board': [['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', 'X', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['X', '.', '.', '.', '.', 'X', '.', '.', '.', 'X', '.', '.'], ['.', 'X', '.', '.', '.', '.', '.', 'O', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', 'O', 'O', '.', '.', '.', 'O'], ['.', '.', '.', '.', '.', '.', '.', 'O', '.', '.', '.', '.'], ['.', 'O', '.', '.', '.', '.', '.', '.', 'O', '.', '.', 'O'], ['X', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', 'X', '.']], 'solution': (6, 8)}
|
||||
|
||||
````
|
||||
|
||||
### word_ladder
|
||||
Generates word ladder transformation tasks
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
"""Go problem (tsumego) generator"""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from random import Random
|
||||
from typing import Dict, List, Optional, Set, Tuple
|
||||
|
|
@ -37,9 +38,9 @@ class TsumegoDataset(ProceduralDataset):
|
|||
|
||||
def __init__(self, config: TsumegoConfig):
|
||||
self._prompt_templates = [
|
||||
"Black to play and capture some stones.\nFind the key move.",
|
||||
"It's Black's turn. Capture the marked white stones.",
|
||||
"Play as Black. What's the best move to capture?",
|
||||
"Tsumego time. Black to play and capture some stones.\nFind the key move.",
|
||||
"I have a Go problem for you. Black moves next - can you capture some of the white stones?",
|
||||
"Here's a Go challenge. Playing as Black, how can you capture as many white stones as possible?",
|
||||
]
|
||||
self._ko_point = None
|
||||
super().__init__(config=config, seed=config.seed, size=config.size)
|
||||
|
|
@ -147,7 +148,7 @@ class TsumegoDataset(ProceduralDataset):
|
|||
|
||||
return True
|
||||
|
||||
def _generate_capture_problem(self, size: int, rng: Random) -> Tuple[List[List[str]], str]:
|
||||
def _generate_capture_problem(self, size: int, rng: Random) -> Tuple[List[List[str]], Tuple[int, int]]:
|
||||
"""Generate a capture problem"""
|
||||
board = [["." for _ in range(size)] for _ in range(size)]
|
||||
stones_placed = 0
|
||||
|
|
@ -172,7 +173,7 @@ class TsumegoDataset(ProceduralDataset):
|
|||
board[row + 1][col] = "O"
|
||||
board[row][col - 1] = "O"
|
||||
if self._is_valid_move(board, row, col + 1, "X"):
|
||||
return board, f"{row+1},{col+2}"
|
||||
return board, (row, col + 1)
|
||||
tries += 1
|
||||
raise RuntimeError("Failed to generate a capture problem")
|
||||
|
||||
|
|
@ -199,15 +200,18 @@ class TsumegoDataset(ProceduralDataset):
|
|||
|
||||
board, solution = self._generate_capture_problem(size, rng)
|
||||
board_str = self._board_to_string(board)
|
||||
solution_str = f"{chr(ord('A')+solution[1])}{solution[0]+1}"
|
||||
|
||||
return {
|
||||
"question": (
|
||||
rng.choice(self._prompt_templates) + "\n\n" + board_str + "\n\n"
|
||||
"X - Black\n"
|
||||
"O - White\n\n"
|
||||
"Specify your move in coordinates (e.g. 'C4' for column C, row 4)"
|
||||
),
|
||||
"answer": solution,
|
||||
"answer": solution_str,
|
||||
"metadata": {
|
||||
"board_size": size,
|
||||
"difficulty": {"board_size": size},
|
||||
"board": board,
|
||||
"solution": solution,
|
||||
},
|
||||
|
|
@ -221,28 +225,22 @@ class TsumegoDataset(ProceduralDataset):
|
|||
if not answer:
|
||||
return 0.01
|
||||
try:
|
||||
# Parse expected solution in the format "row,col"
|
||||
expected_row, expected_col = map(int, metadata["solution"].split(","))
|
||||
# get solution from (row, col) tuple
|
||||
expected_row, expected_col = metadata["solution"]
|
||||
except Exception:
|
||||
return 0.01
|
||||
try:
|
||||
if "," in answer:
|
||||
# Assume numeric format: "row,col"
|
||||
row, col = map(int, answer.split(","))
|
||||
else:
|
||||
# Assume letter-number format, e.g. "C4"
|
||||
import re
|
||||
|
||||
m = re.match(r"^([A-Za-z])(\d+)$", answer)
|
||||
if not m:
|
||||
return 0.01
|
||||
col_letter, row_str = m.group(1), m.group(2)
|
||||
row = int(row_str)
|
||||
col = ord(col_letter.upper()) - ord("A") + 1
|
||||
# Assume letter-number format, e.g. "C4"
|
||||
m = re.match(r"^([A-Za-z])(\d+)$", answer)
|
||||
if not m:
|
||||
return 0.01
|
||||
col_letter, row_str = m.group(1), m.group(2)
|
||||
row = int(row_str) - 1
|
||||
col = ord(col_letter.upper()) - ord("A")
|
||||
if (row, col) == (expected_row, expected_col):
|
||||
return 1.0
|
||||
board_size = metadata["board_size"]
|
||||
if 1 <= row <= board_size and 1 <= col <= board_size:
|
||||
if 0 <= row < board_size and 0 <= col < board_size:
|
||||
return 0.05
|
||||
except Exception:
|
||||
return 0.01
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
"""Tests for Ttsumego problem generation"""
|
||||
|
||||
import pytest
|
||||
from random import Random
|
||||
|
||||
from reasoning_gym.games.tsumego import TsumegoConfig, TsumegoDataset
|
||||
|
||||
|
|
@ -30,7 +29,7 @@ def test_dataset_item_properties():
|
|||
assert key in item
|
||||
|
||||
metadata = item["metadata"]
|
||||
for key in ["board_size", "board", "solution"]:
|
||||
for key in ["difficulty", "board", "solution"]:
|
||||
assert key in metadata
|
||||
|
||||
board = metadata["board"]
|
||||
|
|
@ -101,17 +100,11 @@ def test_liberties_and_move():
|
|||
def test_score_answer():
|
||||
config = TsumegoConfig(min_board_size=9, max_board_size=9, max_stones=10)
|
||||
dataset = TsumegoDataset(config)
|
||||
metadata = {"board_size": 9, "solution": "5,5"}
|
||||
|
||||
# Correct numeric answer
|
||||
assert dataset.score_answer("5,5", metadata) == 1.0
|
||||
metadata = {"board_size": 9, "solution": (4, 4)}
|
||||
|
||||
# Correct letter-number answer (E corresponds to 5)
|
||||
assert dataset.score_answer("E5", metadata) == 1.0
|
||||
|
||||
# Valid but incorrect numeric move
|
||||
assert dataset.score_answer("4,4", metadata) == 0.05
|
||||
|
||||
# Valid but incorrect letter-number move (D corresponds to 4)
|
||||
assert dataset.score_answer("D4", metadata) == 0.05
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue