Add GO hints, legend, disallow numeric answer, store expected string answer

2026-04-30 17:40:45 +00:00 · 2025-02-07 15:18:01 +01:00 · 2025-02-07 15:18:01 +01:00 · 81cb7aa42b
commit 81cb7aa42b
parent 2458d3a646
3 changed files with 150 additions and 32 deletions
--- a/GALLERY.md
+++ b/GALLERY.md
@ -35,6 +35,7 @@ This gallery shows examples from all available datasets using their default conf
 - [number_sorting](#number_sorting)
 - [palindrome](#palindrome)
 - [polynomial_equations](#polynomial_equations)
+- [polynomial_multiplication](#polynomial_multiplication)
 - [prime_factorization](#prime_factorization)
 - [propositional_logic](#propositional_logic)
 - [quantum_lock](#quantum_lock)
@ -49,6 +50,7 @@ This gallery shows examples from all available datasets using their default conf
 - [syllogism](#syllogism)
 - [time_intervals](#time_intervals)
 - [tower_of_hanoi](#tower_of_hanoi)
+- [tsumego](#tsumego)
 - [word_ladder](#word_ladder)
 - [word_sequence_reversal](#word_sequence_reversal)
 - [word_sorting](#word_sorting)
@ -1659,6 +1661,46 @@ Metadata: {'polynomial_expr': '71*n**3 - 2*n - 29', 'variable': 'n', 'degree': 3

 ````

+### polynomial_multiplication
+Generates [min_polynomials, max_polynomials] random polynomials of degree in [min_degree, max_degree].
+    - The polynomial is formed by summing random terms of the form: coeff * x^exponent.
+    - Then we find "F = P_0 * ... * P_1" using Sympy.
+
+Default configuration:
+```python
+min_terms = 2
+max_terms = 4
+min_value = 1
+max_value = 100
+min_degree = 1
+max_degree = 3
+min_polynomials = 2
+max_polynomials = 3
+single_variable = (True,)
+operators = ('+', '-')
+seed = 42
+size = 500
+```
+
+Example tasks:
+````
+Example 1:
+Question: Calculate the following: (65*x - 72)*(105*x - 125)
+Answer: 6825*x**2 - 15685*x + 9000
+Metadata: {'polynomial_expr': '(65*x - 72)*(105*x - 125)', 'single_variable': (True,), 'result': '6825*x**2 - 15685*x + 9000'}
+
+Example 2:
+Question: Calculate the following: (-9*x**2 - 28*x)*(86*x**2 - 2*x - 13)
+Answer: -774*x**4 - 2390*x**3 + 173*x**2 + 364*x
+Metadata: {'polynomial_expr': '(-9*x**2 - 28*x)*(86*x**2 - 2*x - 13)', 'single_variable': (True,), 'result': '-774*x**4 - 2390*x**3 + 173*x**2 + 364*x'}
+
+Example 3:
+Question: Calculate the following: (43 - 91*x)*(3*x**2 - 10*x)*(71*x**3 - 2*x - 29)
+Answer: -19383*x**6 + 73769*x**5 - 29984*x**4 + 5839*x**3 - 29271*x**2 + 12470*x
+Metadata: {'polynomial_expr': '(43 - 91*x)*(3*x**2 - 10*x)*(71*x**3 - 2*x - 29)', 'single_variable': (True,), 'result': '-19383*x**6 + 73769*x**5 - 29984*x**4 + 5839*x**3 - 29271*x**2 + 12470*x'}
+
+````
+
 ### prime_factorization
 Generates prime factorization tasks

@ -2356,6 +2398,91 @@ Metadata: {'num_disks': 6, 'num_pegs': 3, 'start_peg': 1, 'target_peg': 2, 'auxi

 ````

+### tsumego
+Generates Tsumego problems with configurable parameters
+
+Default configuration:
+```python
+min_board_size = 9
+max_board_size = 13
+max_stones = 15
+size = 100
+seed = 42
+```
+
+Example tasks:
+````
+Example 1:
+Question: Tsumego time. Black to play and capture some stones.
+Find the key move.
+
+   A B C D E F G H I
+ 9 X . . . X . . . .
+ 8 . . . . . . . . .
+ 7 . O . O . . X . .
+ 6 . . . . . . . . O
+ 5 O . . O . . . . .
+ 4 . X O O . . . . .
+ 3 . . . O . . . . .
+ 2 . . . . . . . . .
+ 1 . O . O . . X . .
+
+X - Black
+O - White
+
+Specify your move in coordinates (e.g. 'C4' for column C, row 4)
+Answer: E6
+Metadata: {'difficulty': {'board_size': 9}, 'board': [['X', '.', '.', '.', 'X', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', 'O', '.', 'O', '.', '.', 'X', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', 'O'], ['O', '.', '.', 'O', '.', '.', '.', '.', '.'], ['.', 'X', 'O', 'O', '.', '.', '.', '.', '.'], ['.', '.', '.', 'O', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', 'O', '.', 'O', '.', '.', 'X', '.', '.']], 'solution': (5, 4)}
+
+Example 2:
+Question: Tsumego time. Black to play and capture some stones.
+Find the key move.
+
+   A B C D E F G H I
+ 9 . . O . . . . . .
+ 8 . X O . . . . . .
+ 7 . . . O . . . . .
+ 6 . . O O . . . . .
+ 5 . . O O . . . . .
+ 4 . X . . . . . . O
+ 3 . X . . . . X . .
+ 2 O . O . . . . . .
+ 1 . . . . O . . . .
+
+X - Black
+O - White
+
+Specify your move in coordinates (e.g. 'C4' for column C, row 4)
+Answer: E4
+Metadata: {'difficulty': {'board_size': 9}, 'board': [['.', '.', 'O', '.', '.', '.', '.', '.', '.'], ['.', 'X', 'O', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', 'O', '.', '.', '.', '.', '.'], ['.', '.', 'O', 'O', '.', '.', '.', '.', '.'], ['.', '.', 'O', 'O', '.', '.', '.', '.', '.'], ['.', 'X', '.', '.', '.', '.', '.', '.', 'O'], ['.', 'X', '.', '.', '.', '.', 'X', '.', '.'], ['O', '.', 'O', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', 'O', '.', '.', '.', '.']], 'solution': (3, 4)}
+
+Example 3:
+Question: Tsumego time. Black to play and capture some stones.
+Find the key move.
+
+   A B C D E F G H I J K L
+12 . . . . . . . . . . . .
+11 . . X . . . . . . . . .
+10 . . . . . . . . . . . .
+ 9 . . . . . . . . . . . .
+ 8 X . . . . X . . . X . .
+ 7 . X . . . . . O . . . .
+ 6 . . . . . . O O . . . O
+ 5 . . . . . . . O . . . .
+ 4 . O . . . . . . O . . O
+ 3 X . . . . . . . . . . .
+ 2 . . . . . . . . . . . .
+ 1 . . . . . . . . . . X .
+
+X - Black
+O - White
+
+Specify your move in coordinates (e.g. 'C4' for column C, row 4)
+Answer: I7
+Metadata: {'difficulty': {'board_size': 12}, 'board': [['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', 'X', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['X', '.', '.', '.', '.', 'X', '.', '.', '.', 'X', '.', '.'], ['.', 'X', '.', '.', '.', '.', '.', 'O', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', 'O', 'O', '.', '.', '.', 'O'], ['.', '.', '.', '.', '.', '.', '.', 'O', '.', '.', '.', '.'], ['.', 'O', '.', '.', '.', '.', '.', '.', 'O', '.', '.', 'O'], ['X', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', 'X', '.']], 'solution': (6, 8)}
+
+````
+
 ### word_ladder
 Generates word ladder transformation tasks

--- a/reasoning_gym/games/tsumego.py
+++ b/reasoning_gym/games/tsumego.py
@ -1,5 +1,6 @@
 """Go problem (tsumego) generator"""

+import re
 from dataclasses import dataclass
 from random import Random
 from typing import Dict, List, Optional, Set, Tuple
@ -37,9 +38,9 @@ class TsumegoDataset(ProceduralDataset):

    def __init__(self, config: TsumegoConfig):
        self._prompt_templates = [
-            "Black to play and capture some stones.\nFind the key move.",
-            "It's Black's turn. Capture the marked white stones.",
-            "Play as Black. What's the best move to capture?",
+            "Tsumego time. Black to play and capture some stones.\nFind the key move.",
+            "I have a Go problem for you. Black moves next - can you capture some of the white stones?",
+            "Here's a Go challenge. Playing as Black, how can you capture as many white stones as possible?",
        ]
        self._ko_point = None
        super().__init__(config=config, seed=config.seed, size=config.size)
@ -147,7 +148,7 @@ class TsumegoDataset(ProceduralDataset):

        return True

-    def _generate_capture_problem(self, size: int, rng: Random) -> Tuple[List[List[str]], str]:
+    def _generate_capture_problem(self, size: int, rng: Random) -> Tuple[List[List[str]], Tuple[int, int]]:
        """Generate a capture problem"""
        board = [["." for _ in range(size)] for _ in range(size)]
        stones_placed = 0
@ -172,7 +173,7 @@ class TsumegoDataset(ProceduralDataset):
                board[row + 1][col] = "O"
                board[row][col - 1] = "O"
                if self._is_valid_move(board, row, col + 1, "X"):
-                    return board, f"{row+1},{col+2}"
+                    return board, (row, col + 1)
            tries += 1
        raise RuntimeError("Failed to generate a capture problem")

@ -199,15 +200,18 @@ class TsumegoDataset(ProceduralDataset):

        board, solution = self._generate_capture_problem(size, rng)
        board_str = self._board_to_string(board)
+        solution_str = f"{chr(ord('A')+solution[1])}{solution[0]+1}"

        return {
            "question": (
                rng.choice(self._prompt_templates) + "\n\n" + board_str + "\n\n"
+                "X - Black\n"
+                "O - White\n\n"
                "Specify your move in coordinates (e.g. 'C4' for column C, row 4)"
            ),
-            "answer": solution,
+            "answer": solution_str,
            "metadata": {
-                "board_size": size,
+                "difficulty": {"board_size": size},
                "board": board,
                "solution": solution,
            },
@ -221,28 +225,22 @@ class TsumegoDataset(ProceduralDataset):
        if not answer:
            return 0.01
        try:
-            # Parse expected solution in the format "row,col"
-            expected_row, expected_col = map(int, metadata["solution"].split(","))
+            #  get solution from (row, col) tuple
+            expected_row, expected_col = metadata["solution"]
        except Exception:
            return 0.01
        try:
-            if "," in answer:
-                # Assume numeric format: "row,col"
-                row, col = map(int, answer.split(","))
-            else:
-                # Assume letter-number format, e.g. "C4"
-                import re
-
-                m = re.match(r"^([A-Za-z])(\d+)$", answer)
-                if not m:
-                    return 0.01
-                col_letter, row_str = m.group(1), m.group(2)
-                row = int(row_str)
-                col = ord(col_letter.upper()) - ord("A") + 1
+            # Assume letter-number format, e.g. "C4"
+            m = re.match(r"^([A-Za-z])(\d+)$", answer)
+            if not m:
+                return 0.01
+            col_letter, row_str = m.group(1), m.group(2)
+            row = int(row_str) - 1
+            col = ord(col_letter.upper()) - ord("A")
            if (row, col) == (expected_row, expected_col):
                return 1.0
            board_size = metadata["board_size"]
-            if 1 <= row <= board_size and 1 <= col <= board_size:
+            if 0 <= row < board_size and 0 <= col < board_size:
                return 0.05
        except Exception:
            return 0.01
--- a/tests/test_tsumego.py
+++ b/tests/test_tsumego.py
@ -1,7 +1,6 @@
 """Tests for Ttsumego problem generation"""

 import pytest
-from random import Random

 from reasoning_gym.games.tsumego import TsumegoConfig, TsumegoDataset

@ -30,7 +29,7 @@ def test_dataset_item_properties():
        assert key in item

    metadata = item["metadata"]
-    for key in ["board_size", "board", "solution"]:
+    for key in ["difficulty", "board", "solution"]:
        assert key in metadata

    board = metadata["board"]
@ -101,17 +100,11 @@ def test_liberties_and_move():
 def test_score_answer():
    config = TsumegoConfig(min_board_size=9, max_board_size=9, max_stones=10)
    dataset = TsumegoDataset(config)
-    metadata = {"board_size": 9, "solution": "5,5"}
-
-    # Correct numeric answer
-    assert dataset.score_answer("5,5", metadata) == 1.0
+    metadata = {"board_size": 9, "solution": (4, 4)}

    # Correct letter-number answer (E corresponds to 5)
    assert dataset.score_answer("E5", metadata) == 1.0

-    # Valid but incorrect numeric move
-    assert dataset.score_answer("4,4", metadata) == 0.05
-
    # Valid but incorrect letter-number move (D corresponds to 4)
    assert dataset.score_answer("D4", metadata) == 0.05