diff --git a/reasoning_gym/games/tsumego.py b/reasoning_gym/games/tsumego.py index d6d93102..c56bbd25 100644 --- a/reasoning_gym/games/tsumego.py +++ b/reasoning_gym/games/tsumego.py @@ -3,7 +3,7 @@ import re from dataclasses import dataclass from random import Random -from typing import Dict, List, Optional, Set, Tuple +from typing import Any, Dict, List, Optional, Set, Tuple from ..factory import ProceduralDataset, register_dataset @@ -217,13 +217,14 @@ class TsumegoDataset(ProceduralDataset): }, } - def score_answer(self, answer: Optional[str], metadata: Dict[str, any]) -> float: + def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float: """Score the answer against the solution""" if answer is None: return 0.0 answer = answer.strip() if not answer: return 0.01 + metadata = entry["metadata"] try: # get solution from (row, col) tuple expected_row, expected_col = metadata["solution"] diff --git a/tests/test_tsumego.py b/tests/test_tsumego.py index ae7e7d49..a1e6e6b5 100644 --- a/tests/test_tsumego.py +++ b/tests/test_tsumego.py @@ -98,27 +98,30 @@ def test_liberties_and_move(): def test_score_answer(): - config = TsumegoConfig(min_board_size=9, max_board_size=9, max_stones=10) + config = TsumegoConfig(min_board_size=9, max_board_size=9, max_stones=10, size=5) dataset = TsumegoDataset(config) - metadata = {"board_size": 9, "solution": (4, 4)} + entry = {"metadata": {"board_size": 9, "solution": (4, 4)}} # Correct letter-number answer (E corresponds to 5) - assert dataset.score_answer("E5", metadata) == 1.0 + assert dataset.score_answer("E5", entry) == 1.0 # Valid but incorrect letter-number move (D corresponds to 4) - assert dataset.score_answer("D4", metadata) == 0.05 + assert dataset.score_answer("D4", entry) == 0.05 # Invalid format - assert dataset.score_answer("invalid", metadata) == 0.01 + assert dataset.score_answer("invalid", entry) == 0.01 # Empty answer - assert dataset.score_answer("", metadata) == 0.01 + assert dataset.score_answer("", entry) == 0.01 # None answer - assert dataset.score_answer(None, metadata) == 0.0 + assert dataset.score_answer(None, entry) == 0.0 # Out-of-bound letter-number move: 'J' corresponds to 10 which is greater than board size = 9 - assert dataset.score_answer("J9", metadata) == 0.01 + assert dataset.score_answer("J9", entry) == 0.01 + + for x in dataset: + assert dataset.score_answer(x["answer"], entry=x) == 1.0 # Additional tests for game logic edge cases