Merge pull request #132 from zafstojano/fix/n-queens

fix(env): N Queens
2026-05-01 17:45:24 +00:00 · 2025-02-15 21:18:12 +01:00 · 2025-02-15 21:18:12 +01:00 · 90f6ff2d07
commit 90f6ff2d07
parent 95d86464f2 427e3b4ff7
3 changed files with 35 additions and 12 deletions
--- a/reasoning_gym/games/n_queens.py
+++ b/reasoning_gym/games/n_queens.py
@ -14,14 +14,29 @@ from ..factory import ProceduralDataset, register_dataset
 MIN_BOARD_SIZE = 4
 MAX_BOARD_SIZE = 12
-QUESTION_TEMPLATE = """Solve this N Queens puzzle:
+QUESTION_TEMPLATE = """Your job is to complete an n x n chess board with n Queens in total, such that no two attack each other.
 {puzzle}
 The board size is {n}x{n} and your job is to place {num_removed} queen(s) on the board such that no two queens attack each other.
 No two queens attack each other if they are not in the same row, column, or diagonal.
-Place a queen by replacing an underscore (_) with a Q.
+You can place a queen by replacing an underscore (_) with a Q.
 Example:
 - Input: Given the below board of size 4 x 4 your job is to place 2 queen(s) on the board such that no two queens attack each other.
 _ Q _ _
 _ _ _ _
 _ _ _ _
 _ _ Q _
 - Output:
 _ Q _ _
 _ _ _ Q
 Q _ _ _
 _ _ Q _
 - Explanation
    - None of the queens attack each other vertically, horizontally, or diagonally.
    - The added queens are marked with Q at the positions (1, 3) and (2, 0).
 Given the below board of size {n} x {n} your job is to place {num_removed} queen(s) on the board such that no two queens attack each other.
 {puzzle}
 """
@ -137,13 +152,16 @@ class NQueensDataset(ProceduralDataset):
    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
        valid_solutions = entry["metadata"]["valid_answers"]
        reward = 0.0
        if answer is not None:
            if answer in valid_solutions:
-                reward = 1.0
+                return 1.0
-            else:
+            try:
-                reward = 0.01
+                answer = self._board_to_string(eval(answer))
-        return reward
+                if answer in valid_solutions:
                    return 0.5
            except Exception as e:
                return 0.01
        return 0.0
 register_dataset("n_queens", NQueensDataset, NQueensConfig)
--- a/reasoning_gym/utils.py
+++ b/reasoning_gym/utils.py
@ -8,12 +8,12 @@ SYSTEM_PROMPTS = {
    "DeepSeekZero": """A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
 The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think>
 <answer>answer here</answer>
-Do not explain your reasoning inside the answer tags, provide only the final answer.
+Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
 """,
    "default": """Given a problem, your task is to answer the question by thinking step-by-step in a clear and specific manner.
 Once you have thought about the reasoning process, provide the answer in the following format:
 <answer>answer here</answer>
-Do not explain your reasoning inside the answer tags, provide only the final answer.
+Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
 """,
 }
--- a/tests/test_n_queens.py
+++ b/tests/test_n_queens.py
@ -122,6 +122,11 @@ def test_nqueens_score_answer():
        # Test None answer gets score 0.0
        assert dataset.score_answer(None, item) == 0.0
    # Test python list representation of board (partial solution)
    answer = "[['_', 'Q', '_', '_'], ['_', '_', '_', 'Q'], ['Q', '_', '_', '_'], ['_', '_', 'Q', '_']]"
    entry = {"metadata": {"valid_answers": {"_ Q _ _\n_ _ _ Q\nQ _ _ _\n_ _ Q _"}}}
    assert dataset.score_answer(answer, entry) == 0.5
 def is_valid_solution(board: list[list[str]]) -> bool:
    """Helper function to verify N Queens solution validity"""