Merge pull request #132 from zafstojano/fix/n-queens

fix(env): N Queens
This commit is contained in:
Andreas Köpf 2025-02-15 21:18:12 +01:00 committed by GitHub
commit 90f6ff2d07
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 35 additions and 12 deletions

View file

@ -14,14 +14,29 @@ from ..factory import ProceduralDataset, register_dataset
MIN_BOARD_SIZE = 4 MIN_BOARD_SIZE = 4
MAX_BOARD_SIZE = 12 MAX_BOARD_SIZE = 12
QUESTION_TEMPLATE = """Solve this N Queens puzzle: QUESTION_TEMPLATE = """Your job is to complete an n x n chess board with n Queens in total, such that no two attack each other.
{puzzle}
The board size is {n}x{n} and your job is to place {num_removed} queen(s) on the board such that no two queens attack each other.
No two queens attack each other if they are not in the same row, column, or diagonal. No two queens attack each other if they are not in the same row, column, or diagonal.
Place a queen by replacing an underscore (_) with a Q. You can place a queen by replacing an underscore (_) with a Q.
Example:
- Input: Given the below board of size 4 x 4 your job is to place 2 queen(s) on the board such that no two queens attack each other.
_ Q _ _
_ _ _ _
_ _ _ _
_ _ Q _
- Output:
_ Q _ _
_ _ _ Q
Q _ _ _
_ _ Q _
- Explanation
- None of the queens attack each other vertically, horizontally, or diagonally.
- The added queens are marked with Q at the positions (1, 3) and (2, 0).
Given the below board of size {n} x {n} your job is to place {num_removed} queen(s) on the board such that no two queens attack each other.
{puzzle}
""" """
@ -137,13 +152,16 @@ class NQueensDataset(ProceduralDataset):
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float: def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
valid_solutions = entry["metadata"]["valid_answers"] valid_solutions = entry["metadata"]["valid_answers"]
reward = 0.0
if answer is not None: if answer is not None:
if answer in valid_solutions: if answer in valid_solutions:
reward = 1.0 return 1.0
else: try:
reward = 0.01 answer = self._board_to_string(eval(answer))
return reward if answer in valid_solutions:
return 0.5
except Exception as e:
return 0.01
return 0.0
register_dataset("n_queens", NQueensDataset, NQueensConfig) register_dataset("n_queens", NQueensDataset, NQueensConfig)

View file

@ -8,12 +8,12 @@ SYSTEM_PROMPTS = {
"DeepSeekZero": """A conversation between User and Assistant. The user asks a question, and the Assistant solves it. "DeepSeekZero": """A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think> The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think>
<answer>answer here</answer> <answer>answer here</answer>
Do not explain your reasoning inside the answer tags, provide only the final answer. Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
""", """,
"default": """Given a problem, your task is to answer the question by thinking step-by-step in a clear and specific manner. "default": """Given a problem, your task is to answer the question by thinking step-by-step in a clear and specific manner.
Once you have thought about the reasoning process, provide the answer in the following format: Once you have thought about the reasoning process, provide the answer in the following format:
<answer>answer here</answer> <answer>answer here</answer>
Do not explain your reasoning inside the answer tags, provide only the final answer. Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
""", """,
} }

View file

@ -122,6 +122,11 @@ def test_nqueens_score_answer():
# Test None answer gets score 0.0 # Test None answer gets score 0.0
assert dataset.score_answer(None, item) == 0.0 assert dataset.score_answer(None, item) == 0.0
# Test python list representation of board (partial solution)
answer = "[['_', 'Q', '_', '_'], ['_', '_', '_', 'Q'], ['Q', '_', '_', '_'], ['_', '_', 'Q', '_']]"
entry = {"metadata": {"valid_answers": {"_ Q _ _\n_ _ _ Q\nQ _ _ _\n_ _ Q _"}}}
assert dataset.score_answer(answer, entry) == 0.5
def is_valid_solution(board: list[list[str]]) -> bool: def is_valid_solution(board: list[list[str]]) -> bool:
"""Helper function to verify N Queens solution validity""" """Helper function to verify N Queens solution validity"""