mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-05-01 17:45:24 +00:00
Merge pull request #132 from zafstojano/fix/n-queens
fix(env): N Queens
This commit is contained in:
commit
90f6ff2d07
3 changed files with 35 additions and 12 deletions
|
|
@ -14,14 +14,29 @@ from ..factory import ProceduralDataset, register_dataset
|
||||||
MIN_BOARD_SIZE = 4
|
MIN_BOARD_SIZE = 4
|
||||||
MAX_BOARD_SIZE = 12
|
MAX_BOARD_SIZE = 12
|
||||||
|
|
||||||
QUESTION_TEMPLATE = """Solve this N Queens puzzle:
|
QUESTION_TEMPLATE = """Your job is to complete an n x n chess board with n Queens in total, such that no two attack each other.
|
||||||
{puzzle}
|
|
||||||
|
|
||||||
The board size is {n}x{n} and your job is to place {num_removed} queen(s) on the board such that no two queens attack each other.
|
|
||||||
|
|
||||||
No two queens attack each other if they are not in the same row, column, or diagonal.
|
No two queens attack each other if they are not in the same row, column, or diagonal.
|
||||||
|
|
||||||
Place a queen by replacing an underscore (_) with a Q.
|
You can place a queen by replacing an underscore (_) with a Q.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
- Input: Given the below board of size 4 x 4 your job is to place 2 queen(s) on the board such that no two queens attack each other.
|
||||||
|
_ Q _ _
|
||||||
|
_ _ _ _
|
||||||
|
_ _ _ _
|
||||||
|
_ _ Q _
|
||||||
|
- Output:
|
||||||
|
_ Q _ _
|
||||||
|
_ _ _ Q
|
||||||
|
Q _ _ _
|
||||||
|
_ _ Q _
|
||||||
|
- Explanation
|
||||||
|
- None of the queens attack each other vertically, horizontally, or diagonally.
|
||||||
|
- The added queens are marked with Q at the positions (1, 3) and (2, 0).
|
||||||
|
|
||||||
|
Given the below board of size {n} x {n} your job is to place {num_removed} queen(s) on the board such that no two queens attack each other.
|
||||||
|
{puzzle}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -137,13 +152,16 @@ class NQueensDataset(ProceduralDataset):
|
||||||
|
|
||||||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||||
valid_solutions = entry["metadata"]["valid_answers"]
|
valid_solutions = entry["metadata"]["valid_answers"]
|
||||||
reward = 0.0
|
|
||||||
if answer is not None:
|
if answer is not None:
|
||||||
if answer in valid_solutions:
|
if answer in valid_solutions:
|
||||||
reward = 1.0
|
return 1.0
|
||||||
else:
|
try:
|
||||||
reward = 0.01
|
answer = self._board_to_string(eval(answer))
|
||||||
return reward
|
if answer in valid_solutions:
|
||||||
|
return 0.5
|
||||||
|
except Exception as e:
|
||||||
|
return 0.01
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
register_dataset("n_queens", NQueensDataset, NQueensConfig)
|
register_dataset("n_queens", NQueensDataset, NQueensConfig)
|
||||||
|
|
|
||||||
|
|
@ -8,12 +8,12 @@ SYSTEM_PROMPTS = {
|
||||||
"DeepSeekZero": """A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
|
"DeepSeekZero": """A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
|
||||||
The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think>
|
The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think>
|
||||||
<answer>answer here</answer>
|
<answer>answer here</answer>
|
||||||
Do not explain your reasoning inside the answer tags, provide only the final answer.
|
Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
|
||||||
""",
|
""",
|
||||||
"default": """Given a problem, your task is to answer the question by thinking step-by-step in a clear and specific manner.
|
"default": """Given a problem, your task is to answer the question by thinking step-by-step in a clear and specific manner.
|
||||||
Once you have thought about the reasoning process, provide the answer in the following format:
|
Once you have thought about the reasoning process, provide the answer in the following format:
|
||||||
<answer>answer here</answer>
|
<answer>answer here</answer>
|
||||||
Do not explain your reasoning inside the answer tags, provide only the final answer.
|
Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
|
||||||
""",
|
""",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -122,6 +122,11 @@ def test_nqueens_score_answer():
|
||||||
# Test None answer gets score 0.0
|
# Test None answer gets score 0.0
|
||||||
assert dataset.score_answer(None, item) == 0.0
|
assert dataset.score_answer(None, item) == 0.0
|
||||||
|
|
||||||
|
# Test python list representation of board (partial solution)
|
||||||
|
answer = "[['_', 'Q', '_', '_'], ['_', '_', '_', 'Q'], ['Q', '_', '_', '_'], ['_', '_', 'Q', '_']]"
|
||||||
|
entry = {"metadata": {"valid_answers": {"_ Q _ _\n_ _ _ Q\nQ _ _ _\n_ _ Q _"}}}
|
||||||
|
assert dataset.score_answer(answer, entry) == 0.5
|
||||||
|
|
||||||
|
|
||||||
def is_valid_solution(board: list[list[str]]) -> bool:
|
def is_valid_solution(board: list[list[str]]) -> bool:
|
||||||
"""Helper function to verify N Queens solution validity"""
|
"""Helper function to verify N Queens solution validity"""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue