mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-05-01 17:45:24 +00:00
Merge pull request #132 from zafstojano/fix/n-queens
fix(env): N Queens
This commit is contained in:
commit
90f6ff2d07
3 changed files with 35 additions and 12 deletions
|
|
@ -14,14 +14,29 @@ from ..factory import ProceduralDataset, register_dataset
|
|||
MIN_BOARD_SIZE = 4
|
||||
MAX_BOARD_SIZE = 12
|
||||
|
||||
QUESTION_TEMPLATE = """Solve this N Queens puzzle:
|
||||
{puzzle}
|
||||
|
||||
The board size is {n}x{n} and your job is to place {num_removed} queen(s) on the board such that no two queens attack each other.
|
||||
QUESTION_TEMPLATE = """Your job is to complete an n x n chess board with n Queens in total, such that no two attack each other.
|
||||
|
||||
No two queens attack each other if they are not in the same row, column, or diagonal.
|
||||
|
||||
Place a queen by replacing an underscore (_) with a Q.
|
||||
You can place a queen by replacing an underscore (_) with a Q.
|
||||
|
||||
Example:
|
||||
- Input: Given the below board of size 4 x 4 your job is to place 2 queen(s) on the board such that no two queens attack each other.
|
||||
_ Q _ _
|
||||
_ _ _ _
|
||||
_ _ _ _
|
||||
_ _ Q _
|
||||
- Output:
|
||||
_ Q _ _
|
||||
_ _ _ Q
|
||||
Q _ _ _
|
||||
_ _ Q _
|
||||
- Explanation
|
||||
- None of the queens attack each other vertically, horizontally, or diagonally.
|
||||
- The added queens are marked with Q at the positions (1, 3) and (2, 0).
|
||||
|
||||
Given the below board of size {n} x {n} your job is to place {num_removed} queen(s) on the board such that no two queens attack each other.
|
||||
{puzzle}
|
||||
"""
|
||||
|
||||
|
||||
|
|
@ -137,13 +152,16 @@ class NQueensDataset(ProceduralDataset):
|
|||
|
||||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||
valid_solutions = entry["metadata"]["valid_answers"]
|
||||
reward = 0.0
|
||||
if answer is not None:
|
||||
if answer in valid_solutions:
|
||||
reward = 1.0
|
||||
else:
|
||||
reward = 0.01
|
||||
return reward
|
||||
return 1.0
|
||||
try:
|
||||
answer = self._board_to_string(eval(answer))
|
||||
if answer in valid_solutions:
|
||||
return 0.5
|
||||
except Exception as e:
|
||||
return 0.01
|
||||
return 0.0
|
||||
|
||||
|
||||
register_dataset("n_queens", NQueensDataset, NQueensConfig)
|
||||
|
|
|
|||
|
|
@ -8,12 +8,12 @@ SYSTEM_PROMPTS = {
|
|||
"DeepSeekZero": """A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
|
||||
The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think>
|
||||
<answer>answer here</answer>
|
||||
Do not explain your reasoning inside the answer tags, provide only the final answer.
|
||||
Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
|
||||
""",
|
||||
"default": """Given a problem, your task is to answer the question by thinking step-by-step in a clear and specific manner.
|
||||
Once you have thought about the reasoning process, provide the answer in the following format:
|
||||
<answer>answer here</answer>
|
||||
Do not explain your reasoning inside the answer tags, provide only the final answer.
|
||||
Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
|
||||
""",
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -122,6 +122,11 @@ def test_nqueens_score_answer():
|
|||
# Test None answer gets score 0.0
|
||||
assert dataset.score_answer(None, item) == 0.0
|
||||
|
||||
# Test python list representation of board (partial solution)
|
||||
answer = "[['_', 'Q', '_', '_'], ['_', '_', '_', 'Q'], ['Q', '_', '_', '_'], ['_', '_', 'Q', '_']]"
|
||||
entry = {"metadata": {"valid_answers": {"_ Q _ _\n_ _ _ Q\nQ _ _ _\n_ _ Q _"}}}
|
||||
assert dataset.score_answer(answer, entry) == 0.5
|
||||
|
||||
|
||||
def is_valid_solution(board: list[list[str]]) -> bool:
|
||||
"""Helper function to verify N Queens solution validity"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue