mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
fix prompts
This commit is contained in:
parent
95d86464f2
commit
9ca18f07e0
2 changed files with 35 additions and 15 deletions
|
|
@ -7,7 +7,24 @@ from typing import Dict, Optional
|
||||||
|
|
||||||
from ..factory import ProceduralDataset, register_dataset
|
from ..factory import ProceduralDataset, register_dataset
|
||||||
|
|
||||||
QUESTION_TEMPLATE = """Compute {base}^{exponent}"""
|
QUESTION_TEMPLATE = """Your task is to compute an exponentiation of a number.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
- Input: Compute 2^3
|
||||||
|
- Output: 8
|
||||||
|
- Explanation:
|
||||||
|
- 2^3 = 2 * 2 * 2 = 8
|
||||||
|
- Therefore, the final answer is 8
|
||||||
|
|
||||||
|
Example:
|
||||||
|
- Input: Compute 412.5^3
|
||||||
|
- Output: 70189453.125
|
||||||
|
- Explanation:
|
||||||
|
- 412.5^3 = 412.5 * 412.5 * 412.5 = 70189453.125
|
||||||
|
- Therefore, the final answer is 70189453.125
|
||||||
|
|
||||||
|
Compute {base}^{exponent}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -32,28 +49,31 @@ class PowerFunctionDataset(ProceduralDataset):
|
||||||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||||
"""Overwrite this method in derived classes if a single oracle answer is not available."""
|
"""Overwrite this method in derived classes if a single oracle answer is not available."""
|
||||||
oracle_answer = entry["answer"]
|
oracle_answer = entry["answer"]
|
||||||
reward = 0.0
|
|
||||||
if answer is not None:
|
if answer is not None:
|
||||||
difference = abs(float(answer) - float(oracle_answer))
|
try:
|
||||||
if difference < 1e-6:
|
answer = round(float(answer), 4)
|
||||||
reward = 1.0
|
oracle_answer = round(float(oracle_answer), 4)
|
||||||
elif difference < 1e-1:
|
difference = abs(float(answer) - float(oracle_answer))
|
||||||
reward = 0.5
|
if difference < 1e-4:
|
||||||
else:
|
return 1.0
|
||||||
reward = 0.01
|
elif difference < 1e-1:
|
||||||
|
return 0.5
|
||||||
return reward
|
else:
|
||||||
|
return 0.01
|
||||||
|
except Exception as e:
|
||||||
|
return 0.01
|
||||||
|
return 0.0
|
||||||
|
|
||||||
def __getitem__(self, idx: int) -> dict:
|
def __getitem__(self, idx: int) -> dict:
|
||||||
"""Generate a single Power Function question"""
|
"""Generate a single Power Function question"""
|
||||||
rng = Random(self.seed + idx)
|
rng = Random(self.seed + idx)
|
||||||
|
|
||||||
base = rng.uniform(self.config.min_base, self.config.max_base)
|
base = round(rng.uniform(self.config.min_base, self.config.max_base), 4)
|
||||||
exponent = rng.randint(self.config.min_exponent, self.config.max_exponent)
|
exponent = rng.randint(self.config.min_exponent, self.config.max_exponent)
|
||||||
answer = pow(base, exponent)
|
answer = pow(base, exponent)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"question": f"Compute {base}^{exponent}",
|
"question": QUESTION_TEMPLATE.format(base=base, exponent=exponent),
|
||||||
"answer": str(answer),
|
"answer": str(answer),
|
||||||
"metadata": {"base": base, "exponent": exponent, "solution": answer},
|
"metadata": {"base": base, "exponent": exponent, "solution": answer},
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,12 +8,12 @@ SYSTEM_PROMPTS = {
|
||||||
"DeepSeekZero": """A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
|
"DeepSeekZero": """A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
|
||||||
The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think>
|
The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think>
|
||||||
<answer>answer here</answer>
|
<answer>answer here</answer>
|
||||||
Do not explain your reasoning inside the answer tags, provide only the final answer.
|
Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
|
||||||
""",
|
""",
|
||||||
"default": """Given a problem, your task is to answer the question by thinking step-by-step in a clear and specific manner.
|
"default": """Given a problem, your task is to answer the question by thinking step-by-step in a clear and specific manner.
|
||||||
Once you have thought about the reasoning process, provide the answer in the following format:
|
Once you have thought about the reasoning process, provide the answer in the following format:
|
||||||
<answer>answer here</answer>
|
<answer>answer here</answer>
|
||||||
Do not explain your reasoning inside the answer tags, provide only the final answer.
|
Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
|
||||||
""",
|
""",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue