mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
fix prompts
This commit is contained in:
parent
95d86464f2
commit
9ca18f07e0
2 changed files with 35 additions and 15 deletions
|
|
@ -7,7 +7,24 @@ from typing import Dict, Optional
|
|||
|
||||
from ..factory import ProceduralDataset, register_dataset
|
||||
|
||||
QUESTION_TEMPLATE = """Compute {base}^{exponent}"""
|
||||
QUESTION_TEMPLATE = """Your task is to compute an exponentiation of a number.
|
||||
|
||||
Example:
|
||||
- Input: Compute 2^3
|
||||
- Output: 8
|
||||
- Explanation:
|
||||
- 2^3 = 2 * 2 * 2 = 8
|
||||
- Therefore, the final answer is 8
|
||||
|
||||
Example:
|
||||
- Input: Compute 412.5^3
|
||||
- Output: 70189453.125
|
||||
- Explanation:
|
||||
- 412.5^3 = 412.5 * 412.5 * 412.5 = 70189453.125
|
||||
- Therefore, the final answer is 70189453.125
|
||||
|
||||
Compute {base}^{exponent}
|
||||
"""
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -32,28 +49,31 @@ class PowerFunctionDataset(ProceduralDataset):
|
|||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||
"""Overwrite this method in derived classes if a single oracle answer is not available."""
|
||||
oracle_answer = entry["answer"]
|
||||
reward = 0.0
|
||||
if answer is not None:
|
||||
difference = abs(float(answer) - float(oracle_answer))
|
||||
if difference < 1e-6:
|
||||
reward = 1.0
|
||||
elif difference < 1e-1:
|
||||
reward = 0.5
|
||||
else:
|
||||
reward = 0.01
|
||||
|
||||
return reward
|
||||
try:
|
||||
answer = round(float(answer), 4)
|
||||
oracle_answer = round(float(oracle_answer), 4)
|
||||
difference = abs(float(answer) - float(oracle_answer))
|
||||
if difference < 1e-4:
|
||||
return 1.0
|
||||
elif difference < 1e-1:
|
||||
return 0.5
|
||||
else:
|
||||
return 0.01
|
||||
except Exception as e:
|
||||
return 0.01
|
||||
return 0.0
|
||||
|
||||
def __getitem__(self, idx: int) -> dict:
|
||||
"""Generate a single Power Function question"""
|
||||
rng = Random(self.seed + idx)
|
||||
|
||||
base = rng.uniform(self.config.min_base, self.config.max_base)
|
||||
base = round(rng.uniform(self.config.min_base, self.config.max_base), 4)
|
||||
exponent = rng.randint(self.config.min_exponent, self.config.max_exponent)
|
||||
answer = pow(base, exponent)
|
||||
|
||||
return {
|
||||
"question": f"Compute {base}^{exponent}",
|
||||
"question": QUESTION_TEMPLATE.format(base=base, exponent=exponent),
|
||||
"answer": str(answer),
|
||||
"metadata": {"base": base, "exponent": exponent, "solution": answer},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,12 +8,12 @@ SYSTEM_PROMPTS = {
|
|||
"DeepSeekZero": """A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
|
||||
The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think>
|
||||
<answer>answer here</answer>
|
||||
Do not explain your reasoning inside the answer tags, provide only the final answer.
|
||||
Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
|
||||
""",
|
||||
"default": """Given a problem, your task is to answer the question by thinking step-by-step in a clear and specific manner.
|
||||
Once you have thought about the reasoning process, provide the answer in the following format:
|
||||
<answer>answer here</answer>
|
||||
Do not explain your reasoning inside the answer tags, provide only the final answer.
|
||||
Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
|
||||
""",
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue