fix prompts

This commit is contained in:
Zafir Stojanovski 2025-02-15 20:59:07 +01:00
parent 95d86464f2
commit 9ca18f07e0
2 changed files with 35 additions and 15 deletions

View file

@ -7,7 +7,24 @@ from typing import Dict, Optional
from ..factory import ProceduralDataset, register_dataset
QUESTION_TEMPLATE = """Compute {base}^{exponent}"""
QUESTION_TEMPLATE = """Your task is to compute an exponentiation of a number.
Example:
- Input: Compute 2^3
- Output: 8
- Explanation:
- 2^3 = 2 * 2 * 2 = 8
- Therefore, the final answer is 8
Example:
- Input: Compute 412.5^3
- Output: 70189453.125
- Explanation:
- 412.5^3 = 412.5 * 412.5 * 412.5 = 70189453.125
- Therefore, the final answer is 70189453.125
Compute {base}^{exponent}
"""
@dataclass
@ -32,28 +49,31 @@ class PowerFunctionDataset(ProceduralDataset):
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
"""Overwrite this method in derived classes if a single oracle answer is not available."""
oracle_answer = entry["answer"]
reward = 0.0
if answer is not None:
difference = abs(float(answer) - float(oracle_answer))
if difference < 1e-6:
reward = 1.0
elif difference < 1e-1:
reward = 0.5
else:
reward = 0.01
return reward
try:
answer = round(float(answer), 4)
oracle_answer = round(float(oracle_answer), 4)
difference = abs(float(answer) - float(oracle_answer))
if difference < 1e-4:
return 1.0
elif difference < 1e-1:
return 0.5
else:
return 0.01
except Exception as e:
return 0.01
return 0.0
def __getitem__(self, idx: int) -> dict:
"""Generate a single Power Function question"""
rng = Random(self.seed + idx)
base = rng.uniform(self.config.min_base, self.config.max_base)
base = round(rng.uniform(self.config.min_base, self.config.max_base), 4)
exponent = rng.randint(self.config.min_exponent, self.config.max_exponent)
answer = pow(base, exponent)
return {
"question": f"Compute {base}^{exponent}",
"question": QUESTION_TEMPLATE.format(base=base, exponent=exponent),
"answer": str(answer),
"metadata": {"base": base, "exponent": exponent, "solution": answer},
}

View file

@ -8,12 +8,12 @@ SYSTEM_PROMPTS = {
"DeepSeekZero": """A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think>
<answer>answer here</answer>
Do not explain your reasoning inside the answer tags, provide only the final answer.
Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
""",
"default": """Given a problem, your task is to answer the question by thinking step-by-step in a clear and specific manner.
Once you have thought about the reasoning process, provide the answer in the following format:
<answer>answer here</answer>
Do not explain your reasoning inside the answer tags, provide only the final answer.
Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
""",
}