From 9ca18f07e08dbd7d9cf1e35bbdbea7e82603acac Mon Sep 17 00:00:00 2001 From: Zafir Stojanovski Date: Sat, 15 Feb 2025 20:59:07 +0100 Subject: [PATCH] fix prompts --- reasoning_gym/arithmetic/power_function.py | 46 ++++++++++++++++------ reasoning_gym/utils.py | 4 +- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/reasoning_gym/arithmetic/power_function.py b/reasoning_gym/arithmetic/power_function.py index adbda12d..24391a12 100644 --- a/reasoning_gym/arithmetic/power_function.py +++ b/reasoning_gym/arithmetic/power_function.py @@ -7,7 +7,24 @@ from typing import Dict, Optional from ..factory import ProceduralDataset, register_dataset -QUESTION_TEMPLATE = """Compute {base}^{exponent}""" +QUESTION_TEMPLATE = """Your task is to compute an exponentiation of a number. + +Example: +- Input: Compute 2^3 +- Output: 8 +- Explanation: + - 2^3 = 2 * 2 * 2 = 8 + - Therefore, the final answer is 8 + +Example: +- Input: Compute 412.5^3 +- Output: 70189453.125 +- Explanation: + - 412.5^3 = 412.5 * 412.5 * 412.5 = 70189453.125 + - Therefore, the final answer is 70189453.125 + +Compute {base}^{exponent} +""" @dataclass @@ -32,28 +49,31 @@ class PowerFunctionDataset(ProceduralDataset): def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float: """Overwrite this method in derived classes if a single oracle answer is not available.""" oracle_answer = entry["answer"] - reward = 0.0 if answer is not None: - difference = abs(float(answer) - float(oracle_answer)) - if difference < 1e-6: - reward = 1.0 - elif difference < 1e-1: - reward = 0.5 - else: - reward = 0.01 - - return reward + try: + answer = round(float(answer), 4) + oracle_answer = round(float(oracle_answer), 4) + difference = abs(float(answer) - float(oracle_answer)) + if difference < 1e-4: + return 1.0 + elif difference < 1e-1: + return 0.5 + else: + return 0.01 + except Exception as e: + return 0.01 + return 0.0 def __getitem__(self, idx: int) -> dict: """Generate a single Power Function question""" rng = Random(self.seed + idx) - base = rng.uniform(self.config.min_base, self.config.max_base) + base = round(rng.uniform(self.config.min_base, self.config.max_base), 4) exponent = rng.randint(self.config.min_exponent, self.config.max_exponent) answer = pow(base, exponent) return { - "question": f"Compute {base}^{exponent}", + "question": QUESTION_TEMPLATE.format(base=base, exponent=exponent), "answer": str(answer), "metadata": {"base": base, "exponent": exponent, "solution": answer}, } diff --git a/reasoning_gym/utils.py b/reasoning_gym/utils.py index c7d1b0d8..c59c06ca 100644 --- a/reasoning_gym/utils.py +++ b/reasoning_gym/utils.py @@ -8,12 +8,12 @@ SYSTEM_PROMPTS = { "DeepSeekZero": """A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within and tags, respectively, i.e., reasoning process here answer here -Do not explain your reasoning inside the answer tags, provide only the final answer. +Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example. """, "default": """Given a problem, your task is to answer the question by thinking step-by-step in a clear and specific manner. Once you have thought about the reasoning process, provide the answer in the following format: answer here -Do not explain your reasoning inside the answer tags, provide only the final answer. +Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example. """, }