fix prompts

2026-04-19 12:58:07 +00:00 · 2025-02-15 20:59:07 +01:00 · 2025-02-15 20:59:07 +01:00 · 9ca18f07e0
commit 9ca18f07e0
parent 95d86464f2
2 changed files with 35 additions and 15 deletions
--- a/reasoning_gym/arithmetic/power_function.py
+++ b/reasoning_gym/arithmetic/power_function.py
@ -7,7 +7,24 @@ from typing import Dict, Optional

 from ..factory import ProceduralDataset, register_dataset

-QUESTION_TEMPLATE = """Compute {base}^{exponent}"""
+QUESTION_TEMPLATE = """Your task is to compute an exponentiation of a number.
+
+Example:
+- Input: Compute 2^3
+- Output: 8
+- Explanation:
+    - 2^3 = 2 * 2 * 2 = 8
+    - Therefore, the final answer is 8
+
+Example:
+- Input: Compute 412.5^3
+- Output: 70189453.125
+- Explanation:
+    - 412.5^3 = 412.5 * 412.5 * 412.5 = 70189453.125
+    - Therefore, the final answer is 70189453.125
+
+Compute {base}^{exponent}
+"""


@dataclass
@ -32,28 +49,31 @@ class PowerFunctionDataset(ProceduralDataset):
    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
        """Overwrite this method in derived classes if a single oracle answer is not available."""
        oracle_answer = entry["answer"]
-        reward = 0.0
        if answer is not None:
-            difference = abs(float(answer) - float(oracle_answer))
-            if difference < 1e-6:
-                reward = 1.0
-            elif difference < 1e-1:
-                reward = 0.5
-            else:
-                reward = 0.01
-
-        return reward
+            try:
+                answer = round(float(answer), 4)
+                oracle_answer = round(float(oracle_answer), 4)
+                difference = abs(float(answer) - float(oracle_answer))
+                if difference < 1e-4:
+                    return 1.0
+                elif difference < 1e-1:
+                    return 0.5
+                else:
+                    return 0.01
+            except Exception as e:
+                return 0.01
+        return 0.0

    def __getitem__(self, idx: int) -> dict:
        """Generate a single Power Function question"""
        rng = Random(self.seed + idx)

-        base = rng.uniform(self.config.min_base, self.config.max_base)
+        base = round(rng.uniform(self.config.min_base, self.config.max_base), 4)
        exponent = rng.randint(self.config.min_exponent, self.config.max_exponent)
        answer = pow(base, exponent)

        return {
-            "question": f"Compute {base}^{exponent}",
+            "question": QUESTION_TEMPLATE.format(base=base, exponent=exponent),
            "answer": str(answer),
            "metadata": {"base": base, "exponent": exponent, "solution": answer},
        }
--- a/reasoning_gym/utils.py
+++ b/reasoning_gym/utils.py
@ -8,12 +8,12 @@ SYSTEM_PROMPTS = {
    "DeepSeekZero": """A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
 The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think>
 <answer>answer here</answer>
-Do not explain your reasoning inside the answer tags, provide only the final answer.
+Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
 """,
    "default": """Given a problem, your task is to answer the question by thinking step-by-step in a clear and specific manner.
 Once you have thought about the reasoning process, provide the answer in the following format:
 <answer>answer here</answer>
-Do not explain your reasoning inside the answer tags, provide only the final answer.
+Do not explain your reasoning inside the answer tags, provide only the final answer. When an example is provided, you should strictly follow the format of the output/answer in that example.
 """,
 }