mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-23 16:55:05 +00:00
Merge pull request #135 from zafstojano/fix/power-function
fix(env): Power Function
This commit is contained in:
commit
b1caebc09b
1 changed files with 33 additions and 13 deletions
|
|
@ -7,7 +7,24 @@ from typing import Dict, Optional
|
||||||
|
|
||||||
from ..factory import ProceduralDataset, register_dataset
|
from ..factory import ProceduralDataset, register_dataset
|
||||||
|
|
||||||
QUESTION_TEMPLATE = """Compute {base}^{exponent}"""
|
QUESTION_TEMPLATE = """Your task is to compute an exponentiation of a number.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
- Input: Compute 2^3
|
||||||
|
- Output: 8
|
||||||
|
- Explanation:
|
||||||
|
- 2^3 = 2 * 2 * 2 = 8
|
||||||
|
- Therefore, the final answer is 8
|
||||||
|
|
||||||
|
Example:
|
||||||
|
- Input: Compute 412.5^3
|
||||||
|
- Output: 70189453.125
|
||||||
|
- Explanation:
|
||||||
|
- 412.5^3 = 412.5 * 412.5 * 412.5 = 70189453.125
|
||||||
|
- Therefore, the final answer is 70189453.125
|
||||||
|
|
||||||
|
Compute {base}^{exponent}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -32,28 +49,31 @@ class PowerFunctionDataset(ProceduralDataset):
|
||||||
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
|
||||||
"""Overwrite this method in derived classes if a single oracle answer is not available."""
|
"""Overwrite this method in derived classes if a single oracle answer is not available."""
|
||||||
oracle_answer = entry["answer"]
|
oracle_answer = entry["answer"]
|
||||||
reward = 0.0
|
|
||||||
if answer is not None:
|
if answer is not None:
|
||||||
|
try:
|
||||||
|
answer = round(float(answer), 4)
|
||||||
|
oracle_answer = round(float(oracle_answer), 4)
|
||||||
difference = abs(float(answer) - float(oracle_answer))
|
difference = abs(float(answer) - float(oracle_answer))
|
||||||
if difference < 1e-6:
|
if difference < 1e-4:
|
||||||
reward = 1.0
|
return 1.0
|
||||||
elif difference < 1e-1:
|
elif difference < 1e-1:
|
||||||
reward = 0.5
|
return 0.5
|
||||||
else:
|
else:
|
||||||
reward = 0.01
|
return 0.01
|
||||||
|
except Exception as e:
|
||||||
return reward
|
return 0.01
|
||||||
|
return 0.0
|
||||||
|
|
||||||
def __getitem__(self, idx: int) -> dict:
|
def __getitem__(self, idx: int) -> dict:
|
||||||
"""Generate a single Power Function question"""
|
"""Generate a single Power Function question"""
|
||||||
rng = Random(self.seed + idx)
|
rng = Random(self.seed + idx)
|
||||||
|
|
||||||
base = rng.uniform(self.config.min_base, self.config.max_base)
|
base = round(rng.uniform(self.config.min_base, self.config.max_base), 4)
|
||||||
exponent = rng.randint(self.config.min_exponent, self.config.max_exponent)
|
exponent = rng.randint(self.config.min_exponent, self.config.max_exponent)
|
||||||
answer = pow(base, exponent)
|
answer = pow(base, exponent)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"question": f"Compute {base}^{exponent}",
|
"question": QUESTION_TEMPLATE.format(base=base, exponent=exponent),
|
||||||
"answer": str(answer),
|
"answer": str(answer),
|
||||||
"metadata": {"base": base, "exponent": exponent, "solution": answer},
|
"metadata": {"base": base, "exponent": exponent, "solution": answer},
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue