mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-26 17:13:17 +00:00
Minor question template & score_answer improvements (#261)
* math prompt improvements * ignore brackets in complex_arithmetic results * improve additional instruction in prompt of polynomial_equations * more strict tests for score_answer in polynomial_equations * simplify special reward handling * fix test_intermediate_integration * fix sokoban dataset * add common dataset score_answer consistency test
This commit is contained in:
parent
bf24999bb0
commit
b2904ccab9
106 changed files with 403 additions and 507 deletions
|
|
@ -77,9 +77,10 @@ class IntermediateIntegrationDataset(ProceduralDataset):
|
|||
"Evaluate the indefinite integral: ∫ {integrand} dx",
|
||||
]
|
||||
self.added_instruction = """
|
||||
In addition, when doing calculation, use the following instructions together with your mathematical ingenuity to solve the integral problems
|
||||
## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2.
|
||||
## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C].
|
||||
When performing calculations, please follow these guidelines:
|
||||
1. Use ** instead of ^ to represent exponents. For example, write 7*X**2 instead of 7*X^2.
|
||||
2. Always include the * symbol for all multiplication operations in your reasoning steps. For example, write `-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C` instead of `-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C`.
|
||||
3. Use `exp(x)` or `E**(x)` for the exponential function (i.e. use capital E for Euler's number).
|
||||
"""
|
||||
|
||||
def _get_outer_constant(self, rng: random.Random) -> int:
|
||||
|
|
@ -245,7 +246,7 @@ In addition, when doing calculation, use the following instructions together wit
|
|||
"""Determine if the solution provided solves the problem"""
|
||||
reward = 0.0
|
||||
metadata = entry["metadata"]
|
||||
if answer is not None:
|
||||
if isinstance(answer, str):
|
||||
try:
|
||||
var = metadata["variable"]
|
||||
x = sympy.Symbol(var)
|
||||
|
|
@ -258,12 +259,8 @@ In addition, when doing calculation, use the following instructions together wit
|
|||
# Check mathematical equivalence through simplification
|
||||
if sympy.simplify(derivative - integrand) == 0:
|
||||
reward = 1.0
|
||||
elif answer.strip():
|
||||
reward = 0.05
|
||||
else:
|
||||
reward = 0.01
|
||||
except:
|
||||
reward = 0.01
|
||||
reward = 0.0
|
||||
return reward
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue