mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-26 17:13:17 +00:00
Minor question template & score_answer improvements (#261)
* math prompt improvements * ignore brackets in complex_arithmetic results * improve additional instruction in prompt of polynomial_equations * more strict tests for score_answer in polynomial_equations * simplify special reward handling * fix test_intermediate_integration * fix sokoban dataset * add common dataset score_answer consistency test
This commit is contained in:
parent
bf24999bb0
commit
b2904ccab9
106 changed files with 403 additions and 507 deletions
|
|
@ -42,9 +42,9 @@ class SimpleIntegrationDataset(ProceduralDataset):
|
|||
"Evaluate the indefinite integral: ∫ {integrand} dx",
|
||||
]
|
||||
self.added_instruction = """
|
||||
In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems
|
||||
## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2.
|
||||
## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C].
|
||||
When performing calculations, please follow these guidelines:
|
||||
1. Use ** instead of ^ to represent exponents. For example, write 7*X**2 instead of 7*X^2.
|
||||
2. Always include the * symbol for all multiplication operations in your reasoning steps. For example, write `-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C` instead of `-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C`.
|
||||
"""
|
||||
super().__init__(config=config, seed=config.seed, size=config.size)
|
||||
|
||||
|
|
@ -103,12 +103,8 @@ In addition, When doing calculation, Use the following instructions together wit
|
|||
# Check mathematical equivalence through simplification
|
||||
if sympy.simplify(derivative - integrand) == 0:
|
||||
reward = 1.0
|
||||
elif answer.strip():
|
||||
reward = 0.05
|
||||
else:
|
||||
reward = 0.01
|
||||
except:
|
||||
reward = 0.01
|
||||
reward = 0.0
|
||||
return reward
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue