diff --git a/reasoning_gym/algebra/intermediate_integration.py b/reasoning_gym/algebra/intermediate_integration.py index 6335b6b7..34e144df 100644 --- a/reasoning_gym/algebra/intermediate_integration.py +++ b/reasoning_gym/algebra/intermediate_integration.py @@ -76,6 +76,12 @@ class IntermediateIntegrationDataset(ProceduralDataset): "Calculate the antiderivative: ∫ {integrand} dx", "Evaluate the indefinite integral: ∫ {integrand} dx", ] + self.added_instruction = """ + \n\n + In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems + ## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. + ## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C]. + """ def _get_outer_constant(self, rng: random.Random) -> int: """Helper to generate signed outer constant from config""" @@ -222,9 +228,10 @@ class IntermediateIntegrationDataset(ProceduralDataset): answer = sympy.integrate(integrand, x) answer_str = str(answer) + " + C" + question = rng.choice(self.prompt_template).format(integrand=integrand) + self.added_instruction return { - "question": rng.choice(self.prompt_template).format(integrand=integrand), + "question": question, "answer": answer_str, "metadata": { "integrand": str(integrand), diff --git a/reasoning_gym/algebra/polynomial_equations.py b/reasoning_gym/algebra/polynomial_equations.py index eec45285..f3e9d2da 100644 --- a/reasoning_gym/algebra/polynomial_equations.py +++ b/reasoning_gym/algebra/polynomial_equations.py @@ -62,6 +62,15 @@ class PolynomialEquationsDataset(ProceduralDataset): "Determine the real value(s) of {variable} that satisfies: {polynomial_expanded} = 0", "Solve the polynomial equation for real {variable}:\n{polynomial_expanded} = 0", ] + self.added_instruction = """ + \n\n + In solving the equations, please abide by the following instruction: + ## 1. All answers should be inserted in square brackets. For example [], [-0.3773, 0.4005] etc. + ## 2. In cases where your answer is b = [2 + sqrt(4560)] / 172 and b = [2 - sqrt(4560)] / 172. Since b can be 2 numbers Resolve your answer like this instead, [-0.3773, 0.4005] + ## 3. If there are no real values of i that satisfy the equation, report your answer as empty square bracket, [] + ## 4. If there are 2 answers, resolve the answers as floats and fill the 2 numbers in square bracket, if 3 answers, fill it with 3 answers. + ## 5. Resolve all numbers in square brackets as floats. Round the floats higher than 4 decimal place(d.p) down to 4 d.p. + """ super().__init__(config=config, seed=config.seed, size=config.size) def __getitem__(self, idx: int) -> dict: @@ -89,15 +98,13 @@ class PolynomialEquationsDataset(ProceduralDataset): for sol in solutions: if sol.is_real: # Evaluate symbolic solution to a floating approximation - real_solutions.append(float(sol.evalf())) + real_solutions.append(round(float(sol.evalf()), 4)) real_solutions.sort() answer_str = str(real_solutions) + question = rng.choice(self._prompt_templates).format(variable=variable, polynomial_expanded=polynomial_expanded) + self.added_instruction return { - "question": rng.choice(self._prompt_templates).format( - variable=variable, - polynomial_expanded=polynomial_expanded, - ), + "question": question, "answer": answer_str, "metadata": { "polynomial_expr": str(polynomial_expanded), diff --git a/reasoning_gym/algebra/polynomial_multiplication.py b/reasoning_gym/algebra/polynomial_multiplication.py index 9a74679f..d0c472b3 100644 --- a/reasoning_gym/algebra/polynomial_multiplication.py +++ b/reasoning_gym/algebra/polynomial_multiplication.py @@ -61,6 +61,12 @@ class PolynomialMultiplicationDataset(ProceduralDataset): "Simplify this expression: {polynomial_expr}", "Calculate the following: {polynomial_expr}", ] + self.added_instruction = """ + \n\n + In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems + ## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. + ## 2. Always use * when doing all sorts of multiplcation in your reasoning steps and even in reporting answers. + """ super().__init__(config=config, seed=config.seed, size=config.size) def __getitem__(self, idx: int) -> dict: @@ -79,11 +85,10 @@ class PolynomialMultiplicationDataset(ProceduralDataset): polynomial_expr = sp.prod(polynomials) product = sp.expand(polynomial_expr) + question = rng.choice(self._prompt_templates).format(polynomial_expr=polynomial_expr) + self.added_instruction return { - "question": rng.choice(self._prompt_templates).format( - polynomial_expr=polynomial_expr, - ), + "question": question, "answer": product, "metadata": { "polynomial_expr": str(polynomial_expr), diff --git a/reasoning_gym/algebra/simple_integration.py b/reasoning_gym/algebra/simple_integration.py index a8ca3be2..d056bc7c 100644 --- a/reasoning_gym/algebra/simple_integration.py +++ b/reasoning_gym/algebra/simple_integration.py @@ -41,6 +41,12 @@ class SimpleIntegrationDataset(ProceduralDataset): "Calculate the antiderivative: ∫ {integrand} dx", "Evaluate the indefinite integral: ∫ {integrand} dx", ] + self.added_instruction = """ + \n\n + In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems + ## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. + ## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C]. + """ super().__init__(config=config, seed=config.seed, size=config.size) def _generate_coefficient(self, rng: random.Random) -> Fraction: @@ -69,9 +75,10 @@ class SimpleIntegrationDataset(ProceduralDataset): rng = random.Random(self.seed + idx) symbol, polynomial = self._generate_polynomial(rng) derivative = sympy.diff(polynomial, symbol) + question = rng.choice(self._prompt_templates).format(integrand=derivative) + self.added_instruction return { - "question": rng.choice(self._prompt_templates).format(integrand=derivative), + "question": question, "answer": str(polynomial) + " + C", "metadata": { "integrand": str(derivative), diff --git a/reasoning_gym/algorithmic/number_sorting.py b/reasoning_gym/algorithmic/number_sorting.py index d922aa74..39dfda99 100644 --- a/reasoning_gym/algorithmic/number_sorting.py +++ b/reasoning_gym/algorithmic/number_sorting.py @@ -34,6 +34,12 @@ class NumberSortingDataset(ProceduralDataset): def __init__(self, config: NumberSortingConfig): super().__init__(config=config, seed=config.seed, size=config.size) + self.added_instruction = """ + \n\n + Please follow the instruction below: + # 1. Let all your answers be a list of numbers. Instead of reporting your answer as -69, -13, 1, 7, 11, 43, 59, 61, use ['-69', '-13', '1', '7', '11', '43', '59', '61'] instead + # 2. Convert all numbers in the square brackets as strings. For example, ['-69', '-13', '1', '7', '11', '43', '59', '61'] + """ def _format_number(self, num: float, decimals: int) -> str: """Format number with specified decimal places""" @@ -78,9 +84,10 @@ class NumberSortingDataset(ProceduralDataset): is_ascending = rng.choice([True, False]) direction = "ascending" if is_ascending else "descending" answer = asc_answer if is_ascending else desc_answer + question = f"Sort these numbers in {direction} order: {', '.join(number_strs)}" + self.added_instruction return { - "question": f"Sort these numbers in {direction} order: {', '.join(number_strs)}", + "question": question, "answer": str(answer), "metadata": {"original_numbers": number_strs, "direction": direction, "sorted_numbers": answer}, }