diff --git a/eval/yaml/openai-o1/games.yaml b/eval/yaml/openai-o1/games.yaml new file mode 100644 index 00000000..577f4d5c --- /dev/null +++ b/eval/yaml/openai-o1/games.yaml @@ -0,0 +1,9 @@ +model: openai/o1 +category: games +provider: OpenAI +datasets: + - emoji_mystery +eval_dir: eval/openai-01 +dataset_size: 50 +dataset_seed: 45 +developer_role: system diff --git a/reasoning_gym/algebra/__init__.py b/reasoning_gym/algebra/__init__.py index fc77b977..9ed7c5b1 100644 --- a/reasoning_gym/algebra/__init__.py +++ b/reasoning_gym/algebra/__init__.py @@ -1,9 +1,9 @@ from .complex_arithmetic import ComplexArithmeticConfig, ComplexArithmeticDataset from .intermediate_integration import IntermediateIntegrationConfig, IntermediateIntegrationDataset -from .polynomial_equations import PolynomialEquationsConfig, PolynomialEquationsDataset +from .polynomial_equations import PolynomialEquationsConfig, PolynomialEquationsCurriculum, PolynomialEquationsDataset from .polynomial_multiplication import PolynomialMultiplicationConfig, PolynomialMultiplicationDataset from .simple_equations import SimpleEquationsConfig, SimpleEquationsDataset -from .simple_integration import SimpleIntegrationConfig, SimpleIntegrationDataset +from .simple_integration import SimpleIntegrationConfig, SimpleIntegrationCurriculum, SimpleIntegrationDataset __all__ = [ "ComplexArithmeticConfig", @@ -12,9 +12,11 @@ __all__ = [ "IntermediateIntegrationDataset", "PolynomialEquationsConfig", "PolynomialEquationsDataset", + "PolynomialEquationsCurriculum", "SimpleEquationsDataset", "SimpleEquationsConfig", "SimpleIntegrationConfig", + "SimpleIntegrationCurriculum", "SimpleIntegrationDataset", "PolynomialMultiplicationConfig", "PolynomialMultiplicationDataset", diff --git a/reasoning_gym/algebra/polynomial_equations.py b/reasoning_gym/algebra/polynomial_equations.py index ac054427..58a97865 100644 --- a/reasoning_gym/algebra/polynomial_equations.py +++ b/reasoning_gym/algebra/polynomial_equations.py @@ -5,6 +5,7 @@ from typing import Any, Optional from sympy import Eq, Symbol, expand, solve +from ..coaching import AttributeType, BaseCurriculum, RangeAttributeDefinition from ..factory import ProceduralDataset, register_dataset @@ -86,11 +87,14 @@ In solving equations, please follow these instructions: - metadata: dict with details (polynomial_expr, degree, etc.) """ rng = random.Random(self.seed + idx) - for _ in range(8): + for _ in range( + 20 + ): # Increase the number of attempts to get a solvable polynomial - many solutions only real solution is 0 # Get variable and generate polynomial equation in standard form variable = self._get_variable(rng) degree = rng.randint(self.config.min_degree, self.config.max_degree) - polynomial_expr = self._generate_polynomial_expr(rng, variable, degree) + num_terms = rng.randint(self.config.min_terms, self.config.max_terms) + polynomial_expr = self._generate_polynomial_expr(rng, variable, degree, num_terms) polynomial_expanded = expand(polynomial_expr) # Solve the polynomial = 0 @@ -120,6 +124,7 @@ In solving equations, please follow these instructions: "variable": variable, "degree": degree, "real_solutions": real_solutions, + "difficulty": {"terms": num_terms, "degree": degree}, }, } @@ -127,7 +132,7 @@ In solving equations, please follow these instructions: """Get a random lowercase variable name""" return rng.choice("abcdefghklmnopqrstuvwxyz") # remove ij to avoid confusion with complex numbers - def _generate_polynomial_expr(self, rng: random.Random, variable: Symbol, degree: int): + def _generate_polynomial_expr(self, rng: random.Random, variable: Symbol, degree: int, num_terms: int): """ Randomly generate a polynomial expression of 'degree'. We'll use the config parameters: @@ -146,7 +151,6 @@ In solving equations, please follow these instructions: x = Symbol(variable) # Choose the number of terms and their respective degrees - num_terms = rng.randint(self.config.min_terms, self.config.max_terms) # Keep track of exponents, exponents can repeat or skip but we force the highest exponent chosen_exponents = [degree] # Fill the rest randomly in [0, degree] @@ -266,4 +270,33 @@ In solving equations, please follow these instructions: return final_reward -register_dataset("polynomial_equations", PolynomialEquationsDataset, PolynomialEquationsConfig) +class PolynomialEquationsCurriculum(BaseCurriculum): + def __init__(self): + super().__init__(PolynomialEquationsCurriculum.__name__, PolynomialEquationsConfig) + self._define_attributes( + RangeAttributeDefinition( + name="degree", + levels=[1, 2, 3, 4], + default_level=0, + min_value=1, + attr_type=AttributeType.APPEND, + lower_field_name="min_degree", + upper_field_name="max_degree", + description="The degree of the polynomial equation", + ), + RangeAttributeDefinition( + name="terms", + levels=[2, 3, 4, 5], + default_level=0, + min_value=2, + attr_type=AttributeType.APPEND, + lower_field_name="min_terms", + upper_field_name="max_terms", + description="The number of terms in the polynomial equation", + ), + ) + + +register_dataset( + "polynomial_equations", PolynomialEquationsDataset, PolynomialEquationsConfig, PolynomialEquationsCurriculum +) diff --git a/reasoning_gym/algebra/simple_integration.py b/reasoning_gym/algebra/simple_integration.py index 321a7a9d..3e9df2dc 100644 --- a/reasoning_gym/algebra/simple_integration.py +++ b/reasoning_gym/algebra/simple_integration.py @@ -5,6 +5,7 @@ from typing import Any, Optional import sympy +from ..coaching import AttributeType, BaseCurriculum, RangeAttributeDefinition from ..factory import ProceduralDataset, register_dataset @@ -55,12 +56,12 @@ When performing calculations, please follow these guidelines: denominator = rng.randint(2, 10) return Fraction(rng.randint(self.config.min_bounds, self.config.max_bounds), denominator) - def _generate_polynomial(self, rng: random.Random) -> tuple[sympy.Symbol, sympy.Expr]: + def _generate_polynomial(self, rng: random.Random, num_terms: int) -> tuple[sympy.Symbol, sympy.Expr]: """Generate a random polynomial with one variable""" terms = [] x = sympy.Symbol(rng.choice(self.config.symbols)) - for _ in range(rng.randint(self.config.min_terms, self.config.max_terms)): + for _ in range(num_terms): coefficient = self._generate_coefficient(rng) degree = rng.randint(self.config.min_degree, self.config.max_degree) operator = rng.choice(self.config.operators) @@ -72,7 +73,8 @@ When performing calculations, please follow these guidelines: def __getitem__(self, idx: int) -> dict: rng = random.Random(self.seed + idx) - symbol, polynomial = self._generate_polynomial(rng) + num_terms = rng.randint(self.config.min_terms, self.config.max_terms) + symbol, polynomial = self._generate_polynomial(rng, num_terms) derivative = sympy.diff(polynomial, symbol) question = rng.choice(self._prompt_templates).format(integrand=derivative) + self.added_instruction @@ -83,6 +85,7 @@ When performing calculations, please follow these guidelines: "integrand": str(derivative), "variable": str(symbol), "expected_answer_expression": polynomial, + "difficulty": {"terms": num_terms}, }, } @@ -108,4 +111,21 @@ When performing calculations, please follow these guidelines: return reward -register_dataset("simple_integration", SimpleIntegrationDataset, SimpleIntegrationConfig) +class SimpleIntegrationCurriculum(BaseCurriculum): + def __init__(self): + super().__init__(SimpleIntegrationCurriculum.__name__, SimpleIntegrationConfig) + self._define_attributes( + RangeAttributeDefinition( + name="terms", + levels=[2, 3, 4, 5], + default_level=0, + min_value=2, + attr_type=AttributeType.APPEND, + lower_field_name="min_terms", + upper_field_name="max_terms", + description="The number of terms in the polynomial", + ) + ) + + +register_dataset("simple_integration", SimpleIntegrationDataset, SimpleIntegrationConfig, SimpleIntegrationCurriculum) diff --git a/tests/test_polynomial_equations.py b/tests/test_polynomial_equations.py index f99e6424..277fe041 100644 --- a/tests/test_polynomial_equations.py +++ b/tests/test_polynomial_equations.py @@ -3,7 +3,11 @@ from pytest import approx from sympy import Symbol, sympify from reasoning_gym import create_dataset -from reasoning_gym.algebra.polynomial_equations import PolynomialEquationsConfig, PolynomialEquationsDataset +from reasoning_gym.algebra.polynomial_equations import ( + PolynomialEquationsConfig, + PolynomialEquationsCurriculum, + PolynomialEquationsDataset, +) def test_polynomial_config_validation(): @@ -147,3 +151,27 @@ def test_polynomial_perfect_score(): for item in ds: assert ds.score_answer(item["answer"], item) == 1.0 + + +def test_polynomial_equations_curriculum(): + curriculum = PolynomialEquationsCurriculum() + + base_value = {"size": 150, "seed": 1} + + base_cfg: PolynomialEquationsConfig = curriculum.generate_configuration(base_value) + assert base_cfg.seed == 1 + assert base_cfg.size == 150 + # Check default values for degree attribute + assert base_cfg.min_degree == 1 and base_cfg.max_degree == 1 + # Check default values for terms attribute + assert base_cfg.min_terms == 2 and base_cfg.max_terms == 2 + + # Test incrementing attribute levels + curriculum.increment_attr_level("degree") + curriculum.increment_attr_level("terms") + + increased_cfg = curriculum.generate_configuration(base_value) + # Check increased values for degree attribute + assert increased_cfg.min_degree == 1 and increased_cfg.max_degree == 2 + # Check increased values for terms attribute + assert increased_cfg.min_terms == 2 and increased_cfg.max_terms == 3 diff --git a/tests/test_simple_integration.py b/tests/test_simple_integration.py index 994bf20f..4c82a5a4 100644 --- a/tests/test_simple_integration.py +++ b/tests/test_simple_integration.py @@ -2,7 +2,11 @@ import pytest import sympy from sympy.parsing.sympy_parser import parse_expr -from reasoning_gym.algebra.simple_integration import SimpleIntegrationConfig, SimpleIntegrationDataset +from reasoning_gym.algebra.simple_integration import ( + SimpleIntegrationConfig, + SimpleIntegrationCurriculum, + SimpleIntegrationDataset, +) def test_simple_integration_config_validation(): @@ -116,3 +120,40 @@ def test_score_answer_cases(): dummy_entry = {"metadata": metadata} score = dataset.score_answer(answer=answer, entry=dummy_entry) assert score == expected, f"Failed case: {answer} | Expected {expected}, got {score}" + + +def test_simple_integration_curriculum(): + """Test curriculum functionality for SimpleIntegration""" + curriculum = SimpleIntegrationCurriculum() + + base_value = {"size": 150, "seed": 1} + + base_cfg: SimpleIntegrationConfig = curriculum.generate_configuration(base_value) + assert base_cfg.seed == 1 + assert base_cfg.size == 150 + assert base_cfg.min_terms == 2 and base_cfg.max_terms == 2 + + # test incrementing attribute levels + curriculum.increment_attr_level("terms") + increased_cfg = curriculum.generate_configuration(base_value) + assert increased_cfg.min_terms == 2 and increased_cfg.max_terms == 3 + + # test decrementing attribute level for terms + curriculum.decrement_attr_level("terms") + partially_decreased_cfg = curriculum.generate_configuration(base_value) + assert partially_decreased_cfg.min_terms == 2 and partially_decreased_cfg.max_terms == 2 + + # test global level adjustments + curriculum = SimpleIntegrationCurriculum() # reset curriculum + assert curriculum.get_attr_level("terms") == 0 + + # Increase global level + curriculum.increment_global_level() + assert curriculum.get_attr_level("terms") == 1 + + global_level_cfg = curriculum.generate_configuration(base_value) + assert global_level_cfg.min_terms == 2 and global_level_cfg.max_terms == 3 + + # Increase global level again + curriculum.increment_global_level() + assert curriculum.get_attr_level("terms") == 2