mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-30 17:40:45 +00:00
Minor question template & score_answer improvements (#261)
* math prompt improvements * ignore brackets in complex_arithmetic results * improve additional instruction in prompt of polynomial_equations * more strict tests for score_answer in polynomial_equations * simplify special reward handling * fix test_intermediate_integration * fix sokoban dataset * add common dataset score_answer consistency test
This commit is contained in:
parent
bf24999bb0
commit
b2904ccab9
106 changed files with 403 additions and 507 deletions
|
|
@ -92,7 +92,6 @@ def test_polynomial_equations_dataset_items():
|
|||
|
||||
# Check metadata
|
||||
assert isinstance(item["metadata"]["polynomial_expr"], str)
|
||||
assert isinstance(item["metadata"]["result"], str)
|
||||
assert isinstance(item["metadata"]["variables"], list)
|
||||
|
||||
# Check polynomial_expr existence
|
||||
|
|
@ -127,42 +126,6 @@ def test_cross_polynomial_equations_dataset_items():
|
|||
|
||||
# Check metadata
|
||||
assert isinstance(item["metadata"]["polynomial_expr"], str)
|
||||
assert isinstance(item["metadata"]["result"], str)
|
||||
assert isinstance(item["metadata"]["variables"], list)
|
||||
|
||||
# Check polynomial_expr existence
|
||||
poly_str = item["metadata"]["polynomial_expr"]
|
||||
# Ensure it can parse with sympy
|
||||
sp.sympify(poly_str)
|
||||
|
||||
|
||||
def test_cross_polynomial_equations_dataset_items():
|
||||
"""Test that generated items have correct structure"""
|
||||
ds = create_dataset(
|
||||
"polynomial_multiplication",
|
||||
min_terms=2,
|
||||
max_terms=3,
|
||||
min_value=1,
|
||||
max_value=5,
|
||||
min_degree=1,
|
||||
max_degree=2,
|
||||
min_polynomials=2,
|
||||
max_polynomials=5,
|
||||
variables=tuple("xyz"),
|
||||
allow_cross_variable_product=True,
|
||||
allow_multivariate_polynomials=False,
|
||||
size=3,
|
||||
seed=100,
|
||||
)
|
||||
|
||||
for item in ds:
|
||||
assert "question" in item
|
||||
assert "answer" in item
|
||||
assert "metadata" in item
|
||||
|
||||
# Check metadata
|
||||
assert isinstance(item["metadata"]["polynomial_expr"], str)
|
||||
assert isinstance(item["metadata"]["result"], str)
|
||||
assert isinstance(item["metadata"]["variables"], list)
|
||||
|
||||
# Check polynomial_expr existence
|
||||
|
|
@ -197,7 +160,6 @@ def test_multivariate_polynomial_equations_dataset_items():
|
|||
|
||||
# Check metadata
|
||||
assert isinstance(item["metadata"]["polynomial_expr"], str)
|
||||
assert isinstance(item["metadata"]["result"], str)
|
||||
assert isinstance(item["metadata"]["variables"], list)
|
||||
|
||||
# Check polynomial_expr existence
|
||||
|
|
@ -242,7 +204,7 @@ def test_polynomial_solutions_evaluation():
|
|||
poly_expr = sp.expand(poly_str)
|
||||
|
||||
# Verify that each solution satisfies the polynomial
|
||||
assert poly_expr == item["answer"]
|
||||
assert str(poly_expr) == item["answer"]
|
||||
|
||||
|
||||
def test_score_function():
|
||||
|
|
@ -266,11 +228,11 @@ def test_score_function():
|
|||
|
||||
for item in ds:
|
||||
poly_str = item["metadata"]["polynomial_expr"]
|
||||
assert ds.score_answer(poly_str, item) == 0.05
|
||||
assert ds.score_answer(poly_str, item) == 0.0
|
||||
|
||||
poly_expr = str(sp.expand(poly_str))
|
||||
assert ds.score_answer(poly_expr, item) == 1.0
|
||||
|
||||
assert ds.score_answer(None, item) == 0.00
|
||||
assert ds.score_answer("Not a polynomial", item) == 0.01
|
||||
assert ds.score_answer("x**4", item) == 0.05
|
||||
assert ds.score_answer(None, item) == 0.0
|
||||
assert ds.score_answer("Not a polynomial", item) == 0.0
|
||||
assert ds.score_answer("x**4", item) == 0.0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue