From c0e98f93b4e2a5cb0820c387cc93767618370fd5 Mon Sep 17 00:00:00 2001 From: Oliver Stanley Date: Mon, 2 Jun 2025 07:57:15 +0100 Subject: [PATCH] make task entries json serializable (#443) * make sympy-based task entries json serializable * remove datetime objs from time_intervals metadata * make adv geometry json serializable * make futoshiki metadata json serializable * fixes * futoshiki tweaks * fix adv geometry * deal with fractions in str representations * fix * restore start_time, end_time as str --- .../algebra/intermediate_integration.py | 1 - .../algebra/polynomial_multiplication.py | 2 +- reasoning_gym/algebra/simple_integration.py | 1 - reasoning_gym/arithmetic/time_intervals.py | 4 +-- reasoning_gym/games/futoshiki.py | 4 ++- reasoning_gym/games/puzzle24.py | 1 - reasoning_gym/geometry/advanced_geometry.py | 31 +++++++++++-------- tests/test_futoshiki.py | 10 ++++-- tests/test_puzzle24.py | 1 - tests/test_simple_integration.py | 1 - tests/test_time_intervals.py | 17 +++------- 11 files changed, 36 insertions(+), 37 deletions(-) diff --git a/reasoning_gym/algebra/intermediate_integration.py b/reasoning_gym/algebra/intermediate_integration.py index 78daad06..6449bd55 100644 --- a/reasoning_gym/algebra/intermediate_integration.py +++ b/reasoning_gym/algebra/intermediate_integration.py @@ -242,7 +242,6 @@ Use same variable symbols as given in the question "integrand": str(integrand), "problem_type": problem_type, "variable": str(x), - "expected_answer_expression": answer, "difficulty": { "problem_type_weights": self.config.problem_type_weights, }, diff --git a/reasoning_gym/algebra/polynomial_multiplication.py b/reasoning_gym/algebra/polynomial_multiplication.py index 4a388532..f0541ac3 100644 --- a/reasoning_gym/algebra/polynomial_multiplication.py +++ b/reasoning_gym/algebra/polynomial_multiplication.py @@ -114,7 +114,7 @@ When performing calculations, please follow these guidelines: "source_dataset": DATASET_NAME, "source_index": idx, "polynomial_expr": str(polynomial_expr), - "variables": list(product.free_symbols), + "variables": [str(x) for x in product.free_symbols], "difficulty": { "min_terms": self.config.min_terms, "max_terms": self.config.max_terms, diff --git a/reasoning_gym/algebra/simple_integration.py b/reasoning_gym/algebra/simple_integration.py index 78823ca8..d9fdfb2c 100644 --- a/reasoning_gym/algebra/simple_integration.py +++ b/reasoning_gym/algebra/simple_integration.py @@ -88,7 +88,6 @@ When performing calculations, please follow these guidelines: "source_index": idx, "integrand": str(derivative), "variable": str(symbol), - "expected_answer_expression": polynomial, "num_terms": num_terms, "difficulty": { "terms": (self.config.min_terms, self.config.max_terms), diff --git a/reasoning_gym/arithmetic/time_intervals.py b/reasoning_gym/arithmetic/time_intervals.py index 249ff5aa..1cf7c1b0 100644 --- a/reasoning_gym/arithmetic/time_intervals.py +++ b/reasoning_gym/arithmetic/time_intervals.py @@ -139,8 +139,8 @@ class TimeIntervalsDataset(ProceduralDataset): "source_dataset": DATASET_NAME, "source_index": idx, "task_type": task_type, - "start_time": start_dt, - "end_time": end_dt, + "start_time": str(start_dt), + "end_time": str(end_dt), "format": format_str, "expected_format": expected_format, "difficulty": { diff --git a/reasoning_gym/games/futoshiki.py b/reasoning_gym/games/futoshiki.py index 5e377fdb..4246e8b9 100644 --- a/reasoning_gym/games/futoshiki.py +++ b/reasoning_gym/games/futoshiki.py @@ -79,6 +79,8 @@ class FutoshikiDataset(ProceduralDataset): f"Remember, in Futoshiki each row and column must contain each number from 1 to {board_size} exactly once." ) + constraints_meta = [(r1, c1, r2, c2, sign) for ((r1, c1), (r2, c2)), sign in constraints.items()] + return { "question": question, "answer": solution_str, @@ -86,7 +88,7 @@ class FutoshikiDataset(ProceduralDataset): "source_dataset": DATASET_NAME, "source_index": idx, "puzzle": puzzle, - "constraints": constraints, + "constraints": constraints_meta, "solution": solution, "board_size": board_size, "difficulty_rating": difficulty, diff --git a/reasoning_gym/games/puzzle24.py b/reasoning_gym/games/puzzle24.py index 015873ac..4334be81 100644 --- a/reasoning_gym/games/puzzle24.py +++ b/reasoning_gym/games/puzzle24.py @@ -107,7 +107,6 @@ class Puzzle24Dataset(ProceduralDataset): "source_dataset": DATASET_NAME, "source_index": idx, "numbers": numbers, - "expression": expr, "difficulty": {"value": (self.config.min_value, self.config.max_value)}, }, } diff --git a/reasoning_gym/geometry/advanced_geometry.py b/reasoning_gym/geometry/advanced_geometry.py index 9d661a23..d45cfc4a 100644 --- a/reasoning_gym/geometry/advanced_geometry.py +++ b/reasoning_gym/geometry/advanced_geometry.py @@ -150,11 +150,10 @@ class AdvancedGeometryDataset(ProceduralDataset): question = question_template.format(A=(A.x, A.y), B=(B.x, B.y), C=(C.x, C.y), a="a", b="b") answer_str = f"({x_ortho_approx:.3f}, {y_ortho_approx:.3f})" metadata = { - "A": (A.x, A.y), - "B": (B.x, B.y), - "C": (C.x, C.y), - "ortho": (ortho.x, ortho.y), - "orthocenter_exact": (str(ortho.x), str(ortho.y)), + "A": (str(A.x), str(A.y)), + "B": (str(B.x), str(B.y)), + "C": (str(C.x), str(C.y)), + "ortho": (str(ortho.x), str(ortho.y)), "orthocenter_approx": (x_ortho_approx, y_ortho_approx), } return question, answer_str, metadata @@ -185,9 +184,9 @@ class AdvancedGeometryDataset(ProceduralDataset): answer_str = f"{radius_approx:.3f}" metadata = { - "A": (A.x, A.y), - "B": (B.x, B.y), - "C": (C.x, C.y), + "A": (str(A.x), str(A.y)), + "B": (str(B.x), str(B.y)), + "C": (str(C.x), str(C.y)), "incircle_radius_exact": str(radius), "incircle_radius_approx": radius_approx, } @@ -224,9 +223,9 @@ class AdvancedGeometryDataset(ProceduralDataset): answer_str = f"{angle_deg:.2f}°" metadata = { - "A": (A.x, A.y), - "B": (B.x, B.y), - "C": (C.x, C.y), + "A": (str(A.x), str(A.y)), + "B": (str(B.x), str(B.y)), + "C": (str(C.x), str(C.y)), "angle_ABC_degrees": angle_deg, } return question, answer_str, metadata @@ -252,8 +251,14 @@ class AdvancedGeometryDataset(ProceduralDataset): x_coord = float(answer.split(",")[0].replace("(", "").strip()) y_coord = float(answer.split(",")[1].replace(")", "").strip()) - expected_x = float(metadata["ortho"][0]) - expected_y = float(metadata["ortho"][1]) + ortho_0, ortho_1 = metadata["ortho"] + if "/" in ortho_0: + ortho_0 = int(ortho_0.split("/")[0]) / int(ortho_0.split("/")[1]) + if "/" in ortho_1: + ortho_1 = int(ortho_1.split("/")[0]) / int(ortho_1.split("/")[1]) + + expected_x = float(ortho_0) + expected_y = float(ortho_1) if x_coord == expected_x and y_coord == expected_y: reward = 1.0 elif (np.round(x_coord, 3) == np.round(expected_x, 3)) and ( diff --git a/tests/test_futoshiki.py b/tests/test_futoshiki.py index dd89a61c..156b2fdd 100644 --- a/tests/test_futoshiki.py +++ b/tests/test_futoshiki.py @@ -58,7 +58,7 @@ def test_futoshiki_items(): assert len(row) <= config.max_board_size # Verify constraints format constraints = metadata["constraints"] - for ((r1, c1), (r2, c2)), rel in constraints.items(): + for r1, c1, r2, c2, rel in constraints: assert 0 <= r1 < config.max_board_size assert 0 <= c1 < config.max_board_size assert 0 <= r2 < config.max_board_size @@ -97,7 +97,9 @@ def test_futoshiki_solution_validity(): item = dataset[i] metadata = item["metadata"] solution = metadata["solution"] - constraints = metadata["constraints"] + constraints_meta = metadata["constraints"] + + constraints = {((r1, c1), (r2, c2)): rel for (r1, c1, r2, c2, rel) in constraints_meta} assert is_valid_solution(solution, config.min_board_size, constraints) @@ -111,7 +113,9 @@ def test_futoshiki_puzzle_solvability(): item = dataset[i] metadata = item["metadata"] puzzle = metadata["puzzle"] - constraints = metadata["constraints"] + constraints_meta = metadata["constraints"] + + constraints = {((r1, c1), (r2, c2)): rel for (r1, c1, r2, c2, rel) in constraints_meta} # Verify puzzle has exactly one solution assert dataset.count_solutions(puzzle, constraints, limit=2) == 1 diff --git a/tests/test_puzzle24.py b/tests/test_puzzle24.py index 5d8801dc..4b6d3036 100644 --- a/tests/test_puzzle24.py +++ b/tests/test_puzzle24.py @@ -47,7 +47,6 @@ def test_puzzle24_basic_properties(): # Check metadata contains required fields assert "numbers" in item["metadata"] - assert "expression" in item["metadata"] # Check question format assert "Make 24 using" in item["question"] diff --git a/tests/test_simple_integration.py b/tests/test_simple_integration.py index 4c82a5a4..641928f4 100644 --- a/tests/test_simple_integration.py +++ b/tests/test_simple_integration.py @@ -64,7 +64,6 @@ def test_simple_integration_dataset_items(): assert "integrand" in item["metadata"] assert "variable" in item["metadata"] - assert "expected_answer_expression" in item["metadata"] # Verify answer is a mathematical expression answer = item["answer"] diff --git a/tests/test_time_intervals.py b/tests/test_time_intervals.py index 041651b9..1ca5cfbf 100644 --- a/tests/test_time_intervals.py +++ b/tests/test_time_intervals.py @@ -51,8 +51,6 @@ def test_time_intervals_items(): assert "answer" in item assert "metadata" in item assert "task_type" in item["metadata"] - assert "start_time" in item["metadata"] - assert "end_time" in item["metadata"] def test_time_intervals_scoring(): @@ -93,23 +91,18 @@ def test_time_intervals_scoring(): assert 0 < score < 1 -def test_time_format_patterns(): - """Test that generated times match expected formats""" +def test_oracle_answer(): + """Test that generated answer is marked correct""" config = TimeIntervalsConfig(seed=42, size=500) dataset = TimeIntervalsDataset(config) for i in range(len(dataset)): item = dataset[i] - start_dt = item["metadata"]["start_time"] - end_dt = item["metadata"]["end_time"] + metadata = item["metadata"] + assert "start_time" in metadata + assert "end_time" in metadata - # Verify both are datetime objects - assert isinstance(start_dt, datetime) - assert isinstance(end_dt, datetime) - - # Verify end is after start - assert end_dt >= start_dt, item["question"] assert dataset.score_answer(item["answer"], item) == 1.0