diff --git a/GALLERY.md b/GALLERY.md index 2bd381fa..d4290623 100644 --- a/GALLERY.md +++ b/GALLERY.md @@ -26,6 +26,7 @@ This gallery shows examples from all available datasets using their default conf - [fraction_simplification](#fraction_simplification) - [game_of_life](#game_of_life) - [gcd](#gcd) +- [graph_color](#graph_color) - [group_anagrams](#group_anagrams) - [gsm_symbolic](#gsm_symbolic) - [intermediate_integration](#intermediate_integration) @@ -46,8 +47,10 @@ This gallery shows examples from all available datasets using their default conf - [palindrome](#palindrome) - [polynomial_equations](#polynomial_equations) - [polynomial_multiplication](#polynomial_multiplication) +- [pool_matrix](#pool_matrix) - [power_function](#power_function) - [prime_factorization](#prime_factorization) +- [products](#products) - [propositional_logic](#propositional_logic) - [quantum_lock](#quantum_lock) - [ransom_note](#ransom_note) @@ -63,6 +66,9 @@ This gallery shows examples from all available datasets using their default conf - [sokoban](#sokoban) - [spell_backward](#spell_backward) - [spiral_matrix](#spiral_matrix) +- [string_insertion](#string_insertion) +- [string_manipulation](#string_manipulation) +- [string_synthesis](#string_synthesis) - [sudoku](#sudoku) - [syllogism](#syllogism) - [time_intervals](#time_intervals) @@ -1488,8 +1494,8 @@ Generates Game of Life games with configurable parameters Default configuration: ```python -grid_size_x = 20 -grid_size_y = 20 +grid_size_x = 10 +grid_size_y = 10 filled_cells = 100 simulation_steps = 1 seed = 42 @@ -1499,139 +1505,52 @@ size = 500 Example tasks: ```` Example 1: -Question: What will this Game of Life board look like after 1 steps of simulation? +Question: What will this Game of Life board look like after 1 steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]]) -[[0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0] - [0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0] - [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0] - [1 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 1 0 0] - [0 0 0 0 1 1 1 0 0 0 0 1 0 0 0 0 1 0 0 0] - [0 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 0] - [0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0] - [1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0] - [1 1 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0] - [0 0 1 0 0 0 0 1 0 0 0 0 1 1 0 0 1 0 0 1] - [1 1 0 1 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0] - [0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 1 1] - [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1] - [0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1] - [0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 1 1 0] - [1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0] - [1 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1] - [0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0] - [0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0] - [0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0]] -Answer: [[0 0 1 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0] - [0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0] - [0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 0] - [0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 0 1 1 0] - [0 0 0 0 0 1 1 1 0 0 1 1 0 1 0 0 1 1 0 0] - [0 0 0 0 0 1 1 1 0 0 0 0 1 0 0 0 1 1 1 0] - [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1] - [1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0] - [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0] - [0 0 0 0 0 0 1 1 1 0 0 0 1 1 0 0 0 0 0 1] - [1 1 1 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 1 0] - [0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 1] - [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1] - [1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1] - [1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1] - [1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0] - [1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1] - [0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0] - [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0] - [0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -Metadata: {'grid_size_x': 20, 'grid_size_y': 20, 'filled_cells': 100, 'simulation_steps': 1} +[[0,1,0,1,1,0,0,0,1,0], + [1,0,0,1,0,1,1,1,1,1], + [0,0,1,1,1,1,1,1,1,0], + [1,1,1,1,0,0,0,0,1,1], + [1,1,1,1,0,0,1,0,1,1], + [1,1,0,1,1,0,1,1,0,1], + [1,0,0,1,1,0,0,0,0,1], + [1,1,1,0,0,1,1,0,1,1], + [1,1,1,1,1,0,0,1,0,1], + [0,1,1,1,0,1,1,0,1,0]]. +Answer: [[0,1,0,0,0,0,0,0,0,0],[1,1,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,1,1,0,0,0],[0,0,0,0,0,0,1,1,0,0],[0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,1,1,1,0,0],[0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,1,1,0,1,0]] +Metadata: {'grid_size_x': 10, 'grid_size_y': 10, 'filled_cells': 100, 'simulation_steps': 1} Example 2: -Question: What will this Game of Life board look like after 1 steps of simulation? +Question: What will this Game of Life board look like after 1 steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]]) -[[1 0 0 1 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 0] - [0 0 1 1 1 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0] - [0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 1 0 0 0] - [0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 1 1] - [0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0] - [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0] - [0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0] - [1 1 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0] - [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0] - [0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0] - [0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0] - [0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1] - [0 0 1 1 1 1 0 0 1 0 0 1 1 0 0 0 0 0 0 1] - [0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1] - [0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 1] - [0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1] - [0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0] - [0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1] - [0 1 0 0 1 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0] - [0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0]] -Answer: [[0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 1] - [0 0 1 1 0 1 1 0 0 1 1 0 0 0 0 1 0 1 0 0] - [0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 1 0 0 1] - [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1] - [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1] - [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0] - [1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] - [1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0] - [0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0] - [0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0] - [0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] - [1 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0] - [1 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1] - [1 0 1 1 0 1 0 0 0 1 1 0 0 0 0 0 0 1 0 0] - [1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0] - [0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 1 1] - [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] - [0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0] - [0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0] - [0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]] -Metadata: {'grid_size_x': 20, 'grid_size_y': 20, 'filled_cells': 100, 'simulation_steps': 1} +[[1,1,1,1,1,1,0,1,1,1], + [0,0,1,1,1,1,1,1,1,1], + [0,1,0,0,0,0,0,1,1,1], + [1,0,0,1,1,1,1,0,0,1], + [0,1,0,1,1,0,0,1,1,0], + [1,1,1,1,0,1,1,0,1,1], + [0,1,1,0,1,1,1,0,0,1], + [0,0,1,0,1,1,0,0,1,1], + [0,1,1,0,1,0,1,0,1,1], + [1,1,1,0,1,1,1,0,0,1]]. +Answer: [[0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0,0,0],[0,1,0,1,0,1,1,0,0,0],[0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,1,0,0,0],[0,0,0,0,0,0,0,0,0,0]] +Metadata: {'grid_size_x': 10, 'grid_size_y': 10, 'filled_cells': 100, 'simulation_steps': 1} Example 3: -Question: What will this Game of Life board look like after 1 steps of simulation? +Question: What will this Game of Life board look like after 1 steps of simulation? Reply as array of array representing rows in the grid from top to bottom in JSON format. (An empty 3x3 grid would look like this: [[0,0,0],[0,0,0],[0,0,0]]) -[[0 0 1 1 0 0 0 1 0 0 1 0 0 1 0 0 0 0 1 1] - [0 0 0 0 0 0 0 0 1 1 1 1 0 1 0 0 0 0 0 1] - [0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 0 1 0 0 0] - [0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0] - [0 0 1 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0] - [0 0 0 1 1 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0] - [0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0] - [0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 1 0 1] - [0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0] - [1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1] - [0 0 0 1 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0] - [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0] - [0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0] - [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0] - [0 0 1 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0] - [0 0 1 1 1 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0] - [0 0 1 1 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 0] - [1 0 0 1 1 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0] - [0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0] - [0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0]] -Answer: [[1 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1] - [0 0 1 1 0 0 0 1 0 0 0 1 1 0 0 0 0 0 1 1] - [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] - [0 0 0 1 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0] - [0 0 0 0 1 1 1 1 1 0 0 1 1 0 0 0 0 0 0 0] - [0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0] - [0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0] - [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0] - [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1] - [0 1 1 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0] - [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1] - [0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0] - [0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0] - [0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0] - [0 0 1 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0] - [0 1 0 0 1 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0] - [0 1 0 0 0 0 0 0 1 0 1 0 0 0 1 0 1 0 0 0] - [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0] - [0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0] - [0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0]] -Metadata: {'grid_size_x': 20, 'grid_size_y': 20, 'filled_cells': 100, 'simulation_steps': 1} +[[0,1,0,1,1,1,1,0,0,1], + [0,1,0,0,1,1,1,0,1,1], + [0,1,1,1,1,0,1,0,1,0], + [1,0,0,1,1,0,1,1,1,1], + [1,1,1,0,0,1,1,0,1,1], + [0,1,0,0,1,1,0,1,0,1], + [0,1,1,0,0,0,1,0,1,1], + [0,1,1,0,1,1,1,1,0,1], + [1,1,1,1,1,1,0,1,1,0], + [1,1,1,0,0,1,1,0,1,0]]. +Answer: [[0,0,0,1,0,0,0,0,0,0],[0,1,0,0,0,0,0,0,1,1],[0,1,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0],[0,0,1,0,0,0,0,0,0,0],[0,0,0,1,1,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,1],[0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,1,0]] +Metadata: {'grid_size_x': 10, 'grid_size_y': 10, 'filled_cells': 100, 'simulation_steps': 1} ```` @@ -1667,6 +1586,58 @@ Metadata: {'numbers': [297, 30], 'result': 3} ```` +### graph_color +Generates graph coloring problems with configurable parameters + +Default configuration: +```python +num_colors = 4 +num_vertices = 10 +edge_probability = 0.4 +seed = 42 +size = 500 +``` + +Example tasks: +```` +Example 1: +Question: Please provide a coloring for this graph such that every vertex is not connected to a vertex of the same color. The graph has these properties: + +Vertices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +Edges: [(0, 2), (0, 3), (0, 4), (0, 8), (1, 2), (1, 3), (1, 5), (1, 6), (1, 9), (2, 5), (2, 8), (2, 9), (3, 5), (3, 6), (3, 7), (4, 9), (6, 9), (7, 8), (7, 9), (8, 9)] +Possible colors: [1, 2, 3, 4] + +Return your solution as a JSON map of verteces to colors. (For example: {0: 1, 1: 2, 2: 3}) + +Answer: None +Metadata: {'possible_answer': {0: 1, 1: 1, 2: 2, 3: 2, 4: 2, 5: 3, 6: 3, 7: 1, 8: 3, 9: 4}, 'puzzle': {'vertices': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'edges': [(0, 2), (0, 3), (0, 4), (0, 8), (1, 2), (1, 3), (1, 5), (1, 6), (1, 9), (2, 5), (2, 8), (2, 9), (3, 5), (3, 6), (3, 7), (4, 9), (6, 9), (7, 8), (7, 9), (8, 9)], 'num_colors': 4, 'color_options': [1, 2, 3, 4]}} + +Example 2: +Question: Please provide a coloring for this graph such that every vertex is not connected to a vertex of the same color. The graph has these properties: + +Vertices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +Edges: [(0, 1), (0, 3), (0, 9), (1, 3), (1, 8), (2, 4), (2, 5), (3, 6), (3, 7), (3, 8), (4, 6), (4, 9), (6, 7), (7, 9)] +Possible colors: [1, 2, 3, 4] + +Return your solution as a JSON map of verteces to colors. (For example: {0: 1, 1: 2, 2: 3}) + +Answer: None +Metadata: {'possible_answer': {0: 1, 1: 2, 2: 1, 3: 3, 4: 2, 5: 2, 6: 1, 7: 2, 8: 1, 9: 3}, 'puzzle': {'vertices': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'edges': [(0, 1), (0, 3), (0, 9), (1, 3), (1, 8), (2, 4), (2, 5), (3, 6), (3, 7), (3, 8), (4, 6), (4, 9), (6, 7), (7, 9)], 'num_colors': 4, 'color_options': [1, 2, 3, 4]}} + +Example 3: +Question: Please provide a coloring for this graph such that every vertex is not connected to a vertex of the same color. The graph has these properties: + +Vertices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +Edges: [(0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, 9), (1, 5), (1, 8), (1, 9), (2, 5), (2, 6), (2, 7), (2, 9), (3, 6), (3, 7), (4, 5), (4, 6), (4, 7), (4, 8), (5, 8), (6, 9)] +Possible colors: [1, 2, 3, 4] + +Return your solution as a JSON map of verteces to colors. (For example: {0: 1, 1: 2, 2: 3}) + +Answer: None +Metadata: {'possible_answer': {0: 1, 1: 1, 2: 1, 3: 1, 4: 2, 5: 3, 6: 3, 7: 3, 8: 4, 9: 2}, 'puzzle': {'vertices': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'edges': [(0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, 9), (1, 5), (1, 8), (1, 9), (2, 5), (2, 6), (2, 7), (2, 9), (3, 6), (3, 7), (4, 5), (4, 6), (4, 7), (4, 8), (5, 8), (6, 9)], 'num_colors': 4, 'color_options': [1, 2, 3, 4]}} + +```` + ### group_anagrams Generates Group Anagrams exercises with configurable difficulty @@ -1790,16 +1761,28 @@ Example tasks: ```` Example 1: Question: Find the indefinite integral: ∫ -3*exp(3*x + 9) dx +In addition, when doing calculation, use the following instructions together with your mathematical ingenuity to solve the integral problems +## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. +## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C]. + Answer: -exp(3*x + 9) + C Metadata: {'integrand': '-3*exp(3*x + 9)', 'problem_type': 'substitution', 'variable': 'x', 'type': 'exponential', 'expected_answer_expression': -exp(3*x + 9)} Example 2: Question: Evaluate the indefinite integral: ∫ -6*sin(2*X + 10)*cos(2*X + 10)**4 dx +In addition, when doing calculation, use the following instructions together with your mathematical ingenuity to solve the integral problems +## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. +## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C]. + Answer: 3*cos(2*X + 10)**5/5 + C Metadata: {'integrand': '-6*sin(2*X + 10)*cos(2*X + 10)**4', 'problem_type': 'substitution', 'variable': 'X', 'type': 'trigonometric', 'expected_answer_expression': 3*cos(2*X + 10)**5/5} Example 3: Question: Find the indefinite integral: ∫ 2*asin(x) dx +In addition, when doing calculation, use the following instructions together with your mathematical ingenuity to solve the integral problems +## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. +## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C]. + Answer: 2*Integral(asin(x), x) + C Metadata: {'integrand': '2*asin(x)', 'problem_type': 'by_parts', 'variable': 'x', 'type': 'log_inverse_trig', 'expected_answer_expression': 2*Integral(asin(x), x)} @@ -2636,16 +2619,28 @@ Example tasks: ```` Example 1: Question: Sort these numbers in ascending order: 48, -51, -72, -80 +Please follow the instruction below: +## 1. Let all your answers be a list of numbers. Instead of reporting your answer as -69, -13, 1, 7, 11, 43, 59, 61, use ['-69', '-13', '1', '7', '11', '43', '59', '61'] instead +## 2. Convert all numbers in the square brackets as strings. For example, ['-69', '-13', '1', '7', '11', '43', '59', '61'] + Answer: ['-80', '-72', '-51', '48'] Metadata: {'original_numbers': ['48', '-51', '-72', '-80'], 'direction': 'ascending', 'sorted_numbers': ['-80', '-72', '-51', '48']} Example 2: Question: Sort these numbers in ascending order: 39.2, -71.2, -7.5 +Please follow the instruction below: +## 1. Let all your answers be a list of numbers. Instead of reporting your answer as -69, -13, 1, 7, 11, 43, 59, 61, use ['-69', '-13', '1', '7', '11', '43', '59', '61'] instead +## 2. Convert all numbers in the square brackets as strings. For example, ['-69', '-13', '1', '7', '11', '43', '59', '61'] + Answer: ['-71.2', '-7.5', '39.2'] Metadata: {'original_numbers': ['39.2', '-71.2', '-7.5'], 'direction': 'ascending', 'sorted_numbers': ['-71.2', '-7.5', '39.2']} Example 3: Question: Sort these numbers in descending order: 8.39, 72.41, -64.67, -54.97, -94.18, -76.67, -98.24, -68.66, 2.74 +Please follow the instruction below: +## 1. Let all your answers be a list of numbers. Instead of reporting your answer as -69, -13, 1, 7, 11, 43, 59, 61, use ['-69', '-13', '1', '7', '11', '43', '59', '61'] instead +## 2. Convert all numbers in the square brackets as strings. For example, ['-69', '-13', '1', '7', '11', '43', '59', '61'] + Answer: ['72.41', '8.39', '2.74', '-54.97', '-64.67', '-68.66', '-76.67', '-94.18', '-98.24'] Metadata: {'original_numbers': ['8.39', '72.41', '-64.67', '-54.97', '-94.18', '-76.67', '-98.24', '-68.66', '2.74'], 'direction': 'descending', 'sorted_numbers': ['72.41', '8.39', '2.74', '-54.97', '-64.67', '-68.66', '-76.67', '-94.18', '-98.24']} @@ -2721,19 +2716,40 @@ size = 500 Example tasks: ```` Example 1: -Question: Find the real value(s) of u in the equation: -127*u = 0 -Answer: [0.0] -Metadata: {'polynomial_expr': '-127*u', 'variable': 'u', 'degree': 1, 'real_solutions': [0.0]} +Question: Find the real value(s) of w in the equation: -127*w = 0 +In solving the equations, please abide by the following instruction: +## 1. All answers should be comma-separated. For example "-0.3773, 0.4005" etc. +## 2. In cases where your answer is b = 2 + sqrt(4560) / 172 and b = 2 - sqrt(4560) / 172. Since b can be 2 numbers, resolve your answer like this instead, "-0.3773, 0.4005". +## 3. If there are no real values of i that satisfy the equation, report your answer as empty string, "". +## 4. If there are 2 answers, resolve the answers as comma-separated floats of 2 numbers, if 3 answers, make it comma-separated floats of 3 numbers. +## 5. Resolve all numbers as floats in the string of comma-separated numbers. Round the floats higher than 4 decimal place(d.p) down to 4 d.p. + +Answer: 0.0 +Metadata: {'polynomial_expr': '-127*w', 'variable': 'w', 'degree': 1, 'real_solutions': [0.0]} Example 2: Question: Determine the real value(s) of b that satisfies: 86*b**2 - 2*b - 13 = 0 -Answer: [-0.3773425275273891, 0.4005983414808775] -Metadata: {'polynomial_expr': '86*b**2 - 2*b - 13', 'variable': 'b', 'degree': 2, 'real_solutions': [-0.3773425275273891, 0.4005983414808775]} +In solving the equations, please abide by the following instruction: +## 1. All answers should be comma-separated. For example "-0.3773, 0.4005" etc. +## 2. In cases where your answer is b = 2 + sqrt(4560) / 172 and b = 2 - sqrt(4560) / 172. Since b can be 2 numbers, resolve your answer like this instead, "-0.3773, 0.4005". +## 3. If there are no real values of i that satisfy the equation, report your answer as empty string, "". +## 4. If there are 2 answers, resolve the answers as comma-separated floats of 2 numbers, if 3 answers, make it comma-separated floats of 3 numbers. +## 5. Resolve all numbers as floats in the string of comma-separated numbers. Round the floats higher than 4 decimal place(d.p) down to 4 d.p. + +Answer: -0.3773, 0.4006 +Metadata: {'polynomial_expr': '86*b**2 - 2*b - 13', 'variable': 'b', 'degree': 2, 'real_solutions': [-0.3773, 0.4006]} Example 3: -Question: Determine the real value(s) of n that satisfies: 71*n**3 - 2*n - 29 = 0 -Answer: [0.7546129960163634] -Metadata: {'polynomial_expr': '71*n**3 - 2*n - 29', 'variable': 'n', 'degree': 3, 'real_solutions': [0.7546129960163634]} +Question: Determine the real value(s) of p that satisfies: 71*p**3 - 2*p - 29 = 0 +In solving the equations, please abide by the following instruction: +## 1. All answers should be comma-separated. For example "-0.3773, 0.4005" etc. +## 2. In cases where your answer is b = 2 + sqrt(4560) / 172 and b = 2 - sqrt(4560) / 172. Since b can be 2 numbers, resolve your answer like this instead, "-0.3773, 0.4005". +## 3. If there are no real values of i that satisfy the equation, report your answer as empty string, "". +## 4. If there are 2 answers, resolve the answers as comma-separated floats of 2 numbers, if 3 answers, make it comma-separated floats of 3 numbers. +## 5. Resolve all numbers as floats in the string of comma-separated numbers. Round the floats higher than 4 decimal place(d.p) down to 4 d.p. + +Answer: 0.7546 +Metadata: {'polynomial_expr': '71*p**3 - 2*p - 29', 'variable': 'p', 'degree': 3, 'real_solutions': [0.7546]} ```` @@ -2762,21 +2778,153 @@ Example tasks: ```` Example 1: Question: Calculate the following: (65*x - 72)*(105*x - 125) +In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems +## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. +## 2. Always use * when doing all sorts of multiplcation in your reasoning steps and even in reporting answers. + Answer: 6825*x**2 - 15685*x + 9000 Metadata: {'polynomial_expr': '(65*x - 72)*(105*x - 125)', 'single_variable': True, 'result': '6825*x**2 - 15685*x + 9000'} Example 2: Question: Calculate the following: (-9*x**2 - 28*x)*(86*x**2 - 2*x - 13) +In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems +## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. +## 2. Always use * when doing all sorts of multiplcation in your reasoning steps and even in reporting answers. + Answer: -774*x**4 - 2390*x**3 + 173*x**2 + 364*x Metadata: {'polynomial_expr': '(-9*x**2 - 28*x)*(86*x**2 - 2*x - 13)', 'single_variable': True, 'result': '-774*x**4 - 2390*x**3 + 173*x**2 + 364*x'} Example 3: Question: Calculate the following: (43 - 91*x)*(3*x**2 - 10*x)*(71*x**3 - 2*x - 29) +In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems +## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. +## 2. Always use * when doing all sorts of multiplcation in your reasoning steps and even in reporting answers. + Answer: -19383*x**6 + 73769*x**5 - 29984*x**4 + 5839*x**3 - 29271*x**2 + 12470*x Metadata: {'polynomial_expr': '(43 - 91*x)*(3*x**2 - 10*x)*(71*x**3 - 2*x - 29)', 'single_variable': True, 'result': '-19383*x**6 + 73769*x**5 - 29984*x**4 + 5839*x**3 - 29271*x**2 + 12470*x'} ```` +### pool_matrix +Generates Pool Matrix exercises with configurable difficulty + +Default configuration: +```python +min_rows = 2 +min_cols = 2 +max_rows = 10 +max_cols = 10 +max_pool_size = 3 +size = 500 +seed = 42 +``` + +Example tasks: +```` +Example 1: +Question: Your job is to perform max/average pooling on the given matrix. +The stride is equal to the kernel size, meaning there is no overlap between the pooling regions. + +Example 1: +- Input: Perform max pooling on the following matrix with a kernel size of 2: +1 2 3 4 +5 6 7 8 +9 10 11 12 +13 14 15 16 +- Output: +6 8 +14 16 + +Example 2: +- Input: Perform average pooling on the following matrix with a kernel size of 2: +1 2 3 4 +5 6 7 8 +9 10 11 12 +13 14 15 16 +- Output: +3.5 5.5 +11.5 13.5 + +Perform max pooling on the following matrix with a kernel size of 3: +6 3 +7 4 +6 9 + +Answer: 9 +Metadata: {'matrix': [[6, 3], [7, 4], [6, 9]], 'pool_type': 'max', 'pool_size': 3, 'solution': [[9]]} + +Example 2: +Question: Your job is to perform max/average pooling on the given matrix. +The stride is equal to the kernel size, meaning there is no overlap between the pooling regions. + +Example 1: +- Input: Perform max pooling on the following matrix with a kernel size of 2: +1 2 3 4 +5 6 7 8 +9 10 11 12 +13 14 15 16 +- Output: +6 8 +14 16 + +Example 2: +- Input: Perform average pooling on the following matrix with a kernel size of 2: +1 2 3 4 +5 6 7 8 +9 10 11 12 +13 14 15 16 +- Output: +3.5 5.5 +11.5 13.5 + +Perform average pooling on the following matrix with a kernel size of 3: +4 0 1 5 0 3 +1 2 7 0 3 2 + +Answer: 2.5 2.17 +Metadata: {'matrix': [[4, 0, 1, 5, 0, 3], [1, 2, 7, 0, 3, 2]], 'pool_type': 'average', 'pool_size': 3, 'solution': [[2.5, 2.1666666666666665]]} + +Example 3: +Question: Your job is to perform max/average pooling on the given matrix. +The stride is equal to the kernel size, meaning there is no overlap between the pooling regions. + +Example 1: +- Input: Perform max pooling on the following matrix with a kernel size of 2: +1 2 3 4 +5 6 7 8 +9 10 11 12 +13 14 15 16 +- Output: +6 8 +14 16 + +Example 2: +- Input: Perform average pooling on the following matrix with a kernel size of 2: +1 2 3 4 +5 6 7 8 +9 10 11 12 +13 14 15 16 +- Output: +3.5 5.5 +11.5 13.5 + +Perform average pooling on the following matrix with a kernel size of 3: +4 3 1 3 0 4 3 8 7 7 +6 9 3 7 3 3 6 5 4 5 +9 1 8 7 4 5 3 0 4 9 +2 8 8 6 2 0 3 4 8 3 +2 2 1 2 2 9 8 1 8 9 +4 2 4 6 7 5 5 6 2 5 +1 8 9 1 8 0 9 3 5 9 +5 0 8 0 4 2 9 7 6 6 + +Answer: 4.89 4.0 4.44 7.0 +3.67 4.33 5.0 5.67 +5.17 2.5 6.5 7.5 +Metadata: {'matrix': [[4, 3, 1, 3, 0, 4, 3, 8, 7, 7], [6, 9, 3, 7, 3, 3, 6, 5, 4, 5], [9, 1, 8, 7, 4, 5, 3, 0, 4, 9], [2, 8, 8, 6, 2, 0, 3, 4, 8, 3], [2, 2, 1, 2, 2, 9, 8, 1, 8, 9], [4, 2, 4, 6, 7, 5, 5, 6, 2, 5], [1, 8, 9, 1, 8, 0, 9, 3, 5, 9], [5, 0, 8, 0, 4, 2, 9, 7, 6, 6]], 'pool_type': 'average', 'pool_size': 3, 'solution': [[4.888888888888889, 4.0, 4.444444444444445, 7.0], [3.6666666666666665, 4.333333333333333, 5.0, 5.666666666666667], [5.166666666666667, 2.5, 6.5, 7.5]]} + +```` + ### power_function Generates Power Function exercises with configurable difficulty @@ -2839,6 +2987,38 @@ Metadata: {'number': 420, 'factors': [2, 2, 3, 5, 7]} ```` +### products +Generates multiplication tasks with configurable number of terms + +Default configuration: +```python +min_terms = 2 +max_terms = 2 +min_digits = 1 +max_digits = 5 +seed = 42 +size = 500 +``` + +Example tasks: +```` +Example 1: +Question: 4 * 3 = +Answer: 12 +Metadata: {'difficulty': {'num_terms': 2, 'num_digits': 1}, 'expression': '4 * 3'} + +Example 2: +Question: 812 * 880 = +Answer: 714560 +Metadata: {'difficulty': {'num_terms': 2, 'num_digits': 3}, 'expression': '812 * 880'} + +Example 3: +Question: 81037 * 25290 = +Answer: 2049425730 +Metadata: {'difficulty': {'num_terms': 2, 'num_digits': 5}, 'expression': '81037 * 25290'} + +```` + ### propositional_logic Generates propositional logic reasoning tasks @@ -3874,16 +4054,28 @@ Example tasks: ```` Example 1: Question: Find the indefinite integral: ∫ 70*x**6 + 12*x**2/5 dx +In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems +## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. +## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C]. + Answer: 10*x**7 + 4*x**3/5 + C Metadata: {'integrand': '70*x**6 + 12*x**2/5', 'variable': 'x', 'expected_answer_expression': 10*x**7 + 4*x**3/5} Example 2: Question: Find the indefinite integral: ∫ 49*x**6/10 + 48*x**5 - 4*x - 10/9 dx +In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems +## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. +## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C]. + Answer: 7*x**7/10 + 8*x**6 - 2*x**2 - 10*x/9 + C Metadata: {'integrand': '49*x**6/10 + 48*x**5 - 4*x - 10/9', 'variable': 'x', 'expected_answer_expression': 7*x**7/10 + 8*x**6 - 2*x**2 - 10*x/9} Example 3: Question: Find the indefinite integral: ∫ -28*X**3 + 8*X dx +In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems +## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. +## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C]. + Answer: -7*X**4 + 4*X**2 + C Metadata: {'integrand': '-28*X**3 + 8*X', 'variable': 'X', 'expected_answer_expression': -7*X**4 + 4*X**2} @@ -4094,6 +4286,321 @@ Metadata: {'matrix': [[6, 4, 1, 8, 2, 6, 2], [9, 5, 1, 3, 4, 8, 0], [1, 2, 1, 4, ```` +### string_insertion +Generates String Insertion exercises with configurable difficulty + +Default configuration: +```python +min_string_length = 5 +max_string_length = 20 +size = 500 +seed = 42 +``` + +Example tasks: +```` +Example 1: +Question: Given a string consisting of characters A, B, C, D, and E, your job is to insert a character according to the following pattern: +1. If there is a substring ABCD in the string, insert the character A after the substring. +2. If there is a substring BCDE in the string, insert the character B after the substring. +3. If there is a substring CDEA in the string, insert the character C after the substring. +4. If there is a substring DEAB in the string, insert the character D after the substring. +5. If there is a substring EABC in the string, insert the character E after the substring. + +Once you have inserted a character, you have to skip over the substring and the inserted character and continue the search from the next character. + +Example +- Input: DDABCDEEDEAB +- Output: DDABCDAEEDEABD +- Explanation: + - Theere are two inserted characters: DDABCD[A]EEDEAB[D] (shown in square brackets) + - First, we insert A after ABCD. + - Even though with the newly inserted 'A' we can obtain the substring BCD[A], we can't use it to insert another character. + - Lastly, we insert D after DEAB. + +Given the following string, provide the answer after inserting the characters according to the pattern: ['A', 'C', 'B', 'B', 'B', 'A', 'E', 'A'] + +Answer: ACBBBAEA +Metadata: {'string': ['A', 'C', 'B', 'B', 'B', 'A', 'E', 'A'], 'solution': 'ACBBBAEA'} + +Example 2: +Question: Given a string consisting of characters A, B, C, D, and E, your job is to insert a character according to the following pattern: +1. If there is a substring ABCD in the string, insert the character A after the substring. +2. If there is a substring BCDE in the string, insert the character B after the substring. +3. If there is a substring CDEA in the string, insert the character C after the substring. +4. If there is a substring DEAB in the string, insert the character D after the substring. +5. If there is a substring EABC in the string, insert the character E after the substring. + +Once you have inserted a character, you have to skip over the substring and the inserted character and continue the search from the next character. + +Example +- Input: DDABCDEEDEAB +- Output: DDABCDAEEDEABD +- Explanation: + - Theere are two inserted characters: DDABCD[A]EEDEAB[D] (shown in square brackets) + - First, we insert A after ABCD. + - Even though with the newly inserted 'A' we can obtain the substring BCD[A], we can't use it to insert another character. + - Lastly, we insert D after DEAB. + +Given the following string, provide the answer after inserting the characters according to the pattern: ['C', 'B', 'D', 'C', 'A', 'D'] + +Answer: CBDCAD +Metadata: {'string': ['C', 'B', 'D', 'C', 'A', 'D'], 'solution': 'CBDCAD'} + +Example 3: +Question: Given a string consisting of characters A, B, C, D, and E, your job is to insert a character according to the following pattern: +1. If there is a substring ABCD in the string, insert the character A after the substring. +2. If there is a substring BCDE in the string, insert the character B after the substring. +3. If there is a substring CDEA in the string, insert the character C after the substring. +4. If there is a substring DEAB in the string, insert the character D after the substring. +5. If there is a substring EABC in the string, insert the character E after the substring. + +Once you have inserted a character, you have to skip over the substring and the inserted character and continue the search from the next character. + +Example +- Input: DDABCDEEDEAB +- Output: DDABCDAEEDEABD +- Explanation: + - Theere are two inserted characters: DDABCD[A]EEDEAB[D] (shown in square brackets) + - First, we insert A after ABCD. + - Even though with the newly inserted 'A' we can obtain the substring BCD[A], we can't use it to insert another character. + - Lastly, we insert D after DEAB. + +Given the following string, provide the answer after inserting the characters according to the pattern: ['E', 'E', 'A', 'B', 'D', 'B', 'C', 'A', 'B', 'A', 'E', 'A', 'A', 'B', 'E', 'C', 'D', 'E'] + +Answer: EEABDBCABAEAABECDE +Metadata: {'string': ['E', 'E', 'A', 'B', 'D', 'B', 'C', 'A', 'B', 'A', 'E', 'A', 'A', 'B', 'E', 'C', 'D', 'E'], 'solution': 'EEABDBCABAEAABECDE'} + +```` + +### string_manipulation +Generates String Insertion exercises with configurable difficulty + +Default configuration: +```python +min_string_length = 5 +max_string_length = 20 +min_num_rules = 3 +max_num_rules = 8 +size = 500 +seed = 42 +``` + +Example tasks: +```` +Example 1: +Question: Your job is to repeatedly transform a string according to a set of rules until no further transformations can be performed, or a state is repeated. + +Evaluate the following rules in order, and apply the first applicable rule to the string: +1. If the string contains an even number of 'b's (and at least one 'b'), append 'ab' at the end. +2. If the string prefix is 'bc', delete the first two characters and append 'aa' to the end. +3. If the string ends with 'ca', remove the last character. +4. If the string suffix is 'ac', replace it with 'cb'. +5. If the string prefix is 'ab', replace it with 'ca'. +6. If the string contains 'ca' (not at the start), remove the first occurrence found after the first character. +7. If the string suffix is 'bb', delete the last two characters. +8. If the string starts with 'ac', replace the first two characters with 'zz'. + +Once you have applied a rule, repeat the process with the new string until no further transformations can be performed (i.e. the string doesn't change), or a state is repeated. +If a state is repeated, the process is terminated, and the repeated state is discarded (i.e. is not considered as the final answer) and the state before the repeated state is considered as the final answer. + +Example: +- Input: + - String: abbac + - Rules: + 1. If the string prefix is 'ab', replace it with 'ca'. + 2. If the string prefix is 'ca', replace it with 'bb' and append 'c' to the end. + 3. If the string ends with 'aa', replace it with 'cc'. +- Output: bbbacc +- Explanation: + - In the first iteration, rule 1 is applied to the string abbac, resulting in cabac + - In the second interation, rule 1 doesn't apply, but rule 2 is applied to the string cabac, resulting in bbbacc + - In the third iteration, none of the rules (1, 2, 3) apply, so the process is terminated, and the final answer is bbbacc + +Transform the following string according to the above list of rules: +acbaaaca + +Answer: zzbaacbab +Metadata: {'string': 'acbaaaca', 'solution': 'zzbaacbab', 'states': ['acbaaaca', 'acbaaac', 'acbaacb', 'acbaacbab', 'zzbaacbab'], 'selected_rules': ["If the string contains an even number of 'b's (and at least one 'b'), append 'ab' at the end.", "If the string prefix is 'bc', delete the first two characters and append 'aa' to the end.", "If the string ends with 'ca', remove the last character.", "If the string suffix is 'ac', replace it with 'cb'.", "If the string prefix is 'ab', replace it with 'ca'.", "If the string contains 'ca' (not at the start), remove the first occurrence found after the first character.", "If the string suffix is 'bb', delete the last two characters.", "If the string starts with 'ac', replace the first two characters with 'zz'."]} + +Example 2: +Question: Your job is to repeatedly transform a string according to a set of rules until no further transformations can be performed, or a state is repeated. + +Evaluate the following rules in order, and apply the first applicable rule to the string: +1. If the string suffix is 'bb', delete the last two characters. +2. If the string starts with 'bb', remove the second character. +3. If the string ends with 'aa', replace it with 'cc'. +4. If the string prefix is 'ab', replace it with 'ca'. +5. If the string ends with 'ca', remove the last character. +6. If the string contains 'bca', delete the first occurrence entirely. +7. If the string prefix is 'ca', replace it with 'bb' and append 'c' to the end. +8. If the string length is greater than 15, remove the middle character. + +Once you have applied a rule, repeat the process with the new string until no further transformations can be performed (i.e. the string doesn't change), or a state is repeated. +If a state is repeated, the process is terminated, and the repeated state is discarded (i.e. is not considered as the final answer) and the state before the repeated state is considered as the final answer. + +Example: +- Input: + - String: abbac + - Rules: + 1. If the string prefix is 'ab', replace it with 'ca'. + 2. If the string prefix is 'ca', replace it with 'bb' and append 'c' to the end. + 3. If the string ends with 'aa', replace it with 'cc'. +- Output: bbbacc +- Explanation: + - In the first iteration, rule 1 is applied to the string abbac, resulting in cabac + - In the second interation, rule 1 doesn't apply, but rule 2 is applied to the string cabac, resulting in bbbacc + - In the third iteration, none of the rules (1, 2, 3) apply, so the process is terminated, and the final answer is bbbacc + +Transform the following string according to the above list of rules: +bcabbc + +Answer: bc +Metadata: {'string': 'bcabbc', 'solution': 'bc', 'states': ['bcabbc', 'bbc', 'bc'], 'selected_rules': ["If the string suffix is 'bb', delete the last two characters.", "If the string starts with 'bb', remove the second character.", "If the string ends with 'aa', replace it with 'cc'.", "If the string prefix is 'ab', replace it with 'ca'.", "If the string ends with 'ca', remove the last character.", "If the string contains 'bca', delete the first occurrence entirely.", "If the string prefix is 'ca', replace it with 'bb' and append 'c' to the end.", 'If the string length is greater than 15, remove the middle character.']} + +Example 3: +Question: Your job is to repeatedly transform a string according to a set of rules until no further transformations can be performed, or a state is repeated. + +Evaluate the following rules in order, and apply the first applicable rule to the string: +1. If the string contains 'acb', replace the first occurrence with its reverse ('bca'). +2. If the string length is greater than 15, remove the middle character. +3. If the string starts with 'ac', replace the first two characters with 'zz'. +4. If the string ends with 'ba', replace it with 'ab'. +5. If the string starts with 'cc', remove the first two characters. +6. If the string suffix is 'ac', replace it with 'cb'. +7. If the string prefix is 'ca', replace it with 'bb' and append 'c' to the end. +8. If the string prefix is 'cb', replace it with 'aa' and delete the last character. + +Once you have applied a rule, repeat the process with the new string until no further transformations can be performed (i.e. the string doesn't change), or a state is repeated. +If a state is repeated, the process is terminated, and the repeated state is discarded (i.e. is not considered as the final answer) and the state before the repeated state is considered as the final answer. + +Example: +- Input: + - String: abbac + - Rules: + 1. If the string prefix is 'ab', replace it with 'ca'. + 2. If the string prefix is 'ca', replace it with 'bb' and append 'c' to the end. + 3. If the string ends with 'aa', replace it with 'cc'. +- Output: bbbacc +- Explanation: + - In the first iteration, rule 1 is applied to the string abbac, resulting in cabac + - In the second interation, rule 1 doesn't apply, but rule 2 is applied to the string cabac, resulting in bbbacc + - In the third iteration, none of the rules (1, 2, 3) apply, so the process is terminated, and the final answer is bbbacc + +Transform the following string according to the above list of rules: +cccaababaaacaaaccb + +Answer: bbababcaaaccbc +Metadata: {'string': 'cccaababaaacaaaccb', 'solution': 'bbababcaaaccbc', 'states': ['cccaababaaacaaaccb', 'cccaababaacaaaccb', 'cccaababacaaaccb', 'cccaababcaaaccb', 'caababcaaaccb', 'bbababcaaaccbc'], 'selected_rules': ["If the string contains 'acb', replace the first occurrence with its reverse ('bca').", 'If the string length is greater than 15, remove the middle character.', "If the string starts with 'ac', replace the first two characters with 'zz'.", "If the string ends with 'ba', replace it with 'ab'.", "If the string starts with 'cc', remove the first two characters.", "If the string suffix is 'ac', replace it with 'cb'.", "If the string prefix is 'ca', replace it with 'bb' and append 'c' to the end.", "If the string prefix is 'cb', replace it with 'aa' and delete the last character."]} + +```` + +### string_synthesis +Generates String Synthesis exercises with configurable difficulty + +Default configuration: +```python +min_initial_blocks = 0 +max_initial_blocks = 5 +max_iterations = 1000 +size = 500 +seed = 42 +``` + +Example tasks: +```` +Example 1: +Question: There are nine different blocks [A] [B] [C] {A} {B} {C} (A) (B) (C) +1. One [A], one [B], and one [C] can be combined to form one {A}. +2. One [A] and one [B] can be combined to form one {C}. +3. One [B] and one [C] can be combined to form one {B}. +4. Two [C] can be combined to form one {C}. +5. One {A} and one {C} can be combined to form one (A) and one (B). +6. Two {B} can be combined to form one (C). + +Given a certain number of initial blocks, your job is to cycle through the rules 1-6 above, synthesizing new blocks until no more rules can be applied, or until a state (counts of each block type) is repeated. +In the case a state is repeated the answer is the state before the repetition! + +The output should be the count of each block type after the rules have been applied in the order they are listed above. +For example 1 0 3 0 2 0 0 0 1 means that you have 1 [A] 0 [B] 3 [C] 0 {A} 2 {B} 0 {C} 0 (A) 0 (B) 1 (C). + +Example: +- Input: You have 2 [A], 3 [B], and 3 [C]. +- Output: 0 0 0 2 1 0 0 0 0 +- Explanation: + 0. Initial state: 2 3 3 0 0 0 0 0 0 + 1. We can apply Rule 1 and obtain 1 {A}. New state: 1 2 2 1 0 0 0 0 0 + 2. We can apply Rule 1 again and obtain 1 {A}. New state 0 1 1 2 0 0 0 0 0 + 3. We can apply Rule 3 and obtain 1 {B}. New state 0 0 0 2 1 0 0 0 0 + 4. No more rules can be applied. The answer is 0 0 0 2 1 0 0 0 0 + +Now, you have 5 [A], 0 [B], and 0 [C] blocks. Provide the count of each block type after applying the above rules. + +Answer: 5 0 0 0 0 0 0 0 0 +Metadata: {'states': [[5, 0, 0, 0, 0, 0, 0, 0, 0]], 'solution': [5, 0, 0, 0, 0, 0, 0, 0, 0]} + +Example 2: +Question: There are nine different blocks [A] [B] [C] {A} {B} {C} (A) (B) (C) +1. One [A], one [B], and one [C] can be combined to form one {A}. +2. One [A] and one [B] can be combined to form one {C}. +3. One [B] and one [C] can be combined to form one {B}. +4. Two [C] can be combined to form one {C}. +5. One {A} and one {C} can be combined to form one (A) and one (B). +6. Two {B} can be combined to form one (C). + +Given a certain number of initial blocks, your job is to cycle through the rules 1-6 above, synthesizing new blocks until no more rules can be applied, or until a state (counts of each block type) is repeated. +In the case a state is repeated the answer is the state before the repetition! + +The output should be the count of each block type after the rules have been applied in the order they are listed above. +For example 1 0 3 0 2 0 0 0 1 means that you have 1 [A] 0 [B] 3 [C] 0 {A} 2 {B} 0 {C} 0 (A) 0 (B) 1 (C). + +Example: +- Input: You have 2 [A], 3 [B], and 3 [C]. +- Output: 0 0 0 2 1 0 0 0 0 +- Explanation: + 0. Initial state: 2 3 3 0 0 0 0 0 0 + 1. We can apply Rule 1 and obtain 1 {A}. New state: 1 2 2 1 0 0 0 0 0 + 2. We can apply Rule 1 again and obtain 1 {A}. New state 0 1 1 2 0 0 0 0 0 + 3. We can apply Rule 3 and obtain 1 {B}. New state 0 0 0 2 1 0 0 0 0 + 4. No more rules can be applied. The answer is 0 0 0 2 1 0 0 0 0 + +Now, you have 0 [A], 2 [B], and 5 [C] blocks. Provide the count of each block type after applying the above rules. + +Answer: 0 0 1 0 0 1 0 0 1 +Metadata: {'states': [[0, 2, 5, 0, 0, 0, 0, 0, 0], [0, 1, 4, 0, 1, 0, 0, 0, 0], [0, 0, 3, 0, 2, 0, 0, 0, 0], [0, 0, 1, 0, 2, 1, 0, 0, 0], [0, 0, 1, 0, 0, 1, 0, 0, 1]], 'solution': [0, 0, 1, 0, 0, 1, 0, 0, 1]} + +Example 3: +Question: There are nine different blocks [A] [B] [C] {A} {B} {C} (A) (B) (C) +1. One [A], one [B], and one [C] can be combined to form one {A}. +2. One [A] and one [B] can be combined to form one {C}. +3. One [B] and one [C] can be combined to form one {B}. +4. Two [C] can be combined to form one {C}. +5. One {A} and one {C} can be combined to form one (A) and one (B). +6. Two {B} can be combined to form one (C). + +Given a certain number of initial blocks, your job is to cycle through the rules 1-6 above, synthesizing new blocks until no more rules can be applied, or until a state (counts of each block type) is repeated. +In the case a state is repeated the answer is the state before the repetition! + +The output should be the count of each block type after the rules have been applied in the order they are listed above. +For example 1 0 3 0 2 0 0 0 1 means that you have 1 [A] 0 [B] 3 [C] 0 {A} 2 {B} 0 {C} 0 (A) 0 (B) 1 (C). + +Example: +- Input: You have 2 [A], 3 [B], and 3 [C]. +- Output: 0 0 0 2 1 0 0 0 0 +- Explanation: + 0. Initial state: 2 3 3 0 0 0 0 0 0 + 1. We can apply Rule 1 and obtain 1 {A}. New state: 1 2 2 1 0 0 0 0 0 + 2. We can apply Rule 1 again and obtain 1 {A}. New state 0 1 1 2 0 0 0 0 0 + 3. We can apply Rule 3 and obtain 1 {B}. New state 0 0 0 2 1 0 0 0 0 + 4. No more rules can be applied. The answer is 0 0 0 2 1 0 0 0 0 + +Now, you have 3 [A], 4 [B], and 4 [C] blocks. Provide the count of each block type after applying the above rules. + +Answer: 0 0 0 3 1 0 0 0 0 +Metadata: {'states': [[3, 4, 4, 0, 0, 0, 0, 0, 0], [2, 3, 3, 1, 0, 0, 0, 0, 0], [1, 2, 2, 2, 0, 0, 0, 0, 0], [0, 1, 1, 3, 0, 0, 0, 0, 0], [0, 0, 0, 3, 1, 0, 0, 0, 0]], 'solution': [0, 0, 0, 3, 1, 0, 0, 0, 0]} + +```` + ### sudoku Generates sudoku puzzles with configurable difficulty @@ -4253,7 +4760,7 @@ Metadata: {'task_type': 'datetime_tz', 'start_time': datetime.datetime(2964, 6, Example 2: Question: A video call started at 09:44 and ended at 12:22. How long was the call? Answer in HH:MM. Answer: 02:38 -Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 11, 9, 44), 'end_time': datetime.datetime(2025, 2, 11, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'} +Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 14, 9, 44), 'end_time': datetime.datetime(2025, 2, 14, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'} Example 3: Question: Calculate the time difference between Sat Dec 22 2677 and Thu Mar 21 2678. Express the result in D days. diff --git a/reasoning_gym/algebra/intermediate_integration.py b/reasoning_gym/algebra/intermediate_integration.py index 6335b6b7..84e01d9b 100644 --- a/reasoning_gym/algebra/intermediate_integration.py +++ b/reasoning_gym/algebra/intermediate_integration.py @@ -76,6 +76,11 @@ class IntermediateIntegrationDataset(ProceduralDataset): "Calculate the antiderivative: ∫ {integrand} dx", "Evaluate the indefinite integral: ∫ {integrand} dx", ] + self.added_instruction = """ +In addition, when doing calculation, use the following instructions together with your mathematical ingenuity to solve the integral problems +## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. +## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C]. +""" def _get_outer_constant(self, rng: random.Random) -> int: """Helper to generate signed outer constant from config""" @@ -222,9 +227,10 @@ class IntermediateIntegrationDataset(ProceduralDataset): answer = sympy.integrate(integrand, x) answer_str = str(answer) + " + C" + question = rng.choice(self.prompt_template).format(integrand=integrand) + self.added_instruction return { - "question": rng.choice(self.prompt_template).format(integrand=integrand), + "question": question, "answer": answer_str, "metadata": { "integrand": str(integrand), diff --git a/reasoning_gym/algebra/polynomial_equations.py b/reasoning_gym/algebra/polynomial_equations.py index 058d5dbb..cd4842ee 100644 --- a/reasoning_gym/algebra/polynomial_equations.py +++ b/reasoning_gym/algebra/polynomial_equations.py @@ -62,6 +62,14 @@ class PolynomialEquationsDataset(ProceduralDataset): "Determine the real value(s) of {variable} that satisfies: {polynomial_expanded} = 0", "Solve the polynomial equation for real {variable}:\n{polynomial_expanded} = 0", ] + self.added_instruction = """ +In solving the equations, please abide by the following instruction: +## 1. All answers should be comma-separated. For example "-0.3773, 0.4005" etc. +## 2. In cases where your answer is b = 2 + sqrt(4560) / 172 and b = 2 - sqrt(4560) / 172. Since b can be 2 numbers, resolve your answer like this instead, "-0.3773, 0.4005". +## 3. If there are no real values of i that satisfy the equation, report your answer as empty string, "". +## 4. If there are 2 answers, resolve the answers as comma-separated floats of 2 numbers, if 3 answers, make it comma-separated floats of 3 numbers. +## 5. Resolve all numbers as floats in the string of comma-separated numbers. Round the floats higher than 4 decimal place(d.p) down to 4 d.p. +""" super().__init__(config=config, seed=config.seed, size=config.size) def __getitem__(self, idx: int) -> dict: @@ -89,19 +97,20 @@ class PolynomialEquationsDataset(ProceduralDataset): for sol in solutions: if sol.is_real: # Evaluate symbolic solution to a floating approximation - real_solutions.append(float(sol.evalf())) + real_solutions.append(round(float(sol.evalf()), 4)) if len(real_solutions) > 0: real_solutions.sort() break answer_str = ", ".join(str(x) for x in real_solutions) + question = ( + rng.choice(self._prompt_templates).format(variable=variable, polynomial_expanded=polynomial_expanded) + + self.added_instruction + ) return { - "question": rng.choice(self._prompt_templates).format( - variable=variable, - polynomial_expanded=polynomial_expanded, - ), + "question": question, "answer": answer_str, "metadata": { "polynomial_expr": str(polynomial_expanded), diff --git a/reasoning_gym/algebra/polynomial_multiplication.py b/reasoning_gym/algebra/polynomial_multiplication.py index 9a74679f..6076c32a 100644 --- a/reasoning_gym/algebra/polynomial_multiplication.py +++ b/reasoning_gym/algebra/polynomial_multiplication.py @@ -61,6 +61,11 @@ class PolynomialMultiplicationDataset(ProceduralDataset): "Simplify this expression: {polynomial_expr}", "Calculate the following: {polynomial_expr}", ] + self.added_instruction = """ +In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems +## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. +## 2. Always use * when doing all sorts of multiplcation in your reasoning steps and even in reporting answers. +""" super().__init__(config=config, seed=config.seed, size=config.size) def __getitem__(self, idx: int) -> dict: @@ -79,11 +84,10 @@ class PolynomialMultiplicationDataset(ProceduralDataset): polynomial_expr = sp.prod(polynomials) product = sp.expand(polynomial_expr) + question = rng.choice(self._prompt_templates).format(polynomial_expr=polynomial_expr) + self.added_instruction return { - "question": rng.choice(self._prompt_templates).format( - polynomial_expr=polynomial_expr, - ), + "question": question, "answer": product, "metadata": { "polynomial_expr": str(polynomial_expr), diff --git a/reasoning_gym/algebra/simple_integration.py b/reasoning_gym/algebra/simple_integration.py index a8ca3be2..8dfa775b 100644 --- a/reasoning_gym/algebra/simple_integration.py +++ b/reasoning_gym/algebra/simple_integration.py @@ -41,6 +41,11 @@ class SimpleIntegrationDataset(ProceduralDataset): "Calculate the antiderivative: ∫ {integrand} dx", "Evaluate the indefinite integral: ∫ {integrand} dx", ] + self.added_instruction = """ +In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems +## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2. +## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C]. +""" super().__init__(config=config, seed=config.seed, size=config.size) def _generate_coefficient(self, rng: random.Random) -> Fraction: @@ -69,9 +74,10 @@ class SimpleIntegrationDataset(ProceduralDataset): rng = random.Random(self.seed + idx) symbol, polynomial = self._generate_polynomial(rng) derivative = sympy.diff(polynomial, symbol) + question = rng.choice(self._prompt_templates).format(integrand=derivative) + self.added_instruction return { - "question": rng.choice(self._prompt_templates).format(integrand=derivative), + "question": question, "answer": str(polynomial) + " + C", "metadata": { "integrand": str(derivative), diff --git a/reasoning_gym/algorithmic/__init__.py b/reasoning_gym/algorithmic/__init__.py index 9cb50027..7b4baacf 100644 --- a/reasoning_gym/algorithmic/__init__.py +++ b/reasoning_gym/algorithmic/__init__.py @@ -11,6 +11,8 @@ from .base_conversion import BaseConversionConfig, BaseConversionDataset from .binary_matrix import BinaryMatrixConfig, BinaryMatrixDataset from .caesar_cipher import CaesarCipherConfig, CaesarCipherDataset from .count_primes import CountPrimesConfig, CountPrimesDataset +from .game_of_life import GameOfLifeConfig, GameOfLifeDataset +from .graph_color import GraphColorConfig, GraphColorDataset from .group_anagrams import GroupAnagramsConfig, GroupAnagramsDataset from .isomorphic_strings import IsomorphicStringsConfig, IsomorphicStringsDataset from .letter_counting import LetterCountingConfig, LetterCountingDataset @@ -28,6 +30,7 @@ from .spiral_matrix import SpiralMatrixConfig, SpiralMatrixDataset from .string_insertion import StringInsertionConfig, StringInsertionDataset from .string_manipulation import StringManipulationConfig, StringManipulationDataset from .string_splitting import StringSplittingConfig, StringSplittingDataset +from .string_synthesis import StringSynthesisConfig, StringSynthesisDataset from .word_ladder import WordLadderConfig, WordLadderDataset from .word_sequence_reversal import WordSequenceReversalConfig, WordSequenceReversalDataset from .word_sorting import TextTransformation, WordSortingConfig, WordSortingDataset @@ -39,6 +42,8 @@ __all__ = [ "BaseConversionDataset", "CaesarCipherConfig", "CaesarCipherDataset", + "GameOfLifeConfig", + "GameOfLifeDataset", "LetterCountingConfig", "LetterCountingDataset", "LetterJumbleConfig", @@ -78,10 +83,14 @@ __all__ = [ "ABDataset", "CountPrimesConfig", "CountPrimesDataset", + "GraphColorConfig", + "GraphColorDataset", "StringInsertionConfig", "StringInsertionDataset", "StringManipulationConfig", "StringManipulationDataset", "StringSplittingConfig", "StringSplittingDataset", + "StringSynthesisConfig", + "StringSynthesisDataset", ] diff --git a/reasoning_gym/games/game_of_life.py b/reasoning_gym/algorithmic/game_of_life.py similarity index 98% rename from reasoning_gym/games/game_of_life.py rename to reasoning_gym/algorithmic/game_of_life.py index 2b5e369b..c43f8345 100644 --- a/reasoning_gym/games/game_of_life.py +++ b/reasoning_gym/algorithmic/game_of_life.py @@ -67,7 +67,7 @@ class GameOfLifeDataset(ProceduralDataset): ) rows = [json.dumps(board[0, i].tolist(), separators=(",", ":")) for i in range(board.shape[1])] - board_str = "[" + ", \n ".join(rows) + "]" + board_str = "[" + ",\n ".join(rows) + "]" final_step = evolved[-1] final_step_list = final_step.tolist() diff --git a/reasoning_gym/algorithmic/graph_color.py b/reasoning_gym/algorithmic/graph_color.py new file mode 100644 index 00000000..39244944 --- /dev/null +++ b/reasoning_gym/algorithmic/graph_color.py @@ -0,0 +1,236 @@ +import json +from dataclasses import dataclass +from random import Random +from typing import Dict, Optional + +from ..factory import ProceduralDataset, register_dataset + + +def generate_random_graph(rng, num_vertices, edge_probability=0.3): + """ + Generate an undirected random graph. + + Args: + num_vertices (int): The number of vertices. + edge_probability (float): Probability for an edge to exist between any two vertices. + + Returns: + tuple: (vertices, edges) + - vertices: A list of vertex identifiers (0 to num_vertices-1). + - edges: A list of tuples (u, v) representing undirected edges. + """ + vertices = list(range(num_vertices)) + edges = [] + for i in range(num_vertices): + for j in range(i + 1, num_vertices): + if rng.random() < edge_probability: + edges.append((i, j)) + return vertices, edges + + +def generate_graph_coloring_puzzle(rng, num_vertices=10, edge_probability=0.3, num_colors=3): + """ + Generates a graph coloring puzzle. + + Args: + num_vertices (int): Number of vertices in the graph. + edge_probability (float): Probability that an edge exists between any two vertices. + num_colors (int): Number of allowed colors. + + Returns: + dict: A dictionary with the following keys: + - "vertices": List of vertices. + - "edges": List of edges (tuples). + - "num_colors": The number of allowed colors. + - "color_options": A list of allowed colors (e.g., [1, 2, ..., num_colors]). + """ + vertices, edges = generate_random_graph(rng, num_vertices, edge_probability) + puzzle = { + "vertices": vertices, + "edges": edges, + "num_colors": num_colors, + "color_options": list(range(1, num_colors + 1)), + } + return puzzle + + +def verify_graph_coloring_solution(puzzle, coloring): + """ + Verifies that a candidate coloring is a valid solution to the graph coloring puzzle. + + Args: + puzzle (dict): The puzzle specification containing 'vertices', 'edges', and 'color_options'. + coloring (dict): A dictionary mapping each vertex to a color. The keys can be integers or strings. + + Returns: + tuple: (is_valid, message) where is_valid is a boolean and message is a string explanation. + """ + vertices = puzzle["vertices"] + edges = puzzle["edges"] + allowed_colors = set(puzzle["color_options"]) + + # Helper function to get a vertex's color regardless of key type. + def get_color(vertex): + # If the key matches as-is, return it. + if vertex in coloring: + return coloring[vertex] + # If the vertex is an integer and its string form is a key, return that. + elif isinstance(vertex, int) and str(vertex) in coloring: + return coloring[str(vertex)] + # If the vertex is a string, try to convert it to int and look it up. + elif isinstance(vertex, str): + try: + vertex_int = int(vertex) + if vertex_int in coloring: + return coloring[vertex_int] + except ValueError: + pass + # If no matching key is found, signal an error. + raise KeyError(f"Vertex {vertex} has not been assigned a color.") + + # Check that every vertex has been assigned a color. + for vertex in vertices: + try: + get_color(vertex) + except KeyError: + return False, f"Not all vertices have been assigned a color (missing vertex {vertex})." + + # Check that only allowed colors are used. + for vertex in vertices: + try: + color = get_color(vertex) + except KeyError as e: + return False, str(e) + if color not in allowed_colors: + return False, f"Vertex {vertex} uses an invalid color: {color}." + + # Ensure that adjacent vertices do not share the same color. + for u, v in edges: + try: + color_u = get_color(u) + color_v = get_color(v) + except KeyError as e: + return False, str(e) + if color_u == color_v: + return False, f"Adjacent vertices {u} and {v} both have color {color_u}." + + return True, "The coloring is valid." + + +def greedy_graph_coloring(puzzle): + """ + Attempts to color the graph using a simple greedy algorithm. + (Note: This may fail if the graph requires more than the given number of colors.) + + Args: + puzzle (dict): The puzzle specification. + + Returns: + dict or None: A dictionary mapping vertices to colors if successful; otherwise, None. + """ + vertices = puzzle["vertices"] + edges = puzzle["edges"] + color_options = puzzle["color_options"] + + # Build an adjacency list for each vertex. + adjacency = {v: set() for v in vertices} + for u, v in edges: + adjacency[u].add(v) + adjacency[v].add(u) + + coloring = {} + for v in vertices: + # Find colors already used by neighbors. + neighbor_colors = {coloring.get(neighbor) for neighbor in adjacency[v] if neighbor in coloring} + # Pick the first available color not used by any neighbor. + available = [color for color in color_options if color not in neighbor_colors] + if not available: + return None # Failed to color with the given number of colors. + coloring[v] = available[0] + return coloring + + +@dataclass +class GraphColorConfig: + """Configuration for GraphColor puzzle generation""" + + num_colors: int = 4 + num_vertices: int = 10 + edge_probability: float = 0.4 + seed: Optional[int] = None + size: int = 500 + + def validate(self): + """Validate configuration parameters""" + assert self.edge_probability < 1, "edge_probability must be less than 1" + + +class GraphColorDataset(ProceduralDataset): + """Generates graph coloring problems with configurable parameters""" + + def __init__(self, config: GraphColorConfig): + super().__init__(config=config, seed=config.seed, size=config.size) + + def __getitem__(self, idx: int) -> dict: + """Generate a single GraphColor task + + Returns: + dict with keys: + - question: str, the task description + - answer: str, a solution string + - metadata: dict with generation parameters + """ + rng = Random(self.seed + idx) + + puzzle = None + solution = None + while solution is None: + puzzle = generate_graph_coloring_puzzle( + rng=rng, + num_vertices=self.config.num_vertices, + edge_probability=self.config.edge_probability, + num_colors=self.config.num_colors, + ) + solution = greedy_graph_coloring(puzzle) + + edges = str(puzzle["edges"]) + question = f"""Please provide a coloring for this graph such that every vertex is not connected to a vertex of the same color. The graph has these properties: + +Vertices: {puzzle["vertices"]} +Edges: {edges} +Possible colors: {puzzle["color_options"]} + +Return your solution as a JSON map of verteces to colors. (For example: {{0: 1, 1: 2, 2: 3}}) +""" + + return { + "question": question, + "answer": None, + "metadata": {"possible_answer": solution, "puzzle": puzzle}, + } + + def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float: + """Determine if the solution provided solves the GraphColor task. + + The function awards 1.0 for a correct answer. + + Args: + answer (Optional[str]): The user's answer. + entry (Dict[str, any]): The original dataset entry containing the correct answer. + + Returns: + float: The computed score between 0.0 and 1.0. + """ + + if answer == None: + return 0.0 + + danswer = json.loads(answer) + solved, failure = verify_graph_coloring_solution(entry["metadata"]["puzzle"], danswer) + if not solved: + return 0.01 + else: + return 1.0 # Yay + + +register_dataset("graph_color", GraphColorDataset, GraphColorConfig) diff --git a/reasoning_gym/algorithmic/letter_jumble.py b/reasoning_gym/algorithmic/letter_jumble.py index f92d8f2b..728c9c67 100644 --- a/reasoning_gym/algorithmic/letter_jumble.py +++ b/reasoning_gym/algorithmic/letter_jumble.py @@ -3,7 +3,7 @@ import re from dataclasses import dataclass from random import Random -from typing import Optional +from typing import Dict, Optional from reasoning_gym.data import read_data_file @@ -99,5 +99,27 @@ class LetterJumbleDataset(ProceduralDataset): }, } + def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float: + """Determine if the solution provided solves this task. + + The function awards 1.0 for a correct answer. + + Args: + answer (Optional[str]): The user's answer. + entry (Dict[str, any]): The original dataset entry containing the correct answer. + + Returns: + float: The computed score between 0.0 and 1.0. + """ + + if answer == None: + return 0.0 + + s_answer = answer.strip().lower() + if not s_answer == entry["answer"].strip().lower(): + return 0.01 + else: + return 1.0 + register_dataset("letter_jumble", LetterJumbleDataset, LetterJumbleConfig) diff --git a/reasoning_gym/algorithmic/number_sorting.py b/reasoning_gym/algorithmic/number_sorting.py index d922aa74..f906d230 100644 --- a/reasoning_gym/algorithmic/number_sorting.py +++ b/reasoning_gym/algorithmic/number_sorting.py @@ -34,6 +34,11 @@ class NumberSortingDataset(ProceduralDataset): def __init__(self, config: NumberSortingConfig): super().__init__(config=config, seed=config.seed, size=config.size) + self.added_instruction = """ +Please follow the instruction below: +## 1. Let all your answers be a list of numbers. Instead of reporting your answer as -69, -13, 1, 7, 11, 43, 59, 61, use ['-69', '-13', '1', '7', '11', '43', '59', '61'] instead +## 2. Convert all numbers in the square brackets as strings. For example, ['-69', '-13', '1', '7', '11', '43', '59', '61'] +""" def _format_number(self, num: float, decimals: int) -> str: """Format number with specified decimal places""" @@ -78,9 +83,10 @@ class NumberSortingDataset(ProceduralDataset): is_ascending = rng.choice([True, False]) direction = "ascending" if is_ascending else "descending" answer = asc_answer if is_ascending else desc_answer + question = f"Sort these numbers in {direction} order: {', '.join(number_strs)}" + self.added_instruction return { - "question": f"Sort these numbers in {direction} order: {', '.join(number_strs)}", + "question": question, "answer": str(answer), "metadata": {"original_numbers": number_strs, "direction": direction, "sorted_numbers": answer}, } diff --git a/reasoning_gym/algorithmic/palindrome_generation.py b/reasoning_gym/algorithmic/palindrome_generation.py index a663770b..c17e8751 100644 --- a/reasoning_gym/algorithmic/palindrome_generation.py +++ b/reasoning_gym/algorithmic/palindrome_generation.py @@ -53,7 +53,7 @@ class PalindromeDataset(ProceduralDataset): palindrome = self._assemble_palindrome(letters) question_str = ( - "Rearrange these letters to form a palindrome. A palindrome is a word, phrase, or sequence that reads the same forward and backward.\n\n" + "Rearrange these letters to form a palindrome. A palindrome is a word, phrase, or sequence that reads the same forward and backward. If there are multiple answers, only respond with one of them.\n\n" "For example, if the letters are: a, a, b — a valid palindrome is: aba.\n\n" f"Your letters: {', '.join(scrambled_letters)}\n\n" "What palindrome can you form from these letters?" diff --git a/reasoning_gym/algorithmic/ransom_note.py b/reasoning_gym/algorithmic/ransom_note.py index d3367bfc..633cb62c 100644 --- a/reasoning_gym/algorithmic/ransom_note.py +++ b/reasoning_gym/algorithmic/ransom_note.py @@ -7,7 +7,7 @@ https://leetcode.com/problems/ransom-note/description/ from collections import defaultdict from dataclasses import dataclass from random import Random -from typing import Optional +from typing import Dict, Optional from ..factory import ProceduralDataset, register_dataset @@ -95,5 +95,27 @@ class RansomNoteDataset(ProceduralDataset): "metadata": {"ransom_note": ransom_note, "magazine": magazine, "solution": answer, "solvable": solvable}, } + def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float: + """Determine if the solution provided solves this task. + + The function awards 1.0 for a correct answer. + + Args: + answer (Optional[str]): The user's answer. + entry (Dict[str, any]): The original dataset entry containing the correct answer. + + Returns: + float: The computed score between 0.0 and 1.0. + """ + + if answer == None: + return 0.0 + + s_answer = answer.strip() + if not s_answer == str(entry["answer"]): + return 0.01 + else: + return 1.0 + register_dataset("ransom_note", RansomNoteDataset, RansomNoteConfig) diff --git a/reasoning_gym/algorithmic/string_synthesis.py b/reasoning_gym/algorithmic/string_synthesis.py new file mode 100644 index 00000000..c78ed35b --- /dev/null +++ b/reasoning_gym/algorithmic/string_synthesis.py @@ -0,0 +1,139 @@ +"""Iteratively synthesizes a string by inserting characters according to a pattern. + +https://github.com/yongchao98/CodeSteer-v1.0/blob/main/create_dataset/create_dataset_string_synthesis.py +""" + +from dataclasses import dataclass +from random import Random +from typing import Optional + +from ..factory import ProceduralDataset, register_dataset + +QUESTION_TEMPLATE = """There are nine different blocks [A] [B] [C] {{A}} {{B}} {{C}} (A) (B) (C) +1. One [A], one [B], and one [C] can be combined to form one {{A}}. +2. One [A] and one [B] can be combined to form one {{C}}. +3. One [B] and one [C] can be combined to form one {{B}}. +4. Two [C] can be combined to form one {{C}}. +5. One {{A}} and one {{C}} can be combined to form one (A) and one (B). +6. Two {{B}} can be combined to form one (C). + +Given a certain number of initial blocks, your job is to cycle through the rules 1-6 above, synthesizing new blocks until no more rules can be applied, or until a state (counts of each block type) is repeated. +In the case a state is repeated the answer is the state before the repetition! + +The output should be the count of each block type after the rules have been applied in the order they are listed above. +For example 1 0 3 0 2 0 0 0 1 means that you have 1 [A] 0 [B] 3 [C] 0 {{A}} 2 {{B}} 0 {{C}} 0 (A) 0 (B) 1 (C). + +Example: +- Input: You have 2 [A], 3 [B], and 3 [C]. +- Output: 0 0 0 2 1 0 0 0 0 +- Explanation: + 0. Initial state: 2 3 3 0 0 0 0 0 0 + 1. We can apply Rule 1 and obtain 1 {{A}}. New state: 1 2 2 1 0 0 0 0 0 + 2. We can apply Rule 1 again and obtain 1 {{A}}. New state 0 1 1 2 0 0 0 0 0 + 3. We can apply Rule 3 and obtain 1 {{B}}. New state 0 0 0 2 1 0 0 0 0 + 4. No more rules can be applied. The answer is 0 0 0 2 1 0 0 0 0 + +Now, you have {A_square} [A], {B_square} [B], and {C_square} [C] blocks. Provide the count of each block type after applying the above rules. +""" + + +@dataclass +class StringSynthesisConfig: + """Configuration for String Synthesis dataset generation""" + + min_initial_blocks: int = 0 # Minimum number of initial blocks + max_initial_blocks: int = 5 # Maximum number of initial blocks + max_iterations: int = 1_000 # Maximum number of iterations to apply the rules (Safety check for infinite loops) + + size: int = 500 # Virtual dataset size + seed: Optional[int] = None + + def validate(self): + """Validate configuration parameters""" + assert 0 <= self.min_initial_blocks, "min_initial_blocks must be non-negative" + assert ( + self.min_initial_blocks <= self.max_initial_blocks + ), "min_initial_blocks must be less than or equal to max_initial_blocks" + assert 0 < self.max_iterations, "max_iterations must be positive" + + +class StringSynthesisDataset(ProceduralDataset): + """Generates String Synthesis exercises with configurable difficulty""" + + def __init__(self, config: StringSynthesisConfig): + super().__init__(config=config, seed=config.seed, size=config.size) + + def _apply_rule(self, counts: list[int]) -> list[int]: + """ + Apply the first applicable rule to the given counts. + In case no rule is applicable, the counts are returned unchanged. + """ + # label the indices for the counts + A_square, B_square, C_square, A_curly, B_curly, C_curly, A_round, B_round, C_round = range(9) + # Rule 1: One [A], one [B], and one [C] can be combined to form one {A} + if counts[A_square] >= 1 and counts[B_square] >= 1 and counts[C_square] >= 1: + counts[A_square] -= 1 + counts[B_square] -= 1 + counts[C_square] -= 1 + counts[A_curly] += 1 + # Rule 2: One [A] and one [B] can be combined to form one {C} + elif counts[A_square] >= 1 and counts[B_square] >= 1: + counts[A_square] -= 1 + counts[B_square] -= 1 + counts[C_curly] += 1 + # Rule 3: One [B] and one [C] can be combined to form one {B} + elif counts[B_square] >= 1 and counts[C_square] >= 1: + counts[B_square] -= 1 + counts[C_square] -= 1 + counts[B_curly] += 1 + # Rule 4: Two [C] can be combined to form one {C} + elif counts[C_square] >= 2: + counts[C_square] -= 2 + counts[C_curly] += 1 + # Rule 5: One {A} and one {C} can be combined to form one (A) and one (B) + elif counts[A_curly] >= 1 and counts[C_curly] >= 1: + counts[A_curly] -= 1 + counts[C_curly] -= 1 + counts[A_round] += 1 + counts[B_round] += 1 + # Rule 6: Two {B} can be combined to form one (C) + elif counts[B_curly] >= 2: + counts[B_curly] -= 2 + counts[C_round] += 1 + return counts + + def _get_answer(self, A_square: int, B_square: int, C_square: int) -> list[list[int]]: + """Calculate the answer for a given input""" + # [A] [B] [C] {A} {B} {C} (A) (B) (C) + counts = [A_square, B_square, C_square] + [0 for _ in range(6)] + states = [counts] + + for _ in range(self.config.max_iterations): + new_counts = self._apply_rule(counts[:]) + if new_counts in states: + break + states.append(new_counts) + counts = new_counts + + return states + + def __getitem__(self, idx: int) -> dict: + """Generate a single String Synthesis question""" + rng = Random(self.seed + idx) + + A_square = rng.randint(self.config.min_initial_blocks, self.config.max_initial_blocks) + B_square = rng.randint(self.config.min_initial_blocks, self.config.max_initial_blocks) + C_square = rng.randint(self.config.min_initial_blocks, self.config.max_initial_blocks) + + states = self._get_answer(A_square, B_square, C_square) + answer = states[-1] + answer_str = " ".join(str(x) for x in answer) + + return { + "question": QUESTION_TEMPLATE.format(A_square=A_square, B_square=B_square, C_square=C_square), + "answer": answer_str, + "metadata": {"states": states, "solution": answer}, + } + + +register_dataset("string_synthesis", StringSynthesisDataset, StringSynthesisConfig) diff --git a/reasoning_gym/algorithmic/word_sorting.py b/reasoning_gym/algorithmic/word_sorting.py index 8ac683b6..bc20177c 100644 --- a/reasoning_gym/algorithmic/word_sorting.py +++ b/reasoning_gym/algorithmic/word_sorting.py @@ -4,7 +4,7 @@ import re from dataclasses import dataclass from enum import StrEnum from random import Random -from typing import List, Optional, Tuple +from typing import Dict, List, Optional, Tuple from ..data import read_data_file from ..factory import ProceduralDataset, register_dataset @@ -105,5 +105,27 @@ class WordSortingDataset(ProceduralDataset): }, } + def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float: + """Determine if the solution provided solves this task. + + The function awards 1.0 for a correct answer. + + Args: + answer (Optional[str]): The user's answer. + entry (Dict[str, any]): The original dataset entry containing the correct answer. + + Returns: + float: The computed score between 0.0 and 1.0. + """ + + if answer == None: + return 0.0 + + s_answer = answer.strip().replace(" ", "") + if not s_answer == entry["answer"].strip().replace(" ", ""): + return 0.01 + else: + return 1.0 + register_dataset("word_sorting", WordSortingDataset, WordSortingConfig) diff --git a/reasoning_gym/cognition/figlet_fonts.py b/reasoning_gym/cognition/figlet_fonts.py index 22e81256..7274a4ff 100644 --- a/reasoning_gym/cognition/figlet_fonts.py +++ b/reasoning_gym/cognition/figlet_fonts.py @@ -4,7 +4,6 @@ from typing import Dict, Optional import pyfiglet -from ..data.wordle_words import wordle_words from ..factory import ProceduralDataset, register_dataset @@ -23,6 +22,9 @@ class FigletFontDataset(ProceduralDataset): """Generates FigletFont tasks""" def __init__(self, config: FigletFontConfig): + from ..data.wordle_words import wordle_words + + self.wordle_words = wordle_words self._prompt_templates = [ "What word does this say?\n\n{figlet_render}", "Please read the following figlet font:\n\n{figlet_render}", @@ -40,7 +42,7 @@ class FigletFontDataset(ProceduralDataset): """ rng = Random(self.seed + idx) - word = self.config.static_word if self.config.static_word is not None else rng.choice(wordle_words).upper() + word = self.config.static_word if self.config.static_word is not None else rng.choice(self.wordle_words).upper() if self.config.space_letters: render_word = " ".join(word) else: diff --git a/reasoning_gym/dataset.py b/reasoning_gym/dataset.py index 0cb89240..8d126536 100644 --- a/reasoning_gym/dataset.py +++ b/reasoning_gym/dataset.py @@ -59,7 +59,7 @@ class ProceduralDataset(ABC, Sized, Iterable[Dict[str, Any]]): if answer == oracle_answer: reward = 1.0 elif oracle_answer in answer: - reward = 0.5 + reward = len(oracle_answer) / len(answer) else: reward = 0.01 diff --git a/reasoning_gym/games/__init__.py b/reasoning_gym/games/__init__.py index dd1ed898..9a228ea3 100644 --- a/reasoning_gym/games/__init__.py +++ b/reasoning_gym/games/__init__.py @@ -7,7 +7,6 @@ Game tasks for training reasoning capabilities: """ from .countdown import CountdownConfig, CountdownDataset -from .game_of_life import GameOfLifeConfig, GameOfLifeDataset from .knight_swap import KnightSwapConfig, KnightSwapDataset from .maze import MazeConfig, MazeDataset from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset @@ -28,8 +27,6 @@ __all__ = [ "SokobanDataset", "MazeConfig", "MazeDataset", - "GameOfLifeConfig", - "GameOfLifeDataset", "HanoiConfig", "HanoiDataset", "NQueensDataset", diff --git a/reasoning_gym/utils.py b/reasoning_gym/utils.py index 457004ce..c7d1b0d8 100644 --- a/reasoning_gym/utils.py +++ b/reasoning_gym/utils.py @@ -7,17 +7,19 @@ from typing import Any, Optional, Union SYSTEM_PROMPTS = { "DeepSeekZero": """A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within and tags, respectively, i.e., reasoning process here - answer here +answer here +Do not explain your reasoning inside the answer tags, provide only the final answer. """, "default": """Given a problem, your task is to answer the question by thinking step-by-step in a clear and specific manner. Once you have thought about the reasoning process, provide the answer in the following format: - answer here +answer here +Do not explain your reasoning inside the answer tags, provide only the final answer. """, } def extract_answer(completion: str, tag_name: str = "answer") -> Optional[str]: - regex = f"<{tag_name}>(.*?)" + regex = f"<{tag_name}>\\s?(.*?)\\s?" matches = list( re.finditer( regex, diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 9ba6bcc3..f321ad2a 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -2,6 +2,7 @@ import pytest from reasoning_gym.arithmetic.basic_arithmetic import BasicArithmeticDataset, BasicArithmeticDatasetConfig from reasoning_gym.dataset import ReseedingDataset +from reasoning_gym.utils import extract_answer def test_reseeding_dataset_iteration(): @@ -38,3 +39,19 @@ def test_reseeding_dataset_iteration(): test_item = next(iter(infinite_dataset)) assert infinite_dataset.score_answer("wrong", test_item) == 0.01 assert infinite_dataset.score_answer(test_item["answer"], test_item) == 1.0 + + +def test_extract_answer(): + assert extract_answer("This is a text. 1234", tag_name="final_answer") == "1234" + + # ignore single whitespae + assert extract_answer("This is a text. \n1234 ", tag_name="answer") == "1234" + + config = BasicArithmeticDatasetConfig( + min_terms=2, max_terms=3, min_digits=1, max_digits=2, operators=["+"], allow_parentheses=False, seed=42, size=10 + ) + + base_dataset = BasicArithmeticDataset(config) + item = base_dataset[0] + assert base_dataset.score_answer(item["answer"] + " + x", item) > 0.1 + assert base_dataset.score_answer(item["answer"], item) == 1.0 diff --git a/tests/test_game_of_life.py b/tests/test_game_of_life.py index 10ec5c7a..abdc17ae 100644 --- a/tests/test_game_of_life.py +++ b/tests/test_game_of_life.py @@ -1,12 +1,35 @@ import pytest -from reasoning_gym.games.game_of_life import GameOfLifeConfig, GameOfLifeDataset +from reasoning_gym.algorithmic.game_of_life import GameOfLifeConfig, GameOfLifeDataset -def test_game_of_life(): +def test_game_of_life_config_validation(): + """Test that invalid configs raise appropriate errors""" + with pytest.raises(AssertionError): + config = GameOfLifeConfig(grid_size_x=2) # Too small + config.validate() + + with pytest.raises(AssertionError): + config = GameOfLifeConfig(grid_size_y=1000) # Too large + config.validate() + + with pytest.raises(AssertionError): + config = GameOfLifeConfig(grid_size_x=5, grid_size_y=5, filled_cells=26) # Too many cells + config.validate() + + +def test_game_of_life_deterministic(): + """Test that dataset generates same items with same seed""" + config = GameOfLifeConfig(seed=42, size=10) + dataset1 = GameOfLifeDataset(config) + dataset2 = GameOfLifeDataset(config) + + for i in range(len(dataset1)): + assert dataset1[i] == dataset2[i] + + +def test_game_of_life_basic_properties(): """Test basic properties and solution of generated items""" - - # Easy config = GameOfLifeConfig(seed=42, size=10, grid_size_x=20, grid_size_y=20, filled_cells=200, simulation_steps=1) dataset = GameOfLifeDataset(config) @@ -16,12 +39,34 @@ def test_game_of_life(): assert "answer" in item assert "metadata" in item - # # Check metadata contains required fields + # Check metadata contains required fields assert "grid_size_x" in item["metadata"] assert "grid_size_y" in item["metadata"] assert "filled_cells" in item["metadata"] assert "simulation_steps" in item["metadata"] - # # Test the scoring + # Test the scoring assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0 assert dataset.score_answer(answer=None, entry=item) == 0.0 + assert dataset.score_answer(answer="invalid json", entry=item) == 0.01 + + +def test_game_of_life_iteration(): + """Test that iteration respects dataset size""" + config = GameOfLifeConfig(size=5, seed=42) # Small size for testing + dataset = GameOfLifeDataset(config) + + # Test manual iteration + items = [] + for item in dataset: + items.append(item) + assert len(items) == config.size, "Iterator should yield exactly size items" + + # Test list conversion + items = list(dataset) + assert len(items) == config.size, "Iterator should yield exactly size items" + + # Test multiple iterations + first_items = list(dataset) + second_items = list(dataset) + assert first_items == second_items, "Multiple iterations should yield same items" diff --git a/tests/test_graph_color.py b/tests/test_graph_color.py new file mode 100644 index 00000000..01e80e04 --- /dev/null +++ b/tests/test_graph_color.py @@ -0,0 +1,46 @@ +import json + +import pytest + +from reasoning_gym.algorithmic.graph_color import GraphColorConfig, GraphColorDataset + + +def test_graph_color(): + """Test basic properties and solution of generated items""" + config = GraphColorConfig(seed=42, size=10, num_vertices=10, num_colors=4, edge_probability=0.4) + dataset = GraphColorDataset(config) + + # easy + for item in dataset: + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + + # Test the scoring + assert dataset.score_answer(answer=json.dumps(item["metadata"]["possible_answer"]), entry=item) == 1.0 + assert dataset.score_answer(answer=None, entry=item) == 0.0 + + # medium + config = GraphColorConfig(seed=42, size=1, num_vertices=10, num_colors=3, edge_probability=0.3) + dataset = GraphColorDataset(config) + + for item in dataset: + assert dataset.score_answer(answer=json.dumps(item["metadata"]["possible_answer"]), entry=item) == 1.0 + assert dataset.score_answer(answer=None, entry=item) == 0.0 + + # hard + config = GraphColorConfig(seed=42, size=1, num_vertices=40, num_colors=4, edge_probability=0.2) + dataset = GraphColorDataset(config) + + for item in dataset: + assert dataset.score_answer(answer=json.dumps(item["metadata"]["possible_answer"]), entry=item) == 1.0 + assert dataset.score_answer(answer=None, entry=item) == 0.0 + + # v hard + config = GraphColorConfig(seed=42, size=1, num_vertices=50, num_colors=3, edge_probability=0.1) + dataset = GraphColorDataset(config) + + for item in dataset: + assert dataset.score_answer(answer=json.dumps(item["metadata"]["possible_answer"]), entry=item) == 1.0 + assert dataset.score_answer(answer=None, entry=item) == 0.0 diff --git a/tests/test_letter_jumble.py b/tests/test_letter_jumble.py index 8203f2f0..89f860b5 100644 --- a/tests/test_letter_jumble.py +++ b/tests/test_letter_jumble.py @@ -108,6 +108,11 @@ def test_letter_jumble_dataset_items(): assert config.min_word_len <= len(word) <= config.max_word_len assert word.isalpha() + # Test the scoring + assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0 + assert dataset.score_answer(answer="gibberish", entry=item) == 0.01 + assert dataset.score_answer(answer=None, entry=item) == 0.0 + def test_letter_jumble_iteration(): """Test that iteration respects dataset size""" diff --git a/tests/test_polynomial_equations.py b/tests/test_polynomial_equations.py index 420187de..e4e72b18 100644 --- a/tests/test_polynomial_equations.py +++ b/tests/test_polynomial_equations.py @@ -112,7 +112,7 @@ def test_polynomial_solutions_evaluation(): evaluated_value = poly_expr.subs(x, solution) # Ensure the evaluated value is close to zero (numerical stability threshold) - assert abs(evaluated_value) < 1e-6, ( + assert abs(evaluated_value) < 1e-5, ( f"Solution {solution} does not satisfy the polynomial {poly_str}. " f"Evaluated value: {evaluated_value}" ) diff --git a/tests/test_ransom_note.py b/tests/test_ransom_note.py index 9615e8b6..f452ca2e 100644 --- a/tests/test_ransom_note.py +++ b/tests/test_ransom_note.py @@ -84,6 +84,11 @@ def test_group_anagrams_dataset_items(): assert len(magazine) <= config.max_magazine_length assert solution == solvable + # Test the scoring + assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0 + assert dataset.score_answer(answer="gibberish", entry=item) == 0.01 + assert dataset.score_answer(answer=None, entry=item) == 0.0 + def test_ransom_note_dataset_iteration(): """Test that iteration respects dataset size""" diff --git a/tests/test_string_synthesis.py b/tests/test_string_synthesis.py new file mode 100644 index 00000000..39fa4133 --- /dev/null +++ b/tests/test_string_synthesis.py @@ -0,0 +1,119 @@ +"""Tests for String Synthesis questions generation""" + +import pytest + +from reasoning_gym.algorithmic.string_synthesis import StringSynthesisConfig, StringSynthesisDataset + + +def test_string_synthesis_config_validation(): + """Test that invalid configs raise appropriate errors""" + + with pytest.raises(AssertionError): + config = StringSynthesisConfig(min_initial_blocks=-1) # Negative not allowed + config.validate() + + with pytest.raises(AssertionError): + config = StringSynthesisConfig(min_initial_blocks=3, max_initial_blocks=2) # Min > Max + config.validate() + + with pytest.raises(AssertionError): + config = StringSynthesisConfig(max_iterations=0) # Zero not allowed + config.validate() + + +def test_string_synthesis_dataset_deterministic(): + """Test that dataset generates same items with same seed""" + config = StringSynthesisConfig(seed=42, size=10) + dataset1 = StringSynthesisDataset(config) + dataset2 = StringSynthesisDataset(config) + + for i in range(len(dataset1)): + assert dataset1[i] == dataset2[i] + + +def test_string_synthesis_dataset_items(): + """Test basic properties of generated items""" + config = StringSynthesisConfig(min_initial_blocks=1, max_initial_blocks=3, size=10, seed=42) + dataset = StringSynthesisDataset(config) + + for i in range(len(dataset)): + item = dataset[i] + # Check item structure + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + + # Check metadata + assert "states" in item["metadata"] + assert "solution" in item["metadata"] + + states = item["metadata"]["states"] + solution = item["metadata"]["solution"] + + # Verify dimensions + assert len(states) >= 1 + first_state = states[0] + assert len(first_state) == 9 + for i in range(3): + assert 0 <= first_state[i] <= 3 + for i in range(3, 9): + assert first_state[i] == 0 + assert solution == states[-1] + for i in range(9): + assert 0 <= solution[i] + + +def test_string_synthesis_dataset_iteration(): + """Test that iteration respects dataset size""" + config = StringSynthesisConfig(size=5, seed=42) + dataset = StringSynthesisDataset(config) + + items = list(dataset) + assert len(items) == config.size + + # Test multiple iterations yield same items + assert items == list(dataset) + + +def test_string_synthesis_answer(): + """Test the _get_answer method""" + config = StringSynthesisConfig(seed=42) + dataset = StringSynthesisDataset(config) + + # Empty input + counts = [0, 0, 0, 0, 0, 0, 0, 0, 0] + assert dataset._apply_rule(counts) == [0, 0, 0, 0, 0, 0, 0, 0, 0] + + # Rule 1 + counts = [1, 1, 1, 0, 0, 0, 0, 0, 0] + assert dataset._apply_rule(counts) == [0, 0, 0, 1, 0, 0, 0, 0, 0] + + # Rule 2 + counts = [1, 1, 0, 0, 0, 0, 0, 0, 0] + assert dataset._apply_rule(counts) == [0, 0, 0, 0, 0, 1, 0, 0, 0] + + # Rule 3 + counts = [0, 1, 1, 0, 0, 0, 0, 0, 0] + assert dataset._apply_rule(counts) == [0, 0, 0, 0, 1, 0, 0, 0, 0] + + # Rule 4 + counts = [0, 0, 2, 0, 0, 0, 0, 0, 0] + assert dataset._apply_rule(counts) == [0, 0, 0, 0, 0, 1, 0, 0, 0] + + # Rule 5 + counts = [0, 0, 0, 1, 0, 1, 0, 0, 0] + assert dataset._apply_rule(counts) == [0, 0, 0, 0, 0, 0, 1, 1, 0] + + # Rule 6 + counts = [0, 0, 0, 0, 2, 0, 0, 0, 0] + assert dataset._apply_rule(counts) == [0, 0, 0, 0, 0, 0, 0, 0, 1] + + # 1-shot example provided in the prompt + A_square, B_square, C_square = 2, 3, 3 + assert dataset._get_answer(A_square, B_square, C_square) == [ + [2, 3, 3, 0, 0, 0, 0, 0, 0], # Initial state + [1, 2, 2, 1, 0, 0, 0, 0, 0], # Rule 1 + [0, 1, 1, 2, 0, 0, 0, 0, 0], # Rule 1 again + [0, 0, 0, 2, 1, 0, 0, 0, 0], # Rule 3 (final state) + ] diff --git a/tests/test_word_sorting.py b/tests/test_word_sorting.py index 14f44204..ea66b86f 100644 --- a/tests/test_word_sorting.py +++ b/tests/test_word_sorting.py @@ -100,6 +100,11 @@ def test_word_sorting_dataset_items(): else: assert sorted_words == sorted(sorted_words, reverse=True) + # Test the scoring + assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0 + assert dataset.score_answer(answer="gibberish", entry=item) == 0.01 + assert dataset.score_answer(answer=None, entry=item) == 0.0 + def test_word_sorting_dataset_iteration(): """Test that iteration respects dataset size"""